[PATCH v2 0/2] MR11025: libs: Import upstream code from SymCrypt 103.11.0.
-- v2: rsaenh: Use SymCrypt for encryption and hashing. libs: Import upstream code from SymCrypt 103.11.0. https://gitlab.winehq.org/wine/wine/-/merge_requests/11025
From: Alexandre Julliard <julliard@winehq.org> --- configure | 32 +- configure.ac | 2 + libs/symcrypt/LICENSE | 21 + libs/symcrypt/Makefile.in | 129 + libs/symcrypt/inc/C_asm_shared.inc | 74 + libs/symcrypt/inc/buildInfo.h | 8 + libs/symcrypt/inc/symcrypt.h | 10814 ++++++++++++++++ libs/symcrypt/inc/symcrypt_internal.h | 3768 ++++++ .../symcrypt/inc/symcrypt_internal_shared.inc | 33 + libs/symcrypt/inc/symcrypt_low_level.h | 3137 +++++ libs/symcrypt/lib/3des.c | 831 ++ libs/symcrypt/lib/AesTables.c | 899 ++ libs/symcrypt/lib/DesTables.c | 280 + libs/symcrypt/lib/FatalIntercept.c | 23 + libs/symcrypt/lib/IEEE802_11SaeCustom.c | 1585 +++ libs/symcrypt/lib/ScsTable.c | 388 + libs/symcrypt/lib/a_dispatch.c | 1028 ++ libs/symcrypt/lib/aes-asm.c | 46 + libs/symcrypt/lib/aes-c.c | 468 + libs/symcrypt/lib/aes-default-bc.c | 92 + libs/symcrypt/lib/aes-default.c | 872 ++ libs/symcrypt/lib/aes-key.c | 437 + libs/symcrypt/lib/aes-neon.c | 1889 +++ libs/symcrypt/lib/aes-pattern.c | 348 + libs/symcrypt/lib/aes-xmm.c | 1792 +++ libs/symcrypt/lib/aes-ymm.c | 793 ++ libs/symcrypt/lib/aesCtrDrbg.c | 986 ++ libs/symcrypt/lib/aescmac.c | 258 + libs/symcrypt/lib/aeskw.c | 457 + libs/symcrypt/lib/blockciphermodes.c | 470 + libs/symcrypt/lib/ccm.c | 634 + libs/symcrypt/lib/chacha20.c | 267 + libs/symcrypt/lib/chacha20_poly1305.c | 257 + libs/symcrypt/lib/cpuid.c | 419 + libs/symcrypt/lib/cpuid_um.c | 131 + libs/symcrypt/lib/crt.c | 215 + libs/symcrypt/lib/cshake_pattern.c | 152 + libs/symcrypt/lib/desx.c | 131 + libs/symcrypt/lib/dh.c | 141 + libs/symcrypt/lib/dl_internal_groups.c | 922 ++ libs/symcrypt/lib/dlgroup.c | 2016 +++ libs/symcrypt/lib/dlkey.c | 921 ++ libs/symcrypt/lib/dsa.c | 695 + libs/symcrypt/lib/ec_dh.c | 157 + libs/symcrypt/lib/ec_dispatch.c | 300 + libs/symcrypt/lib/ec_dsa.c | 694 + libs/symcrypt/lib/ec_internal_curve_params.c | 597 + libs/symcrypt/lib/ec_internal_curves.c | 79 + libs/symcrypt/lib/ec_montgomery.c | 443 + libs/symcrypt/lib/ec_mul.c | 571 + libs/symcrypt/lib/ec_short_weierstrass.c | 935 ++ libs/symcrypt/lib/ec_twisted_edwards.c | 575 + libs/symcrypt/lib/eckey.c | 996 ++ libs/symcrypt/lib/ecpoint.c | 785 ++ libs/symcrypt/lib/ecurve.c | 771 ++ libs/symcrypt/lib/env_windowsUserModeWin8_1.c | 187 + libs/symcrypt/lib/equal.c | 48 + libs/symcrypt/lib/fdef_general.c | 1550 +++ libs/symcrypt/lib/fdef_int.c | 1321 ++ libs/symcrypt/lib/fdef_mod.c | 1731 +++ libs/symcrypt/lib/gcm.c | 902 ++ libs/symcrypt/lib/gen_int.c | 368 + libs/symcrypt/lib/ghash.c | 951 ++ libs/symcrypt/lib/ghash_definitions.h | 472 + libs/symcrypt/lib/hash.c | 216 + libs/symcrypt/lib/hash_buffer_pattern.c | 75 + libs/symcrypt/lib/hash_pattern.c | 39 + libs/symcrypt/lib/hkdf.c | 229 + libs/symcrypt/lib/hmac.c | 195 + libs/symcrypt/lib/hmac_pattern.c | 197 + libs/symcrypt/lib/hmacmd5.c | 56 + libs/symcrypt/lib/hmacsha1.c | 65 + libs/symcrypt/lib/hmacsha224.c | 62 + libs/symcrypt/lib/hmacsha256.c | 60 + libs/symcrypt/lib/hmacsha384.c | 59 + libs/symcrypt/lib/hmacsha3_224.c | 122 + libs/symcrypt/lib/hmacsha3_256.c | 122 + libs/symcrypt/lib/hmacsha3_384.c | 124 + libs/symcrypt/lib/hmacsha3_512.c | 126 + libs/symcrypt/lib/hmacsha512.c | 59 + libs/symcrypt/lib/hmacsha512_224.c | 62 + libs/symcrypt/lib/hmacsha512_256.c | 62 + libs/symcrypt/lib/kmac.c | 123 + libs/symcrypt/lib/kmac_pattern.c | 218 + libs/symcrypt/lib/libmain.c | 539 + libs/symcrypt/lib/lms.c | 1162 ++ libs/symcrypt/lib/marvin32.c | 331 + libs/symcrypt/lib/md2.c | 307 + libs/symcrypt/lib/md4.c | 425 + libs/symcrypt/lib/md5.c | 503 + libs/symcrypt/lib/mldsa.c | 1096 ++ libs/symcrypt/lib/mldsa_primitives.c | 2410 ++++ libs/symcrypt/lib/mlkem.c | 1164 ++ libs/symcrypt/lib/mlkem_primitives.c | 1442 +++ libs/symcrypt/lib/modexp.c | 510 + libs/symcrypt/lib/paddingPkcs7.c | 167 + libs/symcrypt/lib/parhash.c | 517 + libs/symcrypt/lib/pbkdf2.c | 126 + libs/symcrypt/lib/pbkdf2_hmacsha1.c | 41 + libs/symcrypt/lib/pbkdf2_hmacsha256.c | 41 + libs/symcrypt/lib/poly1305.c | 468 + libs/symcrypt/lib/precomp.h | 26 + libs/symcrypt/lib/primes.c | 306 + libs/symcrypt/lib/rc2.c | 438 + libs/symcrypt/lib/rc4.c | 156 + libs/symcrypt/lib/rdrand.c | 172 + libs/symcrypt/lib/rdseed.c | 135 + libs/symcrypt/lib/recoding.c | 209 + libs/symcrypt/lib/rsa_enc.c | 1531 +++ libs/symcrypt/lib/rsa_padding.c | 1218 ++ libs/symcrypt/lib/rsakey.c | 1631 +++ libs/symcrypt/lib/sc_lib.h | 5161 ++++++++ libs/symcrypt/lib/sc_lib_mldsa.h | 1081 ++ libs/symcrypt/lib/sc_lib_mlkem.h | 468 + libs/symcrypt/lib/scsTools.c | 367 + libs/symcrypt/lib/selftest.c | 17 + libs/symcrypt/lib/session.c | 377 + libs/symcrypt/lib/sha1.c | 472 + libs/symcrypt/lib/sha256-xmm.c | 354 + libs/symcrypt/lib/sha256-ymm.c | 441 + libs/symcrypt/lib/sha256.c | 1884 +++ libs/symcrypt/lib/sha256Par-ymm.c | 269 + libs/symcrypt/lib/sha256Par.c | 1243 ++ libs/symcrypt/lib/sha3.c | 619 + libs/symcrypt/lib/sha3_224.c | 141 + libs/symcrypt/lib/sha3_256.c | 141 + libs/symcrypt/lib/sha3_384.c | 143 + libs/symcrypt/lib/sha3_512.c | 144 + libs/symcrypt/lib/sha512-ymm.c | 801 ++ libs/symcrypt/lib/sha512.c | 1715 +++ libs/symcrypt/lib/sha512Par-ymm.c | 243 + libs/symcrypt/lib/sha512Par.c | 798 ++ libs/symcrypt/lib/shake.c | 337 + libs/symcrypt/lib/shake_pattern.c | 111 + libs/symcrypt/lib/smallPrimes32.h | 29 + libs/symcrypt/lib/sp800_108.c | 143 + libs/symcrypt/lib/sp800_108_hmacsha1.c | 39 + libs/symcrypt/lib/sp800_108_hmacsha256.c | 39 + libs/symcrypt/lib/sp800_108_hmacsha512.c | 66 + libs/symcrypt/lib/srtp_kdf.c | 175 + libs/symcrypt/lib/ssh_kdf.c | 122 + libs/symcrypt/lib/ssh_kdf_sha256.c | 65 + libs/symcrypt/lib/ssh_kdf_sha512.c | 70 + libs/symcrypt/lib/sskdf.c | 266 + libs/symcrypt/lib/tlsCbcVerify.c | 458 + libs/symcrypt/lib/tlsprf.c | 569 + libs/symcrypt/lib/xmss.c | 2129 +++ libs/symcrypt/lib/xtsaes.c | 727 ++ libs/symcrypt/lib/xtsaes_definitions.h | 176 + libs/symcrypt/lib/xtsaes_pattern.c | 90 + tools/make_makefiles | 5 + tools/makedep.c | 1 + 152 files changed, 95131 insertions(+), 1 deletion(-) create mode 100644 libs/symcrypt/LICENSE create mode 100644 libs/symcrypt/Makefile.in create mode 100644 libs/symcrypt/inc/C_asm_shared.inc create mode 100644 libs/symcrypt/inc/buildInfo.h create mode 100644 libs/symcrypt/inc/symcrypt.h create mode 100644 libs/symcrypt/inc/symcrypt_internal.h create mode 100644 libs/symcrypt/inc/symcrypt_internal_shared.inc create mode 100644 libs/symcrypt/inc/symcrypt_low_level.h create mode 100644 libs/symcrypt/lib/3des.c create mode 100644 libs/symcrypt/lib/AesTables.c create mode 100644 libs/symcrypt/lib/DesTables.c create mode 100644 libs/symcrypt/lib/FatalIntercept.c create mode 100644 libs/symcrypt/lib/IEEE802_11SaeCustom.c create mode 100644 libs/symcrypt/lib/ScsTable.c create mode 100644 libs/symcrypt/lib/a_dispatch.c create mode 100644 libs/symcrypt/lib/aes-asm.c create mode 100644 libs/symcrypt/lib/aes-c.c create mode 100644 libs/symcrypt/lib/aes-default-bc.c create mode 100644 libs/symcrypt/lib/aes-default.c create mode 100644 libs/symcrypt/lib/aes-key.c create mode 100644 libs/symcrypt/lib/aes-neon.c create mode 100644 libs/symcrypt/lib/aes-pattern.c create mode 100644 libs/symcrypt/lib/aes-xmm.c create mode 100644 libs/symcrypt/lib/aes-ymm.c create mode 100644 libs/symcrypt/lib/aesCtrDrbg.c create mode 100644 libs/symcrypt/lib/aescmac.c create mode 100644 libs/symcrypt/lib/aeskw.c create mode 100644 libs/symcrypt/lib/blockciphermodes.c create mode 100644 libs/symcrypt/lib/ccm.c create mode 100644 libs/symcrypt/lib/chacha20.c create mode 100644 libs/symcrypt/lib/chacha20_poly1305.c create mode 100644 libs/symcrypt/lib/cpuid.c create mode 100644 libs/symcrypt/lib/cpuid_um.c create mode 100644 libs/symcrypt/lib/crt.c create mode 100644 libs/symcrypt/lib/cshake_pattern.c create mode 100644 libs/symcrypt/lib/desx.c create mode 100644 libs/symcrypt/lib/dh.c create mode 100644 libs/symcrypt/lib/dl_internal_groups.c create mode 100644 libs/symcrypt/lib/dlgroup.c create mode 100644 libs/symcrypt/lib/dlkey.c create mode 100644 libs/symcrypt/lib/dsa.c create mode 100644 libs/symcrypt/lib/ec_dh.c create mode 100644 libs/symcrypt/lib/ec_dispatch.c create mode 100644 libs/symcrypt/lib/ec_dsa.c create mode 100644 libs/symcrypt/lib/ec_internal_curve_params.c create mode 100644 libs/symcrypt/lib/ec_internal_curves.c create mode 100644 libs/symcrypt/lib/ec_montgomery.c create mode 100644 libs/symcrypt/lib/ec_mul.c create mode 100644 libs/symcrypt/lib/ec_short_weierstrass.c create mode 100644 libs/symcrypt/lib/ec_twisted_edwards.c create mode 100644 libs/symcrypt/lib/eckey.c create mode 100644 libs/symcrypt/lib/ecpoint.c create mode 100644 libs/symcrypt/lib/ecurve.c create mode 100644 libs/symcrypt/lib/env_windowsUserModeWin8_1.c create mode 100644 libs/symcrypt/lib/equal.c create mode 100644 libs/symcrypt/lib/fdef_general.c create mode 100644 libs/symcrypt/lib/fdef_int.c create mode 100644 libs/symcrypt/lib/fdef_mod.c create mode 100644 libs/symcrypt/lib/gcm.c create mode 100644 libs/symcrypt/lib/gen_int.c create mode 100644 libs/symcrypt/lib/ghash.c create mode 100644 libs/symcrypt/lib/ghash_definitions.h create mode 100644 libs/symcrypt/lib/hash.c create mode 100644 libs/symcrypt/lib/hash_buffer_pattern.c create mode 100644 libs/symcrypt/lib/hash_pattern.c create mode 100644 libs/symcrypt/lib/hkdf.c create mode 100644 libs/symcrypt/lib/hmac.c create mode 100644 libs/symcrypt/lib/hmac_pattern.c create mode 100644 libs/symcrypt/lib/hmacmd5.c create mode 100644 libs/symcrypt/lib/hmacsha1.c create mode 100644 libs/symcrypt/lib/hmacsha224.c create mode 100644 libs/symcrypt/lib/hmacsha256.c create mode 100644 libs/symcrypt/lib/hmacsha384.c create mode 100644 libs/symcrypt/lib/hmacsha3_224.c create mode 100644 libs/symcrypt/lib/hmacsha3_256.c create mode 100644 libs/symcrypt/lib/hmacsha3_384.c create mode 100644 libs/symcrypt/lib/hmacsha3_512.c create mode 100644 libs/symcrypt/lib/hmacsha512.c create mode 100644 libs/symcrypt/lib/hmacsha512_224.c create mode 100644 libs/symcrypt/lib/hmacsha512_256.c create mode 100644 libs/symcrypt/lib/kmac.c create mode 100644 libs/symcrypt/lib/kmac_pattern.c create mode 100644 libs/symcrypt/lib/libmain.c create mode 100644 libs/symcrypt/lib/lms.c create mode 100644 libs/symcrypt/lib/marvin32.c create mode 100644 libs/symcrypt/lib/md2.c create mode 100644 libs/symcrypt/lib/md4.c create mode 100644 libs/symcrypt/lib/md5.c create mode 100644 libs/symcrypt/lib/mldsa.c create mode 100644 libs/symcrypt/lib/mldsa_primitives.c create mode 100644 libs/symcrypt/lib/mlkem.c create mode 100644 libs/symcrypt/lib/mlkem_primitives.c create mode 100644 libs/symcrypt/lib/modexp.c create mode 100644 libs/symcrypt/lib/paddingPkcs7.c create mode 100644 libs/symcrypt/lib/parhash.c create mode 100644 libs/symcrypt/lib/pbkdf2.c create mode 100644 libs/symcrypt/lib/pbkdf2_hmacsha1.c create mode 100644 libs/symcrypt/lib/pbkdf2_hmacsha256.c create mode 100644 libs/symcrypt/lib/poly1305.c create mode 100644 libs/symcrypt/lib/precomp.h create mode 100644 libs/symcrypt/lib/primes.c create mode 100644 libs/symcrypt/lib/rc2.c create mode 100644 libs/symcrypt/lib/rc4.c create mode 100644 libs/symcrypt/lib/rdrand.c create mode 100644 libs/symcrypt/lib/rdseed.c create mode 100644 libs/symcrypt/lib/recoding.c create mode 100644 libs/symcrypt/lib/rsa_enc.c create mode 100644 libs/symcrypt/lib/rsa_padding.c create mode 100644 libs/symcrypt/lib/rsakey.c create mode 100644 libs/symcrypt/lib/sc_lib.h create mode 100644 libs/symcrypt/lib/sc_lib_mldsa.h create mode 100644 libs/symcrypt/lib/sc_lib_mlkem.h create mode 100644 libs/symcrypt/lib/scsTools.c create mode 100644 libs/symcrypt/lib/selftest.c create mode 100644 libs/symcrypt/lib/session.c create mode 100644 libs/symcrypt/lib/sha1.c create mode 100644 libs/symcrypt/lib/sha256-xmm.c create mode 100644 libs/symcrypt/lib/sha256-ymm.c create mode 100644 libs/symcrypt/lib/sha256.c create mode 100644 libs/symcrypt/lib/sha256Par-ymm.c create mode 100644 libs/symcrypt/lib/sha256Par.c create mode 100644 libs/symcrypt/lib/sha3.c create mode 100644 libs/symcrypt/lib/sha3_224.c create mode 100644 libs/symcrypt/lib/sha3_256.c create mode 100644 libs/symcrypt/lib/sha3_384.c create mode 100644 libs/symcrypt/lib/sha3_512.c create mode 100644 libs/symcrypt/lib/sha512-ymm.c create mode 100644 libs/symcrypt/lib/sha512.c create mode 100644 libs/symcrypt/lib/sha512Par-ymm.c create mode 100644 libs/symcrypt/lib/sha512Par.c create mode 100644 libs/symcrypt/lib/shake.c create mode 100644 libs/symcrypt/lib/shake_pattern.c create mode 100644 libs/symcrypt/lib/smallPrimes32.h create mode 100644 libs/symcrypt/lib/sp800_108.c create mode 100644 libs/symcrypt/lib/sp800_108_hmacsha1.c create mode 100644 libs/symcrypt/lib/sp800_108_hmacsha256.c create mode 100644 libs/symcrypt/lib/sp800_108_hmacsha512.c create mode 100644 libs/symcrypt/lib/srtp_kdf.c create mode 100644 libs/symcrypt/lib/ssh_kdf.c create mode 100644 libs/symcrypt/lib/ssh_kdf_sha256.c create mode 100644 libs/symcrypt/lib/ssh_kdf_sha512.c create mode 100644 libs/symcrypt/lib/sskdf.c create mode 100644 libs/symcrypt/lib/tlsCbcVerify.c create mode 100644 libs/symcrypt/lib/tlsprf.c create mode 100644 libs/symcrypt/lib/xmss.c create mode 100644 libs/symcrypt/lib/xtsaes.c create mode 100644 libs/symcrypt/lib/xtsaes_definitions.h create mode 100644 libs/symcrypt/lib/xtsaes_pattern.c diff --git a/configure b/configure index 9ba2a3fa264..a47cc261172 100755 --- a/configure +++ b/configure @@ -728,6 +728,8 @@ TOMCRYPT_PE_LIBS TOMCRYPT_PE_CFLAGS TIFF_PE_LIBS TIFF_PE_CFLAGS +SYMCRYPT_PE_LIBS +SYMCRYPT_PE_CFLAGS SQLITE3_PE_LIBS SQLITE3_PE_CFLAGS PNG_PE_LIBS @@ -1707,6 +1709,7 @@ enable_sqlite3 enable_strmbase enable_strmiids enable_strsafe +enable_symcrypt enable_tiff enable_tomcrypt enable_unwind @@ -1891,6 +1894,8 @@ PNG_PE_CFLAGS PNG_PE_LIBS SQLITE3_PE_CFLAGS SQLITE3_PE_LIBS +SYMCRYPT_PE_CFLAGS +SYMCRYPT_PE_LIBS TIFF_PE_CFLAGS TIFF_PE_LIBS TOMCRYPT_PE_CFLAGS @@ -2739,6 +2744,11 @@ Some influential environment variables: version SQLITE3_PE_LIBS Linker flags for the PE sqlite3, overriding the bundled version + SYMCRYPT_PE_CFLAGS + C compiler flags for the PE symcrypt, overriding the bundled + version + SYMCRYPT_PE_LIBS + Linker flags for the PE symcrypt, overriding the bundled version TIFF_PE_CFLAGS C compiler flags for the PE tiff, overriding the bundled version TIFF_PE_LIBS @@ -14883,6 +14893,23 @@ fi printf "%s\n" "$as_me:${as_lineno-$LINENO}: sqlite3 cflags: $SQLITE3_PE_CFLAGS" >&5 printf "%s\n" "$as_me:${as_lineno-$LINENO}: sqlite3 libs: $SQLITE3_PE_LIBS" >&5 +if ${SYMCRYPT_PE_LIBS:+false} : +then : + SYMCRYPT_PE_LIBS=symcrypt + if ${SYMCRYPT_PE_CFLAGS:+false} : +then : + SYMCRYPT_PE_CFLAGS="-I\$(top_srcdir)/libs/symcrypt/inc" +else case e in #( + e) enable_symcrypt=no ;; +esac +fi +else case e in #( + e) enable_symcrypt=no ;; +esac +fi +printf "%s\n" "$as_me:${as_lineno-$LINENO}: symcrypt cflags: $SYMCRYPT_PE_CFLAGS" >&5 +printf "%s\n" "$as_me:${as_lineno-$LINENO}: symcrypt libs: $SYMCRYPT_PE_LIBS" >&5 + if ${TIFF_PE_LIBS:+false} : then : TIFF_PE_LIBS="tiff \$(ZLIB_PE_LIBS)" @@ -24034,6 +24061,7 @@ wine_fn_config_makefile libs/sqlite3 enable_sqlite3 wine_fn_config_makefile libs/strmbase enable_strmbase wine_fn_config_makefile libs/strmiids enable_strmiids wine_fn_config_makefile libs/strsafe enable_strsafe +wine_fn_config_makefile libs/symcrypt enable_symcrypt wine_fn_config_makefile libs/tiff enable_tiff wine_fn_config_makefile libs/tomcrypt enable_tomcrypt wine_fn_config_makefile libs/unwind enable_unwind @@ -24262,7 +24290,7 @@ dlls/wineandroid.drv/wine-debug.apk: dlls/wineandroid.drv/build.gradle ${wine_sr mv dlls/wineandroid.drv/build/outputs/apk/debug/wine-debug.apk \$@" -EXTERNAL_SUBDIRS="libs/capstone libs/c++ libs/c++abi libs/faudio libs/fluidsynth libs/gsm libs/icucommon libs/icui18n libs/jpeg libs/jxr libs/lcms2 libs/ldap libs/mpg123 libs/musl libs/png libs/sqlite3 libs/tiff libs/tomcrypt libs/unwind libs/vkd3d libs/xml2 libs/xslt libs/zlib libs/compiler-rt" +EXTERNAL_SUBDIRS="libs/capstone libs/c++ libs/c++abi libs/faudio libs/fluidsynth libs/gsm libs/icucommon libs/icui18n libs/jpeg libs/jxr libs/lcms2 libs/ldap libs/mpg123 libs/musl libs/png libs/sqlite3 libs/symcrypt libs/tiff libs/tomcrypt libs/unwind libs/vkd3d libs/xml2 libs/xslt libs/zlib libs/compiler-rt" TAGSFLAGS="--langmap='c:+.idl.l.rh,make:(Make*.in)'" @@ -25185,6 +25213,8 @@ PNG_PE_CFLAGS = $PNG_PE_CFLAGS PNG_PE_LIBS = $PNG_PE_LIBS SQLITE3_PE_CFLAGS = $SQLITE3_PE_CFLAGS SQLITE3_PE_LIBS = $SQLITE3_PE_LIBS +SYMCRYPT_PE_CFLAGS = $SYMCRYPT_PE_CFLAGS +SYMCRYPT_PE_LIBS = $SYMCRYPT_PE_LIBS TIFF_PE_CFLAGS = $TIFF_PE_CFLAGS TIFF_PE_LIBS = $TIFF_PE_LIBS TOMCRYPT_PE_CFLAGS = $TOMCRYPT_PE_CFLAGS diff --git a/configure.ac b/configure.ac index 60262f26f68..f4dc1eba3eb 100644 --- a/configure.ac +++ b/configure.ac @@ -1246,6 +1246,7 @@ WINE_EXTLIB_FLAGS(MPG123, mpg123, mpg123, "-I\$(top_srcdir)/libs/mpg123/src/incl WINE_EXTLIB_FLAGS(MUSL, musl, musl) WINE_EXTLIB_FLAGS(PNG, png, "png \$(ZLIB_PE_LIBS)", "-I\$(top_srcdir)/libs/png") WINE_EXTLIB_FLAGS(SQLITE3, sqlite3, sqlite3) +WINE_EXTLIB_FLAGS(SYMCRYPT, symcrypt, symcrypt, "-I\$(top_srcdir)/libs/symcrypt/inc") WINE_EXTLIB_FLAGS(TIFF, tiff, "tiff \$(ZLIB_PE_LIBS)", "-I\$(top_srcdir)/libs/tiff/libtiff") WINE_EXTLIB_FLAGS(TOMCRYPT, tomcrypt, tomcrypt, "-I\$(top_srcdir)/libs/tomcrypt/src/headers -DLTC_NO_PROTOTYPES -DLTC_SOURCE") WINE_EXTLIB_FLAGS(UNWIND, unwind, unwind, "-I\$(top_srcdir)/libs/unwind/include") @@ -3541,6 +3542,7 @@ WINE_CONFIG_MAKEFILE(libs/sqlite3) WINE_CONFIG_MAKEFILE(libs/strmbase) WINE_CONFIG_MAKEFILE(libs/strmiids) WINE_CONFIG_MAKEFILE(libs/strsafe) +WINE_CONFIG_MAKEFILE(libs/symcrypt) WINE_CONFIG_MAKEFILE(libs/tiff) WINE_CONFIG_MAKEFILE(libs/tomcrypt) WINE_CONFIG_MAKEFILE(libs/unwind) diff --git a/libs/symcrypt/LICENSE b/libs/symcrypt/LICENSE new file mode 100644 index 00000000000..8cb179cdb69 --- /dev/null +++ b/libs/symcrypt/LICENSE @@ -0,0 +1,21 @@ +Copyright (c) Microsoft Corporation. All rights reserved. + +MIT License + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/libs/symcrypt/Makefile.in b/libs/symcrypt/Makefile.in new file mode 100644 index 00000000000..59150791669 --- /dev/null +++ b/libs/symcrypt/Makefile.in @@ -0,0 +1,129 @@ +STATICLIB = libsymcrypt.a +EXTRAINCL = $(SYMCRYPT_PE_CFLAGS) + +SOURCES = \ + lib/3des.c \ + lib/AesTables.c \ + lib/DesTables.c \ + lib/FatalIntercept.c \ + lib/IEEE802_11SaeCustom.c \ + lib/ScsTable.c \ + lib/a_dispatch.c \ + lib/aes-asm.c \ + lib/aes-c.c \ + lib/aes-default-bc.c \ + lib/aes-default.c \ + lib/aes-key.c \ + lib/aes-neon.c \ + lib/aes-xmm.c \ + lib/aes-ymm.c \ + lib/aesCtrDrbg.c \ + lib/aescmac.c \ + lib/aeskw.c \ + lib/blockciphermodes.c \ + lib/ccm.c \ + lib/chacha20.c \ + lib/chacha20_poly1305.c \ + lib/cpuid.c \ + lib/cpuid_um.c \ + lib/crt.c \ + lib/desx.c \ + lib/dh.c \ + lib/dl_internal_groups.c \ + lib/dlgroup.c \ + lib/dlkey.c \ + lib/dsa.c \ + lib/ec_dh.c \ + lib/ec_dispatch.c \ + lib/ec_dsa.c \ + lib/ec_internal_curve_params.c \ + lib/ec_internal_curves.c \ + lib/ec_montgomery.c \ + lib/ec_mul.c \ + lib/ec_short_weierstrass.c \ + lib/ec_twisted_edwards.c \ + lib/eckey.c \ + lib/ecpoint.c \ + lib/ecurve.c \ + lib/env_windowsUserModeWin8_1.c \ + lib/equal.c \ + lib/fdef_general.c \ + lib/fdef_int.c \ + lib/fdef_mod.c \ + lib/gcm.c \ + lib/gen_int.c \ + lib/ghash.c \ + lib/hash.c \ + lib/hkdf.c \ + lib/hmac.c \ + lib/hmacmd5.c \ + lib/hmacsha1.c \ + lib/hmacsha224.c \ + lib/hmacsha256.c \ + lib/hmacsha384.c \ + lib/hmacsha3_224.c \ + lib/hmacsha3_256.c \ + lib/hmacsha3_384.c \ + lib/hmacsha3_512.c \ + lib/hmacsha512.c \ + lib/hmacsha512_224.c \ + lib/hmacsha512_256.c \ + lib/kmac.c \ + lib/libmain.c \ + lib/lms.c \ + lib/marvin32.c \ + lib/md2.c \ + lib/md4.c \ + lib/md5.c \ + lib/mldsa.c \ + lib/mldsa_primitives.c \ + lib/mlkem.c \ + lib/mlkem_primitives.c \ + lib/modexp.c \ + lib/paddingPkcs7.c \ + lib/parhash.c \ + lib/pbkdf2.c \ + lib/pbkdf2_hmacsha1.c \ + lib/pbkdf2_hmacsha256.c \ + lib/poly1305.c \ + lib/primes.c \ + lib/rc2.c \ + lib/rc4.c \ + lib/rdrand.c \ + lib/rdseed.c \ + lib/recoding.c \ + lib/rsa_enc.c \ + lib/rsa_padding.c \ + lib/rsakey.c \ + lib/scsTools.c \ + lib/selftest.c \ + lib/session.c \ + lib/sha1.c \ + lib/sha256-xmm.c \ + lib/sha256-ymm.c \ + lib/sha256.c \ + lib/sha256Par-ymm.c \ + lib/sha256Par.c \ + lib/sha3.c \ + lib/sha3_224.c \ + lib/sha3_256.c \ + lib/sha3_384.c \ + lib/sha3_512.c \ + lib/sha512-ymm.c \ + lib/sha512.c \ + lib/sha512Par-ymm.c \ + lib/sha512Par.c \ + lib/shake.c \ + lib/sp800_108.c \ + lib/sp800_108_hmacsha1.c \ + lib/sp800_108_hmacsha256.c \ + lib/sp800_108_hmacsha512.c \ + lib/srtp_kdf.c \ + lib/ssh_kdf.c \ + lib/ssh_kdf_sha256.c \ + lib/ssh_kdf_sha512.c \ + lib/sskdf.c \ + lib/tlsCbcVerify.c \ + lib/tlsprf.c \ + lib/xmss.c \ + lib/xtsaes.c diff --git a/libs/symcrypt/inc/C_asm_shared.inc b/libs/symcrypt/inc/C_asm_shared.inc new file mode 100644 index 00000000000..25b33560bfb --- /dev/null +++ b/libs/symcrypt/inc/C_asm_shared.inc @@ -0,0 +1,74 @@ +/* + C_asm_shared.inc file to synchronize C and Asm information + Copyright (c) Microsoft Corporation. Licensed under the MIT license. + + This is a file that is included in both C and ASM such that the values are the same on both sides. + We use the C preprocessor to set ASM constants, as we already need to use the C preprocessor for + symcryptasm processing (see scripts/symcryptasm_processor.py). + We use this to define the structure offsets that the ASM code uses. + By having equivalent C constants we can add checks to the C code to ensure they are correct. + +*/ + +#include "symcrypt_internal_shared.inc" + +#if defined(SYMCRYPT_MASM) +#define SET(_variable, _value) _variable EQU _value +#elif defined(SYMCRYPT_GAS) +#define SET(_variable, _value) .set _variable, _value +#else // assume C +#define SET(_variable, _value) const SIZE_T _variable = _value; +#endif + +SET(SymCryptModulusNdigitsOffsetAmd64, 4); +SET(SymCryptModulusInv64OffsetAmd64, 24); +SET(SymCryptModulusValueOffsetAmd64, 128); +SET(SymCryptNegDivisorSingleDigitOffsetAmd64, 256); + +SET(SymCryptModulusNdigitsOffsetX86, 4); +SET(SymCryptModulusInv64OffsetX86, 24); +SET(SymCryptModulusValueOffsetX86, 96); + +SET(SymCryptModulusNdigitsOffsetArm64, 4); +SET(SymCryptModulusInv64OffsetArm64, 24); +SET(SymCryptModulusValueOffsetArm64, 128); + +SET(SymCryptModulusNdigitsOffsetArm, 4); +SET(SymCryptModulusInv64OffsetArm, 24); +SET(SymCryptModulusValueOffsetArm, 96); + +#if !defined(SYMCRYPT_MASM) && !defined(SYMCRYPT_GAS) +// Preserve the definition of SET for use in symcryptasm processing +#undef SET +#endif + +#if SYMCRYPT_CPU_AMD64 +#define SYMCRYPT_CHECK_ASM_OFFSETS \ + SYMCRYPT_CHECK_ASM_OFFSET( SymCryptModulusNdigitsOffsetAmd64, SYMCRYPT_FIELD_OFFSET( SYMCRYPT_MODULUS, nDigits ) );\ + SYMCRYPT_CHECK_ASM_OFFSET( SymCryptModulusInv64OffsetAmd64, SYMCRYPT_FIELD_OFFSET( SYMCRYPT_MODULUS, inv64 ));\ + SYMCRYPT_CHECK_ASM_OFFSET( SymCryptModulusValueOffsetAmd64, SYMCRYPT_FIELD_OFFSET( SYMCRYPT_MODULUS, Divisor.Int.ti.fdef.uint32 ));\ + SYMCRYPT_CHECK_ASM_OFFSET( SymCryptNegDivisorSingleDigitOffsetAmd64, SYMCRYPT_FIELD_OFFSET( SYMCRYPT_MODULUS, Divisor.Int.ti.fdef.uint32 ) + (2*SYMCRYPT_FDEF_DIGIT_SIZE) );\ + +#elif SYMCRYPT_CPU_X86 +#define SYMCRYPT_CHECK_ASM_OFFSETS \ + SYMCRYPT_CHECK_ASM_OFFSET( SymCryptModulusNdigitsOffsetX86, SYMCRYPT_FIELD_OFFSET( SYMCRYPT_MODULUS, nDigits ) );\ + SYMCRYPT_CHECK_ASM_OFFSET( SymCryptModulusInv64OffsetX86, SYMCRYPT_FIELD_OFFSET( SYMCRYPT_MODULUS, inv64 ));\ + SYMCRYPT_CHECK_ASM_OFFSET( SymCryptModulusValueOffsetX86, SYMCRYPT_FIELD_OFFSET( SYMCRYPT_MODULUS, Divisor.Int.ti.fdef.uint32 ));\ + +#elif SYMCRYPT_CPU_ARM64 +#define SYMCRYPT_CHECK_ASM_OFFSETS \ + SYMCRYPT_CHECK_ASM_OFFSET( SymCryptModulusNdigitsOffsetArm64, SYMCRYPT_FIELD_OFFSET( SYMCRYPT_MODULUS, nDigits ) );\ + SYMCRYPT_CHECK_ASM_OFFSET( SymCryptModulusInv64OffsetArm64, SYMCRYPT_FIELD_OFFSET( SYMCRYPT_MODULUS, inv64 ));\ + SYMCRYPT_CHECK_ASM_OFFSET( SymCryptModulusValueOffsetArm64, SYMCRYPT_FIELD_OFFSET( SYMCRYPT_MODULUS, Divisor.Int.ti.fdef.uint32 ));\ + +#elif SYMCRYPT_CPU_ARM +#define SYMCRYPT_CHECK_ASM_OFFSETS \ + SYMCRYPT_CHECK_ASM_OFFSET( SymCryptModulusNdigitsOffsetArm, SYMCRYPT_FIELD_OFFSET( SYMCRYPT_MODULUS, nDigits ) );\ + SYMCRYPT_CHECK_ASM_OFFSET( SymCryptModulusInv64OffsetArm, SYMCRYPT_FIELD_OFFSET( SYMCRYPT_MODULUS, inv64 ));\ + SYMCRYPT_CHECK_ASM_OFFSET( SymCryptModulusValueOffsetArm, SYMCRYPT_FIELD_OFFSET( SYMCRYPT_MODULUS, Divisor.Int.ti.fdef.uint32 ));\ + +#endif // CPU_* + +#if !defined(SYMCRYPT_CHECK_ASM_OFFSETS) +#define SYMCRYPT_CHECK_ASM_OFFSETS +#endif diff --git a/libs/symcrypt/inc/buildInfo.h b/libs/symcrypt/inc/buildInfo.h new file mode 100644 index 00000000000..c31e04a6420 --- /dev/null +++ b/libs/symcrypt/inc/buildInfo.h @@ -0,0 +1,8 @@ +#include "symcrypt_internal_shared.inc" + +#define _SYMCRYPT_STRING_INT(a) #a +#define _SYMCRYPT_STRING(a) _SYMCRYPT_STRING_INT(a) +#define SYMCRYPT_BUILD_INFO_BRANCH "" +#define SYMCRYPT_BUILD_INFO_COMMIT "2026-03-28T00:56:29+01:00_748c20f1fc48" +#define SYMCRYPT_BUILD_INFO_VERSION _SYMCRYPT_STRING(SYMCRYPT_CODE_VERSION_API) "." _SYMCRYPT_STRING(SYMCRYPT_CODE_VERSION_MINOR) "." _SYMCRYPT_STRING(SYMCRYPT_CODE_VERSION_PATCH) +#define SYMCRYPT_BUILD_INFO_TIMESTAMP "" diff --git a/libs/symcrypt/inc/symcrypt.h b/libs/symcrypt/inc/symcrypt.h new file mode 100644 index 00000000000..b650608d525 --- /dev/null +++ b/libs/symcrypt/inc/symcrypt.h @@ -0,0 +1,10814 @@ +// +// SymCrypt.h +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#pragma once + + +#ifdef __cplusplus +extern "C" { +#endif + +#include "symcrypt_internal_shared.inc" + +#define SYMCRYPT_API_VERSION ((SYMCRYPT_CODE_VERSION_API << 16) | SYMCRYPT_CODE_VERSION_MINOR) + +// +// This is the header file for the SymCrypt library which contains +// implementations of cryptographic algorithms. +// +// All API information is in this file. Information in the +// other include files (symcrypt_internal.h) is subject +// to change at any time. Please use only the information in this file. +// The header file symcrypt_low_level contains low-level API functions that +// are sometimes needed. That API surface is not stable across releases. +// + +; // <-- non-functional semicolon that makes the editor's indent work properly. + +// +// General information about SymCrypt: +// +// +// CPU +// This library is built and tested for: X86, AMD64, ARM, and ARM64. +// +// ENVIRONMENT +// SymCrypt can run in different environments, such as kernel mode, user mode, +// etc. +// In earlier versions of the library, the caller specified the environment by passing a +// pointer to the SymCryptInit function. +// It turns out that that model no longer scales with the use of new extended register sets +// or it introduces too much overhead. +// The current library uses a different model. The user of the library invokes one of the +// environment macros inside a C file in the calling process. +// SymCrypt defines macros for each environment. +// The same mechanism will also be used to select between different implementations of a single +// algorithm. For example, a caller might use +// SYMCRYPT_ENVIRONMENT_WINDOWS_KERNELMODE +// SYMCRYPT_SELECT_SHA256_COMPACT +// to indicate that the environment is kernel mode and the compact SHA-256 implementation is to +// be used. +// There are optimized environments for various Windows use cases. +// +// +// CHECKED BUILDS +// For each CPU, SymCrypt is available in both a checked build and a fre build. The +// checked build includes additional error checking which catches the most common +// errors. Please make sure you build a checked version of your binary and test with +// that regularly. +// +// +// MEMORY STRUCTURES +// Most SymCrypt functions do not allocate any memory; all memory is provided by the caller. +// However, callers may not copy, move, or otherwise manipulate the SymCrypt +// data structures. In particular, a memcpy of a SymCrypt data structure is not allowed. +// When necessary SymCrypt provides functions to perform the necessary manipulations. +// If you are missing one, please ask us. +// +// +// MULTI_THREADING +// The routines in this library are multi-thread safe, taking into account the usual +// rules of multiple threads accessing the same data structures. +// Any function that accepts a pointer-to-const argument must be assumed to read the +// corresponding data. If the function accepts a pointer-to-non-const it must be +// assumed to both read and write the data. +// It is safe for two threads to use the same data element as long as both of them +// are only reading from it. For example, an expanded key is typically passed as +// a pointer-to-const to the encryption and decryption routines. Thus, multiple +// threads can perform multiple encryptions/decryptions in parallel using the +// same expanded key. +// +// The normal memory re-order issues apply as well. If one thread initializes a +// data structure and the initialization function returns, it is NOT safe for +// another thread to read the data structure without a suitable memory barrier or +// synchronization primitive. +// +// +// SIDE CHANNELS +// Side channels are ways in which an attacker can receive information about what +// a target process is doing using other aspects than just the input/output behaviour +// of the target. For example, the memory subsystem, CPU load modulation, disk usage, +// and many other aspects can provide side-channels to an attacker. +// +// Wherever possible the implementations in SymCrypt have been hardened against side channels. +// The most important rules are that the instruction sequence and the memory addresses +// accessed do not depend on any of the data being processed. +// As a general rule, the actual data being processed is protected, but the +// length of the data (i.e. the number of bytes) is not protected in this way and +// is treated as public information. +// +// The implementation of the following algorithms are NOT side-channel safe: +// - non-AES-NI based AES +// used on CPUs that don't have AES-NI, or in kernel mode on x86 Win8 and below. +// - DES, 3DES, DESX +// - RC4 +// Making these algorithms side-channel safe would incur an overhead that is too large. +// +// +// FATAL ERRORS +// This is a high-performance library with a minimum of error checking. +// Many functions do not return an error code; this avoids the cost of +// having any error checking on the caller's side for error situations that +// can never occur. However, this does assume that the caller is calling +// SymCrypt using a valid calling sequence with proper parameters. +// In some situations this library will detect improper parameters or +// calling sequences. In those situations the library will generate a fatal +// error, which leads to an abrupt termination of the process (bugcheck in +// kernel mode). Exceptional circumstances may also induce fatal errors within +// the library (i.e. a caller provided buffer causes an access violation when +// it is read, or the library is called without sufficient stack space for the +// requested operation). +// If a fatal error is generated within the library, the internal state of the +// library may be inconsistent (i.e. there may be outstanding memory allocations +// that will never be freed, or a lock may have been taken which will never be +// released). Callers should not catch fatal errors and continue executing, as +// there is no guarantee of stability. +// The checked version of the library has additional error checking which detects +// the most common errors. We strongly recommend that callers build and test a +// checked version of their binary to catch these common errors. +// +// +// ALGORITHM SELF TEST +// SymCrypt includes functions that perform simple self-tests on the algorithm +// implementations. These functions are designed to be used for FIPS certification +// of crypto binaries. They should never fail, and they generate a fatal error +// if they do fail. +// If you are not FIPS-certifying your binaries, you can ignore the self test functions. +// +// +// CHANGES FROM RSA32.LIB +// This library replaces the venerable rsa32(k).lib. The major changes are: +// +// - SymCrypt requires the caller to call a library initialization function +// before calling the various algorithm implementations. +// - SymCrypt requires the caller to specify the environment in which the library +// is running. +// - SymCrypt has a CHKed and FRE version for use in CHKed and FRE builds. +// - The API has been updated. The API is more consistent and has better support +// for 64-bit platforms (use of SIZE_T rather than UINT32 for lengths). +// - All algorithm implementations have been updated to reflect the +// latest cryptographic coding guidelines. Several security weaknesses +// in the RSA32.lib code have been fixed. +// - Code has been optimized for the newer CPUs. +// This includes support for AES-NI, PCLMULQDQ, AVX2, etc. +// Most algorithms are faster, especially the recommended algorithms. +// Some legacy algorithms are somewhat slower due to removal of assembler support. +// Note: performance on older CPUs, like the Pentium 4, is reduced in some places. +// - Code and data now go into their default segments. +// RSA32 has a kernel-mode version where the code and data go into +// special segments. This allows the crypto code to be made pageable or +// nonpageable separate from the rest of the executable. This feature is +// error-prone, and not widely used. Furthermore, it switches on a per-lib +// basis, rather than a per-functionality basis, which is the wrong granularity. +// - Added native support for HMAC-SHA256 and HMAC-SHA512. +// - Support for parallel hashing, improves throughput up to 500%. +// - SymCrypt does not support binary copying of internal state information, because +// it imposes restrictions on what the library can do. +// Thus, you may NOT do a memcpy or remote copy on any SymCrypt data structure. +// SymCrypt provides copy functions where necessary, if you need others please ask. +// + +// +// Error codes +// +// This is a high-performance library with a minimum of error checking. Most +// routines do not perform any error checking at all. +// Some routines perform internal consistency checks and will cause a fatal +// error if the library is used incorrectly. +// +// In a few cases routines return an error code when they are called incorrectly. +// Mostly this is for key expansion routines which return an error code when the key +// size is wrong. This allows a higher-level library to be agnostic as to the proper +// key sizes for an algorithm and use the SymCrypt library to detect key size errors. +// +// For performance reasons this library avoids per-message error codes wherever possible. +// +// As this library can be used in many different contexts---kernel mode, user mode, +// WinCE, Xbox, etc.---we don't use one of the standard error types but use our own. +// Callers should not depend on the integer value of any of these enums. +// +// Error codes will signal the cause of the error, but callers should not rely on the +// exact symbolic error code returned. Especially in situations where multiple errors +// occur at once (e.g. multiple invalid parameters) the exact error symbol returned +// could change between versions of the library. +// + +#ifndef _Return_type_success_ +#define _Return_type_success_(expr) +#endif + +typedef _Return_type_success_( return == SYMCRYPT_NO_ERROR ) enum { + SYMCRYPT_NO_ERROR = 0, + SYMCRYPT_UNUSED = 0x8000, // Start our error codes here so they're easier to distinguish + SYMCRYPT_WRONG_KEY_SIZE, + SYMCRYPT_WRONG_BLOCK_SIZE, + SYMCRYPT_WRONG_DATA_SIZE, + SYMCRYPT_WRONG_NONCE_SIZE, + SYMCRYPT_WRONG_TAG_SIZE, + SYMCRYPT_WRONG_ITERATION_COUNT, + SYMCRYPT_AUTHENTICATION_FAILURE, + SYMCRYPT_EXTERNAL_FAILURE, + SYMCRYPT_FIPS_FAILURE, + SYMCRYPT_HARDWARE_FAILURE, + SYMCRYPT_NOT_IMPLEMENTED, + SYMCRYPT_INVALID_BLOB, + SYMCRYPT_BUFFER_TOO_SMALL, + SYMCRYPT_INVALID_ARGUMENT, + SYMCRYPT_MEMORY_ALLOCATION_FAILURE, + SYMCRYPT_SIGNATURE_VERIFICATION_FAILURE, + SYMCRYPT_INCOMPATIBLE_FORMAT, + SYMCRYPT_VALUE_TOO_LARGE, + SYMCRYPT_SESSION_REPLAY_FAILURE, + SYMCRYPT_HBS_NO_OTS_KEYS_LEFT, + SYMCRYPT_HBS_PUBLIC_ROOT_MISMATCH, +} SYMCRYPT_ERROR; + +// SYMCRYPT_ECURVE_TYPE needs to be completely defined before including +// symcrypt_internal.h because it's a member of another type in there. +typedef enum _SYMCRYPT_ECURVE_TYPE { + SYMCRYPT_ECURVE_TYPE_NULL = 0, + SYMCRYPT_ECURVE_TYPE_SHORT_WEIERSTRASS = 1, + SYMCRYPT_ECURVE_TYPE_TWISTED_EDWARDS = 2, + SYMCRYPT_ECURVE_TYPE_MONTGOMERY = 3, +} SYMCRYPT_ECURVE_TYPE; +// +// SYMCRYPT_ECURVE_TYPE is used to specify the type of the curve. +// + +// SYMCRYPT_DLGROUP_FIPS needs to be completely defined before including +// symcrypt_internal.h because it's a member of another type in there. + +//===================================================== +// DL group operations + +typedef enum _SYMCRYPT_DLGROUP_FIPS { + SYMCRYPT_DLGROUP_FIPS_NONE = 0, + SYMCRYPT_DLGROUP_FIPS_186_2 = 1, + SYMCRYPT_DLGROUP_FIPS_186_3 = 2, +} SYMCRYPT_DLGROUP_FIPS; +// +// Dlgroup enums for the generation and verification of the group parameters. +// These are used in: +// - SymCryptDlgroupGenerate function to specify the appropriate standard to +// be used. +// - SymCryptDlgroupSetValue function to verify that the input parameters were +// properly generated. +// + +typedef enum _SYMCRYPT_DLGROUP_DH_SAFEPRIMETYPE { + SYMCRYPT_DLGROUP_DH_SAFEPRIMETYPE_NONE = 0, + SYMCRYPT_DLGROUP_DH_SAFEPRIMETYPE_IKE_3526 = 1, + SYMCRYPT_DLGROUP_DH_SAFEPRIMETYPE_TLS_7919 = 2, +} SYMCRYPT_DLGROUP_DH_SAFEPRIMETYPE; +#define SYMCRYPT_DLGROUP_DH_SAFEPRIMETYPE_DEFAULT SYMCRYPT_DLGROUP_DH_SAFEPRIMETYPE_TLS_7919 +// +// Dlgroup enums for the specification and verification of the named safe prime group parameters. +// These are used in: +// - SymCryptDlgroupGenerateSafePrime function to specify the appropriate group to +// be used. +// + +// +// The symcrypt_internal.h file contains information only relevant to the internals +// of the library, but they have to be exposed to the compiler of the caller. +// We put those in a separate file to make this file easier to read +// for users of the library. +// The details in the symcrypt_internal.h file can change at any time; +// users should only rely on the information in this header file. +// +#include "symcrypt_internal.h" + +// +// Useful macros +// +// A variety of useful macros. +// +// The load/store macros convert from integer types to an array of bytes and vice versa. +// LOAD<n>_* (p) loads a value of <n> bits from the byte pointer p. +// STORE<n>_* (p,v) stores the n-bit value v to byte pointer p. +// The macros can either do Most Significant Byte first (big-endian) or +// Least Significant Byte first. +// The actual definitions are in the symcrypt_internal.h file because they contain +// items that are not part of the stable public API of SymCrypt. +// + +#define SYMCRYPT_LOAD_LSBFIRST16( p ) SYMCRYPT_INTERNAL_LOAD_LSBFIRST16( p ) +#define SYMCRYPT_LOAD_LSBFIRST32( p ) SYMCRYPT_INTERNAL_LOAD_LSBFIRST32( p ) +#define SYMCRYPT_LOAD_LSBFIRST64( p ) SYMCRYPT_INTERNAL_LOAD_LSBFIRST64( p ) + +#define SYMCRYPT_LOAD_MSBFIRST16( p ) SYMCRYPT_INTERNAL_LOAD_MSBFIRST16( p ) +#define SYMCRYPT_LOAD_MSBFIRST32( p ) SYMCRYPT_INTERNAL_LOAD_MSBFIRST32( p ) +#define SYMCRYPT_LOAD_MSBFIRST64( p ) SYMCRYPT_INTERNAL_LOAD_MSBFIRST64( p ) + +#define SYMCRYPT_STORE_LSBFIRST16( p, v ) SYMCRYPT_INTERNAL_STORE_LSBFIRST16( p, v ) +#define SYMCRYPT_STORE_LSBFIRST32( p, v ) SYMCRYPT_INTERNAL_STORE_LSBFIRST32( p, v ) +#define SYMCRYPT_STORE_LSBFIRST64( p, v ) SYMCRYPT_INTERNAL_STORE_LSBFIRST64( p, v ) + +#define SYMCRYPT_STORE_MSBFIRST16( p, v ) SYMCRYPT_INTERNAL_STORE_MSBFIRST16( p, v ) +#define SYMCRYPT_STORE_MSBFIRST32( p, v ) SYMCRYPT_INTERNAL_STORE_MSBFIRST32( p, v ) +#define SYMCRYPT_STORE_MSBFIRST64( p, v ) SYMCRYPT_INTERNAL_STORE_MSBFIRST64( p, v ) + +// +// Convert between UINT32/UINT64 and variable-sized byte buffers +// +// The load functions take any size input array, and will return an error if the value +// encoded in the array exceeds the range of the target type (UINT32 or UINT64). +// The store functions will return an error if the destination buffer is too small +// to encode the actual value passed. +// An empty buffer (length = 0) encodes the value 0, and the value 0 can be encoded +// in the empty buffer. +// These functions are not side-channel safe. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLoadLsbFirstUint32( + _In_reads_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + _Out_ PUINT32 pDst ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLoadLsbFirstUint64( + _In_reads_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + _Out_ PUINT64 pDst ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLoadMsbFirstUint32( + _In_reads_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + _Out_ PUINT32 pDst ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLoadMsbFirstUint64( + _In_reads_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + _Out_ PUINT64 pDst ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptStoreLsbFirstUint32( + UINT32 src, + _Out_writes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptStoreLsbFirstUint64( + UINT64 src, + _Out_writes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptStoreMsbFirstUint32( + UINT32 src, + _Out_writes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptStoreMsbFirstUint64( + UINT64 src, + _Out_writes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst ); + +// +// Functions to retrieve the bitsize/bytesize of UINT32/UINT64 values +// Note: the bitsize/bytesize of the value 0 is defined as 0. +// Some data formats don't allow empty encodings, so the caller +// should ensure they handle the 0-case properly. +// These functions are NOT side-channel safe. +// +UINT32 +SymCryptUint32Bitsize( UINT32 value ); + +UINT32 +SymCryptUint64Bitsize( UINT64 value ); + +UINT32 +SymCryptUint32Bytesize( UINT32 value ); + +UINT32 +SymCryptUint64Bytesize( UINT64 value ); + + +// +// FORCED MEMORY ACCESS +// +// These macros force a memory access. That is, they require that the memory +// read or write takes place, and do not allow the compiler to optimize the access +// away. This is useful for wiping memory even if the compiler knows the memory will not be used in future. +// +// The READ<n> macros read an n-bit value from a PBYTE and return a BYTE if n=8 and an UINT<n> otherwise. +// The WRITE<n> macros write a value to a PBYTE using the same types as the corresponding READ<n> +// +// These macros provide no other memory ordering requirements, so there are no acquire/release +// semantics, memory barriers, etc. +// + +#define SYMCRYPT_FORCE_READ8( _p ) SYMCRYPT_INTERNAL_FORCE_READ8( _p ) +#define SYMCRYPT_FORCE_READ16( _p ) SYMCRYPT_INTERNAL_FORCE_READ16( _p ) +#define SYMCRYPT_FORCE_READ32( _p ) SYMCRYPT_INTERNAL_FORCE_READ32( _p ) +#define SYMCRYPT_FORCE_READ64( _p ) SYMCRYPT_INTERNAL_FORCE_READ64( _p ) + +#define SYMCRYPT_FORCE_WRITE8( _p, _v ) SYMCRYPT_INTERNAL_FORCE_WRITE8( _p, _v ) +#define SYMCRYPT_FORCE_WRITE16( _p, _v ) SYMCRYPT_INTERNAL_FORCE_WRITE16( _p, _v ) +#define SYMCRYPT_FORCE_WRITE32( _p, _v ) SYMCRYPT_INTERNAL_FORCE_WRITE32( _p, _v ) +#define SYMCRYPT_FORCE_WRITE64( _p, _v ) SYMCRYPT_INTERNAL_FORCE_WRITE64( _p, _v ) + +//========================================================================== +// TYPE MODIFIERS +//========================================================================== +// +// The SymCrypt library uses the following type modifiers +// +// SYMCRYPT_CALL +// +// The calling-convention used by SymCrypt functions. +// Some platforms have multiple calling conventions which differ in the +// way arguments are passed and the stack is handled +// The SYMCRYPT_CALL type modifier selects the correct calling convention. +// The current implementation uses __fastcall on the x86 platform, which +// passes arguments in registers and is generally faster than the __stdcall +// calling convention. +// +// +// SYMCRYPT_ALIGN +// +// On platforms that support alignment declaration this macro expands to +// __declspec(align(<n>)) where <n> is platform-dependent. +// Many data types that SymCrypt defines are SYMCRYPT_ALIGNed. +// When allocating memory for any SymCrypt data type the caller +// has to ensure that the memory is aligned to the natural alignment for +// that platform. (e.g. 4 for x86, 16 for x64) +// Memory allocation functions typically return properly aligned memory blocks. +// The macro SYMCRYPT_ALIGN_VALUE contains the actual value of <n>. +// + +//========================================================================== +// LIBRARY MANAGEMENT +//========================================================================== +// +// SymCrypt runs in many different environments. Boot library, kernel, user mode, +// (for each of x86, amd64, arm), and possibly WinCE, Mobile, Zune, Xbox, etc. +// These different environments can have different requirements. +// +// Creating different libraries for each environment has huge testing and maintenance +// costs. Instead, the user of the library invokes a pre-defined macro in their own code +// that contains the necessary adoptions to that environment. +// Using a macro makes the selection static, which allows the compiler to optimize +// away a lot of the overhead. +// (e.g. if XMM register saving is not needed, the stub function declared by the macro +// will always succeed, and the compiler will inline it and optimize it away.) +// +// Warning: due to recent changes in the Visual Studio C runtime, we cannot test saving +// of the YMM registers in Windows user mode. Because we do not have a kernel mode test +// for saving/restoring the YMM registers, this functionality is currently not tested. +// Before using SymCrypt in Windows 7 kernel mode, additional kernel mode tests should be +// added to verify this functionality. +// + +// +// The following environment macros are available. Callers should invoke one of these +// in their own code. +// +// SYMCRYPT_ENVIRONMENT_WINDOWS_BOOTLIBRARY // only for the current OS release +// +// SYMCRYPT_ENVIRONMENT_WINDOWS_KERNELMODE_LEGACY // Use for any version of Windows. +// SYMCRYPT_ENVIRONMENT_WINDOWS_KERNELMODE_WIN7_N_LATER // Only for Win7 and later +// SYMCRYPT_ENVIRONMENT_WINDOWS_KERNELMODE_WIN8_1_N_LATER // Only for WinBlue and later +// SYMCRYPT_ENVIRONMENT_WINDOWS_KERNELMODE_LATEST // use for latest OS +// +// SYMCRYPT_ENVIRONMENT_WINDOWS_USERMODE_LEGACY // use for any version of Windows +// SYMCRYPT_ENVIRONMENT_WINDOWS_USERMODE_WIN7_N_LATER // Only for Win7 and later (cannot use AVX2 instructions) +// SYMCRYPT_ENVIRONMENT_WINDOWS_USERMODE_WIN8_1_N_LATER // Only for Win8.1 and later +// SYMCRYPT_ENVIRONMENT_WINDOWS_USERMODE_LATEST // use for latest OS +// +// SYMCRYPT_ENVIRONMENT_WINDOWS_KERNELDEBUGGER +// +// SYMCRYPT_ENVIRONMENT_LINUX_USERMODE // use for Linux +// +// SYMCRYPT_ENVIRONMENT_OPTEE_TA // use for OPTEE +// +// SYMCRYPT_ENVIRONMENT_GENERIC // use for all other situations +// + +VOID +SYMCRYPT_CALL +SymCryptInit(void); +// +// Initialize the static library. +// This function MUST be called before any other function in the library. +// It is not necessary to call this function when using the shared object library. +// +// This function does not perform the self tests in the library. +// Doing so would force the linking of all the algorithm in the library, +// which is obviously not desirable for applications that want to link in +// only one or two algorithms. +// If self test are required (e.g. for FIPS certification) they have to be +// called separately for each algorithm. +// +// It is safe to call this function multiple times. +// The library initialization is done in the first call; subsequent calls are no-ops. +// +// If you get an 'undefined symbol' error on this function name, then you forgot +// to invoke one of the environment macros documented above. +// + +VOID +SYMCRYPT_CALL +SymCryptModuleInit( + _In_ UINT32 api, + _In_ UINT32 minor); + +#define SYMCRYPT_MODULE_INIT() SymCryptModuleInit( SYMCRYPT_CODE_VERSION_API, SYMCRYPT_CODE_VERSION_MINOR ); +// +// Initialize the SymCrypt shared object module/dynamic-link library. This function verifies +// that the module version supports the version requested by the application. If the version +// is unsupported, a fatal error will occur. Rather than explicitly calling SymCryptModuleInit, +// the macro SYMCRYPT_MODULE_INIT should be used to call it with the correct arguments. +// + +//========================================================================== +// DATA MANIPULATION +//========================================================================== +// +// This library provides some data manipulation functions that commonly occur +// in cryptographic code. +// + +VOID +SYMCRYPT_CALL +SymCryptWipe( + _Out_writes_bytes_( cbData ) PVOID pbData, + SIZE_T cbData ); + +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptWipeKnownSize( + _Out_writes_bytes_( cbData ) PVOID pbData, + SIZE_T cbData ); + +// +// The SymCryptWipe and SymCryptWipeKnownSize functions wipe memory. +// They work for any size and any alignment. +// Wiping is faster on x86 and x64 if the data buffer is 16-aligned, +// and the size is a multiple of 16. +// +// The SymCryptWipe function is optimized for the case where the size of the buffer +// is not known at compile time. +// +// The SymCryptWipeKnownSize function is optimized for the case where the +// cbData parameter is a compile-time known value. +// +// The two functions are functionally equivalent, but there can be a significant performance +// differences: +// - calling SymCryptWipeKnownSize when the size is not known at compile time incurs a +// code size penalty. +// - calling SymCryptWipeKnownSize when the size is not known at compile time and is sometimes <= 64 +// incurs a performance penalty. +// (The code assumes that the compiler can optimize all the conditional jumps away. +// Conditional jumps can be very expensive if they are not predicted correctly.) +// - calling SymCryptWipe when the buffer is small and has a compile-time known size incurs +// a performance penalty. +// When in doubt, use SymCryptWipe. +// + +VOID +SYMCRYPT_CALL +SymCryptXorBytes( + _In_reads_( cbBytes ) PCBYTE pbSrc1, + _In_reads_( cbBytes ) PCBYTE pbSrc2, + _Out_writes_( cbBytes ) PBYTE pbResult, + SIZE_T cbBytes ); +// +// Xor two strings of bytes together. +// +// The result buffer can be the same as Src1 or Src2, or can be non-overlapping +// with the inputs. However, the result buffer may not partially overlap with +// one of the inputs. +// + +BOOLEAN +SYMCRYPT_CALL +SymCryptEqual( + _In_reads_( cbBytes ) PCBYTE pbSrc1, + _In_reads_( cbBytes ) PCBYTE pbSrc2, + SIZE_T cbBytes ); +// +// Compare two regions of memory and return TRUE if they are equal, FALSE otherwise. +// +// This function compares all the bytes without an early-out mechanism. +// An early-out implementation, such as memcmp, reveals through side channels +// the position of the first byte where the inputs differ, which leaks information. +// + + +//========================================================================== +// HASH FUNCTIONS +//========================================================================== +// +// All hash functions have a similar interface. For consistency we describe +// the generic parts of the interface once. +// Algorithm-specific comments are given with the API functions of each algorithm separately. +// +// For an algorithm called XXX the following functions, types, and constants are defined: +// +// +// SYMCRYPT_XXX_RESULT_SIZE +// +// A constant giving the size, in bytes, of the result of the hash function. +// +// +// SYMCRYPT_XXX_INPUT_BLOCK_SIZE +// +// A constant giving the natural input block size for the hash function. +// Most callers don't need to know this, but some uses, like the HMAC construction +// adapt to this size to improve efficiency. +// +// +// VOID +// SYMCRYPT_CALL +// SymCryptXxx( _In_reads_( cbData ) PCBYTE pbData, +// SIZE_T cbData, +// _Out_writes_( SYMCRYPT_XXX_RESULT_SIZE ) PBYTE pbResult ); +// +// Computes the hash value of the data buffer. +// If you have all the data to be hashed in a single buffer this is the simplest function to use. +// +// +// SYMCRYPT_XXX_STATE +// +// Type to store the intermediate state of a hash computation. +// This is an opaque type whose structure can change at will. +// It should only be used for transient computations in a single executable +// and not be stored or transferred to a different process. +// The pointer version is also defined (PSYMCRYPT_XXX_STATE) +// +// The SYMCRYPT_XXX_STATE structure contains the entire state of an ongoing +// hash computation. If you want to compute the hash on several strings that +// have the same prefix, the caller may hash the prefix first, then create +// multiple copies using the supplied state copy function, +// and continue hashing the different states with different postfix strings. +// +// VOID +// SYMCRYPT_CALL +// SymCryptXxxInit( _Out_ PSYMCRYPT_XXX_STATE pState ); +// +// Initialize a SYMCRYPT_XXX_STATE for subsequent use. +// +// The state encodes an ongoing hash computation and allows incremental +// computation of a hash function. +// At any point in time the state object encodes a state that is equivalent to +// the hash computation of a data string. +// This function can be called at any time and resets the state to correspond +// to the empty data string. +// The SymCryptXxxAppend function appends data to the data string +// encoded by the state. +// The SymCryptXxxResult function finalizes the computation and +// returns the actual hash result. +// +// +// VOID +// SYMCRYPT_CALL +// SymCryptXxxAppend( _Inout_ PSYMCRYPT_XXX_STATE pState, +// _In_reads_( cbData ) PCBYTE pbData, +// SIZE_T cbData ); +// +// Provide more data to the ongoing hash computation specified by the state. +// The state must have been initialized by SymCryptXxxInit. +// This function can be called multiple times on the same state +// to append more data to the encoded data string. +// +// +// VOID +// SYMCRYPT_CALL +// SymCryptXxxResult( +// _Inout_ PSYMCRYPT_XXX_STATE pState, +// _Out_writes_( SYMCRYPT_XXX_RESULT_SIZE )PBYTE pbResult ); +// +// Returns the hash of the data string encoded by the state. +// If the state was newly initialized this returns the hash of the empty string. +// If one or more SymCryptXxxAppend function calls were made on this state +// it returns the hash of the concatenation of all the data strings +// passed to SymCryptXxxAppend. +// +// The state is re-initialized and ready for re-use; you do not have to call +// SymCryptXxxInit on the state to start another fresh hash computation. +// The state is also wiped of any traces of old data to prevent accidental data leakage. +// +// +// VOID +// SYMCRYPT_CALL +// SymCryptXxxStateCopy( _In_ PCSYMCRYPT_XXX_STATE pSrc, _Out_ PSYMCRYPT_XXX_STATE pDst ); +// +// Create a new copy of the state object. +// +// +// VOID +// SYMCRYPT_CALL +// SymCryptXxxStateExport( +// _In_ PCSYMCRYPT_XXX_STATE pState, +// _Out_writes_bytes_( SYMCRYPT_XXX_STATE_EXPORT_SIZE ) PBYTE pbBlob ); +// +// Converts a hash state to an exported format that can be persisted and re-imported. +// The exported blob is compatible across CPU architectures, and across different +// versions of SymCrypt. +// +// pState must point to a valid initialized hash state. +// +// +// SYMCRYPT_ERROR +// SYMCRYPT_CALL +// SymCryptXxxStateImport( +// _Out_ PSYMCRYPT_XXX_STATE pState, +// _In_reads_bytes_( SYMCRYPT_XXX_STATE_EXPORT_SIZE) PCBYTE pbBlob ); +// +// Imports a hash state that was previously exported with SymCryptXxxStateExport. +// After this call, the effective state of *pState is identical to the effective +// state of *pState that was passed to the SymCryptXxxStateExport function which +// created this blob. +// +// This function returns an error if the blob is incorrectly formatted. +// +// +// VOID +// SYMCRYPT_CALL +// SymCryptXxxSelftest(void); +// +// Perform a minimal self-test on the XXX algorithm. +// This function is designed to be used for achieving FIPS 140-2 compliance or +// to provide a simple self-test when an application starts. +// +// If an error is detected, a platform-specific fatal error action is taken. +// Callers do not need to handle any error conditions. +// +// +// +// +// There are also generic Hash functions that use a virtual table and work +// for any hash algorithm. +// Virtual table addresses that callers can use are supplied through a const-ptr-const definition. +// This supports an application switching the underlying implementation of one algorithm +// without the need to re-compile all the intermediate libraries in between. +// For example, you could use the same signature verification library with the fast hash implementation in one binary, +// and with a compact hash implementation in a second binary, without needing a different +// signature verification library. +// + +typedef enum _SYMCRYPT_HASH_ID +{ + SYMCRYPT_HASH_ID_NULL = 0, + SYMCRYPT_HASH_ID_MD2 = 1, + SYMCRYPT_HASH_ID_MD4 = 2, + SYMCRYPT_HASH_ID_MD5 = 3, + SYMCRYPT_HASH_ID_SHA1 = 4, + SYMCRYPT_HASH_ID_SHA224 = 5, + SYMCRYPT_HASH_ID_SHA256 = 6, + SYMCRYPT_HASH_ID_SHA384 = 7, + SYMCRYPT_HASH_ID_SHA512 = 8, + SYMCRYPT_HASH_ID_SHA512_224 = 9, + SYMCRYPT_HASH_ID_SHA512_256 = 10, + SYMCRYPT_HASH_ID_SHA3_224 = 11, + SYMCRYPT_HASH_ID_SHA3_256 = 12, + SYMCRYPT_HASH_ID_SHA3_384 = 13, + SYMCRYPT_HASH_ID_SHA3_512 = 14, + SYMCRYPT_HASH_ID_SHAKE128 = 15, + SYMCRYPT_HASH_ID_SHAKE256 = 16 +} SYMCRYPT_HASH_ID; + +PCSYMCRYPT_HASH +SYMCRYPT_CALL +SymCryptGetHashAlgorithm( SYMCRYPT_HASH_ID hashId ); +// +// Returns a pointer to the hash algorithm structure for the specified hash ID. +// Returns NULL if the hash ID is invalid. +// + +SIZE_T +SYMCRYPT_CALL +SymCryptHashResultSize( _In_ PCSYMCRYPT_HASH pHash ); + +SIZE_T +SYMCRYPT_CALL +SymCryptHashInputBlockSize( _In_ PCSYMCRYPT_HASH pHash ); + +SIZE_T +SYMCRYPT_CALL +SymCryptHashStateSize( _In_ PCSYMCRYPT_HASH pHash ); +// +// SymCryptHashStateSize +// +// Returns the size, in bytes, of the hash state for this hash algorithm. +// Note that the state must be SYMCRYPT_ALIGNed. +// Alternatively, the SYMCRYPT_HASH_STATE structure is large enough to contain +// any Symcrypt-implemented hash state, so sizeof( SYMCRYPT_HASH_STATE ) is always +// large enough to contain a hash state. +// + +VOID +SYMCRYPT_CALL +SymCryptHash( + _In_ PCSYMCRYPT_HASH pHash, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_MIN( cbResult, pHash->resultSize ) ) PBYTE pbResult, + SIZE_T cbResult ); +// +// SymCryptHash +// +// Compute a hash value using any hash function. +// The number of bytes written to the pbResult buffer is +// min( cbResult, SymCryptHashResultSize( pHash ) ) +// + +VOID +SYMCRYPT_CALL +SymCryptHashInit( + _In_ PCSYMCRYPT_HASH pHash, + _Out_writes_bytes_( pHash->stateSize ) PVOID pState ); + +VOID +SYMCRYPT_CALL +SymCryptHashAppend( + _In_ PCSYMCRYPT_HASH pHash, + _Inout_updates_bytes_( pHash->stateSize ) PVOID pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptHashResult( + _In_ PCSYMCRYPT_HASH pHash, + _Inout_updates_bytes_( pHash->stateSize ) PVOID pState, + _Out_writes_( SYMCRYPT_MIN( cbResult, pHash->resultSize ) ) PBYTE pbResult, + SIZE_T cbResult ); +// +// SymCryptHashResult +// +// Finalizes the hash computation by calling the resultFunc member +// of pHash. +// The hash result is produced to an internal buffer and +// the number of bytes written to the pbResult buffer is +// min( cbResult, SymCryptHashResultSize( pHash ) ) + +VOID +SYMCRYPT_CALL +SymCryptHashStateCopy( + _In_ PCSYMCRYPT_HASH pHash, + _In_reads_(pHash->stateSize) PCVOID pSrc, + _Out_writes_(pHash->stateSize) PVOID pDst); +// +// SymCryptHashStateCopy +// +// Copies the hash state from pSrc to pDst. + +//////////////////////////////////////////////////////////////////////////// +// MD2 +// +// Tha MD2 hash algorithm per RFC1319. +// +// The MD2 hash function has not received widespread analysis and is very slow +// compared to contemporary algorithms. +// +// The SymCrypt implementation of MD2 uses table lookups which leads to a side-channel +// vulnerability. +// +// Per the Crypto SDL, any use of this algorithm in Microsoft code requires +// a Crypto board exemption. Whenever possible, please use SHA-256 or SHA-512. +// +// For details on this API see the description above about the generic hash function API. +// + +#define SYMCRYPT_MD2_RESULT_SIZE (16) +#define SYMCRYPT_MD2_INPUT_BLOCK_SIZE (16) + +VOID +SYMCRYPT_CALL +SymCryptMd2( + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_MD2_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptMd2Init( _Out_ PSYMCRYPT_MD2_STATE pState ); + +VOID +SYMCRYPT_CALL +SymCryptMd2Append( + _Inout_ PSYMCRYPT_MD2_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptMd2Result( + _Inout_ PSYMCRYPT_MD2_STATE pState, + _Out_writes_( SYMCRYPT_MD2_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptMd2StateCopy( _In_ PCSYMCRYPT_MD2_STATE pSrc, _Out_ PSYMCRYPT_MD2_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptMd2StateExport( + _In_ PCSYMCRYPT_MD2_STATE pState, + _Out_writes_bytes_( SYMCRYPT_MD2_STATE_EXPORT_SIZE ) PBYTE pbBlob ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMd2StateImport( + _Out_ PSYMCRYPT_MD2_STATE pState, + _In_reads_bytes_( SYMCRYPT_MD2_STATE_EXPORT_SIZE) PCBYTE pbBlob ); + +VOID +SYMCRYPT_CALL +SymCryptMd2Selftest(void); + +extern const PCSYMCRYPT_HASH SymCryptMd2Algorithm; + +//////////////////////////////////////////////////////////////////////////// +// MD4 +// +// Tha MD4 hash algorithm per RFC1320. +// This implementation is limited to data strings that are in whole bytes. +// Odd bit length are not supported. +// +// The MD4 hash function has been badly broken and is not considered secure. +// Per the Crypto SDL, any use of this algorithm in Microsoft code requires +// a Crypto board exemption. Whenever possible, please use SHA-256 or SHA-512. +// +// For details on this API see the description above about the generic hash function API. +// + +#define SYMCRYPT_MD4_RESULT_SIZE (16) +#define SYMCRYPT_MD4_INPUT_BLOCK_SIZE (64) + +VOID +SYMCRYPT_CALL +SymCryptMd4( + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_MD4_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptMd4Init( _Out_ PSYMCRYPT_MD4_STATE pState ); + +VOID +SYMCRYPT_CALL +SymCryptMd4Append( + _Inout_ PSYMCRYPT_MD4_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptMd4Result( + _Inout_ PSYMCRYPT_MD4_STATE pState, + _Out_writes_( SYMCRYPT_MD4_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptMd4StateCopy( _In_ PCSYMCRYPT_MD4_STATE pSrc, _Out_ PSYMCRYPT_MD4_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptMd4StateExport( + _In_ PCSYMCRYPT_MD4_STATE pState, + _Out_writes_bytes_( SYMCRYPT_MD4_STATE_EXPORT_SIZE ) PBYTE pbBlob ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMd4StateImport( + _Out_ PSYMCRYPT_MD4_STATE pState, + _In_reads_bytes_( SYMCRYPT_MD4_STATE_EXPORT_SIZE) PCBYTE pbBlob ); + +VOID +SYMCRYPT_CALL +SymCryptMd4Selftest(void); + +extern const PCSYMCRYPT_HASH SymCryptMd4Algorithm; + +//////////////////////////////////////////////////////////////////////////// +// MD5 +// +// Tha MD5 hash algorithm per RFC1321. +// This implementation is limited to data strings that are in whole bytes. +// Odd bit length are not supported. +// +// The MD5 hash function has been badly broken and is not considered secure. +// Per the Crypto SDL, any use of this algorithm in Microsoft code requires +// a Crypto board exemption. Whenever possible, please use SHA-256 or SHA-512. +// +// For details on this API see the description above about the generic hash function API. +// + +#define SYMCRYPT_MD5_RESULT_SIZE (16) +#define SYMCRYPT_MD5_INPUT_BLOCK_SIZE (64) + +VOID +SYMCRYPT_CALL +SymCryptMd5( + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_MD5_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptMd5Init( _Out_ PSYMCRYPT_MD5_STATE pState ); + +VOID +SYMCRYPT_CALL +SymCryptMd5Append( + _Inout_ PSYMCRYPT_MD5_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptMd5Result( + _Inout_ PSYMCRYPT_MD5_STATE pState, + _Out_writes_( SYMCRYPT_MD5_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptMd5StateCopy( _In_ PCSYMCRYPT_MD5_STATE pSrc, _Out_ PSYMCRYPT_MD5_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptMd5StateExport( + _In_ PCSYMCRYPT_MD5_STATE pState, + _Out_writes_bytes_( SYMCRYPT_MD5_STATE_EXPORT_SIZE ) PBYTE pbBlob ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMd5StateImport( + _Out_ PSYMCRYPT_MD5_STATE pState, + _In_reads_bytes_( SYMCRYPT_MD5_STATE_EXPORT_SIZE) PCBYTE pbBlob ); + +VOID +SYMCRYPT_CALL +SymCryptMd5Selftest(void); + +extern const PCSYMCRYPT_HASH SymCryptMd5Algorithm; + + +/////////////////////////////////////////////////////////////////////////////// +// SHA-1 +// +// The SHA-1 hash algorithm per FIPS 180-4. +// +// This implementation is limited to data strings that are in whole bytes. +// Odd bit length are not supported. +// +// The SHA-1 standard limits data inputs to a maximum of 2^61-1 bytes. +// This implementation supports larger inputs, and simply wraps the internal message +// length counter. Note that the security properties are unknown for +// such long messages, and their use is not recommended. +// +// The SHA-1 hash algorithm has been broken in a technical sense, and future +// attacks can only get better. +// This algorithm is not recommended for new applications and should only be used +// for backward compatibility. +// Per the Crypto SDL, new uses of this algorithm in Microsoft code require +// a Crypto board exemption. Whenever possible, please use SHA-256 or SHA-512. +// +// For details on this API see the description above about the generic hash function API. +// + +#define SYMCRYPT_SHA1_RESULT_SIZE (20) +#define SYMCRYPT_SHA1_INPUT_BLOCK_SIZE (64) + +VOID +SYMCRYPT_CALL +SymCryptSha1( + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_SHA1_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptSha1Init( _Out_ PSYMCRYPT_SHA1_STATE pState ); + +VOID +SYMCRYPT_CALL +SymCryptSha1Append( + _Inout_ PSYMCRYPT_SHA1_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptSha1Result( + _Inout_ PSYMCRYPT_SHA1_STATE pState, + _Out_writes_( SYMCRYPT_SHA1_RESULT_SIZE )PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptSha1StateCopy( _In_ PCSYMCRYPT_SHA1_STATE pSrc, _Out_ PSYMCRYPT_SHA1_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptSha1StateExport( + _In_ PCSYMCRYPT_SHA1_STATE pState, + _Out_writes_bytes_( SYMCRYPT_SHA1_STATE_EXPORT_SIZE ) PBYTE pbBlob ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSha1StateImport( + _Out_ PSYMCRYPT_SHA1_STATE pState, + _In_reads_bytes_( SYMCRYPT_SHA1_STATE_EXPORT_SIZE) PCBYTE pbBlob ); + +VOID +SYMCRYPT_CALL +SymCryptSha1Selftest(void); + +extern const PCSYMCRYPT_HASH SymCryptSha1Algorithm; + +//////////////////////////////////////////////////////////////////////////// +// SHA-224 +// +// +// The SHA-224 hash algorithm per FIPS 180-4. +// This implementation is limited to data strings that are in whole bytes. +// Odd bit length are not supported. +// +// The SHA-224 standard limits data inputs to a maximum of 2^61-1 bytes. +// This implementation supports larger inputs, and simply wraps the internal message +// length counter. Note that the security properties are unknown for +// such long messages, and their use is not recommended. +// +// This implementation is meant for interoperability and is not recommended for use. +// +// For details on this API see the description above about the generic hash function API. +// + +#define SYMCRYPT_SHA224_RESULT_SIZE (28) +#define SYMCRYPT_SHA224_INPUT_BLOCK_SIZE (64) + +VOID +SYMCRYPT_CALL +SymCryptSha224( + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_SHA224_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptSha224Init( _Out_ PSYMCRYPT_SHA224_STATE pState ); + +VOID +SYMCRYPT_CALL +SymCryptSha224Append( + _Inout_ PSYMCRYPT_SHA224_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptSha224Result( + _Inout_ PSYMCRYPT_SHA224_STATE pState, + _Out_writes_( SYMCRYPT_SHA224_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptSha224StateCopy( _In_ PCSYMCRYPT_SHA224_STATE pSrc, _Out_ PSYMCRYPT_SHA224_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptSha224StateExport( + _In_ PCSYMCRYPT_SHA224_STATE pState, + _Out_writes_bytes_( SYMCRYPT_SHA224_STATE_EXPORT_SIZE ) PBYTE pbBlob ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSha224StateImport( + _Out_ PSYMCRYPT_SHA224_STATE pState, + _In_reads_bytes_( SYMCRYPT_SHA224_STATE_EXPORT_SIZE) PCBYTE pbBlob ); + +VOID +SYMCRYPT_CALL +SymCryptSha224Selftest(void); + +extern const PCSYMCRYPT_HASH SymCryptSha224Algorithm; + +//////////////////////////////////////////////////////////////////////////// +// SHA-256 +// +// +// The SHA-256 hash algorithm per FIPS 180-4. +// This implementation is limited to data strings that are in whole bytes. +// Odd bit length are not supported. +// +// The SHA-256 standard limits data inputs to a maximum of 2^61-1 bytes. +// This implementation supports larger inputs, and simply wraps the internal message +// length counter. Note that the security properties are unknown for +// such long messages, and their use is not recommended. +// +// For details on this API see the description above about the generic hash function API. +// + +#define SYMCRYPT_SHA256_RESULT_SIZE (32) +#define SYMCRYPT_SHA256_INPUT_BLOCK_SIZE (64) + +VOID +SYMCRYPT_CALL +SymCryptSha256( + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_SHA256_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptSha256Init( _Out_ PSYMCRYPT_SHA256_STATE pState ); + +VOID +SYMCRYPT_CALL +SymCryptSha256Append( + _Inout_ PSYMCRYPT_SHA256_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptSha256Result( + _Inout_ PSYMCRYPT_SHA256_STATE pState, + _Out_writes_( SYMCRYPT_SHA256_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptSha256StateCopy( _In_ PCSYMCRYPT_SHA256_STATE pSrc, _Out_ PSYMCRYPT_SHA256_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptSha256StateExport( + _In_ PCSYMCRYPT_SHA256_STATE pState, + _Out_writes_bytes_( SYMCRYPT_SHA256_STATE_EXPORT_SIZE ) PBYTE pbBlob ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSha256StateImport( + _Out_ PSYMCRYPT_SHA256_STATE pState, + _In_reads_bytes_( SYMCRYPT_SHA256_STATE_EXPORT_SIZE) PCBYTE pbBlob ); + +VOID +SYMCRYPT_CALL +SymCryptSha256Selftest(void); + +extern const PCSYMCRYPT_HASH SymCryptSha256Algorithm; + +//////////////////////////////////////////////////////////////////////////// +// SHA-384 +// +// +// The SHA-384 hash algorithm per FIPS 180-4. +// This implementation is limited to data strings that are in whole bytes. +// Odd bit length are not supported. +// +// The SHA-384 standard limits data inputs to a maximum of 2^125-1 bytes. +// This implementation supports larger inputs, and simply wraps the internal message +// length counter. Note that the security properties are unknown for +// such long messages, and their use is not recommended. +// +// For details on this API see the description above about the generic hash function API. +// + +#define SYMCRYPT_SHA384_RESULT_SIZE (48) +#define SYMCRYPT_SHA384_INPUT_BLOCK_SIZE (128) + +VOID +SYMCRYPT_CALL +SymCryptSha384( + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_SHA384_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptSha384Init( _Out_ PSYMCRYPT_SHA384_STATE pState ); + +VOID +SYMCRYPT_CALL +SymCryptSha384Append( + _Inout_ PSYMCRYPT_SHA384_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptSha384Result( + _Inout_ PSYMCRYPT_SHA384_STATE pState, + _Out_writes_( SYMCRYPT_SHA384_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptSha384StateCopy( _In_ PCSYMCRYPT_SHA384_STATE pSrc, _Out_ PSYMCRYPT_SHA384_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptSha384StateExport( + _In_ PCSYMCRYPT_SHA384_STATE pState, + _Out_writes_bytes_( SYMCRYPT_SHA384_STATE_EXPORT_SIZE ) PBYTE pbBlob ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSha384StateImport( + _Out_ PSYMCRYPT_SHA384_STATE pState, + _In_reads_bytes_( SYMCRYPT_SHA384_STATE_EXPORT_SIZE) PCBYTE pbBlob ); + +VOID +SYMCRYPT_CALL +SymCryptSha384Selftest(void); + +extern const PCSYMCRYPT_HASH SymCryptSha384Algorithm; + +//////////////////////////////////////////////////////////////////////////// +// SHA-512 +// +// +// The SHA-512 hash algorithm per FIPS 180-4. +// This implementation is limited to data strings that are in whole bytes. +// Odd bit length are not supported. +// +// The SHA-512 standard limits data inputs to a maximum of 2^125-1 bytes. +// This implementation supports larger inputs, and simply wraps the internal message +// length counter. Note that the security properties are unknown for +// such long messages, and their use is not recommended. +// +// For details on this API see the description above about the generic hash function API. +// + +#define SYMCRYPT_SHA512_RESULT_SIZE (64) +#define SYMCRYPT_SHA512_INPUT_BLOCK_SIZE (128) + +VOID +SYMCRYPT_CALL +SymCryptSha512( + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_SHA512_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptSha512Init( _Out_ PSYMCRYPT_SHA512_STATE pState ); + +VOID +SYMCRYPT_CALL +SymCryptSha512Append( + _Inout_ PSYMCRYPT_SHA512_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptSha512Result( + _Inout_ PSYMCRYPT_SHA512_STATE pState, + _Out_writes_( SYMCRYPT_SHA512_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptSha512StateCopy( _In_ PCSYMCRYPT_SHA512_STATE pSrc, _Out_ PSYMCRYPT_SHA512_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptSha512StateExport( + _In_ PCSYMCRYPT_SHA512_STATE pState, + _Out_writes_bytes_( SYMCRYPT_SHA512_STATE_EXPORT_SIZE ) PBYTE pbBlob ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSha512StateImport( + _Out_ PSYMCRYPT_SHA512_STATE pState, + _In_reads_bytes_( SYMCRYPT_SHA512_STATE_EXPORT_SIZE) PCBYTE pbBlob ); + +VOID +SYMCRYPT_CALL +SymCryptSha512Selftest(void); + +extern const PCSYMCRYPT_HASH SymCryptSha512Algorithm; + + +//////////////////////////////////////////////////////////////////////////// +// SHA-512/224 +// +// +// The SHA-512/224 hash algorithm per FIPS 180-4. +// This implementation is limited to data strings that are in whole bytes. +// Odd bit length are not supported. +// +// The SHA-512/224 standard limits data inputs to a maximum of 2^125-1 bytes. +// This implementation supports larger inputs, and simply wraps the internal message +// length counter. Note that the security properties are unknown for +// such long messages, and their use is not recommended. +// +// This implementation is meant for interoperability and is not recommended for use. +// +// For details on this API see the description above about the generic hash function API. +// + +#define SYMCRYPT_SHA512_224_RESULT_SIZE (28) +#define SYMCRYPT_SHA512_224_INPUT_BLOCK_SIZE (128) + +VOID +SYMCRYPT_CALL +SymCryptSha512_224( + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_SHA512_224_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptSha512_224Init( _Out_ PSYMCRYPT_SHA512_224_STATE pState ); + +VOID +SYMCRYPT_CALL +SymCryptSha512_224Append( + _Inout_ PSYMCRYPT_SHA512_224_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptSha512_224Result( + _Inout_ PSYMCRYPT_SHA512_224_STATE pState, + _Out_writes_( SYMCRYPT_SHA512_224_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptSha512_224StateCopy( _In_ PCSYMCRYPT_SHA512_224_STATE pSrc, _Out_ PSYMCRYPT_SHA512_224_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptSha512_224StateExport( + _In_ PCSYMCRYPT_SHA512_224_STATE pState, + _Out_writes_bytes_( SYMCRYPT_SHA512_224_STATE_EXPORT_SIZE ) PBYTE pbBlob ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSha512_224StateImport( + _Out_ PSYMCRYPT_SHA512_224_STATE pState, + _In_reads_bytes_( SYMCRYPT_SHA512_224_STATE_EXPORT_SIZE) PCBYTE pbBlob ); + +VOID +SYMCRYPT_CALL +SymCryptSha512_224Selftest(void); + +extern const PCSYMCRYPT_HASH SymCryptSha512_224Algorithm; + + +//////////////////////////////////////////////////////////////////////////// +// SHA-512/256 +// +// +// The SHA-512/256 hash algorithm per FIPS 180-4. +// This implementation is limited to data strings that are in whole bytes. +// Odd bit length are not supported. +// +// The SHA-512/256 standard limits data inputs to a maximum of 2^125-1 bytes. +// This implementation supports larger inputs, and simply wraps the internal message +// length counter. Note that the security properties are unknown for +// such long messages, and their use is not recommended. +// +// This implementation is meant for interoperability and is not recommended for use. +// +// For details on this API see the description above about the generic hash function API. +// + +#define SYMCRYPT_SHA512_256_RESULT_SIZE (32) +#define SYMCRYPT_SHA512_256_INPUT_BLOCK_SIZE (128) + +VOID +SYMCRYPT_CALL +SymCryptSha512_256( + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_SHA512_256_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptSha512_256Init( _Out_ PSYMCRYPT_SHA512_256_STATE pState ); + +VOID +SYMCRYPT_CALL +SymCryptSha512_256Append( + _Inout_ PSYMCRYPT_SHA512_256_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptSha512_256Result( + _Inout_ PSYMCRYPT_SHA512_256_STATE pState, + _Out_writes_( SYMCRYPT_SHA512_256_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptSha512_256StateCopy( _In_ PCSYMCRYPT_SHA512_256_STATE pSrc, _Out_ PSYMCRYPT_SHA512_256_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptSha512_256StateExport( + _In_ PCSYMCRYPT_SHA512_256_STATE pState, + _Out_writes_bytes_( SYMCRYPT_SHA512_256_STATE_EXPORT_SIZE ) PBYTE pbBlob ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSha512_256StateImport( + _Out_ PSYMCRYPT_SHA512_256_STATE pState, + _In_reads_bytes_( SYMCRYPT_SHA512_256_STATE_EXPORT_SIZE) PCBYTE pbBlob ); + +VOID +SYMCRYPT_CALL +SymCryptSha512_256Selftest(void); + +extern const PCSYMCRYPT_HASH SymCryptSha512_256Algorithm; + + +//////////////////////////////////////////////////////////////////////////// +// SHA-3 +// +// The SHA-3 family of hash algorithms per FIPS 202. +// This implementation is limited to data strings that are in whole bytes. +// Odd bit length are not supported. +// +// SHA3-224 is meant for interoperability and is not recommended for use. +// +// SHA3-224(M) = KECCAK[448](M || 01, 224) +// SHA3-256(M) = KECCAK[512](M || 01, 256) +// SHA3-384(M) = KECCAK[768](M || 01, 384) +// SHA3-512(M) = KECCAK[1024](M || 01, 512) +// +// For details on this API see the description above about the generic hash function API. +// + + +// +// SHA-3-224 +// + +#define SYMCRYPT_SHA3_224_RESULT_SIZE (28) +#define SYMCRYPT_SHA3_224_INPUT_BLOCK_SIZE (144) + +VOID +SYMCRYPT_CALL +SymCryptSha3_224( + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_(SYMCRYPT_SHA3_224_RESULT_SIZE) PBYTE pbResult); + +VOID +SYMCRYPT_CALL +SymCryptSha3_224Init(_Out_ PSYMCRYPT_SHA3_224_STATE pState); + +VOID +SYMCRYPT_CALL +SymCryptSha3_224Append( + _Inout_ PSYMCRYPT_SHA3_224_STATE pState, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData); + +VOID +SYMCRYPT_CALL +SymCryptSha3_224Result( + _Inout_ PSYMCRYPT_SHA3_224_STATE pState, + _Out_writes_(SYMCRYPT_SHA3_224_RESULT_SIZE) PBYTE pbResult); + +VOID +SYMCRYPT_CALL +SymCryptSha3_224StateCopy(_In_ PCSYMCRYPT_SHA3_224_STATE pSrc, _Out_ PSYMCRYPT_SHA3_224_STATE pDst); + +VOID +SYMCRYPT_CALL +SymCryptSha3_224StateExport( + _In_ PCSYMCRYPT_SHA3_224_STATE pState, + _Out_writes_bytes_(SYMCRYPT_SHA3_224_STATE_EXPORT_SIZE) PBYTE pbBlob); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSha3_224StateImport( + _Out_ PSYMCRYPT_SHA3_224_STATE pState, + _In_reads_bytes_(SYMCRYPT_SHA3_224_STATE_EXPORT_SIZE) PCBYTE pbBlob); + +VOID +SYMCRYPT_CALL +SymCryptSha3_224Selftest(void); + +extern const PCSYMCRYPT_HASH SymCryptSha3_224Algorithm; + + +// +// SHA-3-256 +// + +#define SYMCRYPT_SHA3_256_RESULT_SIZE (32) +#define SYMCRYPT_SHA3_256_INPUT_BLOCK_SIZE (136) + +VOID +SYMCRYPT_CALL +SymCryptSha3_256( + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_(SYMCRYPT_SHA3_256_RESULT_SIZE) PBYTE pbResult); + +VOID +SYMCRYPT_CALL +SymCryptSha3_256Init(_Out_ PSYMCRYPT_SHA3_256_STATE pState); + +VOID +SYMCRYPT_CALL +SymCryptSha3_256Append( + _Inout_ PSYMCRYPT_SHA3_256_STATE pState, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData); + +VOID +SYMCRYPT_CALL +SymCryptSha3_256Result( + _Inout_ PSYMCRYPT_SHA3_256_STATE pState, + _Out_writes_(SYMCRYPT_SHA3_256_RESULT_SIZE) PBYTE pbResult); + +VOID +SYMCRYPT_CALL +SymCryptSha3_256StateCopy(_In_ PCSYMCRYPT_SHA3_256_STATE pSrc, _Out_ PSYMCRYPT_SHA3_256_STATE pDst); + +VOID +SYMCRYPT_CALL +SymCryptSha3_256StateExport( + _In_ PCSYMCRYPT_SHA3_256_STATE pState, + _Out_writes_bytes_(SYMCRYPT_SHA3_256_STATE_EXPORT_SIZE) PBYTE pbBlob); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSha3_256StateImport( + _Out_ PSYMCRYPT_SHA3_256_STATE pState, + _In_reads_bytes_(SYMCRYPT_SHA3_256_STATE_EXPORT_SIZE) PCBYTE pbBlob); + +VOID +SYMCRYPT_CALL +SymCryptSha3_256Selftest(void); + +extern const PCSYMCRYPT_HASH SymCryptSha3_256Algorithm; + + +// +// SHA-3-384 +// + +#define SYMCRYPT_SHA3_384_RESULT_SIZE (48) +#define SYMCRYPT_SHA3_384_INPUT_BLOCK_SIZE (104) + +VOID +SYMCRYPT_CALL +SymCryptSha3_384( + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_(SYMCRYPT_SHA3_384_RESULT_SIZE) PBYTE pbResult); + +VOID +SYMCRYPT_CALL +SymCryptSha3_384Init(_Out_ PSYMCRYPT_SHA3_384_STATE pState); + +VOID +SYMCRYPT_CALL +SymCryptSha3_384Append( + _Inout_ PSYMCRYPT_SHA3_384_STATE pState, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData); + +VOID +SYMCRYPT_CALL +SymCryptSha3_384Result( + _Inout_ PSYMCRYPT_SHA3_384_STATE pState, + _Out_writes_(SYMCRYPT_SHA3_384_RESULT_SIZE) PBYTE pbResult); + +VOID +SYMCRYPT_CALL +SymCryptSha3_384StateCopy(_In_ PCSYMCRYPT_SHA3_384_STATE pSrc, _Out_ PSYMCRYPT_SHA3_384_STATE pDst); + +VOID +SYMCRYPT_CALL +SymCryptSha3_384StateExport( + _In_ PCSYMCRYPT_SHA3_384_STATE pState, + _Out_writes_bytes_(SYMCRYPT_SHA3_384_STATE_EXPORT_SIZE) PBYTE pbBlob); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSha3_384StateImport( + _Out_ PSYMCRYPT_SHA3_384_STATE pState, + _In_reads_bytes_(SYMCRYPT_SHA3_384_STATE_EXPORT_SIZE) PCBYTE pbBlob); + +VOID +SYMCRYPT_CALL +SymCryptSha3_384Selftest(void); + +extern const PCSYMCRYPT_HASH SymCryptSha3_384Algorithm; + + +// +// SHA-3-512 +// + +#define SYMCRYPT_SHA3_512_RESULT_SIZE (64) +#define SYMCRYPT_SHA3_512_INPUT_BLOCK_SIZE (72) + +VOID +SYMCRYPT_CALL +SymCryptSha3_512( + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_SHA3_512_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptSha3_512Init( _Out_ PSYMCRYPT_SHA3_512_STATE pState ); + +VOID +SYMCRYPT_CALL +SymCryptSha3_512Append( + _Inout_ PSYMCRYPT_SHA3_512_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptSha3_512Result( + _Inout_ PSYMCRYPT_SHA3_512_STATE pState, + _Out_writes_( SYMCRYPT_SHA3_512_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptSha3_512StateCopy( _In_ PCSYMCRYPT_SHA3_512_STATE pSrc, _Out_ PSYMCRYPT_SHA3_512_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptSha3_512StateExport( + _In_ PCSYMCRYPT_SHA3_512_STATE pState, + _Out_writes_bytes_( SYMCRYPT_SHA3_512_STATE_EXPORT_SIZE ) PBYTE pbBlob ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSha3_512StateImport( + _Out_ PSYMCRYPT_SHA3_512_STATE pState, + _In_reads_bytes_( SYMCRYPT_SHA3_512_STATE_EXPORT_SIZE) PCBYTE pbBlob ); + +VOID +SYMCRYPT_CALL +SymCryptSha3_512Selftest(void); + +extern const PCSYMCRYPT_HASH SymCryptSha3_512Algorithm; + + +//========================================================================== +// Extendable-Output Functions (XOFs) +//========================================================================== +// +// XOFs are similar to hash functions except that the output can be arbitrary length. +// SHAKE128 and SHAKE256 are XOFs specified in FIPS 202. +// +// SHAKE128(M, d) = KECCAK[256] (M || 1111, d) +// SHAKE256(M, d) = KECCAK[512] (M || 1111, d) +// +// SHAKEs share the same Keccak state as the other Keccak based algorithms under +// the name SYMCRYPT_SHAKEXxx_STATE. +// +// Both SHAKE128 and SHAKE256 have default result sizes (32- and 64-bytes resp.) +// that allows them to be used as substitutes for hash functions with the Init-Append-Result +// pattern. +// +// Extract is a new type of function that does not exist in hash functions, which can +// be called multiple times to successively generate output from the state. Extract +// function also provides the caller with a flag to wipe the state when no further Extract +// calls will be made. If the caller does not know in advance whether an Extract call is +// the final one, wiping can be performed later with an Init call or an Extract call with +// zero bytes output. +// +// If Append is called after an Extract call which did not wipe the state (i.e., the state +// is still in 'extract' mode), Append will notice this and switch from 'extract' mode to +// 'append' mode by wiping and initializing the state. This Append call effectively appends +// data for a fresh computation, saving an additional call to wipe/initialize the state. +// +// +// SYMCRYPT_SHAKEXXX_RESULT_SIZE +// +// Default output size, used by the SymCryptShakeXxxResult function. +// +// SYMCRYPT_SHAKEXXX_INPUT_BLOCK_SIZE +// +// Rate for the Keccak permutation. +// +// VOID +// SYMCRYPT_CALL +// SymCryptShakeXxxDefault( +// _In_reads_( cbData ) PCBYTE pbData, +// SIZE_T cbData, +// _Out_writes_( SYMCRYPT_SHAKEXXX_RESULT_SIZE ) PBYTE pbResult); +// +// SHAKE single-call function that produces default output size defined by +// SYMCRYPT_SHAKEXXX_RESULT_SIZE. +// +// VOID +// SYMCRYPT_CALL +// SymCryptShakeXxx( +// _In_reads_( cbData ) PCBYTE pbData, +// SIZE_T cbData, +// _Out_writes_( cbResult ) PBYTE pbResult, +// SIZE_T cbResult); +// +// SHAKE single-call function that produces variable-length output specified +// by the cbResult parameter. +// +// VOID +// SYMCRYPT_CALL +// SymCryptShakeXxxInit( _Out_ PSYMCRYPT_XXX_STATE pState ); +// +// Initializes the SHAKE state. +// +// VOID +// SYMCRYPT_CALL +// SymCryptShakeXxxAppend( +// _Inout_ PSYMCRYPT_XXX_STATE pState, +// _In_reads_( cbData ) PCBYTE pbData, +// SIZE_T cbData ); +// +// Appends data to the SHAKE state. +// +// Append cannot be the first call to an uninitialized SHAKE state. All +// other uses independent of whether the state is in 'append' mode or 'extract' +// mode are well defined. If the state was previously in 'extract' mode, (i.e., after +// an Extract call with bWipe=FALSE) it wipes/resets the state and the data is +// appended to a fresh state. +// +// VOID +// SYMCRYPT_CALL +// SymCryptShakeXxxExtract( +// _Inout_ PSYMCRYPT_XXX_STATE pState, +// _Out_writes_(cbResult) PBYTE pbResult, +// SIZE_T cbResult, +// BOOLEAN bWipe); +// +// Generates output from the SHAKE state. +// +// Extract cannot be the first call to an uninitialized SHAKE state. All +// other uses independent of whether the state is in 'append' mode or 'extract' mode +// are well defined. +// +// If the state was in 'append' mode before the Extract call, Extract switches +// the state to 'extract' mode and generates the requested number of bytes from +// the state. Extract wipes/resets the state and transitions the state to 'append' +// mode if bWipe=TRUE, otherwise leaving the state in 'extract' mode, available for +// further extractions. +// +// VOID +// SYMCRYPT_CALL +// SymCryptShakeXxxResult( +// _Inout_ PSYMCRYPT_XXX_STATE pState, +// _Out_writes_(SYMCRYPT_SHAKEXXX_RESULT_SIZE) PBYTE pbResult ); +// +// Extracts SYMCRYPT_SHAKEXXX_RESULT_SIZE bytes from the state and wipes/resets +// it for a new computation. +// +// Result cannot be called with an uninitialized state. All other uses are well +// defined. If it is called after an Extract call with bWipe=FALSE, it does the +// final extraction from the state for SYMCRYPT_SHAKEXXX_RESULT_SIZE bytes, +// effectively calling Extract with cbResult=SYMCRYPT_SHAKEXXX_RESULT_SIZE and +// bWipe=TRUE. +// +// VOID +// SYMCRYPT_CALL +// SymCryptShakeXxxStateCopy(_In_ PCSYMCRYPT_SHAKEXXX_STATE pSrc, _Out_ PSYMCRYPT_SHAKEXXX_STATE pDst); +// +// Create a new copy of the state object. +// +// VOID +// SYMCRYPT_CALL +// SymCryptShakeXxxSelftest(void); +// +// Perform a minimal self-test on the ShakeXxx algorithm. +// This function is designed to be used for achieving FIPS 140-2 compliance or +// to provide a simple self-test when an application starts. +// +// If an error is detected, a platform-specific fatal error action is taken. +// Callers do not need to handle any error conditions. + + +// +// SHAKE128 +// +#define SYMCRYPT_SHAKE128_RESULT_SIZE (32) +#define SYMCRYPT_SHAKE128_INPUT_BLOCK_SIZE (168) + +VOID +SYMCRYPT_CALL +SymCryptShake128Default( + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_SHAKE128_RESULT_SIZE ) PBYTE pbResult); + +VOID +SYMCRYPT_CALL +SymCryptShake128( + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( cbResult ) PBYTE pbResult, + SIZE_T cbResult); + +VOID +SYMCRYPT_CALL +SymCryptShake128Init( _Out_ PSYMCRYPT_SHAKE128_STATE pState ); + +VOID +SYMCRYPT_CALL +SymCryptShake128Append( + _Inout_ PSYMCRYPT_SHAKE128_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptShake128Extract( + _Inout_ PSYMCRYPT_SHAKE128_STATE pState, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult, + BOOLEAN bWipe); + +VOID +SYMCRYPT_CALL +SymCryptShake128Result( + _Inout_ PSYMCRYPT_SHAKE128_STATE pState, + _Out_writes_(SYMCRYPT_SHAKE128_RESULT_SIZE) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptShake128StateCopy(_In_ PCSYMCRYPT_SHAKE128_STATE pSrc, _Out_ PSYMCRYPT_SHAKE128_STATE pDst); + +VOID +SYMCRYPT_CALL +SymCryptShake128Selftest(void); + +extern const PCSYMCRYPT_HASH SymCryptShake128HashAlgorithm; + +// +// SHAKE256 +// +#define SYMCRYPT_SHAKE256_RESULT_SIZE (64) +#define SYMCRYPT_SHAKE256_INPUT_BLOCK_SIZE (136) + +VOID +SYMCRYPT_CALL +SymCryptShake256Default( + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_SHAKE256_RESULT_SIZE ) PBYTE pbResult); + +VOID +SYMCRYPT_CALL +SymCryptShake256( + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( cbResult ) PBYTE pbResult, + SIZE_T cbResult); + +VOID +SYMCRYPT_CALL +SymCryptShake256Init( _Out_ PSYMCRYPT_SHAKE256_STATE pState ); + +VOID +SYMCRYPT_CALL +SymCryptShake256Append( + _Inout_ PSYMCRYPT_SHAKE256_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptShake256Extract( + _Inout_ PSYMCRYPT_SHAKE256_STATE pState, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult, + BOOLEAN bWipe); + +VOID +SYMCRYPT_CALL +SymCryptShake256Result( + _Inout_ PSYMCRYPT_SHAKE256_STATE pState, + _Out_writes_(SYMCRYPT_SHAKE256_RESULT_SIZE) PBYTE pbResult ); + + +VOID +SYMCRYPT_CALL +SymCryptShake256StateCopy(_In_ PCSYMCRYPT_SHAKE256_STATE pSrc, _Out_ PSYMCRYPT_SHAKE256_STATE pDst); + +VOID +SYMCRYPT_CALL +SymCryptShake256Selftest(void); + +extern const PCSYMCRYPT_HASH SymCryptShake256HashAlgorithm; + +//========================================================================== +// Customizable Extendable-Output Functions (XOFs) +//========================================================================== +// +// cSHAKE128 and cSHAKE256 are customizable SHAKE functions specified in NIST SP 800-185. +// +// When cSHAKE input strings N (function name string) and S (customization string) are +// both empty, cSHAKE is equivalent to SHAKE: +// +// cSHAKE128(X, L, "", "") = SHAKE128(X, L) +// cSHAKE256(X, L, "", "") = SHAKE256(X, L) +// +// If at least one of N and S is non-empty, cSHAKE is defined as follows: +// +// cSHAKE128(X, L, N, S) = KECCAK[256](bytepad(encode_string(N) || encode_string(S), 168) || X || 00, L) +// cSHAKE256(X, L, N, S) = KECCAK[512](bytepad(encode_string(N) || encode_string(S), 136) || X || 00, L) +// +// The following functions are equivalent to their SHAKE counterparts. +// SymCryptCShakeXxxExtract with bWipe=TRUE and SymCryptCShakeXxxResult functions reset +// the cSHAKE state to an empty SHAKE state after generating output. This behavior is +// equivalent to calling SymCryptCShakeXxxInit with empty input strings. +// +// SymCryptCShakeXxxAppend +// SymCryptCShakeXxxExtract +// SymCryptCShakeXxxResult +// +// Calling SymCryptCShakeXxxAppend when cSHAKE state is in 'extract' mode results +// in the same behavior described above: the state is wiped and initialized with +// empty input strings, after which the data is appended to the empty state. This +// converts the state to a SHAKE state since cSHAKE with empty input strings is +// equivalent to SHAKE. This is a consequence of not being able to store the input +// strings to cSHAKE and re-initialize it with them. Thus, if multiple cSHAKE +// computations with the same input strings are to be carried out, cSHAKE state must +// be initialized with the input strings each time. +// +// The following functions differ from the SHAKE by the introduction of customization +// strings: +// +// VOID +// SYMCRYPT_CALL +// SymCryptCShakeXxx( +// _In_reads_( cbFunctionNameString ) PCBYTE pbFunctionNameString, +// SIZE_T cbFunctionNameString, +// _In_reads_( cbCustomizationString ) PCBYTE pbCustomizationString, +// SIZE_T cbCustomizationString, +// _In_reads_( cbData ) PCBYTE pbData, +// SIZE_T cbData, +// _Out_writes_( cbResult ) PBYTE pbResult, +// SIZE_T cbResult); +// +// Single-call cSHAKE computation. +// +// VOID +// SYMCRYPT_CALL +// SymCryptCShakeXxxInit( +// _Out_ PSYMCRYPT_CSHAKEXXX_STATE pState, +// _In_reads_( cbFunctionNameString ) PCBYTE pbFunctionNameString, +// SIZE_T cbFunctionNameString, +// _In_reads_( cbCustomizationString ) PCBYTE pbCustomizationString, +// SIZE_T cbCustomizationString); +// +// Initializes the cSHAKE state with the provided input strings. If both of +// the input strings are empty, the call is equivalent to SymCryptShakeXxxInit, +// otherwise the input strings will be encoded and appended to the state. + + +// +// cSHAKE128 +// +#define SYMCRYPT_CSHAKE128_RESULT_SIZE SYMCRYPT_SHAKE128_RESULT_SIZE +#define SYMCRYPT_CSHAKE128_INPUT_BLOCK_SIZE SYMCRYPT_SHAKE128_INPUT_BLOCK_SIZE + +VOID +SYMCRYPT_CALL +SymCryptCShake128( + _In_reads_( cbFunctionNameString ) PCBYTE pbFunctionNameString, + SIZE_T cbFunctionNameString, + _In_reads_( cbCustomizationString ) PCBYTE pbCustomizationString, + SIZE_T cbCustomizationString, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( cbResult ) PBYTE pbResult, + SIZE_T cbResult); + +VOID +SYMCRYPT_CALL +SymCryptCShake128Init( + _Out_ PSYMCRYPT_CSHAKE128_STATE pState, + _In_reads_( cbFunctionNameString ) PCBYTE pbFunctionNameString, + SIZE_T cbFunctionNameString, + _In_reads_( cbCustomizationString ) PCBYTE pbCustomizationString, + SIZE_T cbCustomizationString); + +VOID +SYMCRYPT_CALL +SymCryptCShake128Append( + _Inout_ PSYMCRYPT_CSHAKE128_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptCShake128Extract( + _Inout_ PSYMCRYPT_CSHAKE128_STATE pState, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult, + BOOLEAN bWipe); + +VOID +SYMCRYPT_CALL +SymCryptCShake128Result( + _Inout_ PSYMCRYPT_CSHAKE128_STATE pState, + _Out_writes_( SYMCRYPT_CSHAKE128_RESULT_SIZE ) PBYTE pbResult); + +VOID +SYMCRYPT_CALL +SymCryptCShake128StateCopy(_In_ PCSYMCRYPT_CSHAKE128_STATE pSrc, _Out_ PSYMCRYPT_CSHAKE128_STATE pDst); + +VOID +SYMCRYPT_CALL +SymCryptCShake128Selftest(void); + + +// +// cSHAKE256 +// +#define SYMCRYPT_CSHAKE256_RESULT_SIZE SYMCRYPT_SHAKE256_RESULT_SIZE +#define SYMCRYPT_CSHAKE256_INPUT_BLOCK_SIZE SYMCRYPT_SHAKE256_INPUT_BLOCK_SIZE + +VOID +SYMCRYPT_CALL +SymCryptCShake256( + _In_reads_( cbFunctionNameString ) PCBYTE pbFunctionNameString, + SIZE_T cbFunctionNameString, + _In_reads_( cbCustomizationString ) PCBYTE pbCustomizationString, + SIZE_T cbCustomizationString, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( cbResult ) PBYTE pbResult, + SIZE_T cbResult); + +VOID +SYMCRYPT_CALL +SymCryptCShake256Init( + _Out_ PSYMCRYPT_CSHAKE256_STATE pState, + _In_reads_( cbFunctionNameString ) PCBYTE pbFunctionNameString, + SIZE_T cbFunctionNameString, + _In_reads_( cbCustomizationString ) PCBYTE pbCustomizationString, + SIZE_T cbCustomizationString); + +VOID +SYMCRYPT_CALL +SymCryptCShake256Append( + _Inout_ PSYMCRYPT_CSHAKE256_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptCShake256Extract( + _Inout_ PSYMCRYPT_CSHAKE256_STATE pState, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult, + BOOLEAN bWipe); + +VOID +SYMCRYPT_CALL +SymCryptCShake256Result( + _Inout_ PSYMCRYPT_CSHAKE256_STATE pState, + _Out_writes_( SYMCRYPT_CSHAKE256_RESULT_SIZE ) PBYTE pbResult); + +VOID +SYMCRYPT_CALL +SymCryptCShake256StateCopy(_In_ PCSYMCRYPT_CSHAKE256_STATE pSrc, _Out_ PSYMCRYPT_CSHAKE256_STATE pDst); + +VOID +SYMCRYPT_CALL +SymCryptCShake256Selftest(void); + + + +//========================================================================== +// PARALLELISED HASH FUNCTIONS +//========================================================================== +// +// On some platforms it is possible to parallelize the hash function +// computation to achieve a higher throughput. +// The parallel hash APIs support this. +// The parallel implementation tries to perform the computations as efficiently +// as possible. Applications that have many hashes to compute can always call these +// functions; the library will optimize the computation to the current situation. +// For example, if only a single hash is computed using these APIs, the +// single-hash version is used to achieve full single-hash speed. +// On platforms that do not support parallel hash implementations, these functions +// are still available, and will implement the parallel hashing by computing the +// hashes one at a time. +// +// +// SYMCRYPT_PARALLEL_XXX_MIN_PARALLELISM +// +// Compile-time constant, but can vary per platform. +// Minimum number of parallel computations at which +// the parallel implementation is faster on at least some CPU versions. +// Applications can safely ask for parallel computations with fewer hashes, +// but there will be no speed gain. +// +// SYMCRYPT_PARALLEL_XXX_MAX_PARALLELISM +// +// Maximum internal parallelism that the library uses internally on at least one +// CPU version of this architecture. +// If all hash computations are the same length, then there is no significant +// benefit to providing more than this number of hash requests in parallel. +// However, if the hash computations are of different lengths then the library +// overlaps various hash computations and still gains efficiency when the +// number of parallel hash computations increases past this bound. +// Note that the internal parallelism that can be used might depend +// on the CPU features available, so this value is only an upper bound. +// We recommend that callers provide as much parallelism as practical, +// and let the library perform the optimal sequence of computations. +// +// SYMCRYPT_HASH_OPERATION_TYPE +// +// An enum that specifies which operation is to be performed in a command +// structure passed to a parallel hash operations function. +// Defined values: +// SYMCRYPT_HASH_OPERATION_APPEND; +// SYMCRYPT_HASH_OPERATION_RESULT; +// +// SYMCRYPT_PARALLEL_HASH_OPERATION +// +// Structure that contains a command to be performed on a single item in a +// parallel hash state array. Visible fields are: +// +// SIZE_T iHash; // index of hash object into the state array +// SYMCRYPT_HASH_OPERATION_TYPE hashOperation; // operation to be performed +// PBYTE pbBuffer; // data to be hashed, or result buffer +// SIZE_T cbBuffer; +// +// There might be other fields in this structure that the caller should not use or assume anything about. +// +// SymCryptParallelXxxInit( +// _Out_writes_( nStates ) PSYMCRYPT_XXX_STATE pStates, +// SIZE_T nStates ); +// Initialize an array of hash states. +// The elements of the array are normal hash states, and they can be +// manipulated individually using the standard functions for the hash +// algorithm. +// +// Functionally equivalent to: +// for( i=0; i<nStates; i++ ) { +// SymCryptXxxInit( &pStates[i] ); +// } +// +// It is not necessary to use this function to initialize a state array; +// the normal initialization function can also be used, but this function might +// be faster. +// +// SymCryptParallelXxxProcess( +// _Inout_updates_( nStates ) PSYMCRYPT_XXX_STATE pStates, +// SIZE_T nStates, +// _Inout_updates_( nOperations ) PSYMCRYPT_PARALLEL_HASH_OPERATION pOperation, +// SIZE_T nOperations, +// _Out_writes_( cbScratch ) PBYTE pbScratch, +// SIZE_T cbScratch ); +// +// Perform optionally parallel processing of hashes. +// This is functionally equivalent to iterating over the pOperations array in order, +// and executing the command in each PARALLEL_HASH_OPERATION one at a time. +// For each command: +// iHash Which hash state this operation applies to; must be < nStates. +// hashOperation Specifies whether this is an append or result operation. +// pbBuffer The buffer that contains the data to be hashed, or that will receive the result. +// cbBuffer The size of pbBuffer. (Must be equal to the hash algorithm result size for RESULT operations.) +// As the SAL annotations document, the pOperations array is updated by this function, and therefore +// it cannot be in read-only memory. +// The updates modify only to the internal scratch space that is reserved +// in the SYMCRYPT_PARALLEL_HASH_OPERATION structure; none of the documented fields +// (iHash, hashOperation, pbBuffer, cbBuffer) are modified. +// The scratch fields are used purely within one call to this function, their value does not have to be +// maintained between function calls. The scratch fields do not have to be initialized by the caller +// of this function, +// THREAD SAFETY: as the pOperations array is updated, it CANNOT be shared between different threads. +// Obviously, the same is true of pStates and pbScratch. +// +// The pbScratch pointer provides a scratch buffer for the parallel processing function. +// This is used to organize the request and perform the functions in an optimal order for +// maximum parallelism, and for storing intermediate results that are too large +// to fit on the stack. The scratch buffer must be at least +// SYMCRYPT_PARALLEL_XXX_FIXED_SCRATCH + nStates * SYMCRYPT_PARALLEL_HASH_PER_STATE_SCRATCH +// bytes in size. +// +// For incremental hashing, we recommend that callers process data sizes that are +// a multiple of the SYMCRYPT_XXX_INPUT_BLOCK_LEN. +// + + +VOID +SYMCRYPT_CALL +SymCryptParallelSha256Init( + _Out_writes_( nStates ) PSYMCRYPT_SHA256_STATE pStates, + SIZE_T nStates ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptParallelSha256Process( + _Inout_updates_( nStates ) PSYMCRYPT_SHA256_STATE pStates, + SIZE_T nStates, + _Inout_updates_( nOperations ) PSYMCRYPT_PARALLEL_HASH_OPERATION pOperations, + SIZE_T nOperations, + _Out_writes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + + +VOID +SYMCRYPT_CALL +SymCryptParallelSha384Init( + _Out_writes_( nStates ) PSYMCRYPT_SHA384_STATE pStates, + SIZE_T nStates ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptParallelSha384Process( + _Inout_updates_( nStates ) PSYMCRYPT_SHA384_STATE pStates, + SIZE_T nStates, + _Inout_updates_( nOperations ) PSYMCRYPT_PARALLEL_HASH_OPERATION pOperations, + SIZE_T nOperations, + _Out_writes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + + +VOID +SYMCRYPT_CALL +SymCryptParallelSha512Init( + _Out_writes_( nStates ) PSYMCRYPT_SHA512_STATE pStates, + SIZE_T nStates ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptParallelSha512Process( + _Inout_updates_( nStates ) PSYMCRYPT_SHA512_STATE pStates, + SIZE_T nStates, + _Inout_updates_( nOperations ) PSYMCRYPT_PARALLEL_HASH_OPERATION pOperations, + SIZE_T nOperations, + _Out_writes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + + +VOID +SYMCRYPT_CALL +SymCryptParallelSha256Selftest(void); + +VOID +SYMCRYPT_CALL +SymCryptParallelSha384Selftest(void); + +VOID +SYMCRYPT_CALL +SymCryptParallelSha512Selftest(void); + + + +//========================================================================== +// MESSAGE AUTHENTICATION CODE (MAC) +//========================================================================== +// +// All MAC functions have a similar interface. For consistency we describe +// the generic parts of the interface once. +// Algorithm-specific comments are given with the API functions of each algorithm separately. +// +// For a MAC algorithm called XXX the following functions, types, and constants are defined: +// +// +// SYMCRYPT_XXX_RESULT_SIZE +// +// A constant giving is the size, in bytes, of the result of the MAC function. +// Some applications use truncated MAC functions. These are not directly supported +// by this library. Applications will have to perform the truncation themselves. +// +// +// SYMCRYPT_XXX_INPUT_BLOCK_SIZE +// +// A constant giving the natural input block size for the MAC function. +// Most callers don't need to know this, but in some cases it can be useful +// for optimizations. +// +// +// SYMCRYPT_XXX_EXPANDED_KEY +// +// Type which contains a key with all the pre-computations performed. +// This is an opaque type whose structure can change at will. +// It should only be used for transient computations in a single executable +// and not be stored or transferred to a different environment. +// The pointer and const-pointer versions are also declared +// (PSYMCRYPOT_XXX_EXPANDED_KEY and PCSYMCRYPT_XXX_EXPANDED_KEY). +// +// The EXPANDED_KEY structure contains keying material and should be wiped +// once it is no longer used. (See SymCryptWipe & SymCryptWipeKnownSize) +// +// Once a key has been expanded, multiple threads can simultaneously use the same expanded key +// object for different MAC computations that use the same key as the expanded key +// object does not change value. +// +// +// SYMCRYPT_ERROR +// SYMCRYPT_CALL +// SymCryptXxxExpandKey( _Out_ PSYMCRYPT_XXX_EXPANDED_KEY pExpandedKey, +// _In_reads_(cbKey) PCBYTE pbKey, +// SIZE_T cbKey ); +// +// Prepare a key for future use by the Xxx algorithm. +// This function performs pre-computations on the key +// to speed up the actual MAC computations later, and stores the result as an expanded key. +// The expanded key must be kept unchanged until all MAC computations that use the key are finished. +// When the key is no longer needed the expanded key structure should be wiped. +// +// Different algorithms pose different requirements on the length of the key. +// If the key that is provided is of an unsupported length the SYMCRYPT_WRONG_KEY_SIZE error is returned. +// In this case the expanded key structure will not contain any keying material and does not have to be wiped. +// +// +// VOID +// SYMCRYPT_CALL +// SymCryptXxxKeyCopy( _In_ PCSYMCRYPT_XXX_EXPANDED_KEY pSrc, +// _Out_ PSYMCRYPT_XXX_EXPANDED_KEY pDst ); +// +// Create a copy of an expanded key. +// +// VOID +// SYMCRYPT_CALL +// SymCryptXxx( _In_ PCSYMCRYPT_XXX_EXPANDED_KEY pExpandedKey, +// _In_reads_( cbData ) PCBYTE pbData, +// SIZE_T cbData, +// _Out_writes_( SYMCRYPT_XXX_RESULT_SIZE ) PBYTE pbResult ); +// +// Computes the MAC value of the data buffer with a given key. +// If you have all the data to be MACed in a single buffer this is the simplest function to use. +// +// +// SYMCRYPT_XXX_STATE +// +// The state encodes an ongoing MAC computation and allows incremental +// computation of a MAC function. +// At any point in time the state encodes a state that is equivalent to +// the MAC computation of a data string X with the key specified during initialization of the state. +// The SymCryptXxxInit() function initializes a state. +// The SymCryptXxxAppend() function appends data to the data string X. +// The SymCryptXxxResult() function returns the final MAC result. +// +// The state is an opaque type whose structure can change at will. +// It should only be used for transient computations in a single executable +// and not be stored or transferred to a different environment. +// +// Once initialized using SymCryptXxxInit, the state contains sensitive keying information. +// The SymCryptXxxResult function wipes the sensitive information from the state. +// Callers can also wipe the structure themselves if it is no longer needed. +// +// The state can be duplicated using the SymCryptXxxStateCopy function. This supports +// applications that compute the MAC over a prefix and then duplicate the state to +// compute the MAC using multiple different continuations. +// +// +// VOID +// SYMCRYPT_CALL +// SymCryptXxxStateCopy( +// _In_ PCSYMCRYPT_XXX_STATE pSrc, +// _In_opt_ PCSYMCRYPT_XXX_EXPANDED_KEY pExpandedKey, +// _Out_ PSYMCRYPT_XXX_STATE pDst ); +// +// Create a copy of the pSrc state in pDst. If pExpandedKey is NULL, the pDst state +// uses the same expanded key as the pSrc state did. If pExpandedKey is not NULL, +// it must point to an expanded key that contains the same key material as the key +// used by pSrc. (For example, a copy of the expanded key that pSrc uses.) +// +// VOID +// SYMCRYPT_CALL +// SymCryptXxxInit( _Out_ PSYMCRYPT_XXX_STATE pState, +// _In_ PCSYMCRYPT_XXX_EXPANDED_KEY pExpandedKey); +// +// Initialize a SYMCRYPT_XXX_STATE for subsequent use with the provided key. +// +// This function can be called at any time and resets the state to correspond +// to the empty data string with the newly specified key. +// The SymCryptXxxAppend function appends data to the data string +// encoded by the state. +// The SymCryptXxxResult function finalizes the computation and +// returns the actual MAC result. +// +// This function typically stores a pointer to the expanded key in the state. +// The expanded key must remain unchanged in +// memory until the SYMCRYPT_XXX_STATE structure is no longer used. +// +// After initialization the state contains sensitive keying materials, and should +// be wiped when the state is no longer used. The SymCryptXxxResult() function +// also wipes the state, so this is only a concern for aborted MAC computations. +// Note that SymCryptXxxResult() does not wipe the expanded key; callers are always +// responsible for wiping the expanded key. +// +// +// VOID +// SYMCRYPT_CALL +// SymCryptXxxAppend( _Inout_ PSYMCRYPT_XXX_STATE pState, +// _In_reads_( cbData ) PCBYTE pbData, +// SIZE_T cbData ); +// +// Provide more data to the ongoing MAC computation specified by the state. +// The state must have been initialized by SymCryptXxxInit. +// This function can be called multiple times on the same state +// to append more data to the encoded data string. +// +// The SYMCRYPT_XXX_STATE structure contains the entire state of an ongoing +// MAC computation. If you want to MAC some data and then continue with +// multiple other strings you may create one or more copies of the state. +// (The expanded key must remain unchanged in memory until all copies of the state +// are no longer used.) +// +// +// VOID +// SYMCRYPT_CALL +// SymCryptXxxResult( +// _Inout_ PSYMCRYPT_XXX_STATE pState, +// _Out_writes_( SYMCRYPT_XXX_RESULT_SIZE ) PBYTE pbResult ); +// +// Returns the MAC result of the state. +// If the state was newly initialized this returns the MAC of the empty string +// using the key specified in the SymCryptXxxInit call. +// If one or more SymCryptXxxAppend function calls were made on this state +// it returns the MAC of the concatenation of all the data strings +// passed to SymCryptXxxAppend using the specified key. +// +// The state is wiped to remove any traces of sensitive data. +// To use the same state for another MAC computation you must call +// SymCryptXxxInit again to re-initialize the state. +// This behaviour is different from hash function states that are re-initialized for +// use by the Result routine. This difference is by design; re-initializing a hash +// state is a safe operation. Re-initializing a MAC state puts keying information +// in the state, and callers would have to wipe the MAC state explicitly. +// +// +// VOID +// SYMCRYPT_CALL +// SymCryptXxxSelftest(void); +// +// Perform a minimal self-test on the XXX algorithm. +// This function is designed to be used for achieving FIPS 140-2 compliance or +// to provide a simple self-test when an application starts. +// +// If an error is detected, a platform-specific fatal error action is taken. +// Callers do not need to handle any error conditions. +// +// +// We also have the Generic HMAC API where the hash function to be used in the HMAC +// computation can be selected at runtime. +// + +typedef enum _SYMCRYPT_MAC_ID +{ + SYMCRYPT_MAC_ID_NULL = 0, + SYMCRYPT_MAC_ID_HMAC_MD5 = 1, + SYMCRYPT_MAC_ID_HMAC_SHA1 = 2, + SYMCRYPT_MAC_ID_HMAC_SHA224 = 3, + SYMCRYPT_MAC_ID_HMAC_SHA256 = 4, + SYMCRYPT_MAC_ID_HMAC_SHA384 = 5, + SYMCRYPT_MAC_ID_HMAC_SHA512 = 6, + SYMCRYPT_MAC_ID_HMAC_SHA512_224 = 7, + SYMCRYPT_MAC_ID_HMAC_SHA512_256 = 8, + SYMCRYPT_MAC_ID_HMAC_SHA3_224 = 9, + SYMCRYPT_MAC_ID_HMAC_SHA3_256 = 10, + SYMCRYPT_MAC_ID_HMAC_SHA3_384 = 11, + SYMCRYPT_MAC_ID_HMAC_SHA3_512 = 12, + SYMCRYPT_MAC_ID_AES_CMAC = 13, + SYMCRYPT_MAC_ID_KMAC_128 = 14, + SYMCRYPT_MAC_ID_KMAC_256 = 15 +} SYMCRYPT_MAC_ID; + +PCSYMCRYPT_MAC +SYMCRYPT_CALL +SymCryptGetMacAlgorithm( SYMCRYPT_MAC_ID macId ); +// +// Returns a pointer to the MAC algorithm structure for the specified MAC ID. +// Returns NULL if the MAC ID is invalid. +// + +// +// Generic HMAC API with parametrized hash function +// +VOID +SYMCRYPT_CALL +SymCryptHmacStateCopy( + _In_ PCSYMCRYPT_HMAC_STATE pSrc, + _In_opt_ PCSYMCRYPT_HMAC_EXPANDED_KEY pExpandedKey, + _Out_ PSYMCRYPT_HMAC_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptHmacKeyCopy( + _In_ PCSYMCRYPT_HMAC_EXPANDED_KEY pSrc, + _Out_ PSYMCRYPT_HMAC_EXPANDED_KEY pDst ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHmacExpandKey( + _In_ PCSYMCRYPT_HASH pHash, + _Out_ PSYMCRYPT_HMAC_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ); + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptHmacInit( + _Out_ PSYMCRYPT_HMAC_STATE pState, + _In_ PCSYMCRYPT_HMAC_EXPANDED_KEY pExpandedKey ); + +VOID +SYMCRYPT_CALL +SymCryptHmacAppend( + _Inout_ PSYMCRYPT_HMAC_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptHmacResult( + _Inout_ PSYMCRYPT_HMAC_STATE pState, + _Out_writes_( pState->pKey->pHash->resultSize ) PBYTE pbResult ); + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptHmac( + _In_ PCSYMCRYPT_HMAC_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( pExpandedKey->pHash->resultSize ) PBYTE pbResult ); + + +//////////////////////////////////////////////////////////////////////////// +// HMAC-MD5 +// +// + +#define SYMCRYPT_HMAC_MD5_RESULT_SIZE SYMCRYPT_MD5_RESULT_SIZE +#define SYMCRYPT_HMAC_MD5_INPUT_BLOCK_SIZE SYMCRYPT_MD5_INPUT_BLOCK_SIZE + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHmacMd5ExpandKey( + _Out_ PSYMCRYPT_HMAC_MD5_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ); +// +// Supports all key lengths; never returns an error. +// + +VOID +SYMCRYPT_CALL +SymCryptHmacMd5KeyCopy( + _In_ PCSYMCRYPT_HMAC_MD5_EXPANDED_KEY pSrc, + _Out_ PSYMCRYPT_HMAC_MD5_EXPANDED_KEY pDst ); + + +VOID +SYMCRYPT_CALL +SymCryptHmacMd5( + _In_ PCSYMCRYPT_HMAC_MD5_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_HMAC_MD5_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptHmacMd5StateCopy( + _In_ PCSYMCRYPT_HMAC_MD5_STATE pSrc, + _In_opt_ PCSYMCRYPT_HMAC_MD5_EXPANDED_KEY pExpandedKey, + _Out_ PSYMCRYPT_HMAC_MD5_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptHmacMd5Init( + _Out_ PSYMCRYPT_HMAC_MD5_STATE pState, + _In_ PCSYMCRYPT_HMAC_MD5_EXPANDED_KEY pExpandedKey); + +VOID +SYMCRYPT_CALL +SymCryptHmacMd5Append( + _Inout_ PSYMCRYPT_HMAC_MD5_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptHmacMd5Result( + _Inout_ PSYMCRYPT_HMAC_MD5_STATE pState, + _Out_writes_( SYMCRYPT_HMAC_MD5_RESULT_SIZE )PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptHmacMd5Selftest(void); + +extern const PCSYMCRYPT_MAC SymCryptHmacMd5Algorithm; + +//////////////////////////////////////////////////////////////////////////// +// HMAC-SHA-1 +// +// + +#define SYMCRYPT_HMAC_SHA1_RESULT_SIZE SYMCRYPT_SHA1_RESULT_SIZE +#define SYMCRYPT_HMAC_SHA1_INPUT_BLOCK_SIZE SYMCRYPT_SHA1_INPUT_BLOCK_SIZE + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHmacSha1ExpandKey( + _Out_ PSYMCRYPT_HMAC_SHA1_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ); +// +// Supports all key lengths; never returns an error. +// + +VOID +SYMCRYPT_CALL +SymCryptHmacSha1KeyCopy( + _In_ PCSYMCRYPT_HMAC_SHA1_EXPANDED_KEY pSrc, + _Out_ PSYMCRYPT_HMAC_SHA1_EXPANDED_KEY pDst ); + + +VOID +SYMCRYPT_CALL +SymCryptHmacSha1( + _In_ PCSYMCRYPT_HMAC_SHA1_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_HMAC_SHA1_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha1StateCopy( + _In_ PCSYMCRYPT_HMAC_SHA1_STATE pSrc, + _In_opt_ PCSYMCRYPT_HMAC_SHA1_EXPANDED_KEY pExpandedKey, + _Out_ PSYMCRYPT_HMAC_SHA1_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha1Init( + _Out_ PSYMCRYPT_HMAC_SHA1_STATE pState, + _In_ PCSYMCRYPT_HMAC_SHA1_EXPANDED_KEY pExpandedKey); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha1Append( + _Inout_ PSYMCRYPT_HMAC_SHA1_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha1Result( + _Inout_ PSYMCRYPT_HMAC_SHA1_STATE pState, + _Out_writes_( SYMCRYPT_HMAC_SHA1_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha1Selftest(void); + +extern const PCSYMCRYPT_MAC SymCryptHmacSha1Algorithm; + +//////////////////////////////////////////////////////////////////////////// +// HMAC-SHA-224 +// +// This implementation is meant for interoperability and is not recommended for use. +// +// + +#define SYMCRYPT_HMAC_SHA224_RESULT_SIZE SYMCRYPT_SHA224_RESULT_SIZE +#define SYMCRYPT_HMAC_SHA224_INPUT_BLOCK_SIZE SYMCRYPT_SHA224_INPUT_BLOCK_SIZE + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHmacSha224ExpandKey( + _Out_ PSYMCRYPT_HMAC_SHA224_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ); +// +// Supports all key lengths; never returns an error. +// + +VOID +SYMCRYPT_CALL +SymCryptHmacSha224KeyCopy( + _In_ PCSYMCRYPT_HMAC_SHA224_EXPANDED_KEY pSrc, + _Out_ PSYMCRYPT_HMAC_SHA224_EXPANDED_KEY pDst ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha224( + _In_ PCSYMCRYPT_HMAC_SHA224_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_HMAC_SHA224_RESULT_SIZE )PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha224StateCopy( + _In_ PCSYMCRYPT_HMAC_SHA224_STATE pSrc, + _In_opt_ PCSYMCRYPT_HMAC_SHA224_EXPANDED_KEY pExpandedKey, + _Out_ PSYMCRYPT_HMAC_SHA224_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha224Init( + _Out_ PSYMCRYPT_HMAC_SHA224_STATE pState, + _In_ PCSYMCRYPT_HMAC_SHA224_EXPANDED_KEY pExpandedKey); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha224Append( + _Inout_ PSYMCRYPT_HMAC_SHA224_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha224Result( + _Inout_ PSYMCRYPT_HMAC_SHA224_STATE pState, + _Out_writes_( SYMCRYPT_HMAC_SHA224_RESULT_SIZE )PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha224Selftest(void); + +extern const PCSYMCRYPT_MAC SymCryptHmacSha224Algorithm; + +//////////////////////////////////////////////////////////////////////////// +// HMAC-SHA-256 +// +// + +#define SYMCRYPT_HMAC_SHA256_RESULT_SIZE SYMCRYPT_SHA256_RESULT_SIZE +#define SYMCRYPT_HMAC_SHA256_INPUT_BLOCK_SIZE SYMCRYPT_SHA256_INPUT_BLOCK_SIZE + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHmacSha256ExpandKey( + _Out_ PSYMCRYPT_HMAC_SHA256_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ); +// +// Supports all key lengths; never returns an error. +// + +VOID +SYMCRYPT_CALL +SymCryptHmacSha256KeyCopy( + _In_ PCSYMCRYPT_HMAC_SHA256_EXPANDED_KEY pSrc, + _Out_ PSYMCRYPT_HMAC_SHA256_EXPANDED_KEY pDst ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha256( + _In_ PCSYMCRYPT_HMAC_SHA256_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_HMAC_SHA256_RESULT_SIZE )PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha256StateCopy( + _In_ PCSYMCRYPT_HMAC_SHA256_STATE pSrc, + _In_opt_ PCSYMCRYPT_HMAC_SHA256_EXPANDED_KEY pExpandedKey, + _Out_ PSYMCRYPT_HMAC_SHA256_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha256Init( + _Out_ PSYMCRYPT_HMAC_SHA256_STATE pState, + _In_ PCSYMCRYPT_HMAC_SHA256_EXPANDED_KEY pExpandedKey); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha256Append( + _Inout_ PSYMCRYPT_HMAC_SHA256_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha256Result( + _Inout_ PSYMCRYPT_HMAC_SHA256_STATE pState, + _Out_writes_( SYMCRYPT_HMAC_SHA256_RESULT_SIZE )PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha256Selftest(void); + +extern const PCSYMCRYPT_MAC SymCryptHmacSha256Algorithm; + +//////////////////////////////////////////////////////////////////////////// +// HMAC-SHA-384 +// +// + +#define SYMCRYPT_HMAC_SHA384_RESULT_SIZE SYMCRYPT_SHA384_RESULT_SIZE +#define SYMCRYPT_HMAC_SHA384_INPUT_BLOCK_SIZE SYMCRYPT_SHA384_INPUT_BLOCK_SIZE + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHmacSha384ExpandKey( + _Out_ PSYMCRYPT_HMAC_SHA384_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ); +// +// Supports all key lengths; never returns an error. +// + +VOID +SYMCRYPT_CALL +SymCryptHmacSha384KeyCopy( + _In_ PCSYMCRYPT_HMAC_SHA384_EXPANDED_KEY pSrc, + _Out_ PSYMCRYPT_HMAC_SHA384_EXPANDED_KEY pDst ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha384( + _In_ PCSYMCRYPT_HMAC_SHA384_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_HMAC_SHA384_RESULT_SIZE )PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha384StateCopy( + _In_ PCSYMCRYPT_HMAC_SHA384_STATE pSrc, + _In_opt_ PCSYMCRYPT_HMAC_SHA384_EXPANDED_KEY pExpandedKey, + _Out_ PSYMCRYPT_HMAC_SHA384_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha384Init( + _Out_ PSYMCRYPT_HMAC_SHA384_STATE pState, + _In_ PCSYMCRYPT_HMAC_SHA384_EXPANDED_KEY pExpandedKey); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha384Append( + _Inout_ PSYMCRYPT_HMAC_SHA384_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha384Result( + _Inout_ PSYMCRYPT_HMAC_SHA384_STATE pState, + _Out_writes_( SYMCRYPT_HMAC_SHA384_RESULT_SIZE )PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha384Selftest(void); + +extern const PCSYMCRYPT_MAC SymCryptHmacSha384Algorithm; + +//////////////////////////////////////////////////////////////////////////// +// HMAC-SHA-512 +// +// + +#define SYMCRYPT_HMAC_SHA512_RESULT_SIZE SYMCRYPT_SHA512_RESULT_SIZE +#define SYMCRYPT_HMAC_SHA512_INPUT_BLOCK_SIZE SYMCRYPT_SHA512_INPUT_BLOCK_SIZE + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHmacSha512ExpandKey( + _Out_ PSYMCRYPT_HMAC_SHA512_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ); +// +// Supports all key lengths; never returns an error. +// + +VOID +SYMCRYPT_CALL +SymCryptHmacSha512KeyCopy( + _In_ PCSYMCRYPT_HMAC_SHA512_EXPANDED_KEY pSrc, + _Out_ PSYMCRYPT_HMAC_SHA512_EXPANDED_KEY pDst ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha512( + _In_ PCSYMCRYPT_HMAC_SHA512_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_HMAC_SHA512_RESULT_SIZE )PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha512StateCopy( + _In_ PCSYMCRYPT_HMAC_SHA512_STATE pSrc, + _In_opt_ PCSYMCRYPT_HMAC_SHA512_EXPANDED_KEY pExpandedKey, + _Out_ PSYMCRYPT_HMAC_SHA512_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha512Init( + _Out_ PSYMCRYPT_HMAC_SHA512_STATE pState, + _In_ PCSYMCRYPT_HMAC_SHA512_EXPANDED_KEY pExpandedKey); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha512Append( + _Inout_ PSYMCRYPT_HMAC_SHA512_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha512Result( + _Inout_ PSYMCRYPT_HMAC_SHA512_STATE pState, + _Out_writes_( SYMCRYPT_HMAC_SHA512_RESULT_SIZE )PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha512Selftest(void); + +extern const PCSYMCRYPT_MAC SymCryptHmacSha512Algorithm; + +//////////////////////////////////////////////////////////////////////////// +// HMAC-SHA-512_224 +// +// This implementation is meant for interoperability and is not recommended for use. +// +// + +#define SYMCRYPT_HMAC_SHA512_224_RESULT_SIZE SYMCRYPT_SHA512_224_RESULT_SIZE +#define SYMCRYPT_HMAC_SHA512_224_INPUT_BLOCK_SIZE SYMCRYPT_SHA512_224_INPUT_BLOCK_SIZE + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHmacSha512_224ExpandKey( + _Out_ PSYMCRYPT_HMAC_SHA512_224_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ); +// +// Supports all key lengths; never returns an error. +// + +VOID +SYMCRYPT_CALL +SymCryptHmacSha512_224KeyCopy( + _In_ PCSYMCRYPT_HMAC_SHA512_224_EXPANDED_KEY pSrc, + _Out_ PSYMCRYPT_HMAC_SHA512_224_EXPANDED_KEY pDst ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha512_224( + _In_ PCSYMCRYPT_HMAC_SHA512_224_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_HMAC_SHA512_224_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha512_224StateCopy( + _In_ PCSYMCRYPT_HMAC_SHA512_224_STATE pSrc, + _In_opt_ PCSYMCRYPT_HMAC_SHA512_224_EXPANDED_KEY pExpandedKey, + _Out_ PSYMCRYPT_HMAC_SHA512_224_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha512_224Init( + _Out_ PSYMCRYPT_HMAC_SHA512_224_STATE pState, + _In_ PCSYMCRYPT_HMAC_SHA512_224_EXPANDED_KEY pExpandedKey); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha512_224Append( + _Inout_ PSYMCRYPT_HMAC_SHA512_224_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha512_224Result( + _Inout_ PSYMCRYPT_HMAC_SHA512_224_STATE pState, + _Out_writes_( SYMCRYPT_HMAC_SHA512_224_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha512_224Selftest(void); + +extern const PCSYMCRYPT_MAC SymCryptHmacSha512_224Algorithm; + +//////////////////////////////////////////////////////////////////////////// +// HMAC-SHA-512_256 +// +// This implementation is meant for interoperability and is not recommended for use. +// +// + +#define SYMCRYPT_HMAC_SHA512_256_RESULT_SIZE SYMCRYPT_SHA512_256_RESULT_SIZE +#define SYMCRYPT_HMAC_SHA512_256_INPUT_BLOCK_SIZE SYMCRYPT_SHA512_256_INPUT_BLOCK_SIZE + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHmacSha512_256ExpandKey( + _Out_ PSYMCRYPT_HMAC_SHA512_256_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ); +// +// Supports all key lengths; never returns an error. +// + +VOID +SYMCRYPT_CALL +SymCryptHmacSha512_256KeyCopy( + _In_ PCSYMCRYPT_HMAC_SHA512_256_EXPANDED_KEY pSrc, + _Out_ PSYMCRYPT_HMAC_SHA512_256_EXPANDED_KEY pDst ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha512_256( + _In_ PCSYMCRYPT_HMAC_SHA512_256_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_HMAC_SHA512_256_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha512_256StateCopy( + _In_ PCSYMCRYPT_HMAC_SHA512_256_STATE pSrc, + _In_opt_ PCSYMCRYPT_HMAC_SHA512_256_EXPANDED_KEY pExpandedKey, + _Out_ PSYMCRYPT_HMAC_SHA512_256_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha512_256Init( + _Out_ PSYMCRYPT_HMAC_SHA512_256_STATE pState, + _In_ PCSYMCRYPT_HMAC_SHA512_256_EXPANDED_KEY pExpandedKey); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha512_256Append( + _Inout_ PSYMCRYPT_HMAC_SHA512_256_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha512_256Result( + _Inout_ PSYMCRYPT_HMAC_SHA512_256_STATE pState, + _Out_writes_( SYMCRYPT_HMAC_SHA512_256_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha512_256Selftest(void); + +extern const PCSYMCRYPT_MAC SymCryptHmacSha512_256Algorithm; + +//////////////////////////////////////////////////////////////////////////// +// HMAC-SHA3-224 +// +// This implementation is meant for interoperability and is not recommended for use. +// +// + +#define SYMCRYPT_HMAC_SHA3_224_RESULT_SIZE SYMCRYPT_SHA3_224_RESULT_SIZE +#define SYMCRYPT_HMAC_SHA3_224_INPUT_BLOCK_SIZE SYMCRYPT_SHA3_224_INPUT_BLOCK_SIZE + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHmacSha3_224ExpandKey( + _Out_ PSYMCRYPT_HMAC_SHA3_224_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ); +// +// Supports all key lengths; never returns an error. +// + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_224KeyCopy( + _In_ PCSYMCRYPT_HMAC_SHA3_224_EXPANDED_KEY pSrc, + _Out_ PSYMCRYPT_HMAC_SHA3_224_EXPANDED_KEY pDst ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_224( + _In_ PCSYMCRYPT_HMAC_SHA3_224_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_HMAC_SHA3_224_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_224StateCopy( + _In_ PCSYMCRYPT_HMAC_SHA3_224_STATE pSrc, + _In_opt_ PCSYMCRYPT_HMAC_SHA3_224_EXPANDED_KEY pExpandedKey, + _Out_ PSYMCRYPT_HMAC_SHA3_224_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_224Init( + _Out_ PSYMCRYPT_HMAC_SHA3_224_STATE pState, + _In_ PCSYMCRYPT_HMAC_SHA3_224_EXPANDED_KEY pExpandedKey); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_224Append( + _Inout_ PSYMCRYPT_HMAC_SHA3_224_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_224Result( + _Inout_ PSYMCRYPT_HMAC_SHA3_224_STATE pState, + _Out_writes_( SYMCRYPT_HMAC_SHA3_224_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_224Selftest(void); + +extern const PCSYMCRYPT_MAC SymCryptHmacSha3_224Algorithm; + +//////////////////////////////////////////////////////////////////////////// +// HMAC-SHA3-256 +// +// + +#define SYMCRYPT_HMAC_SHA3_256_RESULT_SIZE SYMCRYPT_SHA3_256_RESULT_SIZE +#define SYMCRYPT_HMAC_SHA3_256_INPUT_BLOCK_SIZE SYMCRYPT_SHA3_256_INPUT_BLOCK_SIZE + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHmacSha3_256ExpandKey( + _Out_ PSYMCRYPT_HMAC_SHA3_256_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ); +// +// Supports all key lengths; never returns an error. +// + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_256KeyCopy( + _In_ PCSYMCRYPT_HMAC_SHA3_256_EXPANDED_KEY pSrc, + _Out_ PSYMCRYPT_HMAC_SHA3_256_EXPANDED_KEY pDst ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_256( + _In_ PCSYMCRYPT_HMAC_SHA3_256_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_HMAC_SHA3_256_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_256StateCopy( + _In_ PCSYMCRYPT_HMAC_SHA3_256_STATE pSrc, + _In_opt_ PCSYMCRYPT_HMAC_SHA3_256_EXPANDED_KEY pExpandedKey, + _Out_ PSYMCRYPT_HMAC_SHA3_256_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_256Init( + _Out_ PSYMCRYPT_HMAC_SHA3_256_STATE pState, + _In_ PCSYMCRYPT_HMAC_SHA3_256_EXPANDED_KEY pExpandedKey); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_256Append( + _Inout_ PSYMCRYPT_HMAC_SHA3_256_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_256Result( + _Inout_ PSYMCRYPT_HMAC_SHA3_256_STATE pState, + _Out_writes_( SYMCRYPT_HMAC_SHA3_256_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_256Selftest(void); + +extern const PCSYMCRYPT_MAC SymCryptHmacSha3_256Algorithm; + +//////////////////////////////////////////////////////////////////////////// +// HMAC-SHA3-384 +// +// + +#define SYMCRYPT_HMAC_SHA3_384_RESULT_SIZE SYMCRYPT_SHA3_384_RESULT_SIZE +#define SYMCRYPT_HMAC_SHA3_384_INPUT_BLOCK_SIZE SYMCRYPT_SHA3_384_INPUT_BLOCK_SIZE + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHmacSha3_384ExpandKey( + _Out_ PSYMCRYPT_HMAC_SHA3_384_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ); +// +// Supports all key lengths; never returns an error. +// + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_384KeyCopy( + _In_ PCSYMCRYPT_HMAC_SHA3_384_EXPANDED_KEY pSrc, + _Out_ PSYMCRYPT_HMAC_SHA3_384_EXPANDED_KEY pDst ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_384( + _In_ PCSYMCRYPT_HMAC_SHA3_384_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_HMAC_SHA3_384_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_384StateCopy( + _In_ PCSYMCRYPT_HMAC_SHA3_384_STATE pSrc, + _In_opt_ PCSYMCRYPT_HMAC_SHA3_384_EXPANDED_KEY pExpandedKey, + _Out_ PSYMCRYPT_HMAC_SHA3_384_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_384Init( + _Out_ PSYMCRYPT_HMAC_SHA3_384_STATE pState, + _In_ PCSYMCRYPT_HMAC_SHA3_384_EXPANDED_KEY pExpandedKey); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_384Append( + _Inout_ PSYMCRYPT_HMAC_SHA3_384_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_384Result( + _Inout_ PSYMCRYPT_HMAC_SHA3_384_STATE pState, + _Out_writes_( SYMCRYPT_HMAC_SHA3_384_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_384Selftest(void); + +extern const PCSYMCRYPT_MAC SymCryptHmacSha3_384Algorithm; + +//////////////////////////////////////////////////////////////////////////// +// HMAC-SHA3-512 +// +// + +#define SYMCRYPT_HMAC_SHA3_512_RESULT_SIZE SYMCRYPT_SHA3_512_RESULT_SIZE +#define SYMCRYPT_HMAC_SHA3_512_INPUT_BLOCK_SIZE SYMCRYPT_SHA3_512_INPUT_BLOCK_SIZE + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHmacSha3_512ExpandKey( + _Out_ PSYMCRYPT_HMAC_SHA3_512_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ); +// +// Supports all key lengths; never returns an error. +// + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_512KeyCopy( + _In_ PCSYMCRYPT_HMAC_SHA3_512_EXPANDED_KEY pSrc, + _Out_ PSYMCRYPT_HMAC_SHA3_512_EXPANDED_KEY pDst ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_512( + _In_ PCSYMCRYPT_HMAC_SHA3_512_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_HMAC_SHA3_512_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_512StateCopy( + _In_ PCSYMCRYPT_HMAC_SHA3_512_STATE pSrc, + _In_opt_ PCSYMCRYPT_HMAC_SHA3_512_EXPANDED_KEY pExpandedKey, + _Out_ PSYMCRYPT_HMAC_SHA3_512_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_512Init( + _Out_ PSYMCRYPT_HMAC_SHA3_512_STATE pState, + _In_ PCSYMCRYPT_HMAC_SHA3_512_EXPANDED_KEY pExpandedKey); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_512Append( + _Inout_ PSYMCRYPT_HMAC_SHA3_512_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_512Result( + _Inout_ PSYMCRYPT_HMAC_SHA3_512_STATE pState, + _Out_writes_( SYMCRYPT_HMAC_SHA3_512_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_512Selftest(void); + +extern const PCSYMCRYPT_MAC SymCryptHmacSha3_512Algorithm; + + +//////////////////////////////////////////////////////////////////////////// +// AES-CMAC +// +// This is the AES-CMAC algorithm per SP 800-38B & RFC 4493. +// It is also known as AES-OMAC1. +// + +#define SYMCRYPT_AES_CMAC_RESULT_SIZE (16) +#define SYMCRYPT_AES_CMAC_INPUT_BLOCK_SIZE (16) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptAesCmacExpandKey( + _Out_ PSYMCRYPT_AES_CMAC_EXPANDED_KEY pExpandedKey, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ); +// +// Key size must be a valid AES key (16, 24, or 32 bytes) +// + +VOID +SYMCRYPT_CALL +SymCryptAesCmacKeyCopy( + _In_ PCSYMCRYPT_AES_CMAC_EXPANDED_KEY pSrc, + _Out_ PSYMCRYPT_AES_CMAC_EXPANDED_KEY pDst ); + +VOID +SYMCRYPT_CALL +SymCryptAesCmac( + _In_ PSYMCRYPT_AES_CMAC_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_AES_CMAC_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptAesCmacStateCopy( + _In_ PCSYMCRYPT_AES_CMAC_STATE pSrc, + _In_opt_ PCSYMCRYPT_AES_CMAC_EXPANDED_KEY pExpandedKey, + _Out_ PSYMCRYPT_AES_CMAC_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptAesCmacInit( + _Out_ PSYMCRYPT_AES_CMAC_STATE pState, + _In_ PCSYMCRYPT_AES_CMAC_EXPANDED_KEY pExpandedKey); + +VOID +SYMCRYPT_CALL +SymCryptAesCmacAppend( + _Inout_ PSYMCRYPT_AES_CMAC_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesCmacResult( + _Inout_ PSYMCRYPT_AES_CMAC_STATE pState, + _Out_writes_( SYMCRYPT_AES_CMAC_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptAesCmacSelftest(void); + +extern const PCSYMCRYPT_MAC SymCryptAesCmacAlgorithm; + +//////////////////////////////////////////////////////////////////////////// +// KMAC +// +// Keccak Message Authentication Code (KMAC) is specified in NIST SP 800-185 +// and has two variants; KMAC128 and KMAC256, using cSHAKE128 and cSHAKE256 +// as the underlying functions, respectively. +// +// KMAC128(K, X, L, S) = cSHAKE128(bytepad(encode_string(K), 168) || X || right_encode(L), L, "KMAC", S) +// KMAC256(K, X, L, S) = cSHAKE256(bytepad(encode_string(K), 136) || X || right_encode(L), L, "KMAC", S) +// +// KMAC accepts a variable-size key. There's no restriction on the size of the key. +// +// KMAC differs from other MAC algorithms in SymCrypt by having two additional input +// parameters; a customization string and the length of the output. Output generated +// by KMAC also depends on the specified output length, i.e., outputs generated from +// two KMAC calls with the same key, message, customization string, but different output +// lengths will be unrelated/uncorrelated. This differs from SHAKE and cSHAKE where an +// output of size N bytes from the algorithm is a prefix of the output of size M bytes +// where N < M, when the inputs are the same. +// +// KMAC works in two modes; fixed-length mode and XOF mode. XOF variants are named KMACXOF128 +// and KMACXOF256. SymCrypt does not provide a separate KMACXOF API but supports them via +// the KMAC interface. +// +// KMACXOF128(K, X, L, S) = cSHAKE128(bytepad(encode_string(K), 168) || X || right_encode(0), L, "KMAC", S) +// KMACXOF256(K, X, L, S) = cSHAKE256(bytepad(encode_string(K), 136) || X || right_encode(0), L, "KMAC", S) +// +// KMAC output generation mode is determined by the output length parameter +// L in SP 800-185; if it is non-zero then KMAC works in fixed-length mode, otherwise (i.e., L=0) +// it works in XOF mode. +// - Fixed-length mode generates result with SymCryptKmacXxxResult or SymCryptKmacXxxResultEx. +// These functions wipe the state after generating output, thus can only be used +// once per initialized state. The result size is SYMCRYPT_KMAC_XXX_RESULT +// for SymCryptKmacXxxResult and specified by the caller for SymCryptKmacXxxResultEx. +// - XOF mode can produce arbitrary length output. SymCryptKmacXxxExtract function puts KMAC +// state into XOF mode and all the successive calls that generate output from the KMAC state will be +// from the XOF mode. SymCryptKmacXxxResult and SymCryptKmacXxxResultEx functions +// will also generate output in XOF mode IF they are called after a SymCryptKmacXxxExtract +// function with bWipe=FALSE (so that the state remains in XOF mode). Note that +// SymCryptKmacXxxResult and SymCryptKmacXxxResultEx functions wipe the state afterwards, +// thus KMAC state can only be used to generate output in XOF mode once with these two functions. +// +// SYMCRYPT_KMACXXX_RESULT_SIZE +// +// Default result size when KMAC is used with the existing MAC interface. +// Equals to twice the SYMCRYPT_KMACXXX_KEY_SIZE. +// +// SYMCRYPT_ERROR +// SYMCRYPT_CALL +// SymCryptKmacXxxExpandKey( +// _Out_ PSYMCRYPT_KMACXXX_EXPANDED_KEY pExpandedKey, +// _In_reads_bytes_( cbKey ) PCBYTE pbKey, +// SIZE_T cbKey); +// +// Performs key expansion with empty customization string. +// There's no restriction on the size of the key. +// +// SYMCRYPT_ERROR +// SYMCRYPT_CALL +// SymCryptKmacXxxExpandKeyEx( +// _Out_ PSYMCRYPT_KMAXXX_EXPANDED_KEY pExpandedKey, +// _In_reads_bytes_( cbKey ) PCBYTE pbKey, +// SIZE_T cbKey, +// _In_reads_bytes_( cbCustomizationString ) PCBYTE pbCustomizationString, +// SIZE_T cbCustomizationString); +// +// Performs key expansion for the provided key and customization string. +// There's no restriction on the size of the key. +// +// VOID +// SYMCRYPT_CALL +// SymCryptKmacXxx( +// _In_ PCSYMCRYPT_KMACXXX_EXPANDED_KEY pExpandedKey, +// _In_reads_bytes_( cbInput ) PCBYTE pbInput, +// SIZE_T cbInput, +// _Out_writes_bytes_( SYMCRYPT_KMACXXX_RESULT_SIZE ) PBYTE pbResult); +// +// Single-call KMAC computation for the given input producing default result +// size SYMCRYPT_KMACXXX_RESULT_SIZE. +// +// pExpandedKey must be initialized before the call. This function is equivalent +// to SymCryptKmacXxxEx with output size set to SYMCRYPT_KMACXXX_RESULT_SIZE. +// If a result size different than the default value is desired, SymCryptKmacXxxEx +// must be called. +// +// VOID +// SYMCRYPT_CALL +// SymCryptKmacXxxEx( +// _In_ PCSYMCRYPT_KMACXXX_EXPANDED_KEY pExpandedKey, +// _In_reads_bytes_( cbInput ) PCBYTE pbInput, +// SIZE_T cbInput, +// _Out_writes_bytes_( cbResult ) PBYTE pbResult, +// SIZE_T cbResult); +// +// Single-call KMAC computation for the given input producing cbResult bytes result. +// pExpandedKey must be initialized before the call. +// +// VOID +// SYMCRYPT_CALL +// SymCryptKmacXxxInit( +// _Out_ PSYMCRYPT_KMACXXX_STATE pState, +// _In_ PCSYMCRYPT_KMACXXX_EXPANDED_KEY pExpandedKey); +// +// Initializes KMAC state for appending data for the provided key. Expanded +// key must be generated prior to this call. +// +// VOID +// SYMCRYPT_CALL +// SymCryptKmacXxxAppend( +// _Inout_ PSYMCRYPT_KMACXXX_STATE pState, +// _In_reads_( cbData ) PCBYTE pbData, +// SIZE_T cbData ); +// +// Appends data to the KMAC state. +// +// This function must only be called after SymCryptKmacXxxInit or SymCryptKmacXxxAppend. +// Calling SymCryptKmacXxxAppend after SymCryptKmacXxxExtract with bWipe=FALSE +// is not well-defined. KMAC state must be initialized with SymCryptKmacXxxInit before +// the first call to SymCryptKmacXxxAppend. +// +// VOID +// SYMCRYPT_CALL +// SymCryptKmacXxxExtract( +// _Inout_ PSYMCRYPT_KMACXXX_STATE pState, +// _Out_writes_( cbOutput ) PBYTE pbOutput, +// SIZE_T cbOutput, +// BOOLEAN bWipe); +// +// Generates KMAC output in XOF mode. +// +// Extract can only be called after an Init, Append or Extract call. +// The state is cleared if bWipe=TRUE, otherwise further Extract calls +// can be made to generate more output. +// +// VOID +// SYMCRYPT_CALL +// SymCryptKmacXxxResult( +// _Inout_ PSYMCRYPT_KMACXXX_STATE pState, +// _Out_writes_( SYMCRYPT_KMACXXX_RESULT_SIZE ) PBYTE pbResult); +// +// Produces SYMCRYPT_KMACXXX_RESULT_SIZE bytes of output from the KMAC state. +// The state is wiped on return. +// +// This function internally calls SymCryptKmacXxxResultEx with result size +// SYMCRYPT_KMACXXX_RESULT_SIZE. +// If Result is called in XOF mode (i.e., after an Extract with bWipe=FALSE), it +// performs a final extraction of SYMCRYPT_KMACXXX_RESULT_SIZE bytes in XOF mode +// and clears the state afterwards. +// Result function does not re-initialize the state for a new computation like +// the Result for hash functions do. Computing a new MAC with the same key +// requires calling the SymCryptKmacXxxInit function first. +// +// VOID +// SYMCRYPT_CALL +// SymCryptKmacXxxResultEx( +// _Inout_ PSYMCRYPT_KMACXXX_STATE pState, +// _Out_writes_( cbResult ) PBYTE pbResult, +// SIZE_T cbResult); +// +// Produces cbResult bytes of output from the KMAC state. The state is +// wiped on return. +// +// If ResultEx is called in XOF mode (i.e., after an Extract with bWipe=FALSE), it +// performs a final extraction of cbResult bytes in XOF mode and clears the state +// afterwards. +// ResultEx function does not re-initialize the state for a new computation like +// the Result for hash functions do. Computing a new MAC with the same key +// requires calling the SymCryptKmacXxxInit function first. +// + + +// +// KMAC128 +// +#define SYMCRYPT_KMAC128_RESULT_SIZE SYMCRYPT_CSHAKE128_RESULT_SIZE +#define SYMCRYPT_KMAC128_INPUT_BLOCK_SIZE SYMCRYPT_CSHAKE128_INPUT_BLOCK_SIZE + +VOID +SYMCRYPT_CALL +SymCryptKmac128( + _In_ PCSYMCRYPT_KMAC128_EXPANDED_KEY pExpandedKey, + _In_reads_bytes_( cbInput ) PCBYTE pbInput, + SIZE_T cbInput, + _Out_writes_bytes_( SYMCRYPT_KMAC128_RESULT_SIZE ) PBYTE pbResult); + +VOID +SYMCRYPT_CALL +SymCryptKmac128Ex( + _In_ PCSYMCRYPT_KMAC128_EXPANDED_KEY pExpandedKey, + _In_reads_bytes_( cbInput ) PCBYTE pbInput, + SIZE_T cbInput, + _Out_writes_bytes_( cbResult ) PBYTE pbResult, + SIZE_T cbResult); + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptKmac128ExpandKey( + _Out_ PSYMCRYPT_KMAC128_EXPANDED_KEY pExpandedKey, + _In_reads_bytes_( cbKey ) PCBYTE pbKey, + SIZE_T cbKey); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptKmac128ExpandKeyEx( + _Out_ PSYMCRYPT_KMAC128_EXPANDED_KEY pExpandedKey, + _In_reads_bytes_( cbKey ) PCBYTE pbKey, + SIZE_T cbKey, + _In_reads_bytes_( cbCustomizationString ) PCBYTE pbCustomizationString, + SIZE_T cbCustomizationString); + +VOID +SYMCRYPT_CALL +SymCryptKmac128Init( + _Out_ PSYMCRYPT_KMAC128_STATE pState, + _In_ PCSYMCRYPT_KMAC128_EXPANDED_KEY pExpandedKey); + +VOID +SYMCRYPT_CALL +SymCryptKmac128Append( + _Inout_ PSYMCRYPT_KMAC128_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptKmac128Extract( + _Inout_ PSYMCRYPT_KMAC128_STATE pState, + _Out_writes_( cbOutput ) PBYTE pbOutput, + SIZE_T cbOutput, + BOOLEAN bWipe); + +VOID +SYMCRYPT_CALL +SymCryptKmac128Result( + _Inout_ PSYMCRYPT_KMAC128_STATE pState, + _Out_writes_( SYMCRYPT_KMAC128_RESULT_SIZE ) PBYTE pbResult); + +VOID +SYMCRYPT_CALL +SymCryptKmac128ResultEx( + _Inout_ PSYMCRYPT_KMAC128_STATE pState, + _Out_writes_( cbResult ) PBYTE pbResult, + SIZE_T cbResult); + +VOID +SYMCRYPT_CALL +SymCryptKmac128KeyCopy(_In_ PCSYMCRYPT_KMAC128_EXPANDED_KEY pSrc, _Out_ PSYMCRYPT_KMAC128_EXPANDED_KEY pDst); + +VOID +SYMCRYPT_CALL +SymCryptKmac128StateCopy(_In_ const SYMCRYPT_KMAC128_STATE* pSrc, _Out_ SYMCRYPT_KMAC128_STATE* pDst); + +VOID +SYMCRYPT_CALL +SymCryptKmac128Selftest(void); + +extern const PCSYMCRYPT_MAC SymCryptKmac128Algorithm; + +// +// KMAC256 +// +#define SYMCRYPT_KMAC256_RESULT_SIZE SYMCRYPT_CSHAKE256_RESULT_SIZE +#define SYMCRYPT_KMAC256_INPUT_BLOCK_SIZE SYMCRYPT_CSHAKE256_INPUT_BLOCK_SIZE + +VOID +SYMCRYPT_CALL +SymCryptKmac256( + _In_ PCSYMCRYPT_KMAC256_EXPANDED_KEY pExpandedKey, + _In_reads_bytes_( cbInput ) PCBYTE pbInput, + SIZE_T cbInput, + _Out_writes_bytes_( SYMCRYPT_KMAC256_RESULT_SIZE ) PBYTE pbResult); + +VOID +SYMCRYPT_CALL +SymCryptKmac256Ex( + _In_ PCSYMCRYPT_KMAC256_EXPANDED_KEY pExpandedKey, + _In_reads_bytes_( cbInput ) PCBYTE pbInput, + SIZE_T cbInput, + _Out_writes_bytes_( cbResult ) PBYTE pbResult, + SIZE_T cbResult); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptKmac256ExpandKey( + _Out_ PSYMCRYPT_KMAC256_EXPANDED_KEY pExpandedKey, + _In_reads_bytes_( cbKey ) PCBYTE pbKey, + SIZE_T cbKey); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptKmac256ExpandKeyEx( + _Out_ PSYMCRYPT_KMAC256_EXPANDED_KEY pExpandedKey, + _In_reads_bytes_( cbKey ) PCBYTE pbKey, + SIZE_T cbKey, + _In_reads_bytes_( cbCustomizationString ) PCBYTE pbCustomizationString, + SIZE_T cbCustomizationString); + +VOID +SYMCRYPT_CALL +SymCryptKmac256Init( + _Out_ PSYMCRYPT_KMAC256_STATE pState, + _In_ PCSYMCRYPT_KMAC256_EXPANDED_KEY pExpandedKey); + +VOID +SYMCRYPT_CALL +SymCryptKmac256Append( + _Inout_ PSYMCRYPT_KMAC256_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptKmac256Extract( + _Inout_ PSYMCRYPT_KMAC256_STATE pState, + _Out_writes_( cbOutput ) PBYTE pbOutput, + SIZE_T cbOutput, + BOOLEAN bWipe); + +VOID +SYMCRYPT_CALL +SymCryptKmac256Result( + _Inout_ PSYMCRYPT_KMAC256_STATE pState, + _Out_writes_( SYMCRYPT_KMAC256_RESULT_SIZE ) PBYTE pbResult); + +VOID +SYMCRYPT_CALL +SymCryptKmac256ResultEx( + _Inout_ PSYMCRYPT_KMAC256_STATE pState, + _Out_writes_( cbResult ) PBYTE pbResult, + SIZE_T cbResult); + +VOID +SYMCRYPT_CALL +SymCryptKmac256KeyCopy(_In_ PCSYMCRYPT_KMAC256_EXPANDED_KEY pSrc, _Out_ PSYMCRYPT_KMAC256_EXPANDED_KEY pDst); + +VOID +SYMCRYPT_CALL +SymCryptKmac256StateCopy(_In_ const SYMCRYPT_KMAC256_STATE* pSrc, _Out_ SYMCRYPT_KMAC256_STATE* pDst); + +VOID +SYMCRYPT_CALL +SymCryptKmac256Selftest(void); + +extern const PCSYMCRYPT_MAC SymCryptKmac256Algorithm; + + +//////////////////////////////////////////////////////////////////////////// +// POLY1305 +// +// Poly1305 is different from other MAC functions because a key can only +// be used safely for a single message. +// We do not follow the default API pattern for MAC functions as that invites +// callers to compute multiple MACs per key. +// + +#define SYMCRYPT_POLY1305_RESULT_SIZE (16) +#define SYMCRYPT_POLY1305_BLOCK_SIZE (16) +#define SYMCRYPT_POLY1305_KEY_SIZE (32) + +VOID +SYMCRYPT_CALL +SymCryptPoly1305( + _In_reads_( SYMCRYPT_POLY1305_KEY_SIZE ) PCBYTE pbKey, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_POLY1305_RESULT_SIZE ) PBYTE pbResult ); +// Compute a Poly1305 authentication with the provided key on the data buffer. +// Note: A Poly1305 key may only be used for a single message. + +VOID +SYMCRYPT_CALL +SymCryptPoly1305Init( + _Out_ PSYMCRYPT_POLY1305_STATE pState, + _In_reads_( SYMCRYPT_POLY1305_KEY_SIZE ) PCBYTE pbKey ); +// Starts an incremental Poly1305 computation. +// Note: A Poly1305 key may only be used for a single message. + +VOID +SYMCRYPT_CALL +SymCryptPoly1305Append( + _Inout_ PSYMCRYPT_POLY1305_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptPoly1305Result( + _Inout_ PSYMCRYPT_POLY1305_STATE pState, + _Out_writes_( SYMCRYPT_POLY1305_RESULT_SIZE ) PBYTE pbResult ); +// The state is wiped and not suitable for re-use. + +VOID +SYMCRYPT_CALL +SymCryptPoly1305Selftest(void); + +// +// We do NOT define a SYMCRYPT_MAC structure SymCryptPoly1305Algorithm +// for Poly1305 as it is a 1-time MAC function and cannot safely be used +// by any KDF we have +// +// NOT DEFINED: extern const PCSYMCRYPT_MAC SymCryptPoly1305Algorithm; +// + +//////////////////////////////////////////////////////////////////////////// +// CHACHA20_POLY1305 +// +// This algorithm combines the CHACHA20 symmetric key stream cipher with +// the POLY1305 MAC function as per RFC 8439. +// The POLY1305 authenticator key is generated from the first 32 bytes +// of the CHACHA20 keystream and is only valid for a single message. +// For this reason each key and nonce combination passed to +// SymCryptChaCha20Poly1305Encrypt MUST only be used once. +// +// The Src and Dst buffers can be identical or non-overlapping; partial overlaps +// are not supported. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptChaCha20Poly1305Encrypt( + _In_reads_( cbKey ) PCBYTE pbKey, + SIZE_T cbKey, // Required. Key size MUST be 32 bytes. + _In_reads_( cbNonce ) PCBYTE pbNonce, + SIZE_T cbNonce, // Required. Nonce size MUST be 12 bytes. + _In_reads_opt_( cbAuthData ) PCBYTE pbAuthData, + SIZE_T cbAuthData, // Optional. Can be any size. + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData, // Required. Max size is 274,877,906,880 bytes. + _Out_writes_( cbTag ) PBYTE pbTag, + SIZE_T cbTag ); // Required. Tag size MUST be 16 bytes. + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptChaCha20Poly1305Decrypt( + _In_reads_( cbKey ) PCBYTE pbKey, + SIZE_T cbKey, // Required. Key size MUST be 32 bytes. + _In_reads_( cbNonce ) PCBYTE pbNonce, + SIZE_T cbNonce, // Required. Nonce size MUST be 12 bytes. + _In_reads_opt_( cbAuthData ) PCBYTE pbAuthData, + SIZE_T cbAuthData, // Optional. Can be any size. + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData, // Required. Max size is 274,877,906,880 bytes. + _In_reads_( cbTag ) PCBYTE pbTag, + SIZE_T cbTag ); // Required. Tag size MUST be 16 bytes. + +VOID +SYMCRYPT_CALL +SymCryptChaCha20Poly1305Selftest(void); + +//////////////////////////////////////////////////////////////////////////// +// MARVIN32 +// +// Marvin is a checksum function optimized for speed on small inputs. +// IT IS NOT A CRYPTOGRAPHIC HASH FUNCTION. +// Marvin lacks the security properties of a cryptographic hash function. +// DO NOT USE FOR ANY SECURITY USE. +// +// A randomizable checksum function has essentially the same API as a MAC +// function. We use the SymCrypt MAC API here, with the difference +// that we use the word 'seed' rather than 'key'. +// +// See the description above of the generic MAC API for details on how +// these functions are used. Wherever the MAC API talks about keys, this +// applies to the seed for Marvin32. +// +// The randomization is useful for hash tables. +// There are DOS attacks where an attacker generates many inputs that +// hash to the same location in the hash table. Some hash table implementations +// then use O(n^2) CPU time, allowing a DOS attack. +// The randomization provided by the seed avoids this attack if: +// - The seed is unpredictable and unknown to the attacker. +// - The attacker cannot learn information about the output of the checksum function. +// In particular, if an attacker can measure how long it takes to add each +// element in a hash table, they might be able to determine enough information about +// the output of the checksum function to recover the seed. Of course, +// once that is done the DOS attack is once again possible. +// +// SymCrypt provides a default seed for applications that don't need a seed. +// +// FUTURE IMPROVEMENTS: +// At the moment it is relatively expensive to change the seed. +// If needed, we can add a facility to modify the seed faster than +// re-running the ExpandSeed function. +// + +#define SYMCRYPT_MARVIN32_RESULT_SIZE (8) +#define SYMCRYPT_MARVIN32_SEED_SIZE (8) +#define SYMCRYPT_MARVIN32_INPUT_BLOCK_SIZE (4) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMarvin32ExpandSeed( + _Out_ PSYMCRYPT_MARVIN32_EXPANDED_SEED pExpandedSeed, + _In_reads_(cbSeed) PCBYTE pbSeed, + SIZE_T cbSeed ); +// +// The seed must be 8 bytes (= SYMCRYPT_MARVIN32_SEED_SIZE). +// Use of the all-zero seed is not recommended as it has some undesirable properties. +// Note that a pre-expanded default seed is provided for applications that do not wish to control +// their seed. Such applications do not need to call SymCryptMarvin32ExpandSeed +// + +extern PCSYMCRYPT_MARVIN32_EXPANDED_SEED const SymCryptMarvin32DefaultSeed; + +PCSYMCRYPT_MARVIN32_EXPANDED_SEED +SYMCRYPT_CALL +SymCryptGetMarvin32DefaultSeed( void ); +// +// Returns a pointer to the default Marvin32 seed. +// + +VOID +SYMCRYPT_CALL +SymCryptMarvin32SeedCopy( _In_ PCSYMCRYPT_MARVIN32_EXPANDED_SEED pSrc, + _Out_ PSYMCRYPT_MARVIN32_EXPANDED_SEED pDst ); + +VOID +SYMCRYPT_CALL +SymCryptMarvin32( + _In_ PCSYMCRYPT_MARVIN32_EXPANDED_SEED pExpandedSeed, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_MARVIN32_RESULT_SIZE ) PBYTE pbResult ); +// +// If the application does not wish to use a seed, a default expanded seed is provided. +// Callers can pass SymCryptMarvin32DefaultSeed as the first argument. +// + +VOID +SYMCRYPT_CALL +SymCryptMarvin32StateCopy( + _In_ PCSYMCRYPT_MARVIN32_STATE pSrc, + _In_opt_ PCSYMCRYPT_MARVIN32_EXPANDED_SEED pExpandedSeed, + _Out_ PSYMCRYPT_MARVIN32_STATE pDst ); + + +VOID +SYMCRYPT_CALL +SymCryptMarvin32Init( _Out_ PSYMCRYPT_MARVIN32_STATE pState, + _In_ PCSYMCRYPT_MARVIN32_EXPANDED_SEED pExpandedSeed); + +VOID +SYMCRYPT_CALL +SymCryptMarvin32Append( _Inout_ PSYMCRYPT_MARVIN32_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptMarvin32Result( + _Inout_ PSYMCRYPT_MARVIN32_STATE pState, + _Out_writes_( SYMCRYPT_MARVIN32_RESULT_SIZE ) PBYTE pbResult ); + + +VOID +SYMCRYPT_CALL +SymCryptMarvin32Selftest(void); + + +//========================================================================== +// BLOCK CIPHERS +//========================================================================== +// +// For a block cipher XXX the following minimal functions, types, and constants are defined: +// +// SYMCRYPT_XXX_BLOCK_SIZE +// +// A constant giving is the block size, in bytes, of the algorithm. +// +// +// SYMCRYPT_XXX_EXPANDED_KEY +// Type which contains a key with all the pre-computations performed. +// This is an opaque type whose structure can change at will. +// It should only be used for transient computations in a single executable +// and not be stored or transferred to a different environment. +// The pointer and const-pointer versions are also declared +// (PSYMCRYPOT_XXX_EXPANDED_KEY and PCSYMCRYPT_XXX_EXPANDED_KEY). +// +// The EXPANDED_KEY structure contains keying material and should be wiped +// once it is no longer used. (See SymCryptWipe & SymCryptWipeKnownSize) +// +// Once initialized, multiple threads can use the same expanded key object simultaneously +// for different block cipher computations as the expanded key is not modified once initialized. +// +// SymCryptXxxBlockCipher +// A SYMCRYPT_BLOCKCIPHER structure that provides a description +// of the block cipher and its primary functions. This is used by cipher modes to pass +// all the block-cipher specific information in a single structure. +// +// +// SYMCRYPT_ERROR +// SYMCRYPT_CALL +// SymCryptXxxExpandKey( _Out_ PSYMCRYPT_XXX_EXPANDED_KEY pExpandedKey, +// _In_reads_(cbKey) PCBYTE pbKey, +// SIZE_T cbKey ); +// +// Prepare a key for future use by the Xxx algorithm. +// This function performs pre-computations on the key +// to speed up the actual block cipher computations later, and stores the result as an expanded key. +// The expanded key must be kept unchanged until all computations that use the key are finished. +// When the key is no longer needed the expanded key structure should be wiped. +// +// Different algorithms pose different requirements on the length of the key. +// If the key that is provided is of an unsupported length the SYMCRYPT_WRONG_KEY_SIZE error is returned. +// In this case the expanded key structure will not contain any keying material and does not have to be wiped. +// +// +// VOID +// SYMCRYPT_CALL +// SymCryptXxxEncrypt( _In_ PCSYMCRYPT_XXX_EXPANDED_KEY pExpandedKey, +// _In_reads_( SYMCRYPT_XXX_BLOCK_SIZE ) PCBYTE pbSrc, +// _Out_writes_( SYMCRYPT_XXX_BLOCK_SIZE ) PBYTE pbDst ); +// +// Encrypt a single block. +// +// +// VOID +// SYMCRYPT_CALL +// SymCryptXxxDecrypt( _In_ PCSYMCRYPT_XXX_EXPANDED_KEY pExpandedKey, +// _In_reads_( SYMCRYPT_XXX_BLOCK_SIZE ) PCBYTE pbSrc, +// _Out_writes_( SYMCRYPT_XXX_BLOCK_SIZE ) PBYTE pbDst ); +// +// Decrypt a single block. +// +// +// -------------------------------------------------------------------------------------------------------------- +// In addition to these elementary encrypt block/decrypt block functions a block cipher may also implement +// optimized versions of CBC encryption, CBC decryption, CBC-MAC, and CTR encryption. Not all block ciphers +// do implement these. +// All block cipher modes are always available through the generic block cipher mode functions. +// +// VOID +// SYMCRYPT_CALL +// SymCryptXxxCbcEncrypt( +// _In_ PCSYMCRYPT_XXX_EXPANDED_KEY pExpandedKey, +// _Inout_updates_( SYMCRYPT_XXX_BLOCK_SIZE ) PBYTE pbChainingValue, +// _In_reads_( cbData ) PCBYTE pbSrc, +// _Out_writes_( cbData ) PBYTE pbDst, +// SIZE_T cbData ); +// +// Encrypt data using the CBC chaining mode. +// On entry the pbChainingValue is the IV which is xorred into the first plaintext block of the CBC encryption. +// On exit the pbChainingValue is updated to the last ciphertext block of the result. +// This allows a longer CBC encryption to be done incrementally. +// +// cbData must be a multiple of the block size. For efficiency reasons this routine does not return an error +// if cbData is not a proper multiple; instead the result is undefined. The routine might hang, +// round cbData down to a multiple of the block size, or return random data that cannot be decrypted. +// +// The pbSrc and pbDst buffers may be the same, or they may be non-overlapping. However, they may +// not be partially overlapping. +// +// +// VOID +// SYMCRYPT_CALL +// SymCryptXxxCbcDecrypt( +// _In_ PCSYMCRYPT_XXX_EXPANDED_KEY pExpandedKey, +// _Inout_updates_( SYMCRYPT_XXX_BLOCK_SIZE ) PBYTE pbChainingValue, +// _In_reads_( cbData ) PCBYTE pbSrc, +// _Out_writes_( cbData ) PBYTE pbDst, +// SIZE_T cbData ); +// +// Decrypt data using the CBC chaining mode. +// On entry the pbChainingValue is the IV to be xorred into the first plaintext block of the CBC decryption. +// On exit the pbChainingValue is updated to the last ciphertext block of the input. +// This allows a longer CBC decryption to be done incrementally. +// +// cbData must be a multiple of the block size. For efficiency reasons this routine does not return an error +// if cbData is not a proper multiple; instead the result is undefined. The routine might hang, +// round cbData down to a multiple of the block size, or return random data. +// +// The pbSrc and pbDst buffers may be the same, or they may be non-overlapping. However, they may +// not be partially overlapping. +// +// +// VOID +// SYMCRYPT_CALL +// SymCryptXxxCbcMac( +// _In_ PCSYMCRYPT_XXX_EXPANDED_KEY pExpandedKey, +// _Inout_updates_( SYMCRYPT_XXX_BLOCK_SIZE ) PBYTE pbChainingValue, +// _In_reads_( cbData ) PCBYTE pbData, +// SIZE_T cbData ); +// +// Compute a CBC-MAC on the input data. +// On entry the pbChainingValue is the current chaining state of the CBC-MAC computation; this routine +// updates the state to reflect the chaining state after MACing the data. +// cbData must be a multiple of the block size. +// This function is NOT intended for general use; rather it is a high-performance primitive to support +// implementations of other cipher modes like CCM and CMAC. +// Note: If a key is used for CBC-MAC computations it should NOT be used for any encryptions. +// +// +// VOID +// SYMCRYPT_CALL +// SymCryptXxxCtrMsb64( +// _In_ PCSYMCRYPT_XXX_EXPANDED_KEY pExpandedKey, +// _Inout_updates_( SYMCRYPT_XXX_BLOCK_SIZE ) PBYTE pbChainingValue, +// _In_reads_( cbData ) PCBYTE pbSrc, +// _Out_writes_( cbData ) PBYTE pbDst, +// SIZE_T cbData ); +// +// Perform a CTR encryption on the data. (Note: CTR encryption and decryption are the same operation.) +// On entry pbChainingValue contains the first counter value to be used. On exit it contains +// the next counter value to be used. +// The increment function treats the last 8 bytes of the pbChainingValue string as an integer +// in most-significant-byte-first format, and increments this integer. +// Thus, the last byte is incremented the fastest. +// The pbSrc and pbDst buffers may be identical or non-overlapping, but they may not partially overlap. +// cbData must be a multiple of the block size. +// +// +// VOID +// SYMCRYPT_CALL +// SymCryptXxxSelftest(void); +// +// Perform a minimal self-test on the XXX algorithm. +// This function is designed to be used for achieving FIPS 140-2 compliance or +// to provide a simple self-test when an application starts. +// +// If an error is detected the fatal callback routine is called. +// +// We do not provide self-tests for the various cipher modes. There are too many +// (block cipher, key size, cipher mode) combinations and CNG performs the self tests +// on the outside APIs, not on the internal APIs. +// We retain a self test on the basic algorithm to help internal library testing. + + + +//////////////////////////////////////////////////////////////////////////// +// AES +// +// The AES block cipher per FIPS 197 +// +// WARNING: +// Unless this code is running on a CPU with AES-NI instructions, +// the AES implementation makes extensive use of table lookups to implement the S-boxes of the algorithm. +// This violates our current crypto implementation guidelines and opens up a possible side-channel attack +// through information leakage via the memory caching system of the CPU. +// +// Unfortunately there is no known software fix for this that does not lead to an order of magnitude performance loss. +// An implementation that is 10x slower will not be used by anybody and is useless, so we implement a fast +// version that uses table lookups. (Just like all other systems we know of.) +// +// The risk of this type of side-channel attack is limited as it requires malicious code to run on the same +// machine as the code being attacked. +// +// At the time of writing (Apr 2007) there are no approved alternative encryption algorithms that do not +// use table lookups. NIST and NSA are aware of this problem, but so far we have not seen any indication +// that they consider this important enough to create an alternative encryption algorithm that does not +// rely on table lookups as much. +// + +#define SYMCRYPT_AES_BLOCK_SIZE (16) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptAesExpandKey( + _Out_ PSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ); + +// +// The SymCryptAesExpandKeyEncryptOnly creates an AES-expanded key that can ONLY be used +// for AES encryption operations. There are no safeguards when you use it for decryption; you get the wrong +// result if you try. +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptAesExpandKeyEncryptOnly( + _Out_ PSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ); + +VOID +SYMCRYPT_CALL +SymCryptAesKeyCopy( _In_ PCSYMCRYPT_AES_EXPANDED_KEY pSrc, + _Out_ PSYMCRYPT_AES_EXPANDED_KEY pDst ); + +VOID +SYMCRYPT_CALL +SymCryptAesEncrypt( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbDst ); + +VOID +SYMCRYPT_CALL +SymCryptAesDecrypt( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbDst ); + +VOID +SYMCRYPT_CALL +SymCryptAesEcbEncrypt( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesEcbDecrypt( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesCbcEncrypt( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesCbcDecrypt( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesCbcMac( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesCtrMsb64( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +// +// There are many optimized implementations for various AES modes. +// To test them all would pull in all the code for these modes. +// We solve this by letting the caller specify a bitmask of modes to be tested. +// Under the following circumstances this will avoid pulling in unnecessary code: +// - The argument is a compile-time constant. +// - The compiler implements the usual constant propagation optimizations. +// +// Note: GCM, CCM, and XTS are NOT tested by this function. + +#define SYMCRYPT_AES_SELFTEST_BASE 0x01 // tests AesEncrypt & AesDecrypt +#define SYMCRYPT_AES_SELFTEST_ECB 0x02 // ECB mode +#define SYMCRYPT_AES_SELFTEST_CBC 0x04 // CBC mode +#define SYMCRYPT_AES_SELFTEST_CBCMAC 0x08 // CBC-mac +#define SYMCRYPT_AES_SELFTEST_CTR 0x10 // all CTR modes + +#define SYMCRYPT_AES_SELFTEST_ALL 0x1f + +VOID +SYMCRYPT_CALL +SymCryptAesSelftest( UINT32 maskTestsToRun ); + +extern const PCSYMCRYPT_BLOCKCIPHER SymCryptAesBlockCipher; + + +//////////////////////////////////////////////////////////////////////////// +// DES +// +// The DES block cipher per FIPS-46-3 +// +// WARNING: +// DES is no longer considered secure and should not be used. +// Per the Crypto SDL, any use of DES in Microsoft code requires a Crypto board exemption +// +// The DES implementation makes extensive use of table lookups to implement the S-boxes of the algorithm. +// This violates our current crypto implementation guidelines and opens up a possible side-channel attack +// through information leakage via the memory caching system of the CPU. +// + +#define SYMCRYPT_DES_BLOCK_SIZE (8) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDesExpandKey( + _Out_ PSYMCRYPT_DES_EXPANDED_KEY pExpandedKey, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ); +// +// The key must be 8 bytes long. The parity bits in the key are ignored and can be any value. +// + +VOID +SYMCRYPT_CALL +SymCryptDesEncrypt( + _In_ PCSYMCRYPT_DES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_DES_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_DES_BLOCK_SIZE ) PBYTE pbDst ); + +VOID +SYMCRYPT_CALL +SymCryptDesDecrypt( + _In_ PCSYMCRYPT_DES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_DES_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_DES_BLOCK_SIZE ) PBYTE pbDst ); + + +VOID +SYMCRYPT_CALL +SymCryptDesSetOddParity( + _Inout_updates_( cbData ) PBYTE pbData, + _In_ SIZE_T cbData ); +// +// Set each byte to have odd parity by possibly flipping bit 0. +// This is the parity used by DES, and is needed for compatibility. +// The parity bit is ignored by the DES key expansion. +// + +VOID +SYMCRYPT_CALL +SymCryptDesSelftest(void); + +extern const PCSYMCRYPT_BLOCKCIPHER SymCryptDesBlockCipher; + +//////////////////////////////////////////////////////////////////////////// +// 3DES +// +// The triple-DES block cipher +// +// WARNING: +// The DES implementation makes extensive use of table lookups to implement the S-boxes of the algorithm. +// This violates our current crypto implementation guidelines and opens up a possible side-channel attack +// through information leakage via the memory caching system of the CPU. +// + +#define SYMCRYPT_3DES_BLOCK_SIZE (8) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCrypt3DesExpandKey( + _Out_ PSYMCRYPT_3DES_EXPANDED_KEY pExpandedKey, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ); +// +// If the provided key is 24 bytes long this expands a 3-key 3DES key. If 16 bytes are provided it +// expands a 2-key 3DES. If 8 bytes are provided it creates the 3-key equivalent of the single +// key des encryption. The parity bits in the key are ignored. +// + +VOID +SYMCRYPT_CALL +SymCrypt3DesEncrypt( + _In_ PCSYMCRYPT_3DES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_3DES_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_3DES_BLOCK_SIZE )PBYTE pbDst ); + +VOID +SYMCRYPT_CALL +SymCrypt3DesDecrypt( + _In_ PCSYMCRYPT_3DES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_3DES_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_3DES_BLOCK_SIZE )PBYTE pbDst ); + +VOID +SYMCRYPT_CALL +SymCrypt3DesCbcEncrypt( + _In_ PCSYMCRYPT_3DES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_3DES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCrypt3DesCbcDecrypt( + _In_ PCSYMCRYPT_3DES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_3DES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCrypt3DesSelftest(void); + +extern const PCSYMCRYPT_BLOCKCIPHER SymCrypt3DesBlockCipher; + +//////////////////////////////////////////////////////////////////////////// +// DESX +// +// The DESX block cipher. +// +// Use of DESX is not recommended. +// + +#define SYMCRYPT_DESX_BLOCK_SIZE (8) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDesxExpandKey( + _Out_ PSYMCRYPT_DESX_EXPANDED_KEY pExpandedKey, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ); + +VOID +SYMCRYPT_CALL +SymCryptDesxEncrypt( + _In_ PCSYMCRYPT_DESX_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_DESX_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_DESX_BLOCK_SIZE )PBYTE pbDst ); + +VOID +SYMCRYPT_CALL +SymCryptDesxDecrypt( + _In_ PCSYMCRYPT_DESX_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_DESX_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_DESX_BLOCK_SIZE )PBYTE pbDst ); + + +VOID +SYMCRYPT_CALL +SymCryptDesxSelftest(void); + +extern const PCSYMCRYPT_BLOCKCIPHER SymCryptDesxBlockCipher; + +//////////////////////////////////////////////////////////////////////////// +// RC2 +// +// The RC2 block cipher +// +// WARNING: +// Use of RC2 is not recommended for many reasons. +// +// The RC2 implementation makes extensive use of table lookups to implement the S-boxes of the algorithm. +// This violates our current crypto implementation guidelines and opens up a possible side-channel attack +// through information leakage via the memory caching system of the CPU. +// + +#define SYMCRYPT_RC2_BLOCK_SIZE (8) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRc2ExpandKey( + _Out_ PSYMCRYPT_RC2_EXPANDED_KEY pExpandedKey, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ); +// +// The default effective key size is 8*cbKey. Note that this is NOT the default used in +// the old RSA32 library which used a default effective key size of 40 bits. +// That is too dangerous a default to implement. We chose 8*cbKey rather than 1024 as +// our choice provides slightly better mixing of the key bytes into the expanded key. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRc2ExpandKeyEx( + _Out_ PSYMCRYPT_RC2_EXPANDED_KEY pExpandedKey, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey, + UINT32 effectiveKeySizeInBits ); +// +// Rc2 has an option to limit the effective key size, which means the key expansion function has an extra +// parameter. +// +// The effective key size in bits may be any value from 9..1024. If it is larger than 8*cbKey it does +// not significantly affect the key strength. However, the expanded key will always depend on the +// effective key size; expanding the same string of key bytes with differ effective key sizes leads +// to different expanded keys and different encryption functions. +// +// The original default was an effective key size of 40 bits. +// +// Do not allow your attacker to choose the effective key size. RC2 seems vulnerable to +// related-effective-key-size attacks. +// + +VOID +SYMCRYPT_CALL +SymCryptRc2Encrypt( + _In_ PCSYMCRYPT_RC2_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_RC2_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_RC2_BLOCK_SIZE ) PBYTE pbDst ); + +VOID +SYMCRYPT_CALL +SymCryptRc2Decrypt( + _In_ PCSYMCRYPT_RC2_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_RC2_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_RC2_BLOCK_SIZE ) PBYTE pbDst ); + + +VOID +SYMCRYPT_CALL +SymCryptRc2Selftest(void); + +extern const PCSYMCRYPT_BLOCKCIPHER SymCryptRc2BlockCipher; + + +//========================================================================== +// BLOCK CIPHER MODES +//========================================================================== +// +// Block cipher modes use the block cipher description tables to implement +// the various modes in a block-cipher independent way. +// +// Some block ciphers implement optimized versions of the block cipher modes. +// These functions call that optimized version, but calling the block-cipher specific +// function has less overhead. +// +// Note that these functions will only work with SymCrypt-provided block ciphers. +// They are not designed to be used with externally provided block ciphers. +// (The SYMCRYPT_BLOCKCIPHER structure is a private one not available to callers.) +// + +typedef enum _SYMCRYPT_BLOCKCIPHER_ID +{ + SYMCRYPT_BLOCKCIPHER_ID_NULL = 0, + SYMCRYPT_BLOCKCIPHER_ID_AES = 1, + SYMCRYPT_BLOCKCIPHER_ID_DES = 2, + SYMCRYPT_BLOCKCIPHER_ID_3DES = 3, + SYMCRYPT_BLOCKCIPHER_ID_DESX = 4, + SYMCRYPT_BLOCKCIPHER_ID_RC2 = 5 +} SYMCRYPT_BLOCKCIPHER_ID; + +PCSYMCRYPT_BLOCKCIPHER +SYMCRYPT_CALL +SymCryptGetBlockCipher( SYMCRYPT_BLOCKCIPHER_ID blockCipherId ); +// +// Returns a pointer to the block cipher structure for the specified block cipher ID. +// Returns NULL if the block cipher ID is invalid. +// + +VOID +SYMCRYPT_CALL +SymCryptEcbEncrypt( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ PCVOID pExpandedKey, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); +// +// Generic ECB encryption routine for block ciphers. +// +// - pBlockCipher is a pointer to the block cipher description table. +// Suitable description tables for all ciphers in this library have been pre-defined. +// - pExpandedKey points to the expanded key to use. This generic function uses PVOID so there +// is no type safety to ensure that the expanded key and the encryption function match. +// - pbSrc is the plaintext input buffer. The plaintext and ciphertext buffers may be +// identical (in-place encryption) or non-overlapping, but they may not partially overlap. +// - cbData. Number of bytes to encrypt. This must be a multiple of the block size. +// - pbDst is the result buffer. It may be identical to pbPlaintext or non-overlapping, +// but it may not partially overlap with the pbPlaintext buffer. +// + +VOID +SYMCRYPT_CALL +SymCryptEcbDecrypt( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ PCVOID pExpandedKey, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); +// +// Generic ECB decryption routine for block ciphers. +// +// - pBlockCipher is a pointer to the block cipher description table. +// Suitable description tables for all ciphers in this library have been pre-defined. +// - pExpandedKey points to the expanded key to use. This generic function uses PVOID so there +// is no type safety to ensure that the expanded key and the encryption function match. +// - pbSrc is the plaintext input buffer. The plaintext and ciphertext buffers may be +// identical (in-place encryption) or non-overlapping, but they may not partially overlap. +// - cbData. Number of bytes to encrypt. This must be a multiple of the block size. +// - pbDst is the result buffer. It may be identical to pbPlaintext or non-overlapping, +// but it may not partially overlap with the pbPlaintext buffer. +// + + +VOID +SYMCRYPT_CALL +SymCryptCbcEncrypt( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ PCVOID pExpandedKey, + _Inout_updates_( pBlockCipher->blockSize ) + PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +// +// Generic CBC encryption routine for block ciphers. +// +// - pBlockCipher is a pointer to the block cipher description table. +// Suitable description tables for all ciphers in this library have been pre-defined. +// - pExpandedKey points to the expanded key to use. This generic function uses PVOID so there +// is no type safety to ensure that the expanded key and the encryption function match. +// - pbChainingValue points to the chaining value. On entry it is the IV value for the CBC +// encryption, on return it is the last ciphertext block. A long message can be encrypted +// piecewise in multiple calls; at the end of one call the pbChainingValue buffer will contain +// the correct chaining value for encrypting the next piece of the message. +// Once the encryption is finished the value in the chaining buffer is no longer needed. +// - pbSrc is the plaintext input buffer. The plaintext and ciphertext buffers may be +// identical (in-place encryption) or non-overlapping, but they may not partially overlap. +// - cbData. Number of bytes to encrypt. This must be a multiple of the block size. +// - pbDst is the result buffer. It may be identical to pbPlaintext or non-overlapping, +// but it may not partially overlap with the pbPlaintext buffer. +// + + +VOID +SYMCRYPT_CALL +SymCryptCbcDecrypt( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ PCVOID pExpandedKey, + _Inout_updates_( pBlockCipher->blockSize ) + PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +// +// This is the decryption version of SymCryptCbcEncrypt. +// All parameters have the same explanation and restrictions.: +// + + +VOID +SYMCRYPT_CALL +SymCryptCbcMac( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ PCVOID pExpandedKey, + _Inout_updates_( pBlockCipher->blockSize ) + PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + SIZE_T cbData ); +// +// This function implements the same function as SymCryptCbcEncrypt except that +// it does not produce a ciphertext output. +// All other restrictions apply. +// The pbChainingValue is the only output provided. +// +// This is the primitive operation used by other modes of operation, +// and some platforms have special optimizations for this primitive. +// As we expose special APIs for some algorithms, we provide the generic function so that it +// can be used for all algorithms. +// + + +VOID +SYMCRYPT_CALL +SymCryptCtrMsb64( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ PCVOID pExpandedKey, + _Inout_updates_( pBlockCipher->blockSize ) + PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); +// +// This function implements the CTR cipher mode. +// It is not intended to be used as-is, rather it is a building block for modes like CCM. +// On some platforms we have optimized code for AES-CTR, on other platforms +// we use this generic construction to achieve the same effect. +// +// Note that in CTR mode encryption and decryption are the same operation. +// +// - pBlockCipher is a pointer to the block cipher description table. +// Suitable description tables for all ciphers in this library have been pre-defined. +// - pExpandedKey points to the expanded key to use. This generic function uses PVOID so there +// is no type safety to ensure that the expanded key and the encryption function match. +// - pbChainingValue points to the chaining value. On entry it is the first counter value to be +// used. On exit is the next counter value to be used. +// The pbChainingValue is incremented by cbData/blockSize. +// The increment function treats the last 8 bytes of pbChaining a MSBfirst integer +// and increments the integer representation by one for each block. +// - pbSrc is the input data buffer that will be encrypted/decrypted. +// - cbData. Number of bytes to encrypt/decrypt. This must be a multiple of the block size. +// - pbDst is the output buffer that receives the encrypted/decrypted data. The input and output +// buffers may be the same or non-overlapping, but may not partially overlap. +// + +VOID +SYMCRYPT_CALL +SymCryptCfbEncrypt( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + SIZE_T cbShift, + _In_ PCVOID pExpandedKey, + _Inout_updates_( pBlockCipher->blockSize ) + PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); +// +// Encrypt a buffer using the CFB cipher mode. +// +// This implements the CFB mode, with selected shift amount (in bytes). +// In general, one block cipher encryption is used for each cbShift bytes +// of plaintext, which can be slow. +// Use of this cipher mode is not recommended. +// +// - pBlockCipher is a pointer to the block cipher description table. +// Suitable description tables for all ciphers in this library have been pre-defined. +// - cbShift is the shift value (in bytes) of the CFB mode. +// The only supported values are 1 and the block size. +// - pExpandedKey points to the expanded key to use. This generic function uses PVOID so there +// is no type safety to ensure that the expanded key and the encryption function match. +// - pbChainingValue points to the chaining value. On entry and exit it +// contains the last blockSize ciphertext bytes. +// - pbSrc is the input data buffer that will be encrypted/decrypted. +// - cbData. Number of bytes to encrypt/decrypt. +// Must be a multiple of cbShift, or a multiple of the block size if cbShift = 0. +// - pbDst is the output buffer that receives the encrypted/decrypted data. The input and output +// buffers may be the same or non-overlapping, but may not partially overlap. +// + +VOID +SYMCRYPT_CALL +SymCryptCfbDecrypt( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + SIZE_T cbShift, + _In_ PCVOID pExpandedKey, + _Inout_updates_( pBlockCipher->blockSize ) + PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); +// +// The corresponding decryption routine. +// + +VOID +SYMCRYPT_CALL +SymCryptPaddingPkcs7Add( + SIZE_T cbBlockSize, + _In_reads_(cbSrc) PCBYTE pbSrc, + SIZE_T cbSrc, + _Out_writes_to_(cbDst, *pcbResult) PBYTE pbDst, + SIZE_T cbDst, + SIZE_T* pcbResult); +// +// Prerequisites: +// cbBlockSize is a power of 2 and < 256 +// cbDst >= cbSrc - cbSrc % cbBlockSize + cbBlockSize +// +// Add PKCS7 block padding to a message +// The input data (pbSrc,cbSrc) is padded with between 1 and cbBlockSize bytes so that +// the length of the result is a multiple of cbBlockSize. +// The padded message is written to the pbDst buffer. +// The length of the padded message is returned in *pcbResult. +// +// If pbSrc == pbDst this function avoids copying all the data. +// Note that cbSrc == cbDst is not valid as it violates the prerequisites. +// Padding a message with cbSrc == 0 is valid. +// +// Note: +// Any whole blocks in Src are merely copied to Dst. +// Callers can either process the whole message in this call, +// or handle the whole blocks themselves and only pass the last few bytes of the message to this function. +// +// Note: the prerequisites are not checked by this function; if they are not satisfied +// the behaviour of the function is undefined. +// + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptPaddingPkcs7Remove( + SIZE_T cbBlockSize, + _In_reads_(cbSrc) PCBYTE pbSrc, + SIZE_T cbSrc, + _Out_writes_to_(cbDst, *pcbResult) PBYTE pbDst, + SIZE_T cbDst, + SIZE_T* pcbResult); +// +// Prerequisites: +// - cbBlockSize is a power of 2 and < 256 +// - cbSrc is a multiple of cbBlockSize +// - cbSrc is greater than zero (at least equals to cbBlockSize) +// +// Remove PKCS7 block padding from a message in a side-channel safe way. +// *** see below for important rules the caller should follow w.r.t. side-channel safety *** +// The input data (pbSrc, cbSrc) is a valid PKCS7 padded message for the given blocksize. +// This function removes the padding, copies the result to the (pbDst, cbDst) buffer, +// and returns the size of the result in *pcbResult. +// +// This function only supports padding with a size up to the block size. +// +// If pbSrc == pbDst this function avoids copying data. +// +// The following errors are returned: +// - SYMCRYPT_INVALID_ARGUMENT if cbSrc or the padding is invalid +// - SYMCRYPT_BUFFER_TOO_SMALL if cbDst < size of the unpadded message +// If cbDst >= cbSrc the SYMCRYPT_BUFFER_TOO_SMALL error will not be returned. +// Even if an error is returned, the pbDst buffer may or may not contain data from the message. +// Callers should wipe the buffer even if an error is returned. +// +// Note: Removal of PKCS7 padding is extremely sensitive to side channels. +// For example, if a message is encrypted with AES-CBC and the attacker can modify +// the ciphertext and then determine whether a padding error occurs during decryption, +// then the attacker can use the presence or absence of the error to decrypt the message itself. +// This function takes great care not to reveal whether an error occurred, and hides +// the size of the unpadded message. This is even true when writing to pbDst. If cbDst is large +// enough, the code will write cbSrc-1 bytes to pbDst, using masking to only update the bytes of the +// message and leaving the other bytes in pbDst unchanged. +// Callers should take great care not to reveal the returned error or success, +// or the size of the returned message, until they have authenticated +// the source of the data. +// +// In particular, any mapping of the error code should be done in a side-channel safe way. +// See the SymCryptMapUint32() function for a side-channel safe way to map error codes. +// +// The error caused by an invalid cbSrc value is not hidden from side channels as this does not reveal any +// secret information. +// +// Note: callers can either process the whole message in this call, +// or process the whole blocks themselves and only pass the last block to this function. + +//////////////////////////// +// CCM +//////////////////////////// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptCcmValidateParameters( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ SIZE_T cbNonce, + _In_ SIZE_T cbAssociatedData, + _In_ UINT64 cbData, + _In_ SIZE_T cbTag + ); +// +// To achieve maximum performance, CCM functions do not check for valid parameters. +// Passing invalid parameters can lead to buffer overflows. +// Callers who want to validate their CCM parameters can call this function. +// Note: In Checked builds some CCM functions might fatal out when invalid parameters are +// passed. +// + + +VOID +SYMCRYPT_CALL +SymCryptCcmEncrypt( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ PCVOID pExpandedKey, + _In_reads_( cbNonce ) PCBYTE pbNonce, + SIZE_T cbNonce, + _In_reads_opt_( cbAuthData ) PCBYTE pbAuthData, + SIZE_T cbAuthData, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData, + _Out_writes_( cbTag ) PBYTE pbTag, + SIZE_T cbTag ); + +// +// Encrypt a buffer using the block cipher in CCM mode. +// - pBlockCipher points to the block cipher description table. +// - pExpandedKey points to the expanded key for the block cipher. +// - pbNonce: Pointer to the nonce for this encryption. For a single key, each nonce +// value may be used at most once to encrypt data. Re-using nonce values leads +// to catastrophic loss of security. +// - cbNonce: number of bytes in the nonce: 7 <= cbNonce <= 13. +// - pbAuthData: pointer to the associated authentication data. This data is not encrypted +// but it is included in the authentication. Use NULL if not used. +// - cbAuthData: # bytes of associated authentication data. (0 if not used) +// - pbSrc: plaintext input +// - pbDst: ciphertext output. The ciphertext buffer may be identical to the plaintext +// buffer, or non-overlapping. The ciphertext is also cbData bytes long. +// - cbData: # bytes of plaintext input. The maximum length is 2^{8(15-cbNonce)} - 1 bytes. +// - pbTag: buffer that will receive the authentication tag. +// - cbTag: size of tag. cbTag must be one of {4, 6, 8, 10, 12, 14, 16}. +// + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptCcmDecrypt( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ PCVOID pExpandedKey, + _In_reads_( cbNonce ) PCBYTE pbNonce, + SIZE_T cbNonce, + _In_reads_opt_( cbAuthData ) PCBYTE pbAuthData, + SIZE_T cbAuthData, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData, + _In_reads_( cbTag ) PCBYTE pbTag, + SIZE_T cbTag ); +// +// Decrypt a buffer using the block cipher in CCM mode. +// See SymCryptCcmEncrypt for a description of the parameters. This function decrypts rather than +// encrypts, and as a result the pbTag parameter is read rather than filled. +// +// If the tag value is not correct the SYMCRYPT_AUTHENTICATION_FAILURE error is returned and the pbDst buffer +// is wiped of any plaintext. +// Note: While checking the authentication the purported plaintext is stored in pbDst. It is not safe to reveal +// purported plaintext when the authentication has not been checked. (Doing so would reveal key stream information +// that can be used to decrypt any message encrypted with the same nonce value.) Thus, users should be careful +// to not reveal the pbDst buffer until this function returns (e.g. through other threads or sharing memory). +// + +// +// We also provide functions for incremental computation of CCM encryption and decryption. See the functions +// above for a description of the parameters and restrictions. +// In particular, note that the restriction on revealing the plaintext for unauthenticated decryptions holds +// for all the decrypted data, even when the decryption is done incrementally. +// +// SYMCRYPT_CCM_STATE +// Ongoing state of an incremental CCM encryption or decryption operation. +// + +VOID +SYMCRYPT_CALL +SymCryptCcmInit( + _Out_ PSYMCRYPT_CCM_STATE pState, + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ PCVOID pExpandedKey, + _In_reads_( cbNonce ) PCBYTE pbNonce, + SIZE_T cbNonce, + _In_reads_opt_( cbAuthData ) PCBYTE pbAuthData, + SIZE_T cbAuthData, + UINT64 cbData, + SIZE_T cbTag ); +// +// Initialize a CCM computation. Note that the ultimate data length has to be provided. +// The pBlockCipher and pExpandedKey structures must remain unchanged until the CCM computation is finished. +// + +VOID +SYMCRYPT_CALL +SymCryptCcmEncryptPart( + _Inout_ PSYMCRYPT_CCM_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptCcmEncryptFinal( + _Inout_ PSYMCRYPT_CCM_STATE pState, + _Out_writes_( cbTag ) PBYTE pbTag, + SIZE_T cbTag ); +// +// Note: passing cbTag is redundant but necessary for SAL purposes. +// + +VOID +SYMCRYPT_CALL +SymCryptCcmDecryptPart( + _Inout_ PSYMCRYPT_CCM_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptCcmDecryptFinal( + _Inout_ PSYMCRYPT_CCM_STATE pState, + _In_reads_( cbTag ) PCBYTE pbTag, + SIZE_T cbTag ); +// +// WARNING: When the authentication fails the data already decrypted may not be revealed. +// This function cannot wipe the plaintext buffers; the caller is responsible for ensuring +// the plaintext is not revealed. +// + +VOID +SYMCRYPT_CALL +SymCryptCcmSelftest(void); +// +// Self test for CCM cipher mode +// + +/////////////////////////////////////// +// GCM +/////////////////////////////////////// +// +// The GCM algorithm per SP 800-38D. +// GMAC is just GCM with an empty data string; all the data is put in the pbAuthData buffer. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptGcmValidateParameters( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ SIZE_T cbNonce, + _In_ UINT64 cbAssociatedData, + _In_ UINT64 cbData, + _In_ SIZE_T cbTag + ); +// +// To achieve maximum performance, GCM functions do not check for valid parameters. +// Passing invalid parameters can lead to buffer overflows. +// Callers who want to validate their GCM parameters can call this function. +// Note: In Checked builds some GCM functions might fatal out when invalid parameters are +// passed. +// + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptGcmExpandKey( + _Out_ PSYMCRYPT_GCM_EXPANDED_KEY pExpandedKey, + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_reads_( cbKey ) PCBYTE pbKey, + SIZE_T cbKey ); +// +// Create an expanded key suitable for GCM +// + +VOID +SYMCRYPT_CALL +SymCryptGcmKeyCopy( _In_ PCSYMCRYPT_GCM_EXPANDED_KEY pSrc, _Out_ PSYMCRYPT_GCM_EXPANDED_KEY pDst ); + +// +// Create a copy of an expanded key +// + +VOID +SYMCRYPT_CALL +SymCryptGcmEncrypt( + _In_ PCSYMCRYPT_GCM_EXPANDED_KEY pExpandedKey, + _In_reads_( cbNonce ) PCBYTE pbNonce, + SIZE_T cbNonce, + _In_reads_opt_( cbAuthData ) PCBYTE pbAuthData, + SIZE_T cbAuthData, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData, + _Out_writes_( cbTag ) PBYTE pbTag, + SIZE_T cbTag ); + +// +// Encrypt a buffer using the block cipher in GCM mode. +// - pExpandedKey points to the expanded key for GCM. +// - pbNonce: Pointer to the nonce for this encryption. For a single key, each nonce +// value may be used at most once to encrypt data. Re-using nonce values leads +// to catastrophic loss of security. Only 12-byte nonces are supported, +// per the SP800-38D section 5.2.1.1 recommendation. +// - cbNonce: number of bytes in the nonce, must be 12. +// - pbAuthData: pointer to the associated authentication data. This data is not encrypted +// but it is included in the authentication. Use NULL if not used. +// - cbAuthData: # bytes of associated authentication data. (0 if not used) +// - pbSrc: plaintext input +// - pbDst: ciphertext output. The ciphertext buffer may be identical to the plaintext +// buffer, or non-overlapping. The ciphertext is also cbData bytes long. +// - cbData: # bytes of plaintext input. The maximum length is 2^{36} - 32 bytes. +// - pbTag: buffer that will receive the authentication tag. +// - cbTag: size of tag. cbTag must be one of {12, 13, 14, 15, 16} per SP800-38D +// section 5.2.1.2. The optional shorter tag sizes (4 and 8) are not supported. +// + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptGcmDecrypt( + _In_ PCSYMCRYPT_GCM_EXPANDED_KEY pExpandedKey, + _In_reads_( cbNonce ) PCBYTE pbNonce, + SIZE_T cbNonce, + _In_reads_opt_( cbAuthData ) PCBYTE pbAuthData, + SIZE_T cbAuthData, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData, + _In_reads_( cbTag ) PCBYTE pbTag, + SIZE_T cbTag ); +// +// Decrypt a buffer using the block cipher in GCM mode. +// See SymCryptGcmEncrypt for a description of the parameters. This function decrypts rather than +// encrypts, and as a result the pbTag parameter is read rather than filled. +// If the tag value is not correct the SYMCRYPT_AUTHENTICATION_FAILURE error is returned and the pbDst buffer +// is wiped of any plaintext. +// Note: While checking the authentication the purported plaintext is stored in pbDst. It is not safe to reveal +// purported plaintext when the authentication has not been checked. (Doing so would reveal key stream information +// that can be used to decrypt any message encrypted with the same nonce value.) Thus, users should be careful +// to not reveal the pbDst buffer until this function returns (e.g. through other threads or sharing memory). +// + +// +// We also provide functions for incremental computation of GCM encryption and decryption. See the functions +// above for a description of the parameters and restrictions. +// In particular, note that the restriction on revealing the plaintext for unauthenticated decryptions holds +// for all the decrypted data, even when the decryption is done incrementally. +// +// +// SYMCRYPT_GCM_STATE +// Ongoing state of an incremental GCM encryption or decryption operation. +// + +VOID +SYMCRYPT_CALL +SymCryptGcmInit( + _Out_ PSYMCRYPT_GCM_STATE pState, + _In_ PCSYMCRYPT_GCM_EXPANDED_KEY pExpandedKey, + _In_reads_( cbNonce ) PCBYTE pbNonce, + SIZE_T cbNonce ); +// +// Initialize a GCM computation. +// The pBlockCipher and pExpandedKey structures must remain unchanged until the GCM computation is finished. +// + +VOID +SYMCRYPT_CALL +SymCryptGcmStateCopy( + _In_ PCSYMCRYPT_GCM_STATE pSrc, + _In_opt_ PCSYMCRYPT_GCM_EXPANDED_KEY pExpandedKeyCopy, + _Out_ PSYMCRYPT_GCM_STATE pDst ); +// +// Copy a GCM state. +// If pExpandedKeyCopy is NULL, then the new pDst state uses the same expanded key as pSrc. +// If pExpandedKeyCopy is not NULL, it must point to a copy of the expanded key of the pSrc state. +// This new expanded key will be used as the expanded key for pDst. +// + +VOID +SYMCRYPT_CALL +SymCryptGcmAuthPart( + _Inout_ PSYMCRYPT_GCM_STATE pState, + _In_reads_opt_( cbData ) PCBYTE pbAuthData, + SIZE_T cbData ); +// +// Incrementally process the authentication data. This function can be called multiple times +// after the SymCryptGcmInit function. It may not be called after any encrypt or decrypt +// function has been called on the GCM state. +// + +VOID +SYMCRYPT_CALL +SymCryptGcmEncryptPart( + _Inout_ PSYMCRYPT_GCM_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptGcmEncryptFinal( + _Inout_ PSYMCRYPT_GCM_STATE pState, + _Out_writes_( cbTag ) PBYTE pbTag, + SIZE_T cbTag ); + +VOID +SYMCRYPT_CALL +SymCryptGcmDecryptPart( + _Inout_ PSYMCRYPT_GCM_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptGcmDecryptFinal( + _Inout_ PSYMCRYPT_GCM_STATE pState, + _In_reads_( cbTag ) PCBYTE pbTag, + SIZE_T cbTag ); +// +// Returns SYMCRYPT_AUTHENTICATION_FAILURE if the tag value does not match. +// + + +VOID +SYMCRYPT_CALL +SymCryptGcmSelftest(void); +// +// Self test for GCM cipher mode +// + + +//========================================================================== +// SESSION BASED APIs +//========================================================================== + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSessionSenderInit( + _Inout_ PSYMCRYPT_SESSION pSession, + UINT32 senderId, + UINT32 flags ); +// +// Initialize an encryption session object. The default nonce size of 12B is used - 8B are provided +// by message number, 4B by senderId. +// - pSession: Pointer to an uninitialized session object. +// - senderId: The id of the sender (must be unique for each user of a given key). +// Callers should either choose a senderId which is specific to the sender, or +// at least to the software and role in a system in which a key is being used. +// Two encryption sessions using the same key and senderId leads to catastrophic loss of security. +// - No flags are specified for this function +// +// Remarks: +// On some platforms use of a session object requires use of a mutex. On those platforms this +// function will call SymCryptCallbackAllocateMutexFastInproc and may indicate failure by returning +// SYMCRYPT_MEMORY_ALLOCATION_FAILURE if a mutex object cannot be created. +// Callers must call SymCryptSessionDestroy to ensure any associated allocated mutex object is freed +// either before calling another Init function on the SYMCRYPT_SESSION object, and instead of directly +// calling SymCryptWipeKnownSize on the object. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSessionReceiverInit( + _Inout_ PSYMCRYPT_SESSION pSession, + UINT32 senderId, + UINT32 flags ); +// +// Initialize an decryption session object. The default nonce size of 12B is used - 8B are provided +// by message number, 4B by senderId. +// - pSession: Pointer to an uninitialized session object. +// - senderId: The id of the sender (must be unique for each user of a given key). +// Callers should either choose a senderId which is specific to the sender, or +// at least to the software and role in a system in which a key is being used. +// The id used in a decryption session must be the same as the id used in the corresponding +// encryption session (i.e. sender and receiver must agree upon a senderId for their +// communication session) +// - No flags are specified for this function +// +// Remarks: +// On some platforms use of a session object requires use of a mutex. On those platforms this +// function will call SymCryptCallbackAllocateMutexFastInproc and may indicate failure by returning +// SYMCRYPT_MEMORY_ALLOCATION_FAILURE if a mutex object cannot be created. +// Callers must call SymCryptSessionDestroy to ensure any associated allocated mutex object is freed +// either before calling another Init function on the SYMCRYPT_SESSION object, and instead of directly +// calling SymCryptWipeKnownSize on the object. +// + +VOID +SYMCRYPT_CALL +SymCryptSessionDestroy( + _Inout_ PSYMCRYPT_SESSION pSession ); +// +// Clear session object and free any data associated with the object (i.e. allocated locks) +// After this call the memory used for pSession is uninitialized and can be used for other purposes. +// Note that it is not safe to just wipe the memory of the session object as the session +// object contains pointers to other allocations. +// The only way to safely destroy a session is to use this function. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSessionGcmEncrypt( + _Inout_ PSYMCRYPT_SESSION pSession, + _In_ PCSYMCRYPT_GCM_EXPANDED_KEY pExpandedKey, + _In_reads_opt_( cbAuthData ) PCBYTE pbAuthData, + SIZE_T cbAuthData, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData, + _Out_writes_( cbTag ) PBYTE pbTag, + SIZE_T cbTag, + _Out_opt_ PUINT64 pu64MessageNumber ); +// +// Encrypt a buffer, in a series, using the block cipher in GCM mode. +// - pSession points to the session object for this series of GCM encryptions. It handles +// ensuring Nonce uniqueness across several encryption calls using the same key. The message +// number in the pSession object is atomically incremented by this call. +// If too many messages (2^64 - 2^32) have been encrypted with the same session object, +// SYMCRYPT_INVALID_ARGUMENT is returned and no encryption takes place. This should never +// occur in real use! +// - pExpandedKey points to the expanded key for GCM. +// - pbAuthData: pointer to the associated authentication data. This data is not encrypted +// but it is included in the authentication. Use NULL if not used. +// - cbAuthData: # bytes of associated authentication data. (0 if not used) +// - pbSrc: plaintext input +// - pbDst: ciphertext output. The ciphertext buffer may be identical to the plaintext +// buffer, or non-overlapping. The ciphertext is also cbData bytes long. +// - cbData: # bytes of plaintext input. The maximum length is 2^{36} - 32 bytes. +// - pbTag: buffer that will receive the authentication tag. +// - cbTag: size of tag. cbTag must be one of {12, 13, 14, 15, 16} per SP800-38D +// section 5.2.1.2. The optional shorter tag sizes (4 and 8) are not supported. +// - pu64MessageNumber: Optional message number output for this encryption. A unique message +// number is extracted from the pSession object, this output is set to the value used in +// the encryption. The first message number generated in a session will have the value 1, +// and subsequent message numbers will be taken by atomically incrementing the counter. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSessionGcmDecrypt( + _Inout_ PSYMCRYPT_SESSION pSession, + UINT64 messageNumber, + _In_ PCSYMCRYPT_GCM_EXPANDED_KEY pExpandedKey, + _In_reads_opt_( cbAuthData ) PCBYTE pbAuthData, + SIZE_T cbAuthData, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData, + _In_reads_( cbTag ) PCBYTE pbTag, + SIZE_T cbTag ); +// +// Decrypt a buffer, in a series, using the block cipher in GCM mode. +// - pSession points to the session object for this series of GCM decryptions. It handles +// ensuring Nonce uniqueness across several decryption calls using the same key, particularly +// ensuring there are no replays. +// - messageNumber: The message number to be used for this decryption, forming part of the Nonce. +// When performing decryption in a session, it is guaranteed that no 2 decryptions using the +// same session and same message number can succeed. This is to provide protection against +// replay attacks. +// In order to provide this guarantee, pSession tracks a window of used message numbers +// preceding the largest messageNumber successfully used so far in the decryption session. +// A SYMCRYPT_SESSION_REPLAY_FAILURE error will be returned if either: +// a) messageNumber is less than the smallest message number that can be tracked for replays +// b) messageNumber is within the window that can be tracked for replays, and the message +// number is marked as already having been used in a successful decryption in this session +// In either case, the destination buffer is wiped. +// See SymCryptSessionGcmEncrypt for a description of the other parameters. This function decrypts +// rather than encrypts, and as a result the pbTag parameter is read rather than filled. +// If the tag value is not correct the SYMCRYPT_AUTHENTICATION_FAILURE error is returned and the +// pbDst buffer is wiped of any plaintext. +// Note: While checking the authentication the purported plaintext is stored in pbDst. It is not safe to reveal +// purported plaintext when the authentication has not been checked. (Doing so would reveal key stream information +// that can be used to decrypt any message encrypted with the same nonce value.) Thus, users should be careful +// to not reveal the pbDst buffer until this function returns (e.g. through other threads or sharing memory). +// + + +//========================================================================== +// STREAM CIPHERS +//========================================================================== + +//////////////////////////////////////////////////////////////////////////// +// RC4 +// +// The RC4 stream cipher +// +// Use of RC4 is not recommended. +// +// The RC4 implementation makes extensive use of table lookups to implement the S-boxes of the algorithm. +// This violates our current crypto implementation guidelines and opens up a possible side-channel attack +// through information leakage via the memory caching system of the CPU. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRc4Init( + _Out_ PSYMCRYPT_RC4_STATE pState, + _In_reads_( cbKey ) PCBYTE pbKey, + _In_ SIZE_T cbKey ); +// +// Initialize an RC4 encryption/decryption state. +// WARNING: the most common error in using RC4 is to use the same key to encrypt two different pieces of data. +// This is insecure and should never be done; you need a unique key for each data element that is encrypted. +// + +VOID +SYMCRYPT_CALL +SymCryptRc4Crypt( + _Inout_ PSYMCRYPT_RC4_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + _In_ SIZE_T cbData ); +// +// Encrypt or Decrypt data using the RC4 state. Note that the RC4 state is updated and therefore this +// function cannot be used by two threads simultaneously using the same state object. +// + +VOID +SYMCRYPT_CALL +SymCryptRc4Selftest(void); + + +// +// ChaCha20 +// +// The ChaCha20 stream cipher is specified in RFC 7539 and referenced by RFC 7905 +// which specifies the ChaCha20-Poly1305 TLS cipher suite. +// +// ChaCha is a random-access stream cipher. It is possible to jump to any part of +// the key stream and start en/decrypting there. +// We support this by allowing the caller to select the position in the key stream +// to use. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptChaCha20Init( + _Out_ PSYMCRYPT_CHACHA20_STATE pState, + _In_reads_( cbKey ) PCBYTE pbKey, + _In_ SIZE_T cbKey, + _In_reads_( cbNonce ) PCBYTE pbNonce, + SIZE_T cbNonce, + UINT64 offset ); +// +// Initialize a ChaCha20 en/decryption state. +// Key must be 32 bytes +// Nonce must be 12 bytes +// offset is the position into the key stream that the next encrypt/decrypt +// operation will use. Requirement: 0 <= offset < 2^38 +// The ChaCha documentation is formulated in terms of a 'counter' or 'initial counter'. +// Callers can set offset = 64 * <counter> to achieve the same results. +// +// An error is returned only for invalid key or nonce sizes. +// +// A single (key,nonce) pair defines a key stream of 256 GB. +// Any part of that key stream can be used to encrypt a message, or part of a +// message. +// Note that it is critical that each key stream byte is used only once; thus +// callers have to ensure that for any key, each nonce is used at most once for +// a message, and messages cannot use any part of the 256 GB key stream more than +// once. +// + +VOID +SYMCRYPT_CALL +SymCryptChaCha20SetOffset( + _Inout_ PSYMCRYPT_CHACHA20_STATE pState, + UINT64 offset ); +// +// Specify the offset into the key stream where the next encrypt/decrypt operation +// will start. +// Requirement: 0 <= offset < 2^38 +// + +VOID +SYMCRYPT_CALL +SymCryptChaCha20Crypt( + _Inout_ PSYMCRYPT_CHACHA20_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); +// +// Encrypt or Decrypt data using the CHACHA20 state. +// The Src data is xorred with the key stream generated from the state, and the result stored +// in the Dst buffer. The Src and Dst buffer can be identical or non-overlapping; partial overlaps +// are not supported. +// As the state is updated two threads cannot en/decrypt with the same state at the same time. +// The key stream used is the one generated from the key and nonce, starting at the specified +// offset into the key stream. This function updates the offset of the state by adding cbData to +// it so that the next call will use the next part of the key stream. +// Any attempt to use the key stream at offset >= 2^38 will result in catastrophic loss of security. +// + +VOID +SYMCRYPT_CALL +SymCryptChaCha20Selftest(void); + + + + +//========================================================================== +// KEY DERIVATION ALGORITHMS +//========================================================================== + +//////////////////////////////////////////////////////////////////////////// +// PBKDF2 +// +// Generic KDF parameter handling: +// - Generic parameter is passed in the Salt input; +// - iterationCnt is set to 1. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptPbkdf2ExpandKey( + _Out_ PSYMCRYPT_PBKDF2_EXPANDED_KEY pExpandedKey, + _In_ PCSYMCRYPT_MAC macAlgorithm, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptPbkdf2Derive( + _In_ PCSYMCRYPT_PBKDF2_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbSalt) PCBYTE pbSalt, + SIZE_T cbSalt, + UINT64 iterationCnt, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptPbkdf2( + PCSYMCRYPT_MAC macAlgorithm, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey, + _In_reads_opt_(cbSalt) PCBYTE pbSalt, + SIZE_T cbSalt, + UINT64 iterationCnt, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult); + +// +// Because the self-test pulls in the associated MAC function, +// we have several self-tests; each of which tests the PBKDF2 implementation +// using the specified MAC function. +// This allows a FIPS module to run the self-test with the MAC function it already +// uses internally. +// +// More can be added when needed. +// + +VOID +SYMCRYPT_CALL +SymCryptPbkdf2_HmacSha1SelfTest(void); + +VOID +SYMCRYPT_CALL +SymCryptPbkdf2_HmacSha256SelfTest(void); + +//////////////////////////////////////////////////////////////////////////// +// SP800-108 Counter mode +// +// Generic KDF parameter handling: +// Generic parameter contains the concatenation of the Label, a zero byte, and the Context. +// To pass a generic parameter do the following: +// - pbLabel = NULL +// - cbLabel = (SIZE_T) -1; +// - pbContext/cbContext = generic parameter +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSp800_108ExpandKey( + _Out_ PSYMCRYPT_SP800_108_EXPANDED_KEY pExpandedKey, + _In_ PCSYMCRYPT_MAC macAlgorithm, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSp800_108Derive( + _In_ PCSYMCRYPT_SP800_108_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbLabel) PCBYTE pbLabel, + SIZE_T cbLabel, + _In_reads_opt_(cbContext) PCBYTE pbContext, + SIZE_T cbContext, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSp800_108( + PCSYMCRYPT_MAC macAlgorithm, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey, + _In_reads_opt_(cbLabel) PCBYTE pbLabel, + SIZE_T cbLabel, + _In_reads_opt_(cbContext) PCBYTE pbContext, + SIZE_T cbContext, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult); + +VOID +SYMCRYPT_CALL +SymCryptSp800_108_HmacSha1SelfTest(void); + +VOID +SYMCRYPT_CALL +SymCryptSp800_108_HmacSha256SelfTest(void); + +VOID +SYMCRYPT_CALL +SymCryptSp800_108_HmacSha384SelfTest(void); + +VOID +SYMCRYPT_CALL +SymCryptSp800_108_HmacSha512SelfTest(void); + +//////////////////////////////////////////////////////////////////////////// +// TLS Key Derivation PRFs +// +// PRFs used in the key derivation functions of the TLS protocol, versions +// 1.0, 1.1, and 1.2. These are defined in RFC 2246, 4346, and 5246, +// respectively. +// Note: The PRFs for versions 1.0 and 1.1 are identical. +// + +// Maximum sizes (in bytes) for the label and the seed inputs. See the +// above RFCs 2246, 4346, and 5246 for more details. +#define SYMCRYPT_TLS_MAX_LABEL_SIZE 256 +#define SYMCRYPT_TLS_MAX_SEED_SIZE 256 + +// +// Version 1.0/1.1 +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptTlsPrf1_1ExpandKey( + _Out_ PSYMCRYPT_TLSPRF1_1_EXPANDED_KEY pExpandedKey, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptTlsPrf1_1Derive( + _In_ PCSYMCRYPT_TLSPRF1_1_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbLabel) PCBYTE pbLabel, + _In_ SIZE_T cbLabel, // Up to SYMCRYPT_TLS_MAX_LABEL_SIZE + _In_reads_(cbSeed) PCBYTE pbSeed, + _In_ SIZE_T cbSeed, // Up to SYMCRYPT_TLS_MAX_SEED_SIZE + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptTlsPrf1_1( + _In_reads_(cbKey) PCBYTE pbKey, + _In_ SIZE_T cbKey, + _In_reads_opt_(cbLabel) PCBYTE pbLabel, + _In_ SIZE_T cbLabel, + _In_reads_(cbSeed) PCBYTE pbSeed, + _In_ SIZE_T cbSeed, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult); + +VOID +SYMCRYPT_CALL +SymCryptTlsPrf1_1SelfTest(void); + +// +// Version 1.2 +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptTlsPrf1_2ExpandKey( + _Out_ PSYMCRYPT_TLSPRF1_2_EXPANDED_KEY pExpandedKey, + _In_ PCSYMCRYPT_MAC macAlgorithm, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptTlsPrf1_2Derive( + _In_ PCSYMCRYPT_TLSPRF1_2_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbLabel) PCBYTE pbLabel, + _In_ SIZE_T cbLabel, // Up to SYMCRYPT_TLS_MAX_LABEL_SIZE + _In_reads_(cbSeed) PCBYTE pbSeed, + _In_ SIZE_T cbSeed, // Up to SYMCRYPT_TLS_MAX_SEED_SIZE + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptTlsPrf1_2( + _In_ PCSYMCRYPT_MAC macAlgorithm, + _In_reads_(cbKey) PCBYTE pbKey, + _In_ SIZE_T cbKey, + _In_reads_opt_(cbLabel) PCBYTE pbLabel, + _In_ SIZE_T cbLabel, + _In_reads_(cbSeed) PCBYTE pbSeed, + _In_ SIZE_T cbSeed, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult); + +VOID +SYMCRYPT_CALL +SymCryptTlsPrf1_2SelfTest(void); + + +//////////////////////////////////////////////////////////////////////////// +// SSH-KDF as specified in RFC 4253 Section 7.2. +// + + +// Labels defined in RFC 4253 +#define SYMCRYPT_SSHKDF_IV_CLIENT_TO_SERVER 0x41 // 'A' +#define SYMCRYPT_SSHKDF_IV_SERVER_TO_CLIENT 0x42 // 'B' +#define SYMCRYPT_SSHKDF_ENCRYPTION_KEY_CLIENT_TO_SERVER 0x43 // 'C' +#define SYMCRYPT_SSHKDF_ENCRYPTION_KEY_SERVER_TO_CLIENT 0x44 // 'D' +#define SYMCRYPT_SSHKDF_INTEGRITY_KEY_CLIENT_TO_SERVER 0x45 // 'E' +#define SYMCRYPT_SSHKDF_INTEGRITY_KEY_SERVER_TO_CLIENT 0x46 // 'F' + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSshKdfExpandKey( + _Out_ PSYMCRYPT_SSHKDF_EXPANDED_KEY pExpandedKey, + _In_ PCSYMCRYPT_HASH pHashFunc, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey); +// +// Process the key using the specified hash function and store the result in +// SYMCRYPT_SSHKDF_EXPANDED_KEY structure. Once the key is expanded, +// SymCryptSshKdfDerive can be called multiple times to generate keys for +// different uses/labels. +// +// After all the keys are derived from a particular "shared secret" key, +// SYMCRYPT_SSHKDF_EXPANDED_KEY structure must be wiped. +// +// Parameters: +// - pExpandedKey : Pointer to a SYMCRYPT_SSHKDF_EXPANDED_KEY structure that +// will contain the expanded key after the function returns. +// - pHashFunc : Hash function that will be used in the key derivation. +// This function is saved in SYMCRYPT_SSHKDF_EXPANDED_KEY +// so that it is also used by the SymCryptSshKdfDerive function. +// - pbKey, cbKey : Buffer containing the secret key for the KDF. +// +// Returns SYMCRYPT_NO_ERROR +// + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSshKdfDerive( + _In_ PCSYMCRYPT_SSHKDF_EXPANDED_KEY pExpandedKey, + _In_reads_(cbHashValue) PCBYTE pbHashValue, + SIZE_T cbHashValue, + BYTE label, + _In_reads_(cbSessionId) PCBYTE pbSessionId, + SIZE_T cbSessionId, + _Inout_updates_(cbOutput) PBYTE pbOutput, + SIZE_T cbOutput); +// +// Derive keys using the expanded key that was initialized with SymCryptSshKdfExpandKey +// along with other inputs. This function can be called consecutively with varying label +// values to generate keys for different purposes as defined in the RFC. +// +// Parameters: +// - pExpandedKey : Pointer to a SYMCRYPT_SSHKDF_EXPANDED_KEY structure that is +// initialized by a prior call to SymCryptSshKdfExpandKey. +// Must be wiped when SymCryptSshKdfDerive is not going to be called +// again with the same expanded key. +// - pbHashValue, cbHashValue : Buffer pointing to "exchange hash" value. cbHashValue must be equal +// to the output size of the hash function passed to SymCryptSshKdfExpandKey. +// - label : Label value used to indicate the type of the derived key. +// - pbSessionId, cbSessionId : Buffer pointing to the session identifier. cbSessionId must be equal +// to the output size of the hash function passed to SymCryptSshKdfExpandKey. +// - pbOutput, cbOutput : Buffer to store the derived key. Exactly cbOutput bytes of output will be generated. +// +// Returns SYMCRYPT_NO_ERROR +// + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSshKdf( + _In_ PCSYMCRYPT_HASH pHashFunc, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey, + _In_reads_(cbHashValue) PCBYTE pbHashValue, + SIZE_T cbHashValue, + BYTE label, + _In_reads_(cbSessionId) PCBYTE pbSessionId, + SIZE_T cbSessionId, + _Out_writes_(cbOutput) PBYTE pbOutput, + SIZE_T cbOutput); +// +// This function is a wrapper for using SymCryptSshKdfExpandKey followed by SymCryptSshKdfDerive +// in order to produce SSH-KDF output. +// +// All of the function arguments are forwarded to SymCryptSshKdfExpandKey and SymCryptSshKdfDerive +// functions, hence the documentation on those functions apply here as well. +// + + +VOID +SYMCRYPT_CALL +SymCryptSshKdfSha256SelfTest(void); + +VOID +SYMCRYPT_CALL +SymCryptSshKdfSha512SelfTest(void); + + +//////////////////////////////////////////////////////////////////////////// +// SRTP-KDF as specified in RFC 3711 Section 4.3.1. +// + + +// Labels defined in RFC 3711 +#define SYMCRYPT_SRTP_ENCRYPTION_KEY 0x00 +#define SYMCRYPT_SRTP_AUTHENTICATION_KEY 0x01 +#define SYMCRYPT_SRTP_SALTING_KEY 0x02 +#define SYMCRYPT_SRTCP_ENCRYPTION_KEY 0x03 +#define SYMCRYPT_SRTCP_AUTHENTICATION_KEY 0x04 +#define SYMCRYPT_SRTCP_SALTING_KEY 0x05 + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSrtpKdfExpandKey( + _Out_ PSYMCRYPT_SRTPKDF_EXPANDED_KEY pExpandedKey, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey); +// +// Process the key and store the result in SYMCRYPT_SRTPKDF_EXPANDED_KEY structure. +// Once the key is expanded, SymCryptSrtpKdfDerive can be called multiple times to +// generate keys for different uses/labels. +// +// After all the keys are derived from a particular "shared secret" key, +// SYMCRYPT_SRTPKDF_EXPANDED_KEY structure must be wiped. +// +// Parameters: +// - pExpandedKey : Pointer to a SYMCRYPT_SRTPKDF_EXPANDED_KEY structure that +// will contain the expanded key after the function returns. +// - pbKey, cbKey : Buffer containing the secret key for the KDF. cbKey must be +// a valid AES key size (16-, 24-, or 32-bytes). +// +// Returns: +// SYMCRYPT_WRONG_KEY_SIZE : If cbKey is not a valid AES key size +// SYMCRYPT_NO_ERROR : On success +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSrtpKdfDerive( + _In_ PCSYMCRYPT_SRTPKDF_EXPANDED_KEY pExpandedKey, + _In_reads_(cbSalt) PCBYTE pbSalt, + SIZE_T cbSalt, + UINT32 uKeyDerivationRate, + UINT64 uIndex, + UINT32 uIndexWidth, + BYTE label, + _Out_writes_(cbOutput) PBYTE pbOutput, + SIZE_T cbOutput); +// +// Derive keys using the expanded key that was initialized with SymCryptSrtpKdfExpandKey +// along with other inputs. This function can be called consecutively with varying label +// values to generate keys for different purposes as defined in the RFC. +// +// Parameters: +// - pExpandedKey : Pointer to a SYMCRYPT_SRTPKDF_EXPANDED_KEY structure that is +// initialized by a prior call to SymCryptSrtpKdfExpandKey. +// Must be wiped when SymCryptSrtpKdfDerive is not going to be called +// again with the same expanded key. +// - pbSalt, cbSalt : Buffer pointing to the salt value. cbSalt must always be 14 (112-bits). +// - uKeyDerivationRate : Key derivation rate; must be zero or 2^i for 0 <= i <= 24. +// - uIndex : Denotes an SRTP index value when label is 0x00, 0x01, or 0x02, otherwise +// denotes an SRTCP index value. +// - uIndexWidth : Denotes how wide uIndex value is. Must be one of 0, 32, or 48. By default, +// (when uIndexWidth = 0) uIndex is treated as 48-bits. +// RFC 3711 initially defined SRTCP indices to be 32-bit values. It was updated +// to be 48-bits by Errata ID 3712. SRTP index values are defined to be 48-bits. +// - label : Label value used to indicate the type of the derived key. +// - pbOutput, cbOutput : Buffer to store the derived key. Exactly cbOutput bytes of output will be generated. +// +// Returns: +// SYMCRYPT_INVALID_ARGUMENT : If cbSalt is not 14-bytes, or uKeyDerivationRate in invalid. +// SYMCRYPT_NO_ERROR : On success. +// + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSrtpKdf( + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey, + _In_reads_(cbSalt) PCBYTE pbSalt, + SIZE_T cbSalt, + UINT32 uKeyDerivationRate, + UINT64 uIndex, + UINT32 uIndexWidth, + BYTE label, + _Out_writes_(cbOutput) PBYTE pbOutput, + SIZE_T cbOutput); +// +// This function is a wrapper for using SymCryptSrtpKdfExpandKey followed by SymCryptSrtpKdfDerive +// in order to produce SRTP-KDF output. +// +// All of the function arguments are forwarded to SymCryptSrtpKdfExpandKey and SymCryptSrtpKdfDerive +// functions, hence the documentation on those functions apply here as well. +// + + +VOID +SYMCRYPT_CALL +SymCryptSrtpKdfSelfTest(void); + + +//////////////////////////////////////////////////////////////////////////// +// HKDF +// +// PRF used in the key derivation functions of the TLS protocol, version +// 1.3. It is defined in RFC 5869. +// +// The SymCrypt ExtractPrk function corresponds to the "HKDF-Extract" function +// of the RFC 5869, while the SymCrypt PrkExpandKey and Derive functions +// correspond to the "HKDF-Expand" function of the RFC. +// +// SymCryptHkdfExtractPrk takes as inputs the MAC algorithm, the IKM (input +// keying material), and the optional salt. It executes the full "HKDF-Extract" +// function to produce the PRK (pseudorandom key). +// +// SymCryptHkdfPrkExpandKey takes as inputs just the MAC algorithm and the PRK. +// It produces the final (MAC) key to be used by the "HKDF-Expand" function. +// +// SymCryptHkdfExpandKey performs SymCryptHkdfExtractPrk followed by +// SymCryptHkdfPrkExpandKey to produce the final (MAC) key to be used by the +// "HKDF-Expand" function, without exposing the PRK to the caller. +// +// SymCryptHkdfDerive takes as input the final MAC key and the optional info. It +// performs the rest of the "HKDF-Expand" function to produce the HKDF result. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHkdfExpandKey( + _Out_ PSYMCRYPT_HKDF_EXPANDED_KEY pExpandedKey, + _In_ PCSYMCRYPT_MAC macAlgorithm, + _In_reads_(cbIkm) PCBYTE pbIkm, + SIZE_T cbIkm, + _In_reads_opt_(cbSalt) PCBYTE pbSalt, + SIZE_T cbSalt ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHkdfExtractPrk( + _In_ PCSYMCRYPT_MAC macAlgorithm, + _In_reads_(cbIkm) PCBYTE pbIkm, + SIZE_T cbIkm, + _In_reads_opt_(cbSalt) PCBYTE pbSalt, + SIZE_T cbSalt, + _Out_writes_(cbPrk) PBYTE pbPrk, + SIZE_T cbPrk ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHkdfPrkExpandKey( + _Out_ PSYMCRYPT_HKDF_EXPANDED_KEY pExpandedKey, + _In_ PCSYMCRYPT_MAC macAlgorithm, + _In_reads_(cbPrk) PCBYTE pbPrk, + SIZE_T cbPrk ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHkdfDerive( + _In_ PCSYMCRYPT_HKDF_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbInfo) PCBYTE pbInfo, + SIZE_T cbInfo, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHkdf( + PCSYMCRYPT_MAC macAlgorithm, + _In_reads_(cbIkm) PCBYTE pbIkm, + SIZE_T cbIkm, + _In_reads_opt_(cbSalt) PCBYTE pbSalt, + SIZE_T cbSalt, + _In_reads_opt_(cbInfo) PCBYTE pbInfo, + SIZE_T cbInfo, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult); + +VOID +SYMCRYPT_CALL +SymCryptHkdfSelfTest(void); + +//////////////////////////////////////////////////////////////////////////// +// SSKDF +// +// Single-Step KDF as specified in SP800-56C section 4. +// +// SSKDF requires an auxiliary function H. This can be approved hash function, +// HMAC with an approved hash function, or KMAC. The approved hash functions +// are listed in SP800-56C section 7. +// +// A salt value may be optionally provided if either HMAC or KMAC is used for H. +// When no salt is provided, an all-zero default salt is used instead. For HMAC, +// the default salt is the length of an input block of the HMAC's hash function. +// For KMAC128, the default salt is 164 bytes. For KMAC256, the default salt is 132 bytes. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSskdfMacExpandSalt( + _Out_ PSYMCRYPT_SSKDF_MAC_EXPANDED_SALT pExpandedSalt, + _In_ PCSYMCRYPT_MAC macAlgorithm, + _In_reads_opt_(cbSalt) PCBYTE pbSalt, + SIZE_T cbSalt); +// +// Initializes *pExpandedSalt with the macAlgorithm, and optionally the salt. Used +// for SSKDF when H is a MAC function. After calling SymCryptSskdfMacExpandSalt, +// SymCryptSskdfMacDerive can be called multiple times to generate keys for different +// uses, fixed infos, and shared secrets. For multiple KDFs using the same MAC and salt, +// calling SymCryptSskdfMacExpandSalt once and SymCryptSskdfMacDerive multiple times +// is more efficient than calling SymCryptSskdfMac multiple times. +// +// The expanded salt contains no secrets and does not need to be wiped. +// +// Parameters: +// - pExpandedSalt : Pointer to a SYMCRYPT_SSKDF_MAC_EXPANDED_SALT structure that +// will contain the expanded salt after the function returns. +// - macAlgorithm : MAC algorithm that will be used in the key derivation. +// This function is saved in SYMCRYPT_SSKDF_MAC_EXPANDED_SALT. +// - pbSalt, cbSalt : Buffer containing the salt for the KDF. cbSalt must be a valid +// key size for the MAC algorithm. If pbSalt is NULL, the default +// all zero-byte salt is used. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSskdfMacDerive( + _In_ PCSYMCRYPT_SSKDF_MAC_EXPANDED_SALT pExpandedSalt, + SIZE_T cbMacOutputSize, + _In_reads_(cbSecret) PCBYTE pbSecret, + SIZE_T cbSecret, + _In_reads_opt_(cbInfo) PCBYTE pbInfo, + SIZE_T cbInfo, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult); +// +// Derive keys using the expanded salt that was initialized with SymCryptSskdfMacExpandSalt +// along with other inputs. This function can be called consecutively with varying fixed infos +// and shared secrets to generate keys for different purposes as defined in the SP800-56C. +// The same pbExpandedKey can be used simultaneously by multiple threads. +// +// Parameters: +// - pExpandedSalt : Pointer to a SYMCRYPT_SSKDF_MAC_EXPANDED_SALT structure that is +// initialized by a prior call to SymCryptSskdfMacExpandSalt. +// - cbMacOutputSize : Output size used by the MAC algorithm for intermediate computations. Must not be +// greater than 64 bytes. Set to 0 for MACs that don't support variable output sizes, +// or to use the default output size. The default output size when KMAC is used is cbResult. +// - pbSecret, cbSecret : Buffer containing the shared secret. +// - pbInfo, cbInfo : Buffer containing the fixed info. +// - pbResult, cbResult : Buffer to store the derived key. Exactly cbResult bytes of output will be generated. +// Must not exceed 2^{32} - 1 times the result size of the MAC algorithm. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSskdfMac( + _In_ PCSYMCRYPT_MAC macAlgorithm, + SIZE_T cbMacOutputSize, + _In_reads_(cbSecret) PCBYTE pbSecret, + SIZE_T cbSecret, + _In_reads_opt_(cbSalt) PCBYTE pbSalt, + SIZE_T cbSalt, + _In_reads_opt_(cbInfo) PCBYTE pbInfo, + SIZE_T cbInfo, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult); +// +// This function is a wrapper for using SymCryptSskdfMacExpandSalt followed by SymCryptSskdfMacDerive +// in order to produce SSKDF output. +// +// All of the function arguments are forwarded to SymCryptSskdfMacExpandSalt and SymCryptSskdfMacDerive +// functions, hence the documentation on those functions apply here as well. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSskdfHash( + _In_ PCSYMCRYPT_HASH hashAlgorithm, + SIZE_T cbHashOutputSize, + _In_reads_(cbSecret) PCBYTE pbSecret, + SIZE_T cbSecret, + _In_reads_opt_(cbInfo) PCBYTE pbInfo, + SIZE_T cbInfo, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult); +// +// Derive keys using the specified hash algorithm as H. +// +// Parameters: +// - hashAlgorithm : Hash algorithm that will be used in the key derivation. +// - cbHashOutputSize : Output size used by the hash algorithm for intermediate computations. +// Set to 0 for hashes that don't support variable output sizes, or to use +// the default output size. Currently, no allowed hash algorithms support +// variable output sizes, so this should always be set to 0. +// - pbSecret, cbSecret : Buffer containing the shared secret. +// - pbInfo, cbInfo : Buffer containing the fixed info. +// - pbResult, cbResult : Buffer to store the derived key. Exactly cbResult bytes of output will be generated. +// Must not exceed 2^{32} - 1 times the result size of hashAlgorithm. +// + +VOID +SYMCRYPT_CALL +SymCryptSskdfSelfTest(void); + +//========================================================================== +// RNG ALGORITHMS +//========================================================================== + +//////////////////////////////////////////////////////////////////////////// +// AES-CTR-DRBG +// +// This is an implementation of AES-CTR_DRBG as specified in SP 800-90. +// It always uses a 256-bit security strength. +// +// Note: This RNG is NOT compliant with FIPS 140-2 as it lacks the continuous +// self test required by FIPS 140-2. See the AES-FIPS RNG algorithm below. +// +// SYMCRYPT_RNG_AES_STATE +// State of an AES-CTR_DRBG instance. +// + +#define SYMCRYPT_RNG_AES_MIN_INSTANTIATE_SIZE (32 + 16) +#define SYMCRYPT_RNG_AES_MIN_RESEED_SIZE (32) +#define SYMCRYPT_RNG_AES_MAX_SEED_SIZE (256) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRngAesInstantiate( + _Out_ PSYMCRYPT_RNG_AES_STATE pRngState, + _In_reads_(cbSeedMaterial) PCBYTE pcbSeedMaterial, + + _In_range_(SYMCRYPT_RNG_AES_MIN_INSTANTIATE_SIZE, SYMCRYPT_RNG_AES_MAX_SEED_SIZE) + SIZE_T cbSeedMaterial ); +// +// Initialize a new SYMCRYPT_RNG_AES_STATE, and seed it with the seed material. +// +// 'Instantiate' is the SP800-90 terminology. +// The seed material must be at least SYMCRYPT_RNG_AES_MIN_INSTANTIATE_SIZE bytes, +// and at most SYMCRYPT_RNG_AES_MAX_SEED_SIZE bytes. +// +// This implementation always uses 256-bit security strength, and +// does not support 'prediction resistance' as defined in SP 800-90. +// +// SP 800-90 specifies three inputs to the instantiation: +// - entropy +// - nonce +// - personalization string +// This function takes only a single input, which is the concatenation of these three: +// seed material := entropy | nonce | personalization string +// +// The following are the requirements on the three inputs: +// Entropy: must have at least 256 bits of entropy +// Nonce: must either be a random value with 128-bits of entropy, or a value that does not +// repeat with a probability of more than 2^{-128}. +// Together these requirements imply that cbSeedMaterial should be at least +// SYMCRYPT_RNG_AES_MIN_INSTANTIATE_SIZE +// +// This function only returns an error if the cbSeedMaterial value is out of range. +// + +VOID +SYMCRYPT_CALL +SymCryptRngAesGenerate( + _Inout_ PSYMCRYPT_RNG_AES_STATE pRngState, + _Out_writes_(cbRandom) PBYTE pbRandom, + SIZE_T cbRandom ); +// +// Generate random output from the state. +// +// Callers do not need to limit themselves to requests of 64 kB or less; +// large requests are split internally to follow the request size limitations of SP 800-90. +// +// SP 800-90 also requires a limit on the # generate calls that can be done between reseeds. +// For AES-CTR_DRBG this limit is 2^48, which means it is all but impossible to hit this limit. +// If the caller were to succeed, the 2^48'th call will result in a fatal error. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRngAesReseed( + _Inout_ PSYMCRYPT_RNG_AES_STATE pRngState, + _In_reads_(cbSeedMaterial) PCBYTE pcbSeedMaterial, + + _In_range_(SYMCRYPT_RNG_AES_MIN_RESEED_SIZE, SYMCRYPT_RNG_AES_MAX_SEED_SIZE) + SIZE_T cbSeedMaterial ); +// +// Reseed the PRNG state. +// +// The seed material consists of the concatenation of the following SP800-90 fields: +// - entropy +// - additional input +// +// The entropy input should have at least 256 bits of entropy. +// This function only returns an error if the cbSeedMaterial value is out of range. +// + +VOID +SYMCRYPT_CALL +SymCryptRngAesUninstantiate( + _Inout_ PSYMCRYPT_RNG_AES_STATE pRngState ); +// +// Uninstantiate (clean up) the PRNG state +// + +VOID +SYMCRYPT_CALL +SymCryptRngAesInstantiateSelftest(void); +// +// For FIPS-certified modules, this function should be called before every instantiation. +// If multiple DRBGs are instantiated 'in quick succession', a single self-test is sufficient +// (see SP 800-90 11.3.2). +// + + +VOID +SYMCRYPT_CALL +SymCryptRngAesReseedSelftest(void); +// +// FIPS-certified modules should call this function before every call to the reseed function. +// + +VOID +SYMCRYPT_CALL +SymCryptRngAesGenerateSelftest(void); +// +// FIPS-certified modules should call this function at least once on startup, and whenever +// they want to re-test the generate function. +// + +//////////////////////////////////////////////////////////////////////////// +// AES-CTR-DRBG with FIPS 140-2 continuous self-test +// +// This is a straightforward wrapper around the AES-CTR-DRBG implementation +// that adds the FIPS 140-2 continuous self-test. +// At the moment, it looks like this test will not be present in FIPS 140-3 so +// this RNG will be dropped when FIPS 140-3 comes out. +// The self-test requirements are met by calling the selftest functions of the +// AES-CTR_DRBG implementation directly. +// +// These functions are functionally equivalent to the ones for AES-CTR_DRBG. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRngAesFips140_2Instantiate( + _Out_ PSYMCRYPT_RNG_AES_FIPS140_2_STATE pRngState, + _In_reads_(cbSeedMaterial) PCBYTE pcbSeedMaterial, + + _In_range_(SYMCRYPT_RNG_AES_MIN_INSTANTIATE_SIZE, SYMCRYPT_RNG_AES_MAX_SEED_SIZE) + SIZE_T cbSeedMaterial ); + +VOID +SYMCRYPT_CALL +SymCryptRngAesFips140_2Generate( + _Inout_ PSYMCRYPT_RNG_AES_FIPS140_2_STATE pRngState, + _Out_writes_(cbRandom) PBYTE pbRandom, + SIZE_T cbRandom ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRngAesFips140_2Reseed( + _Inout_ PSYMCRYPT_RNG_AES_FIPS140_2_STATE pRngState, + _In_reads_(cbSeedMaterial) PCBYTE pcbSeedMaterial, + + _In_range_(SYMCRYPT_RNG_AES_MIN_RESEED_SIZE, SYMCRYPT_RNG_AES_MAX_SEED_SIZE) + SIZE_T cbSeedMaterial ); + +VOID +SYMCRYPT_CALL +SymCryptRngAesFips140_2Uninstantiate( + _Inout_ PSYMCRYPT_RNG_AES_FIPS140_2_STATE pRngState ); + +//////////////////////////////////////////////////////////////////////////////////////////// +// +// Internal RNG functions +// +// To satisfy FIPS 140-3 and SP 800-90B, certain modules of SymCrypt may set up internal +// RNG state(s) to keep random bit generation behind the module's FIPS boundary. +// These functions allow the caller to get random bits and provide entropy, respectively, +// to SymCrypt's internal RNG state(s). +// Implementation is module dependent, and these functions may not be defined +// for certain modules. Check before using. +// + +VOID +SYMCRYPT_CALL +SymCryptRandom( + _Out_writes_(cbRandom) PBYTE pbRandom, + SIZE_T cbRandom ); +// Fills pbRandom with cbRandom random bytes + +VOID +SYMCRYPT_CALL +SymCryptProvideEntropy( + _In_reads_(cbEntropy) PCBYTE pbEntropy, + SIZE_T cbEntropy ); +// Mixes pbEntropy into the internal RNG state. There may be module-specific limits on +// cbEntropy - check module before use + + +//////////////////////////////////////////////////////////////////////////////////////////// +// +// RdRand support +// These functions provide access to the RdRand random number generator in +// the latest Intel CPUs. +// The DRBG that underlies the RdRand instruction is limited to 128-bit security. +// The seed for each consecutive 8 kB of data can be recovered in 2^128 work. +// Therefore, we allow for multiple blocks of 8 kB to be gathered in an attempt to +// extract 256-bit security from the hardware. +// In general, to achieve N*128 bits of security, you should use a buffer of +// (N+1)*SYMCRYPT_RDRAND_RESEED_SIZE bytes. +// + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + +// The RdRand instruction reseeds its internal DRBG every 8 kB (or faster) +#define SYMCRYPT_RDRAND_RESEED_SIZE (1<<13) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRdrandStatus(void); +// +// Returns SYMCRYPT_NO_ERROR if RdRand is available. +// returns SYMCRYPT_NOT_IMPLEMENTED if RdRand is not available. +// Note: the library must be initialized before you call this function. +// + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRdrandGetBytes( + _Out_writes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + _Out_writes_( SYMCRYPT_SHA512_RESULT_SIZE ) PBYTE pbResult ); +// +// Gets cbBuffer bytes from the RdRand instruction and hashes them to the pbResult buffer. +// pbBuffer points to a scratch buffer that is used internally, but wiped upon exit. +// cbBuffer must be a multiple of 16. +// Fatal error if SymCryptRdrandStatus indicates that Rdrand is not available. +// Returns an error if the RdRand instruction failed consistently. +// Note: SymCrypt only checks whether RdRand self-reports as failing. SymCrypt does NOT attempt +// to validate that the values returned in successful RdRand calls are in fact random. +// See SymCryptRdrandGet for a version that does not return an error but fatals instead. +// + +VOID +SYMCRYPT_CALL +SymCryptRdrandGet( + _Out_writes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + _Out_writes_( SYMCRYPT_SHA512_RESULT_SIZE ) PBYTE pbResult ); +// +// Gets cbBuffer bytes from the RdRand instruction and hashes them to the pbResult buffer. +// pbBuffer points to a scratch buffer that is used internally, but wiped upon exit. +// cbBuffer must be a multiple of 16. +// Fatal error if the RdRand instruction fails. +// Note: SymCrypt only checks whether RdRand self-reports as failing. SymCrypt does NOT attempt +// to validate that the values returned in successful RdRand calls are in fact random. +// + +#endif + + +//////////////////////////////////////////////////////////////////////////////////////////// +// +// RdSeed support +// These functions provide access to the RdSeed random number generator in +// recent Intel CPUs. +// + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRdseedStatus(void); +// +// Returns SYMCRYPT_NO_ERROR if RdSeed is available. +// returns SYMCRYPT_NOT_IMPLEMENTED if RdSeed is not available. +// Note: the library must be initialized before you call this function. +// + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRdseedGetBytes( + _Out_writes_( cbResult ) PBYTE pbResult, + SIZE_T cbResult ); +// +// Queries cbResult bytes from the Rdseed instruction and puts them in the buffer. +// The number of bytes (cbResult) must be a multiple of 16. +// Fatal error if the Rdseed instruction is not present. +// Returns an error if the Rdseed instruction fails consistently. +// Note: SymCrypt only checks whether Rdseed self-reports as failing. SymCrypt does NOT attempt +// to validate that the values returned in successful Rdseed calls are in fact random. +// See SymCryptRdseedGet for a version that does not return an error but fatals instead. +// + +VOID +SYMCRYPT_CALL +SymCryptRdseedGet( + _Out_writes_( cbResult ) PBYTE pbResult, + SIZE_T cbResult ); +// +// Queries cbResult bytes from the Rdseed instruction and puts them in the buffer. +// The number of bytes (cbResult) must be a multiple of 16. +// Fatal error if the Rdseed instruction is not present, or the instruction fails consistently. +// Note: SymCrypt only checks whether Rdseed self-reports as failing. SymCrypt does NOT attempt +// to validate that the values returned in successful Rdseed calls are in fact random. +// + +#endif + +//////////////////////////////////////////////////////////////////////////////////////////// +// +// AES-XTS +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXtsAesExpandKey( + _Out_ PSYMCRYPT_XTS_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( cbKey ) PCBYTE pbKey, + SIZE_T cbKey ); +// Note that this key expansion function does not perform FIPS checks for backwards compatibility. +// Use SymCryptXtsAesExpandKeyEx for FIPS-approved XTS key expansion. + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXtsAesExpandKeyEx( + _Out_ PSYMCRYPT_XTS_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( cbKey ) PCBYTE pbKey, + SIZE_T cbKey, + UINT32 flags ); +// Allowed flags: +// +// - SYMCRYPT_FLAG_KEY_NO_FIPS +// Opt-out of performing validation required for FIPS. +// Currently this is just checking that 2 AES keys used in XTS are non-equal. + +VOID +SYMCRYPT_CALL +SymCryptXtsAesKeyCopy( + _In_ PCSYMCRYPT_XTS_AES_EXPANDED_KEY pSrc, + _Out_ PSYMCRYPT_XTS_AES_EXPANDED_KEY pDst ); +// +// Create a copy of an expanded key +// + +VOID +SYMCRYPT_CALL +SymCryptXtsAesEncrypt( + _In_ PCSYMCRYPT_XTS_AES_EXPANDED_KEY pExpandedKey, + SIZE_T cbDataUnit, + UINT64 tweak, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); +// +// Encrypt a buffer using XTS-AES and 64 bit tweak. +// - pExpandedKey points to the expanded key for XTS. +// - cbDataUnit: size of each data unit, must be at least 16 and cannot exceed 2^{24} bytes. Typically 512. +// - tweak: 64 bit tweak value used for the first data unit in the buffer, incremented for subsequent data units. +// - pbSrc: plaintext input +// - pbDst: ciphertext output. The ciphertext buffer may be identical to the plaintext +// buffer, or non-overlapping. The ciphertext is also cbData bytes long. +// - cbData: # bytes of plaintext input. Must be a multiple of cbDataUnit. +// +// XTS-AES works on equal-sized data units, with each data unit being uniquely encrypted using a combination of +// an integer "tweak" value and the XTS key (a pair of AES keys). A data unit typically corresponds to a sector +// size on a disk. +// +// This API encrypts a buffer consisting of several consecutive data units, which use consecutive tweak values. +// As the tweak is 64 bits, if there is an overflow of 64 bits, the value of the tweak will wrap to 0. +// +// i.e. encryption with tweak 0xffffffffffffffff for a buffer consisting of 2 data units will correspond to: +// encryption using tweak 0xffffffffffffffff for the first data unit, +// encryption using tweak 0x0000000000000000 for the second data unit +// +// Note, using cbDataUnit which is a power of 2 >= 256, will likely be more performant. +// + +VOID +SYMCRYPT_CALL +SymCryptXtsAesDecrypt( + _In_ PCSYMCRYPT_XTS_AES_EXPANDED_KEY pExpandedKey, + SIZE_T cbDataUnit, + UINT64 tweak, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); +// +// Decrypt a buffer using XTS-AES and 64 bit tweak. +// See SymCryptXtsAesEncrypt for a more in depth description, everything is the same, only this decrypts rather than encrypts. +// + +VOID +SYMCRYPT_CALL +SymCryptXtsAesEncryptWith128bTweak( + _In_ PCSYMCRYPT_XTS_AES_EXPANDED_KEY pExpandedKey, + SIZE_T cbDataUnit, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pbTweak, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); +// +// Encrypt a buffer using XTS-AES and 128 bit tweak. +// - pExpandedKey points to the expanded key for XTS. +// - cbDataUnit: size of each data unit, must be at least 16 and cannot exceed 2^{24} bytes. Typically 512. +// - pbTweak: 128 bit tweak value used for the first data unit in the buffer, incremented for subsequent data units. +// - pbSrc: plaintext input +// - pbDst: ciphertext output. The ciphertext buffer may be identical to the plaintext +// buffer, or non-overlapping. The ciphertext is also cbData bytes long. +// - cbData: # bytes of plaintext input. Must be a multiple of cbDataUnit. +// +// XTS-AES works on equal-sized data units, with each data unit being uniquely encrypted using a combination of +// an integer "tweak" value and the XTS key (a pair of AES keys). A data unit typically corresponds to a sector +// size on a disk. +// +// This API encrypts a buffer consisting of several consecutive data units, which use consecutive tweak values. +// As the tweak is 128 bits, if there is an overflow of 128 bits, the value of the tweak will wrap to 0. +// +// i.e. encryption with tweak 0x0000000000000000ffffffffffffffff for a buffer consisting of 2 data units will correspond to: +// encryption using tweak 0x0000000000000000ffffffffffffffff for the first data unit, +// encryption using tweak 0x00000000000000010000000000000000 for the second data unit +// but encryption with tweak 0xffffffffffffffffffffffffffffffff for a buffer consisting of 2 data units will correspond to: +// encryption using tweak 0xffffffffffffffffffffffffffffffff for the first data unit, +// encryption using tweak 0x00000000000000000000000000000000 for the second data unit +// +// Note, using cbDataUnit which is a power of 2 >= 256, will likely be more performant. +// + +VOID +SYMCRYPT_CALL +SymCryptXtsAesDecryptWith128bTweak( + _In_ PCSYMCRYPT_XTS_AES_EXPANDED_KEY pExpandedKey, + SIZE_T cbDataUnit, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pbTweak, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); +// +// Decrypt a buffer using XTS-AES and 128 bit tweak. +// See SymCryptXtsAesEncryptWith128bTweak for a more in depth description, everything is the same, only this decrypts rather than encrypts. +// + +VOID +SYMCRYPT_CALL +SymCryptXtsAesSelftest(void); + +//////////////////////////////////////////////////////////////////////////////////////////// +// +// AES-KW and AES-KWP +// +// These are the AES-KW and AES-KWP algorithms per SP 800-38F. +// +// These are very slow compared to most AES modes, requiring a long serial chain of AES +// block encryption/decryptions, with a best case cost comparable to ~12x AES-CBC encryption +// for a given buffer size. In practice the cost is often higher. +// These cipher modes are not recommended. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptAesKwEncrypt( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_(cbSrc) PCBYTE pbSrc, + SIZE_T cbSrc, + _Out_writes_to_(cbDst, *pcbResult) PBYTE pbDst, + SIZE_T cbDst, + _Out_ SIZE_T* pcbResult ); +// +// Encrypt a buffer using AES-KW mode. +// +// - pExpandedKey points to the expanded key to use. +// - pbSrc is the plaintext source buffer. The source and destination buffers may be +// identical (in-place encryption) or non-overlapping, but they may not partially overlap. +// - cbSrc. # bytes of plaintext. This must be a multiple of 8, >=16, and <2^31. +// - pbDst is the ciphertext destination buffer. The source and destination buffers may be +// identical (in-place encryption) or non-overlapping, but they may not partially overlap. +// - cbDst. # bytes in the destination buffer. This must be >= cbSrc+8. +// - pcbResult pointer to a variable which receives the length of the ciphertext written to pbDst. +// +// Returns: +// SYMCRYPT_INVALID_ARGUMENT : If cbSrc is an invalid size +// SYMCRYPT_BUFFER_TOO_SMALL : If cbDst is not large enough +// (this can always be avoided if cbDst >= cbSrc+8) +// SYMCRYPT_MEMORY_ALLOCATION_FAILURE : If there is insufficient memory for the operation +// SYMCRYPT_NO_ERROR : On success +// +// Remarks: +// The standard allows larger plaintexts but there is no requirement to support them, we only support +// plaintext up to 2^31 bytes because it avoids complexity in handling overflow of 32b buffer sizes, and +// is larger than practically necessary. +// The output parameters (pbDst and pcbResult) are only set on success. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptAesKwDecrypt( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_(cbSrc) PCBYTE pbSrc, + SIZE_T cbSrc, + _Out_writes_to_(cbDst, *pcbResult) PBYTE pbDst, + SIZE_T cbDst, + _Out_ SIZE_T* pcbResult ); +// +// Decrypt a buffer using AES-KW mode. +// +// - pExpandedKey points to the expanded key to use. +// - pbSrc is the ciphertext source buffer. The source and destination buffers may be +// identical (in-place decryption) or non-overlapping, but they may not partially overlap. +// - cbSrc. # bytes of ciphertext. This must be a multiple of 8, >=24, and <=2^31. +// - pbDst is the plaintext destination buffer. The source and destination buffers may be +// identical (in-place decryption) or non-overlapping, but they may not partially overlap. +// - cbDst. # bytes in the destination buffer. This must be >= cbSrc-8. +// - pcbResult pointer to a variable which receives the length of the plaintext written to pbDst. +// +// Returns: +// SYMCRYPT_INVALID_ARGUMENT : If cbSrc is an invalid size +// SYMCRYPT_BUFFER_TOO_SMALL : If cbDst is not large enough +// (this can always be avoided if cbDst >= cbSrc-8) +// SYMCRYPT_AUTHENTICATION_FAILURE : If pbSrc does not decrypt successfully +// SYMCRYPT_MEMORY_ALLOCATION_FAILURE : If there is insufficient memory for the operation +// SYMCRYPT_NO_ERROR : On success +// +// Remarks: +// The standard allows larger plaintexts but there is no requirement to support them, we only support +// plaintext up to 2^31 bytes because it avoids complexity in handling overflow of 32b buffer sizes, and +// is larger than practically necessary. +// The output parameters (pbDst and pcbResult) are only set on success. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptAesKwpEncrypt( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_(cbSrc) PCBYTE pbSrc, + SIZE_T cbSrc, + _Out_writes_to_(cbDst, *pcbResult) PBYTE pbDst, + SIZE_T cbDst, + _Out_ SIZE_T* pcbResult ); +// +// Encrypt a buffer using AES-KWP mode. +// +// - pExpandedKey points to the expanded key to use. +// - pbSrc is the plaintext source buffer. The source and destination buffers may be +// identical (in-place encryption) or non-overlapping, but they may not partially overlap. +// - cbSrc. # bytes of plaintext. This must be >0 and <=2^31-8. +// - pbDst is the ciphertext destination buffer. The source and destination buffers may be +// identical (in-place encryption) or non-overlapping, but they may not partially overlap. +// - cbDst. # bytes in the destination buffer. This must be >= cbSrc + 16 - (cbSrc%8) - ((cbSrc%8)==0 ? 8 : 0) +// - pcbResult pointer to a variable which receives the length of the ciphertext written to pbDst. +// +// Returns: +// SYMCRYPT_INVALID_ARGUMENT : If cbSrc is an invalid size +// SYMCRYPT_BUFFER_TOO_SMALL : If cbDst is not large enough +// (this can always be avoided if cbDst >= cbSrc+15) +// SYMCRYPT_MEMORY_ALLOCATION_FAILURE : If there is insufficient memory for the operation +// SYMCRYPT_NO_ERROR : On success +// +// Remarks: +// The standard allows larger plaintexts but there is no requirement to support them, we only support +// plaintext up to 2^31 bytes because it avoids complexity in handling overflow of 32b buffer sizes, and +// is larger than practically necessary. +// The output parameters (pbDst and pcbResult) are only set on success. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptAesKwpDecrypt( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_(cbSrc) PCBYTE pbSrc, + SIZE_T cbSrc, + _Out_writes_to_(cbDst, *pcbResult) PBYTE pbDst, + SIZE_T cbDst, + _Out_ SIZE_T* pcbResult ); +// +// Decrypt a buffer using AES-KWP mode. +// +// - pExpandedKey points to the expanded key to use. +// - pbSrc is the ciphertext source buffer. The source and destination buffers may be +// identical (in-place decryption) or non-overlapping, but they may not partially overlap. +// - cbSrc. # bytes of ciphertext. This must be a multiple of 8, >=16, and <=2^31. +// - pbDst is the plaintext destination buffer. The source and destination buffers may be +// identical (in-place decryption) or non-overlapping, but they may not partially overlap. +// - cbDst. # bytes in the destination buffer. This must be large enough to fit the plaintext, +// a valid plaintext length is in the range [cbSrc-15, cbSrc-8]. If cbDst >= cbSrc-8 then the +// destination buffer is guaranteed to be large enough. +// - pcbResult pointer to a variable which receives the length of the plaintext written to pbDst. +// +// Returns: +// SYMCRYPT_INVALID_ARGUMENT : If cbSrc is an invalid size +// SYMCRYPT_BUFFER_TOO_SMALL : If cbDst is not large enough +// (this can always be avoided if cbDst >= cbSrc-8) +// SYMCRYPT_AUTHENTICATION_FAILURE : If pbSrc does not decrypt successfully +// SYMCRYPT_MEMORY_ALLOCATION_FAILURE : If there is insufficient memory for the operation +// SYMCRYPT_NO_ERROR : On success +// +// Remarks: +// The standard allows larger plaintexts but there is no requirement to support them, we only support +// plaintext up to 2^31 bytes because it avoids complexity in handling overflow of 32b buffer sizes, and +// is larger than practically necessary. +// The output parameters (pbDst and pcbResult) are only set on success. +// +// If we fail to decrypt due to bad data, we return SYMCRYPT_AUTHENTICATION_FAILURE in constant time with +// respect to how the decrypted data is corrupted. While there is no known attack on AES-KWP abusing +// differential timing of different failure cases, being constant time for this is cheap, so is a reasonable +// hardening measure. +// +// On success we do not attempt to hide the plaintext length from sidechannels, as this could make it hard +// for callers with known plaintext length to use precisely sized buffers to decrypt into (i.e. caller +// knows the valid plaintext is 15 bytes but the API would require caller to provide a 16 byte pbDst). It +// is expected that in any real use case the length of the plaintext would immediately be used to import the +// unwrapped key into some other piece of code - so attempting to obscure the plaintext length would not be +// of any benefit. +// + + +//////////////////////////////////////////////////////////////////////////////////////////// +// +// TLS CBC cipher suites HMAC verification +// +// The TLS cipher suites for block cipher modes (typically CBC) are designed in an unfortunate way. +// The format is: +// Plaintext | MAC | <padding> | <padding_length> +// Which is then encrypted by the block cipher. +// Plaintext is the data being transferred. MAC is the HMAC value over some header data and the plaintext. +// The padding_length is a byte (range 0-255) that specifies the length of the padding. +// The padding consists of padding_length bytes (up to 255) Each byte is equal to padding_length. +// The padding_length is chosen so that length of the whole structure is a multiple of the block cipher block +// size, so that it can be encrypted with CBC. +// +// The problem is that when decrypting this, the natural code will take actions that depend on the padding_length +// byte before it has been authenticated, and those actions might reveal information about padding_byte. This +// in turn can be used in an attack that lets the attacker decrypt data. +// We are particularly concerned with software side channels, where another thread infers information about what the +// active thread is doing through cache state and other shared CPU state. +// +// To address this issue once and for all, we created an implementation of the HMAC verification with the following +// properties: +// - It verifies the HMAC in the data structure above. +// - This is done in a side-channel safe manner, not revealing anything except whether the structure is valid or not. +// This means that the HMAC computation over the plaintext is constant-time and constant-memory-access pattern +// irrespective of the padding_length; thus this is a fixed-time implementation for variable-sized inputs. +// Similarly, the MAC value has to be extracted from a variable location in the input using a fixed memory access +// pattern. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptTlsCbcHmacVerify( + _In_ PCSYMCRYPT_MAC pMacAlgorithm, + _In_ PVOID pExpandedKey, + _Inout_ PVOID pState, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData); +// Verify a TLS CBC cipher suite MAC value +// - macAlgorithm: one of SymCryptHmacSha1Algorithm, SymCryptHmacSha256Algorithm, or SymCryptHmacSha384Algorithm. +// Other MAC algorithms are not supported. +// - pState points to an SYMCRYPT_HMAC_SHAXXX_STATE. It is allowed to process data into the state before this call, +// but the total # bytes processed must be < 2^16. +// - pbData points to a buffer containing the concatenation of plaintext, MAC, padding, and padding_length. +// - cbData is the size of the buffer. +// Note: callers should pass the entire (plaintext | MAC | padding | padding_length) in a single call to get +// the full side-channel protection. +// This function returns success if the HMAC verification is successful. +// It returns an error if the padding or HMAC verification fails. +// After the call pState is wiped of any sensitive data, just like the SymCryptHmacXxxResult function. +// Callers have to check the padding_length byte pbData[cbData-1] to determine the size of the plaintext. +// + + + +/* + +Yes, despite its name, SymCrypt supports asymmetric cryptographic algorithms. +The asymmetric implementations have the following primary design goals: + - Implement asymmetric cryptographic algorithms like RSA, DSA, DH, ECDSA, ECDH, etc. + - Protect against all software-based side-channel attacks + - Protect against those hardware-based side-channel attacks that can be practically protected against in software. + - High performance, dynamically using CPU features that are available on the current CPU stepping. + - Support small code and small memory environments. + - Support environments that need to control memory allocations. + +The primary use-case is for SymCrypt to be the crypto library for MS products. This includes high-performance +scenarios such as TLS server termination, and low-footprint uses such as Bootmgr. +SymCrypt supports applications such as firmware updates for embedded CPUs where code and memory +footprint are of overriding importance. + +Side channel attacks: +Defence against side channel attacks play an important part in the design and implementation of +SymCrypt. Side channel attacks are a class of attacks on cryptographic systems where the attacker +gets some information about a cryptographic computation in addition to the inputs and outputs. +For example, any of the following information could be retrieved by the attacker: +- The time it takes to perform a computation (either exactly or approximately) +- The power usage over time of the CPU. +- The noise made by the computer's power supply (a function of the CPU power consumption) +- Which cache lines are evicted from the attacker's thread A by a computation in thread B. +These may sound like esoteric attacks, but all of them have been used in practical demonstrations +to attack cryptographic systems. + +SymCrypt uses the following API rules to protect against side-channel attacks: +- Information is divided into two classes: public information and private information. +- Public information is allowed to leak through side channels, and the library makes no attempt to hide + public information. +- Private information is protected against side-channel attacks to the best ability of the library. +Unless otherwise documented, all information is treated as private. +Functions may document that a particular value is "published". This means that the function may use +the value in a way that is not side-channel safe, so any security analysis that considers +side-channel attacks must assume that the published value is public and known by the attacker. + +The following information is always assumed to be public, and thus known to any side-channel attacker: +- Which SymCrypt function is being called. +- The location of any of the buffers passed as arguments. +- The size parameter of any buffer passed as an argument. +- Any details that cause a function to return an error. +Thus, it is important that callers who wish to be side-channel safe ensure that their buffer locations and sizes +do not reveal any information, and that they do not make any calls that result in an error, unless there is no +need for secrecy when an error occurs. + +Because pointer values are all public (the memory address cannot be hidden on modern CPUs if the buffer is accessed) +side-channel safe code ends up using masked operations, such as masked-copy where the copy is done or not done +depending on a mask parameter to the function. +SymCrypt exposes a set of masked functions that applications can use for their own side-channel safe operations. + +The following coding rules are used to protect private information: +- The sequence of instructions executed is independent of private information. +- The sequence of memory operations (read/write) and memory addresses accessed is independent of private information. +- Private information is not used in instructions whose timing may depend on the data being processed. +As far as we know these rules stop all software-based side-channel attacks, and many hardware-based ones. + +One remaining line of attack is to feed the algorithm with values that are special. For example, an RSA +decryption may receive a value that contains many zeroes modulo one prime. If the power consumption of the +multiply instruction reveals whether one of the multiplicands is zero, then the attacker might learn +useful information. Note that this is a pure hardware attack, it is not applicable to software attackers. +Protecting against this style of attack is an area that still needs more research. Where applicable we +document the additional protections that SymCrypt provides. + + +Running with CHKed code: +All binaries that use SymCrypt must build CHKed versions of the binary (linking the CHKed version of SymCrypt) +and perform full test runs on the CHKed version. +Due to the performance and operational requirements, the production-optimized SymCrypt library API cannot +check all buffer sizes or even be fully SAL-annotated. +The necessary size information is simply not available at every call point, and passing +the size information around would add too much overhead. +The CHKed version of the library adds additional code & per-object storage to be able to implement check that +are broadly equivalent to what SAL would normally check. +SAL checks are part of the SDL requirements and need to be done on all Microsoft products. +Though this requirement cannot strictly speaking be satisfied with the SymCrypt library, running the CHKed +version through full validation is the best equivalent, and therefore should be considered mandatory. + +Please ensure that the validation runs exercise all the border-cases of largest and smallest sizes, as well as +intermediate sizes for the parameters. + +*/ + + +// +// Caller-provided functions +// +// Some of the large-integer and asymmetric algorithm functions use callbacks. +// The callback functions do not have to be functional for binaries that only use the symmetric algorithm +// implementations. +// Use of callbacks is documented in each function that uses them. +// + +PVOID +SYMCRYPT_CALL +SymCryptCallbackAlloc( SIZE_T nBytes ); +// +// Allocate a buffer of nBytes; returns NULL on failure. +// Returned pointer must be aligned to a multiple of SYMCRYPT_ASYM_ALIGN_VALUE. +// + +VOID +SYMCRYPT_CALL +SymCryptCallbackFree( PVOID pMem ); +// +// Called by SymCrypt to free a buffer previously allocated by SymCryptCallbackAlloc(). +// Note that callers should never call these functions directly. Buffers that were returned +// from the SymCrypt API are freed with SymCryptFree* functions, not this function. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SYMCRYPT_WEAK_SYMBOL +SymCryptCallbackRandom( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer ); +// +// Fill the buffer with uniformly distributed random bytes from a cryptographically strong RNG source. +// + +PVOID +SYMCRYPT_CALL +SymCryptCallbackAllocateMutexFastInproc(void); +// +// Allocate and initialize a mutex object; returns NULL on failure. +// +// Fast indicates that users of the mutex will only hold it for a short period of time, so it +// is not expected that threads should need to sleep before acquiring the mutex. (i.e. can be +// implemented by a spinlock in kernel mode). +// Inproc indicates the mutex is only used for synchronization between threads in a single process. +// +// Users of the library in contexts where mutexes are not available can set this callback to always +// return NULL, and attempts to use APIs requiring it will fail at runtime. +// + +VOID +SYMCRYPT_CALL +SymCryptCallbackFreeMutexFastInproc( _Inout_ PVOID pMutex ); +// +// Free a mutex object previously created by SymCryptCallbackAllocateMutexFastInproc +// + +VOID +SYMCRYPT_CALL +SymCryptCallbackAcquireMutexFastInproc( _Inout_ PVOID pMutex ); +// +// Take exclusive ownership of a mutex object allocated by SymCryptCallbackAllocateMutexFastInproc. +// +// This call must also ensure memory ordering such that stores before the previous call to +// SymCryptCallbackReleaseMutexFastInproc with this mutex are observable by loads after this call. +// + +VOID +SYMCRYPT_CALL +SymCryptCallbackReleaseMutexFastInproc( _Inout_ PVOID pMutex ); +// +// Relinquish ownership of a mutex object allocated by SymCryptCallbackAllocateMutexFastInproc and +// acquired by SymCryptCallbackAcquireMutexFastInproc. +// + +//============================================================================================== +// Object types for high-level API +// +// SYMCRYPT_RSAKEY A key that stores the information for the RSA algorithms (encryption and signing). +// It always contains the RSA parameters / public key, and may or may not contain +// the associated private key. +// SYMCRYPT_DLGROUP A discrete log group to be used for the DSA and DH algorithms. It contains the +// group parameters (P,[Q],G) (The prime Q is optional). +// SYMCRYPT_DLKEY A "discrete log" key that stores the information for the DSA and DH algorithms. It +// always contains a public key, and may or may not contain the associated private key. +// SYMCRYPT_ECURVE An elliptic curve over a prime field. Contains field prime, curve parameters, +// and distinguished point (generator). +// SYMCRYPT_ECKEY An elliptic curve key for the ECDH and ECDSA algorithms. It always contains a +// public key, and may or may not contain the associated private key. +// +// See symcrypt_internal.h for structure definitions. +// + +//============================================================================================== +// Supported formats and parameters +// + +typedef enum _SYMCRYPT_NUMBER_FORMAT { + SYMCRYPT_NUMBER_FORMAT_LSB_FIRST = 1, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST = 2, +} SYMCRYPT_NUMBER_FORMAT; +// +// SYMCRYPT_NUMBER_FORMAT is used to specify the number format for import and export +// of BYTE arrays. We support the following two number formats: +// Let p[0], ..., p[n-1] be an array containing n bytes: +// LSB_FIRST: +// Value = \sum_{i=0}^{n-1} p[i] * 2^{8*i} +// = p[0] + 2^8 * p[1] + 2^{16} * p[2] + ... +// +// MSB_FIRST: +// Value = \sum_{i=0}^{n-1} p[n-1-i] * 2^{8*i} +// = p[n-1] + 2^8 * p[n-2] + 2^{16} * p[n-3] + ... +// + +typedef struct _SYMCRYPT_RSA_PARAMS { + UINT32 version; // Version of the parameters structure + UINT32 nBitsOfModulus; // Number of bits in the modulus + UINT32 nPrimes; // Number of primes, 0 if object is only for public key + UINT32 nPubExp; // Number of public exponents (typically 1) +} SYMCRYPT_RSA_PARAMS, *PSYMCRYPT_RSA_PARAMS; +typedef const SYMCRYPT_RSA_PARAMS * PCSYMCRYPT_RSA_PARAMS; +// +// SYMCRYPT_RSA_PARAMS is used to specify all the parameters needed for creation of an +// RSA key object. The above is version 1 of the parameters. +// Currently, we only support nPubExp = 1 and nPrimes = 0 or 2. +// Note: nPrimes > 2 and nPubExp > 1 allow faster and more flexible +// RSA functionality. Though currently not supported, these parameters make it easy to add +// support in the future. +// + +// Notation for elliptic curve parameters and functions +// ==================================================== + +// E The elliptic curve group. This is typically represented as the set of 2D points (with +// coordinates from a finite field) that satisfy a specific curve equation. +// An example equation is y^2 = x^3 + Ax + B for A,B. The set E also +// contains a special "zero" point denoted by O. +// |E| The total number of points on the elliptic curve group E. +// G A special point in E which generates a (prime) order subgroup. +// GOrd The (prime) order of the generator point G. Therefore, GOrd * G = O. +// h The cofactor of the curve. It is defined as h = |E| / GOrd. Typical +// cofactors are 4 (NUMS curves), and 8 (curve 25519). + +// Definitions +// =========== + +// A "proper public key" (PPK) on the curve E is defined to be an arbitrary nonzero point of the +// subgroup generated by the point G. + +// A "proper secret key" (PSK) is the logarithm of a "proper public key" with +// respect to G. Therefore, if Q is the PPK, then the corresponding PSK is the unique +// integer s with 0 < s < GOrd such that s*G = Q. + +// If the cofactor of the curve is equal to 1, then the entire group E is generated by +// the point G and all nonzero points in E are "proper public keys". + +// Otherwise, an arbitrary point on the curve might or might not belong to the subgroup +// generated by G. Furthermore, in this case, an arbitrary point P may have order equal +// to the cofactor (or smaller), i.e. h*P=O, or an order larger than GOrd. + +// To securely handle the cases where "non-proper" public keys are imported from possibly malicious +// sources, the creators of curve parameters impose several restrictions on the secret keys +// and the algorithms used. For example, the scalar multiplication algorithm for NUMS curves +// always pre-multiplies a point by the cofactor; in order to zero-out any possible +// components of lower order ("low-order clearing"). Curve 25519 imposes this by asserting +// that all secret keys have the 3 lowest bits set to 0, which is equivalent to multiplying +// by h=8. + +typedef enum _SYMCRYPT_ECURVE_GEN_ALG_ID { + SYMCRYPT_ECURVE_GEN_ALG_ID_NULL = 0, +} SYMCRYPT_ECURVE_GEN_ALG_ID; +// +// SYMCRYPT_ECURVE_GEN_ALG_ID is used to specify (if available) the algorithm that +// generates the curve parameters from the provided seed. +// + + +typedef struct _SYMCRYPT_ECURVE_PARAMS_V2_EXTENSION { + UINT32 PrivateKeyDefaultFormat; + UINT32 HighBitRestrictionNumOfBits; + UINT32 HighBitRestrictionPosition; + UINT32 HighBitRestrictionValue; +} SYMCRYPT_ECURVE_PARAMS_V2_EXTENSION, *PSYMCRYPT_ECURVE_PARAMS_V2_EXTENSION; +typedef const SYMCRYPT_ECURVE_PARAMS_V2_EXTENSION * PCSYMCRYPT_ECURVE_PARAMS_V2_EXTENSION; +// +// SYMCRYPT_ECURVE_PARAMS_V2_EXTENSION is used to specify restrictions and default formats +// for known curves. The possible formats and restriction are explained below. +// + +// Secret key formats +// ================== +// The possible secret key formats in SymCrypt are shown below. For all formats, s denotes +// a "proper secret key" defined as above. I.e. 0 < s < GOrd. +// +// 1. "Canonical": s +// 2. "DivH": s/h mod GOrd +// 3. "DivHTimesH": h*(s/h mod GOrd) +// 4. "TimesH": h*s <-- This format is currently unsupported +// +// Remarks: +// - The above formats apply **only to external formats**: When somebody is +// importing a secret key (from test vectors, for example) or exporting a key. +// The internal format of the secret keys might be one of them or something totally +// different; the internal format is not visible to the caller. +// - Formats 3 and 4 have bigger storage requirements compared to 1 and 2, as +// the key can be up to |E|. +// - When h=1 all formats are identical. This is the case for NIST curves. +// - The NUMS curves use the "DivH" secret key format in the test vectors and the +// multiplication algorithm implicitly multiplies by h. +// - Curve 25519 uses the "DivHTimesH" secret key format in the test vectors. +typedef enum _SYMCRYPT_ECKEY_PRIVATE_FORMAT { + SYMCRYPT_ECKEY_PRIVATE_FORMAT_NULL = 0, + SYMCRYPT_ECKEY_PRIVATE_FORMAT_CANONICAL = 1, + SYMCRYPT_ECKEY_PRIVATE_FORMAT_DIVH = 2, + SYMCRYPT_ECKEY_PRIVATE_FORMAT_DIVH_TIMESH = 3, +} SYMCRYPT_ECKEY_PRIVATE_FORMAT; + +// High bit restrictions +// ===================== +// A high bit restriction is a requirement for some of the high bits of the secret keys +// (usually the most significant bits of the curve). +// Currently only curve 25519 imposes such a restriction: That the bits 255 and 254 of the +// secret key in the "DivHTimesH" format are 0 and 1, respectively. +// +// The high bit restrictions specification takes the following form: +// - Number of bits that are specified +// - Bit position of the lowest bit to be specified (starting from 0 for the LSB) +// - The bit values +// The bits that are specified refer to the relevant secret key format. +// For Canonical and DivH formats the total number of bits is the # bits of GOrd-1. +// For DivHTimesH and TimesH formats the total number of bits is the # bits of |E|-1. +// +// Note: as GOrd must be prime, #bits(Gord) == #bits(Gord-1). The same is true +// for |E|=h*GOrd as it cannot be a power of 2. +// +// The HighBitRestrictionNumOfBits field is a value between 0 and 32 (inclusive) +// and specifies how many bits of the HighBitRestrictionValue are used (starting +// from the least significant bit of the value). The bits that are restricted are +// the bits [HighBitRestrictionPosition+HighBitRestrictionNumOfBits-1, ..., HighBitRestrictionPosition] +// +// For example, let's assume it is required that the bits [104, 103, ..., 100] +// of all private keys of a curve are always 11011. +// Then the parameters should be set to +// HighBitRestrictionNumOfBits = 5 +// HighBitRestrictionPosition = 100 +// HighBitRestrictionValue = 0x1B +// + + +typedef struct _SYMCRYPT_ECURVE_PARAMS { + UINT32 version; // Version of the parameters structure (see comment below) + SYMCRYPT_ECURVE_TYPE type; // Type of the curve + SYMCRYPT_ECURVE_GEN_ALG_ID algId; // Algorithm ID for generation of parameters from seed + UINT32 cbFieldLength; // Length of the field elements in bytes + UINT32 cbSubgroupOrder; // Length of the subgroup in bytes + UINT32 cbCofactor; // Length of the cofactor in bytes + UINT32 cbSeed; // Length of the seed + // This struct is followed in memory by: + //P[cbFieldLength] Prime of the base field + //A[cbFieldLength] Coefficient A of all three types of curves + //B[cbFieldLength] Coefficient B of Weierstrass and Montgomery curves and D for Twisted Edwards curves + //Gx[cbFieldLength] X-coordinate of the distinguished point (assuming SYMCRYPT_ECPOINT_FORMAT_XY) + //Gy[cbFieldLength] Y-coordinate of the distinguished point (assuming SYMCRYPT_ECPOINT_FORMAT_XY) + //n[cbSubGroupOrder] Order of the subgroup generated by the distinguished point + //h[cbCofactor] Cofactor of the distinguished point + //S[cbSeed] Seed of the curve + + //ParamsV2Extension[sizeof(SYMCRYPT_ECURVE_PARAMS_V2_EXTENSION)]; // Only on version 2 of the parameters +} SYMCRYPT_ECURVE_PARAMS, *PSYMCRYPT_ECURVE_PARAMS; +typedef const SYMCRYPT_ECURVE_PARAMS * PCSYMCRYPT_ECURVE_PARAMS; +// +// SYMCRYPT_ECURVE_PARAMS is used to specify all the parameters needed for the curve generation. The above +// are versions 1 and 2 of the curve parameters. +// + +typedef enum _SYMCRYPT_ECPOINT_FORMAT { + SYMCRYPT_ECPOINT_FORMAT_X = 1, // One value, encoding the X coordinate only of a point + SYMCRYPT_ECPOINT_FORMAT_XY = 2, // Two equally-sized values, the first one encoding X and the second one encoding Y +} SYMCRYPT_ECPOINT_FORMAT; +// +// SYMCRYPT_ECPOINT_FORMAT is used to support different elliptic curve point formats, including possible point compression. +// + +//======================================================================== +//======================================================================== +// Main schema for object creation, deletion, and management. +// +// Object management is the same for most object types. For an object type XXX we have +// the following functions: +// +// PSYMCRYPT_XXX +// SYMCRYPT_CALL +// SymCryptXxxAllocate( <size parameters> ) +// Allocates an object of type XXX according to the specified size parameters. +// If the allocation fails, NULL is returned. +// If the allocation succeeds, an XXX pointer is returned, and the caller is responsible +// for freeing the result using SymCryptXxxFree(). +// The value of the new object is undefined. +// All the parameters to this function are published. (Object sizes cannot be private information.) +// +// VOID +// SYMCRYPT_CALL +// SymCryptXxxFree( _Inout_ PSYMCRYPT_XXX p ) +// Free an XXX object allocated with SymCryptAllocateXxx(). +// Any storage location in the object that might have contained private information is wiped. +// +// UINT32 +// SYMCRYPT_CALL +// SymCryptSizeofXxxFromYyy( <size parameters> ); +// Memory size that is sufficient to store an XXX object with size defined by the <size parameters>. +// The Yyy specifies the form of the size parameters, for example Ecurve. +// This is a runtime function as the size of an object is a run-time decision dependent on the CPU stepping. +// The result is always a multiple of the alignment requirements of this object type, so arrays can be built +// using this element size. +// +// SYMCRYPT_SIZEOF_XXX_FROM_YYY( <size parameters> ) +// This is a compile-time macro that computes a value not less than the SymCryptSizeofXxxFromYyy function, and +// is suitable to statically compute the size of a memory buffer for an object. +// (Not defined for all types.) +// +// PSYMCRYPT_XXX +// SYMCRYPT_CALL +// SymCryptXxxCreate( +// _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, +// SIZE_T cbBuffer, +// <size parameters> ); +// Create an XXX object from the provided (pbBuffer, cbBuffer) space. +// This function performs the necessary initializations of the object, but does not assign or set a value. +// The object will be able to store values up to size determined by the <size parameters>. +// Requirement: +// - pbBuffer is aligned to SYMCRYPT_ASYM_ALIGN_VALUE. Note that this can be a stricter requirement than +// SYMCRYPT_ALIGNED, and memory allocation functions might not return pointers that are suitably +// aligned. For some object types and some CPUs, the alignment requirements might be less strict. +// The main purpose of this relaxation is to always allow objects that are spaced +// SymCryptSizeofXxxFromYyy apart. The common usage is to create an array of objects. The array +// starts at a SYMCRYPT_ASYM_ALIGNed location, with each element SymCryptSizeofXxxFromYyy(..) bytes long. +// - cbBuffer >= SymCryptSizeofXxxFromYyy( <size parameters> ) +// - (pbBuffer,cbBuffer) memory must be exclusively used by this object. +// The last requirement ensures that all objects are non-overlapping (except for API functions +// that explicitly create overlapping objects). +// All parameters are published. +// It is always safe to choose +// cbBuffer = SymCryptSizeofXxxFromYyy( <size parameters> ) +// The returned object pointer is simply a cast of the pbBuffer pointer. +// Callers that manage arrays of objects can reconstruct the PSYMCRYPT_XXX by casting the buffer pointer +// to the right type. +// An object that is created with this function should be wiped, even if it doesn't contain private data. +// The SymCryptXxxWipe() function also frees any associated data that the library may maintain. +// +// VOID +// SYMCRYPT_CALL +// SymCryptXxxWipe( _Out_ PSYMCRYPT_XXX Dst ) +// All private information in the Dst object is wiped, and any associated data is freed. +// Unless otherwise specified, the Dst object is left in an undefined state. +// An SymCryptXxxAllocate-d object does not have to be wiped before it is freed +// because the SymCryptXxxFree function will perform the wipe. +// However, SymCryptXxxCreate-d objects should always be wiped even if they don't contain +// secret data, as the wipe also frees any associated data the library may maintain. +// +// VOID +// SYMCRYPT_CALL +// SymCryptXxxCopy( +// _In_ PCSYMCRYPT_XXX pxSrc, +// _Out_PSYMCRYPT_XXX pxDst ); +// Dst = Src. +// Requirement: The <size parameters> of both objects should the same. +// Src must be in a defined state, it is not valid to copy an undefined object. +// Src and Dst may be the same object (though that is a no-op). +// + +//======================================================================== +// RSAKEY objects' API +// + +#define SYMCRYPT_SIZEOF_RSAKEY_FROM_PARAMS( modBits, nPrimes, nPubExps ) \ + SYMCRYPT_INTERNAL_SIZEOF_RSAKEY_FROM_PARAMS( modBits, nPrimes, nPubExps ) +// Return a buffer size large enough to create an RSA key in which the specified +// modulus size, # primes, # public exponents, and upper bound for the bitsize of each public exponent. +// If the object will only contain a public key, nPrimes can be set to 0 + +PSYMCRYPT_RSAKEY +SYMCRYPT_CALL +SymCryptRsakeyAllocate( + _In_ PCSYMCRYPT_RSA_PARAMS pParams, + _In_ UINT32 flags ); +// +// Allocate and create a new RSAKEY object sized according to the parameters. +// If the SYMCRYPT_RSAKEY object will only be used for a public key, the +// SYMCRYPT_RSA_PARAMS structure may set nPrimes = 0. Use of +// SymCryptRsakeySetValueFromPrivateExponent requires nPrimes = 2. +// +// This call does not initialize the key. It should be +// followed by a call to SymCryptRsakeyGenerate or +// SymCryptRsakeySetValue*. +// +// No flags are specified for this function. +// + +VOID +SYMCRYPT_CALL +SymCryptRsakeyFree( _Out_ PSYMCRYPT_RSAKEY pkObj ); + +UINT32 +SYMCRYPT_CALL +SymCryptSizeofRsakeyFromParams( _In_ PCSYMCRYPT_RSA_PARAMS pParams ); +// If the to-be-allocated SYMCRYPT_RSAKEY object will only be used for a public key, the +// SYMCRYPT_RSA_PARAMS structure may set nPrimes = 0. + +PSYMCRYPT_RSAKEY +SYMCRYPT_CALL +SymCryptRsakeyCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + _In_ PCSYMCRYPT_RSA_PARAMS pParams ); +// +// Create an RSAKEY object from a buffer, but does not initialize it. +// If the SYMCRYPT_RSAKEY object will only be used for a public key, the +// SYMCRYPT_RSA_PARAMS structure may set nPrimes = 0. Use of +// SymCryptRsakeySetValueFromPrivateExponent requires nPrimes = 2. +// +// This call does not initialize the key. It should be +// followed by a call to SymCryptRsakeyGenerate or +// SymCryptRsakeySetValue*. +// + +VOID +SYMCRYPT_CALL +SymCryptRsakeyWipe( _Out_ PSYMCRYPT_RSAKEY pkDst ); + +// +//VOID +//SYMCRYPT_CALL +//SymCryptRsakeyCopy( +// _In_ PCSYMCRYPT_RSAKEY pkSrc, +// _Out_ PSYMCRYPT_RSAKEY pkDst ); +// +// This function is currently not available. +// + +//======================================================================== +// DLGROUP objects' API +// + +PSYMCRYPT_DLGROUP +SYMCRYPT_CALL +SymCryptDlgroupAllocate( UINT32 nBitsOfP, UINT32 nBitsOfQ ); +// +// Allocate a Discrete Logarithm group object suitable for the given sizes. +// +// nBitsOfP: Maximum number of bits of the field prime P. Specifying a value larger +// than the actual size is allowed, but inefficient. +// nBitsOfQ: Maximum number of bits of the group order Q. Specify the size of Q, +// or 0 if the size of Q is not (yet) known. +// +// This call does not initialize the DLGROUP. It should be followed +// by a call to SymCryptDlgroupGenerate or SymCryptDlgroupSetValue. +// +// nBitsOfQ is allowed to be equal to 0 and signifies that the size of Q +// is unknown or Q does not exist. This may be used when creating a DLGROUP +// for the DH algorithm which does not use a prime Q. +// +// Setting nBitsOfQ to something bigger than 0 signifies that the size of +// the prime Q is known and if a future caller tries to import a bigger Q then +// the SymCryptDlgroupSetValue call will fail. +// +// Technically nBitsOfQ should always be strictly less than nBitsOfP, as Q divides +// P-1. For simplicity, it is allowed that callers specify nBitsOfQ equal to nBitsOfP +// in this call, but SymCrypt will treat this as setting nBitsOfQ to (nBitsOfP-1). +// +// Setting nBitsOfQ to 0 might result in a bigger size of the DLGROUP object +// compared to setting it to a specific size (see SymCryptSizeofDlgroupFromBitsizes). +// +// Requirements: +// - nBitsOfP >= nBitsOfQ +// + +VOID +SYMCRYPT_CALL +SymCryptDlgroupFree( _Out_ PSYMCRYPT_DLGROUP pgObj ); + +UINT32 +SYMCRYPT_CALL +SymCryptSizeofDlgroupFromBitsizes( UINT32 nBitsOfP, UINT32 nBitsOfQ ); +// +// This call returns the memory size that is sufficient to store a +// DLGROUP object with primes P,Q of size nBitsOfP and nBitsOfQ, +// respectively (L,N parameters in FIPS 186-3 specs). +// +// Requirements: +// - nBitsOfP >= nBitsOfQ +// +// Remarks: +// - The value in nBitsOfQ is allowed to be equal to 0 +// (see SymCryptDlgroupAllocate). +// +// - When nBitsOfQ!=0 this is a monotonic function w.r.t. a partial order on N^2. +// I.e. for all fixed (nBitsOfP_0,nBitsOfQ_0) and (nBitsOfP_1,nBitsOfQ_1) with +// nBitsOfQ_0>0 and nBitsOfQ_1>0, +// +// (nBitsOfP_0<=nBitsOfP_1 AND nBitsOfQ_0<=nBitsOfQ_1) implies that +// F(nBitsOfP_0,nBitsOfQ_0) <= F(nBitsOfP_1,nBitsOfQ_1) +// where F is the function SymCryptSizeofDlgroupFromBitsizes. +// +// - F(nBitsOfP, 0)=F(nBitsOfP, nBitsOfP-1). Thus when nBitsOfQ==0 the +// function takes the maximum value for a fixed nBitsOfP. +// + +PSYMCRYPT_DLGROUP +SYMCRYPT_CALL +SymCryptDlgroupCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + UINT32 nBitsOfP, + UINT32 nBitsOfQ ); +// +// Creates a DL group object, but does not initialize it. It must be followed +// by a call to SymCryptDlgroupGenerate or SymCryptDlgroupSetValue. +// +// - pbBuffer,cbBuffer: memory buffer to create the object out of. The required size +// can be computed with SymCryptSizeofDlgroupFromBitsizes(). +// - nBitsOfP: number of bits of the field prime P. +// - nBitsOfQ: number of bits of the group order Q, or 0 if the size of Q is not (yet) known. +// + +VOID +SYMCRYPT_CALL +SymCryptDlgroupWipe( _Out_ PSYMCRYPT_DLGROUP pgDst ); + +VOID +SYMCRYPT_CALL +SymCryptDlgroupCopy( + _In_ PCSYMCRYPT_DLGROUP pgSrc, + _Out_ PSYMCRYPT_DLGROUP pgDst ); + +//======================================================================== +// DLKEY objects' API +// + +PSYMCRYPT_DLKEY +SYMCRYPT_CALL +SymCryptDlkeyAllocate( _In_ PCSYMCRYPT_DLGROUP pDlgroup ); +// +// This call does not initialize the key. It should be +// followed by a call to SymCryptDlkeyGenerate or +// SymCryptDlkeySetValue. +// + +VOID +SYMCRYPT_CALL +SymCryptDlkeyFree( _Out_ PSYMCRYPT_DLKEY pkObj ); + +UINT32 +SYMCRYPT_CALL +SymCryptSizeofDlkeyFromDlgroup( _In_ PCSYMCRYPT_DLGROUP pDlgroup ); + +PSYMCRYPT_DLKEY +SYMCRYPT_CALL +SymCryptDlkeyCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + _In_ PCSYMCRYPT_DLGROUP pDlgroup ); + +VOID +SYMCRYPT_CALL +SymCryptDlkeyWipe( _Out_ PSYMCRYPT_DLKEY pkDst ); + +VOID +SYMCRYPT_CALL +SymCryptDlkeyCopy( + _In_ PCSYMCRYPT_DLKEY pkSrc, + _Out_ PSYMCRYPT_DLKEY pkDst ); + +//======================================================================== +// ECURVE objects' API is slightly different than the above API schema because of the close +// relation to multiple parameters, the fact that they contain public information, +// and that they are persisted by the callers. +// Thus, the Allocate function takes in all the curve parameters and there are no Create, +// Wipe, or Copy functions. +// + +PSYMCRYPT_ECURVE +SYMCRYPT_CALL +SymCryptEcurveAllocate( + _In_ PCSYMCRYPT_ECURVE_PARAMS pParams, + _In_ UINT32 flags ); +// +// Allocate memory and create an ECURVE object which is defined +// by the parameters in pParams. +// +// - pParams: parameters that define the curve +// - flags: Not used, must be zero. +// +// Future versions might use the flags to enable different features/tradeoffs. +// There are a number of interesting memory/speed/pre-computation cost trades that can be made. +// For example, pre-computing multiples of the distinguished point, or (parallel?) pre-computation +// of (r, rG) pairs for random r values. +// +// This function applies limited validation of the pParams. The validation is intended to eliminate +// the threat of denial-of-service when hostile parameters are presented. It does not ensure that +// the parameters make sense, define a proper curve, or that any elliptic-curve operations made on +// the curve built from these parameters will fail, succeed or provide any security. +// The only guarantee provided for invalid parameters is that all operations on this curve will +// not crash and will return in some reasonable amount of time. +// +// Returns NULL if out of memory or the parameters are deemed invalid. +// If the return value is not NULL, the object must later be freed with SymCryptEcurveFree(). +// + +VOID +SYMCRYPT_CALL +SymCryptEcurveFree( _Out_ PSYMCRYPT_ECURVE pCurve ); + +//======================================================================== +// ECKEY objects' API is slightly different than the above API schema in the sense that they +// take as input an ECURVE object pointer instead of the number of digits. +// + +PSYMCRYPT_ECKEY +SYMCRYPT_CALL +SymCryptEckeyAllocate( _In_ PCSYMCRYPT_ECURVE pCurve ); + +VOID +SYMCRYPT_CALL +SymCryptEckeyFree( _Out_ PSYMCRYPT_ECKEY pkObj ); + +UINT32 +SYMCRYPT_CALL +SymCryptSizeofEckeyFromCurve( _In_ PCSYMCRYPT_ECURVE pCurve ); + +PSYMCRYPT_ECKEY +SYMCRYPT_CALL +SymCryptEckeyCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + PCSYMCRYPT_ECURVE pCurve ); + +VOID +SYMCRYPT_CALL +SymCryptEckeyWipe( _Out_ PSYMCRYPT_ECKEY pkDst ); + +VOID +SymCryptEckeyCopy( + _In_ PCSYMCRYPT_ECKEY pkSrc, + _Out_ PSYMCRYPT_ECKEY pkDst ); + + +//===================================================== +// Flags for asymmetric key generation and import + +// These flags are introduced primarily for FIPS purposes. For FIPS 140-3 rather than expose to the +// caller the specifics of what tests will be run with various algorithms, we are sanitizing flags +// provided on asymmetric key generation and import to enable the caller to indicate their intent, +// and for SymCrypt to perform the required testing. +// Below we define the flags that can be passed and when a caller should set them. +// The specifics of what tests will be run are likely to change over time, as FIPS requirements and +// our understanding of how best to implement them, change over time. Callers should not rely on +// specific behavior. + + +// Validation required by FIPS is enabled by default. This flag enables a caller to opt out of this +// validation. +#define SYMCRYPT_FLAG_KEY_NO_FIPS (0x100) + +// When opting out of FIPS, SymCrypt may still perform some sanity checks on key import +// In very performance sensitive situations where a caller strongly trusts the values it is passing +// to SymCrypt and does not care about FIPS (or can statically prove properties about the imported +// keys), a caller may specify SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION in addition to +// SYMCRYPT_FLAG_KEY_NO_FIPS to skip costly checks +#define SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION (0x200) + +// Callers must specify what algorithm(s) a given asymmetric key will be used for. +// This information will be tracked by SymCrypt, and attempting to use the key in an algorithm it +// was not generated or imported for will result in failure. +// If no algorithm is specified then the key generation or import function will fail. +#define SYMCRYPT_FLAG_DLKEY_DSA (0x1000) +#define SYMCRYPT_FLAG_DLKEY_DH (0x2000) + +#define SYMCRYPT_FLAG_ECKEY_ECDSA (0x1000) +#define SYMCRYPT_FLAG_ECKEY_ECDH (0x2000) + +#define SYMCRYPT_FLAG_RSAKEY_SIGN (0x1000) +#define SYMCRYPT_FLAG_RSAKEY_ENCRYPT (0x2000) + +//===================================================== +// RSA key operations + +BOOLEAN +SYMCRYPT_CALL +SymCryptRsakeyHasPrivateKey( _In_ PCSYMCRYPT_RSAKEY pkRsakey ); +// +// Returns TRUE if the pkRsakey object has private key information. +// + +UINT32 +SYMCRYPT_CALL +SymCryptRsakeySizeofModulus( _In_ PCSYMCRYPT_RSAKEY pkRsakey ); +// +// Returns the (tight) size in bytes of a byte array big enough to store +// the modulus of the key. +// + +UINT32 +SYMCRYPT_CALL +SymCryptRsakeyModulusBits( _In_ PCSYMCRYPT_RSAKEY pkRsakey ); +// +// Return the number of bits in the RSA modulus +// + +UINT32 +SYMCRYPT_CALL +SymCryptRsakeySizeofPublicExponent( + _In_ PCSYMCRYPT_RSAKEY pRsakey, + UINT32 index ); +// +// Returns the (tight) size in bytes of a byte array big enough to store +// the public exponent. The index specifies the index +// of the public exponent, starting with 0. +// +// Remarks: +// - Currently, only one public exponent is supported, i.e. the only +// valid index is 0. +// + +UINT32 +SYMCRYPT_CALL +SymCryptRsakeySizeofPrime( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + UINT32 index ); +// +// Returns the (tight) size in bytes of a byte array big enough to store +// the selected prime of the key. The index specifies the index of the +// prime, starting at 0. +// +// Remarks: +// - Currently, only two prime RSA is supported, i.e. the only +// valid indexes are 0 and 1. +// + +UINT32 +SYMCRYPT_CALL +SymCryptRsakeyGetNumberOfPublicExponents( _In_ PCSYMCRYPT_RSAKEY pkRsakey ); +// +// Returns the number of public exponents stored in the key. +// + +UINT32 +SYMCRYPT_CALL +SymCryptRsakeyGetNumberOfPrimes( _In_ PCSYMCRYPT_RSAKEY pkRsakey ); +// +// Returns the number of primes stored in the key. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsakeyGenerate( + _Inout_ PSYMCRYPT_RSAKEY pkRsakey, + _In_reads_opt_( nPubExp ) PCUINT64 pu64PubExp, + UINT32 nPubExp, + _In_ UINT32 flags ); +// +// Generate a new random RSA key using the information from the +// parameters passed to SymCryptRsaKeyAllocate/SymCryptRsaKeyCreate. +// PubExp is the array of nPubExp public exponent values, specifying +// the public exponents for the key. +// nPubExp must match the # public exponents in the parameters. +// If pu64PubExp == NULL, nPubExp == 0, and the key requires only one +// public exponent, then the default exponent 2^16 + 1 is used. +// +// Allowed flags: +// +// - SYMCRYPT_FLAG_KEY_NO_FIPS +// Opt-out of performing validation required for FIPS +// +// - At least one of the flags indicating what the Rsakey is to be used for must be specified: +// SYMCRYPT_FLAG_RSAKEY_SIGN +// SYMCRYPT_FLAG_RSAKEY_ENCRYPT + +// Described in more detail in the "Flags for asymmetric key generation and import" section above +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsakeySetValue( + _In_reads_bytes_( cbModulus ) PCBYTE pbModulus, + SIZE_T cbModulus, + _In_reads_( nPubExp ) PCUINT64 pu64PubExp, + UINT32 nPubExp, + _In_reads_opt_( nPrimes ) PCBYTE * ppPrimes, + _In_reads_opt_( nPrimes ) SIZE_T * pcbPrimes, + UINT32 nPrimes, + SYMCRYPT_NUMBER_FORMAT numFormat, + UINT32 flags, + _Inout_ PSYMCRYPT_RSAKEY pkRsakey ); +// +// Import key material to an RSAKEY object. The arguments are the following: +// - pbModulus is a pointer to a byte buffer of cbModulus bytes. It cannot be NULL. +// - pu64PubExp is a pointer to an array of nPubExp UINT64 exponent values. +// nPubExp must match the RSA parameters used to create the key object. +// - ppPrimes is an array of nPrimes pointers that point to byte buffers storing +// the primes. pcbPrimes is an array of nPrimes sizes such that +// the size of ppPrimes[i] is equal to pcbPrimes[i] for each i in [0, nPrimes-1]. +// - numFormat specifies the number format for all inputs +// +// Allowed flags: +// +// - SYMCRYPT_FLAG_KEY_NO_FIPS +// Opt-out of performing validation required for FIPS +// +// - SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION +// Opt-out of performing almost all validation - must be specified with SYMCRYPT_FLAG_KEY_NO_FIPS +// +// - At least one of the flags indicating what the Rsakey is to be used for must be specified: +// SYMCRYPT_FLAG_RSAKEY_SIGN +// SYMCRYPT_FLAG_RSAKEY_ENCRYPT +// +// Described in more detail in the "Flags for asymmetric key generation and import" section above +// +// Remarks: +// - Modulus and all primes are stored in the same format specified by numFormat. +// - ppPrimes, pcbPrimes, and nPrimes can be NULL, NULL, and 0 respectively, when +// importing a public key. +// - Currently, the only acceptable value of nPubExps is 1. +// - Currently, the only acceptable value of nPrimes is 2 or 0. +// - Elements of ppPrimes must represent prime numbers. +// We allow separate sizes for each prime. This seems redundant because all primes +// are approximately the same size. However, some storage/encoding formats, such as ASN.1, +// strip leading zeroes, or add an additional leading zero depending on the situation. +// Allowing separate sizes avoids the need for the caller to make a copy of the data +// into a possibly slightly larger buffer. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsakeySetValueFromPrivateExponent( + _In_reads_bytes_( cbModulus ) PCBYTE pbModulus, + SIZE_T cbModulus, + UINT64 u64PubExp, + _In_reads_bytes_( cbPrivateExponent ) PCBYTE pbPrivateExponent, + SIZE_T cbPrivateExponent, + SYMCRYPT_NUMBER_FORMAT numFormat, + UINT32 flags, + _Inout_ PSYMCRYPT_RSAKEY pkRsakey ); +// +// Import private key to an RSAKEY object using a private exponent. This is not generally +// recommended - where possible it is more efficient to import a private key using primes +// with SymCryptRsakeySetValue. +// +// The arguments are the following: +// - pbModulus is a pointer to a byte buffer of cbModulus bytes. It cannot be NULL. +// - u64PubExp is a UINT64 public exponent value. +// - pbPrivateExponent is a pointer to a byte buffer of cbPrivateExponent bytes. It +// cannot be NULL. +// - numFormat specifies the number format for all inputs +// +// Allowed flags: +// +// - SYMCRYPT_FLAG_KEY_NO_FIPS +// Opt-out of performing validation required for FIPS +// +// - SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION +// Opt-out of performing almost all validation - must be specified with SYMCRYPT_FLAG_KEY_NO_FIPS +// +// - At least one of the flags indicating what the Rsakey is to be used for must be specified: +// SYMCRYPT_FLAG_RSAKEY_SIGN +// SYMCRYPT_FLAG_RSAKEY_ENCRYPT +// +// Described in more detail in the "Flags for asymmetric key generation and import" section above +// +// Remarks: +// +// Modulus and Private exponent are stored in the same format specified by numFormat. +// +// Internally this attempts to recover a pair of primes (p1, p2) that factorize Modulus. +// This procedure has following assumptions: +// Modulus (n) is the product of two prime factors, p1 and p2 +// e*d == 1 modulo LCM(p1-1, p2-1) +// e*d != 1 modulo 2^64 +// If any of these assumptions are not met, then the method may fail. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsakeyGetValue( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _Out_writes_bytes_( cbModulus ) PBYTE pbModulus, + SIZE_T cbModulus, + _Out_writes_opt_( nPubExp ) PUINT64 pu64PubExp, + UINT32 nPubExp, + _Out_writes_opt_( nPrimes ) PBYTE * ppPrimes, + _In_reads_opt_( nPrimes ) SIZE_T * pcbPrimes, + UINT32 nPrimes, + SYMCRYPT_NUMBER_FORMAT numFormat, + UINT32 flags ); +// +// Export key material from an RSAKEY object. The arguments are the following: +// - pbModulus is a pointer to a byte buffer of cbModulus bytes. +// - pu64PubExp is an pointer to an array of nPubExp elements that receives the public exponent values. +// nPubExp must match the # public exponents in pkRsaKey. +// - ppPrimes is an array of nPrimes pointers that point to byte buffers storing +// the primes. pcbPrimes is an array of nPrimes sizes such that +// the size of ppPrimes[i] is equal to pcbPrimes[i] for each i in [0, nPrimes-1]. +// Remarks: +// - All parameters are stored in the same format specified by numFormat. +// - ppPrimes, pcbPrimes, and nPrimes can be NULL, NULL, and 0 respectively, when +// exporting a public key. +// - Currently, the only acceptable value of nPubExp is 1 or 0. +// - Currently, the only acceptable value of nPrimes is 2 or 0. +// We use separate sizes for each prime. This supports the tight encoding +// used by CNG export blobs, and uses the same format as RsakeySetValue +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsakeyGetCrtValue( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _Out_writes_opt_(nCrtExponents) PBYTE * ppCrtExponents, + _In_reads_(nCrtExponents) SIZE_T * pcbCrtExponents, + UINT32 nCrtExponents, + _Out_writes_bytes_opt_(cbCrtCoefficient) PBYTE pbCrtCoefficient, + SIZE_T cbCrtCoefficient, + _Out_writes_bytes_opt_(cbPrivateExponent) PBYTE pbPrivateExponent, + SIZE_T cbPrivateExponent, + SYMCRYPT_NUMBER_FORMAT numFormat, + UINT32 flags); +// +// Export Crt key material from an RSAKEY object. The arguments are the following: +// ppCrtExponents is an array of nCrtExponent pointers that point to byte buffers +// storing the Crt exponents. That is, d mod p-1, d mod q-1. +// pcbCrtExponents is an array of nCrtExponent sizes such that +// the size of ppCrtExponents[i] is equal to pcbCrtExponents[i] for each i in [0, nCrtExponent-1] +// pbCrtCoefficient is a pointer to a byte buffer of cbCrtCoefficient bytes, that is q^{-1} mod p +// pbPrivateExponent is a pointer to a byte buffer of cbPrivateExponent bytes, that is, d. + +// Remarks: +// - All parameters are stored in the same format specified by numFormat. +// - ppCrtExponents, pcbCrtExponents, and nCrtExponent can be NULL, NULL, and 0 respectively +// - Currently, the only acceptable value of nCrtExponent is 2 or 0. +// pbCrtCoefficient, pbPrivateExponent can be NULL; + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsakeyExtendKeyUsage( + _Inout_ PSYMCRYPT_RSAKEY pkRsakey, + UINT32 flags ); +// +// Enable an existing key which has been generated or imported to be used in specified algorithms. +// Some callers may not know at key generation or import time what algorithms a key will be used for +// and this API allows the key to be extended for use in additional algorithms. Use of this API may +// not be compliant with FIPS 140-3 +// +// - flags must be some bitwise OR of the following flags: +// SYMCRYPT_FLAG_RSAKEY_SIGN +// SYMCRYPT_FLAG_RSAKEY_ENCRYPT + +#define SYMCRYPT_DLGROUP_FIPS_LATEST (SYMCRYPT_DLGROUP_FIPS_186_3) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDlgroupGenerate( + _In_ PCSYMCRYPT_HASH hashAlgorithm, + _In_ SYMCRYPT_DLGROUP_FIPS fipsStandard, + _Inout_ PSYMCRYPT_DLGROUP pDlgroup ); +// +// Generate a Discrete Logarithm Group for use in Diffie-Hellman and DSA. +// +// - hashAlgorithm: Hash algorithm to be used for generating the group (if required by the algorithm) +// - fipsStandard: Which FIPS standard algorithm to use for generating the group. +// - pDlgroup: group object that will be initialized with a newly generated group. +// +// pDlGroup must have been created with SymCryptDlgroupAllocate() or SymCryptDlgroupCreate(). +// +// If nBitsOfQ was equal to 0 when the DLGROUP was Allocate-d/Create-d +// (and only in this case), then this function picks a default size +// for the prime Q according to the following table: +// - If nBitsOfP <= 160 then the function fails with SYMCRYPT_FIPS_FAILURE +// - If 160 < nBitsOfP <= 1024 then nBitsOfQ = 160 +// - If 1024 < nBitsOfP <= 2048 then nBitsOfQ = 256 +// - If 2048 < nBitsOfP then nBitsOfQ = 256 +// +// If fipsStandard == SYMCRYPT_DLGROUP_FIPS_NONE then no FIPS compliance is requested. +// The code defaults to SYMCRYPT_DLGROUP_FIPS_LATEST. +// +// The requirements below address the parameter values after the defaults have been substituted +// for nBitsOfQ and fipsStandard. +// +// Requirements: +// - pDlgroup!=NULL. Otherwise it returns SYMCRYPT_INVALID_ARGUMENT. +// +// - If fipsStandard == SYMCRYPT_DLGROUP_FIPS_186_2, hashAlgorithm MUST be equal to +// NULL, and nBitsOfQ <= 160 or nBitsOfQ = 0 && nBitsOfP <= 1024. +// +// - If fipsStandard == SYMCRYPT_DLGROUP_FIPS_186_3, then hashAlgorithm MUST NOT be equal +// to NULL. +// +// - If nBitsOfHash is the number of bits of the output block of hashAlgorithm, +// it is required that: +// nBitsOfQ <= nBitsOfHash <= nBitsOfP +// (where nBitsOfQ>0 was either provided by the caller of Allocate/Create +// or it was picked from the above table). +// +// - For FIPS 186-2, we have that nBitsOfHash == 160 (SHA1 output size). Therefore +// this flag can only work with nBitsOfQ up to 160 bits. Anything else will +// return SYMCRYPT_INVALID_ARGUMENT. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDlgroupSetValueSafePrime( + SYMCRYPT_DLGROUP_DH_SAFEPRIMETYPE dhSafePrimeType, + _Inout_ PSYMCRYPT_DLGROUP pDlgroup ); +// +// Sets a Discrete Logarithm Group for use in Diffie-Hellman using a named safe-prime group. +// +// - dhSafePrimeType: The type of named safe-prime group to use +// +// pDlGroup must have been created with SymCryptDlgroupAllocate() or SymCryptDlgroupCreate(). +// +// Selects the largest named safe-prime group that will fit in the allocated Dlgroup (based on the +// values of nBitsOfP and nBitsOfQ used in allocation). It is recommended that callers set nBitsOfQ +// to 0 in allocation (equivalent to nBitsOfQ = (nBitsOfP-1)) when creating a safe-prime group. +// +// Requirements: +// - pDlgroup was allocated with sufficient bits for the selected P (and Q) to fit. If there is no +// named safe-prime group with bit size <= the allocated size, it returns SYMCRYPT_INVALID_ARGUMENT. +// The minimum currently supported bitsize of named safe-prime groups is nBitsOfP = 2048. +// +// - dhSafePrimeType!=SYMCRYPT_DLGROUP_DH_SAFEPRIMETYPE_NONE. Otherwise it returns SYMCRYPT_INVALID_ARGUMENT. +// + +BOOLEAN +SYMCRYPT_CALL +SymCryptDlgroupIsSame( + _In_ PCSYMCRYPT_DLGROUP pDlgroup1, + _In_ PCSYMCRYPT_DLGROUP pDlgroup2 ); +// +// Returns true if pDlgroup1 and pDlgroup2 have same set of P and G, false otherwise. +// + +VOID +SYMCRYPT_CALL +SymCryptDlgroupGetSizes( + _In_ PCSYMCRYPT_DLGROUP pDlgroup, + _Out_ SIZE_T* pcbPrimeP, + _Out_ SIZE_T* pcbPrimeQ, + _Out_ SIZE_T* pcbGenG, + _Out_ SIZE_T* pcbSeed ); +// +// It returns the tight byte-sizes of each parameter of the group: prime P, +// prime Q, generator G, and the FIPS domain_parameter_seed. +// +// If one of the pointers is NULL then the corresponding size is ignored. +// +// Remarks: +// - If the group has no prime Q, then the returned sizes in *pcbPrimeQ and +// *pcbSeed will be 0. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDlgroupSetValue( + _In_reads_bytes_( cbPrimeP ) PCBYTE pbPrimeP, + SIZE_T cbPrimeP, + _In_reads_bytes_( cbPrimeQ ) PCBYTE pbPrimeQ, + SIZE_T cbPrimeQ, + _In_reads_bytes_( cbGenG ) PCBYTE pbGenG, + SIZE_T cbGenG, + SYMCRYPT_NUMBER_FORMAT numFormat, + _In_opt_ PCSYMCRYPT_HASH pHashAlgorithm, + _In_reads_bytes_( cbSeed ) PCBYTE pbSeed, + SIZE_T cbSeed, + UINT32 genCounter, + SYMCRYPT_DLGROUP_FIPS fipsStandard, + _Inout_ PSYMCRYPT_DLGROUP pDlgroup ); +// +// Import key material to a DLGROUP object. +// - Prime P is NOT optional and should always be imported. +// - Prime Q is an optional parameter that may or may not be imported. If not +// the group will not have a prime Q. +// - Generator G is an optional parameter. However, if not present, the +// algorithm will generate a random G of order Q. If both Q and G are missing +// the calls fails with SYMCRYPT_INVALID_ARGUMENT. +// - The parameters pHashAlgorithm, pbSeed, cbSeed and genCounter are the generation +// parameters of the FIPS standards. If fipsStandard is not equal to +// SYMCRYPT_DLGROUP_FIPS_NONE, the algorithm verifies that the input P,Q,G parameters are properly +// generated by the corresponding standard. +// If there is any discrepancy the function returns SYMCRYPT_AUTHENTICATION_FAILURE. +// Notice that these parameters are imported even if they aren't verified. +// +// Requirements: +// - The number stored in pbPrimeP and pbGenG must have at most nBitsOfP significant bits. +// Otherwise the function returns SYMCRYPT_INVALID_ARGUMENT. +// - The number stored in pbPrimeQ must have at most nBitsOfQ where nBitsOfQ is either +// the **non-zero** value input in the call of Allocate/Create or equal to nBitsOfP if +// 0 was input. +// Otherwise the function returns SYMCRYPT_INVALID_ARGUMENT. +// - The size of the seed cbSeed must be **exactly** equal to the byte-size of the imported +// modulus Q. Otherwise the function returns SYMCRYPT_INVALID_ARGUMENT. +// +// Remarks: +// - The buffers pbPrimeP, pbPrimeQ, pbGenG must all have the same number +// format defined by numFormat. +// - Primes P and (when provided) Q must represent prime numbers. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDlgroupGetValue( + _In_ PCSYMCRYPT_DLGROUP pDlgroup, + _Out_writes_bytes_( cbPrimeP ) PBYTE pbPrimeP, + SIZE_T cbPrimeP, + _Out_writes_bytes_( cbPrimeQ ) PBYTE pbPrimeQ, + SIZE_T cbPrimeQ, + _Out_writes_bytes_( cbGenG ) PBYTE pbGenG, + SIZE_T cbGenG, + SYMCRYPT_NUMBER_FORMAT numFormat, + _Out_ PCSYMCRYPT_HASH * ppHashAlgorithm, + _Out_writes_bytes_( cbSeed ) PBYTE pbSeed, + SIZE_T cbSeed, + _Out_ PUINT32 pGenCounter ); + +// +// Retrieve the group parameters from a DLGROUP. The buffers should be +// allocated by the caller. If a pbXXX parameter is NULL (and the cbXXX==0) +// then this parameter is not returned. +// +// Requirements: +// - All the buffers must have size at least equal to the corresponding +// size returned by SymCryptDlgroupGetSizes. For the pbSeed buffer the +// size must be **exactly** equal to the size returned from SymCryptDlgroupGetSizes. +// +// Remarks: +// - If the caller requests a Q but the group does not have one, this function +// will fail with SYMCRYPT_INVALID_BLOB. +// - The return value of *ppHashAlgorithm can be NULL if the group was generated +// by FIPS 186-2. +// + +//===================================================== +// DL flags +// +// Also see Generic key validation flags above + +// SYMCRYPT_FLAG_DLKEY_GEN_MODP: +// When set on SymCryptDlkeyGenerate call, generate a private key between 1 and P-2. +// When Q is known, this overrides the default behavior of generating a private key between 1 and Q-1, +// or 1 and min(2^nBitsPriv-1, Q-1) for named safe-prime groups +// When Q is not known, this does not affect the behavior +#define SYMCRYPT_FLAG_DLKEY_GEN_MODP (0x01) + +//===================================================== +// DL key operations + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDlkeySetPrivateKeyLength( _Inout_ PSYMCRYPT_DLKEY pkDlkey, UINT32 nBitsPriv, UINT32 flags ); +// +// Sets the number of bits that this dlkey can have in its private key +// The set value is only used for when the dlkey is a named safe-prime dlgroup, otherwise the value +// is ignored. +// +// Requirements: +// - pkDlkey->pDlgroup->nBitsOfQ >= nBitsPriv >= pkDlkey->pDlgroup->nMinBitsPriv +// Otherwise SYMCRYPT_INVALID_ARGUMENT is returned +// +// Allowed flags: +// - None. + +PCSYMCRYPT_DLGROUP +SYMCRYPT_CALL +SymCryptDlkeyGetGroup( _In_ PCSYMCRYPT_DLKEY pkDlkey ); +// +// Returns a pointer to the dlgroup object associated with the key. +// + +UINT32 +SYMCRYPT_CALL +SymCryptDlkeySizeofPublicKey( _In_ PCSYMCRYPT_DLKEY pkDlkey ); +// +// Returns the size in bytes of a blob big enough to retrieve the public key. +// + +UINT32 +SYMCRYPT_CALL +SymCryptDlkeySizeofPrivateKey( _In_ PCSYMCRYPT_DLKEY pkDlkey ); +// +// Returns the size in bytes of a blob big enough to retrieve the private key. +// + +BOOLEAN +SYMCRYPT_CALL +SymCryptDlkeyHasPrivateKey( _In_ PCSYMCRYPT_DLKEY pkDlkey ); +// +// Returns TRUE if the pkDlkey object has a private key set. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDlkeyGenerate( + _In_ UINT32 flags, + _Inout_ PSYMCRYPT_DLKEY pkDlkey ); +// +// Allowed flags: +// - SYMCRYPT_FLAG_DLKEY_GEN_MODP +// When set, generate a private key between 1 and P-2. +// When Q is known, this overrides the default behavior of generating a private key between 1 and Q-1, +// or 1 and min(2^nBitsPriv-1, Q-1) for named safe-prime groups +// When Q is not known, this does not affect the behavior +// +// - SYMCRYPT_FLAG_KEY_NO_FIPS +// Opt-out of performing validation required for FIPS +// +// - At least one of the flags indicating what the Dlkey is to be used for must be specified: +// SYMCRYPT_FLAG_DLKEY_DSA +// SYMCRYPT_FLAG_DLKEY_DH +// +// Note: +// If SYMCRYPT_FLAG_DLKEY_GEN_MODP is specified then SYMCRYPT_FLAG_KEY_NO_FIPS must also be +// specified to avoid SYMCRYPT_INVALID_ARGUMENT, as FIPS requires the default generation behavior +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDlkeySetValue( + _In_reads_bytes_( cbPrivateKey ) PCBYTE pbPrivateKey, + SIZE_T cbPrivateKey, + _In_reads_bytes_( cbPublicKey ) PCBYTE pbPublicKey, + SIZE_T cbPublicKey, + SYMCRYPT_NUMBER_FORMAT numFormat, + UINT32 flags, + _Inout_ PSYMCRYPT_DLKEY pkDlkey ); +// +// Import key material to a DLKEY object. +// +// Allowed flags: +// +// - SYMCRYPT_FLAG_KEY_NO_FIPS +// Opt-out of performing validation required for FIPS +// +// - SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION +// Opt-out of performing almost all validation - must be specified with SYMCRYPT_FLAG_KEY_NO_FIPS +// +// - At least one of the flags indicating what the Dlkey is to be used for must be specified: +// SYMCRYPT_FLAG_DLKEY_DSA +// SYMCRYPT_FLAG_DLKEY_DH +// +// Described in more detail in the "Flags for asymmetric key generation and import" section above +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDlkeyGetValue( + _In_ PCSYMCRYPT_DLKEY pkDlkey, + _Out_writes_bytes_( cbPrivateKey ) + PBYTE pbPrivateKey, + SIZE_T cbPrivateKey, + _Out_writes_bytes_( cbPublicKey ) + PBYTE pbPublicKey, + SIZE_T cbPublicKey, + SYMCRYPT_NUMBER_FORMAT numFormat, + UINT32 flags ); +// +// Retrieve the public or the private key (or both) from a DLKEY. The buffers should be +// allocated by the caller. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDlkeyExtendKeyUsage( + _Inout_ PSYMCRYPT_DLKEY pkDlkey, + UINT32 flags ); +// +// Enable an existing key which has been generated or imported to be used in specified algorithms. +// Some callers may not know at key generation or import time what algorithms a key will be used for +// and this API allows the key to be extended for use in additional algorithms. Use of this API may +// not be compliant with FIPS 140-3. +// +// - flags must be some bitwise OR of the following flags: +// SYMCRYPT_FLAG_DLKEY_DSA +// SYMCRYPT_FLAG_DLKEY_DH + +//===================================================== +// Elliptic curve operations and supported curves +// + +UINT32 +SYMCRYPT_CALL +SymCryptEcurvePrivateKeyDefaultFormat( _In_ PCSYMCRYPT_ECURVE pCurve ); +// +// This function returns the private key default format of the input curve. +// + +UINT32 +SYMCRYPT_CALL +SymCryptEcurveHighBitRestrictionNumOfBits( _In_ PCSYMCRYPT_ECURVE pCurve ); +// +// This function returns the number of bits specified by the high bit restriction +// value of the input curve. +// + +UINT32 +SYMCRYPT_CALL +SymCryptEcurveHighBitRestrictionPosition( _In_ PCSYMCRYPT_ECURVE pCurve ); +// +// This function returns the position of the high bit restriction +// value of the input curve. +// + +UINT32 +SYMCRYPT_CALL +SymCryptEcurveHighBitRestrictionValue( _In_ PCSYMCRYPT_ECURVE pCurve ); +// +// This function returns the high bit restriction value of the input curve. +// + +UINT32 +SYMCRYPT_CALL +SymCryptEcurveBitsizeofFieldModulus( _In_ PCSYMCRYPT_ECURVE pCurve ); +// +// This function returns the number of bits of a field element on which +// the curve is defined. +// + +UINT32 +SYMCRYPT_CALL +SymCryptEcurveBitsizeofGroupOrder( _In_ PCSYMCRYPT_ECURVE pCurve ); +// +// This function returns the number of bits of the order of the subgroup generated by +// the distinguished point of the curve. +// + +UINT32 +SYMCRYPT_CALL +SymCryptEcurveSizeofFieldElement( _In_ PCSYMCRYPT_ECURVE pCurve ); +// +// This function returns the number of bytes of a field element. It is used to +// construct buffers for setting and getting the value of elliptic curve points (most +// notably the public key of an ECKEY object). +// +// The result is equal to the cbFieldLength field of the parameters that created the curve. +// + +UINT32 +SYMCRYPT_CALL +SymCryptEcurveSizeofScalarMultiplier( _In_ PCSYMCRYPT_ECURVE pCurve ); +// +// This function returns the number of bytes of a scalar integer that is big enough to +// store a private key (or a multiplier of an elliptic curve point). It is used to +// construct buffers for setting and getting the value of a scalar multiplier (most +// notably the private key of an ECKEY object - see SymCryptEckeySetValue and +// SymCryptEckeyGetValue). +// +// The result is equal to sizeof( subgroupOrder * co-factor ). +// + +BOOLEAN +SYMCRYPT_CALL +SymCryptEcurveIsSame( + _In_ PCSYMCRYPT_ECURVE pCurve1, + _In_ PCSYMCRYPT_ECURVE pCurve2); +// +// Returns true if pCurve1 and pCurve2 have same type, P, A, and B - false otherwise. +// +// Note: This does not check that the curves have the same G set, callers may additionally +// consider calling SymCryptEcpointIsEqual to compare the curves' distinguished points. +// + +// Internally supported curves +extern const PCSYMCRYPT_ECURVE_PARAMS SymCryptEcurveParamsNistP192; +extern const PCSYMCRYPT_ECURVE_PARAMS SymCryptEcurveParamsNistP224; +extern const PCSYMCRYPT_ECURVE_PARAMS SymCryptEcurveParamsNistP256; +extern const PCSYMCRYPT_ECURVE_PARAMS SymCryptEcurveParamsNistP384; +extern const PCSYMCRYPT_ECURVE_PARAMS SymCryptEcurveParamsNistP521; + +extern const PCSYMCRYPT_ECURVE_PARAMS SymCryptEcurveParamsNumsP256t1; +extern const PCSYMCRYPT_ECURVE_PARAMS SymCryptEcurveParamsNumsP384t1; +extern const PCSYMCRYPT_ECURVE_PARAMS SymCryptEcurveParamsNumsP512t1; + +extern const PCSYMCRYPT_ECURVE_PARAMS SymCryptEcurveParamsCurve25519; + +typedef enum _SYMCRYPT_ECURVE_ID +{ + SYMCRYPT_ECURVE_ID_NULL = 0, + SYMCRYPT_ECURVE_ID_NIST_P192 = 1, + SYMCRYPT_ECURVE_ID_NIST_P224 = 2, + SYMCRYPT_ECURVE_ID_NIST_P256 = 3, + SYMCRYPT_ECURVE_ID_NIST_P384 = 4, + SYMCRYPT_ECURVE_ID_NIST_P521 = 5, + SYMCRYPT_ECURVE_ID_NUMS_P256T1 = 6, + SYMCRYPT_ECURVE_ID_NUMS_P384T1 = 7, + SYMCRYPT_ECURVE_ID_NUMS_P512T1 = 8, + SYMCRYPT_ECURVE_ID_CURVE25519 = 9 +} SYMCRYPT_ECURVE_ID; + +PCSYMCRYPT_ECURVE_PARAMS +SYMCRYPT_CALL +SymCryptGetEcurveParams( SYMCRYPT_ECURVE_ID ecurveId ); +// +// Returns a pointer to the elliptic curve parameters structure for the specified curve ID. +// Returns NULL if the curve ID is invalid. +// + +//===================================================== +// ECC flags +// +// Also see Generic key validation flags above + +// SYMCRYPT_FLAG_ECDSA_NO_TRUNCATION: This flag applies only to the ECDSA algorithm. When set, the sign +// and verify algorithms will not do hash truncation. The caller can use their own truncation method in such case. +// (default: according to the ECDSA standard) +#define SYMCRYPT_FLAG_ECDSA_NO_TRUNCATION (0x08) + +//===================================================== +// EC key operations + +UINT32 +SYMCRYPT_CALL +SymCryptEckeySizeofPublicKey( + _In_ PCSYMCRYPT_ECKEY pkEckey, + _In_ SYMCRYPT_ECPOINT_FORMAT ecPointFormat ); +// +// Returns the size in bytes of a blob big enough to retrieve the public key in +// the specified ECPOINT format. +// + +UINT32 +SYMCRYPT_CALL +SymCryptEckeySizeofPrivateKey( _In_ PCSYMCRYPT_ECKEY pkEckey ); +// +// Returns the size in bytes of a blob big enough to retrieve the private key. +// It is equal to SymCryptEcurveSizeofScalarMultiplier( pCurve ) where pCurve is the +// curve that created the key. +// + +BOOLEAN +SYMCRYPT_CALL +SymCryptEckeyHasPrivateKey( _In_ PCSYMCRYPT_ECKEY pkEckey ); +// +// Returns TRUE if the pkEckey object has a private key set. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEckeySetValue( + _In_reads_bytes_( cbPrivateKey ) PCBYTE pbPrivateKey, + SIZE_T cbPrivateKey, + _In_reads_bytes_( cbPublicKey ) PCBYTE pbPublicKey, + SIZE_T cbPublicKey, + SYMCRYPT_NUMBER_FORMAT numFormat, + SYMCRYPT_ECPOINT_FORMAT ecPointFormat, + UINT32 flags, + _Inout_ PSYMCRYPT_ECKEY pEckey ); +// +// Import key material to an ECKEY object. +// +// Requirements: +// (pbPrivateKey, cbPrivateKey): a buffer that contains the private key, encoded +// in the format specified by the numFormat parameter. +// Note that the integer encoded in (pbPrivateKey, cbPrivateKey) is taken modulo the order of the +// subgroup generated by the curve generator. Callers that want a uniform private key value +// should ensure that the input is uniform in the range [1..GOrd-1]. +// +// Requirements: cbPrivateKey == SymCryptEckeySizeofPrivateKey( pEckey ) +// +// If pbPrivateKey == NULL && cbPrivateKey == 0, then no private key is imported, and the +// resulting ECKEY object will not have a private key. +// +// (pbPublicKey, cbPublicKey): buffer that contains the public key, encoded in the format +// specified by the format parameter, the buffer length, and the curve properties. +// +// Requirements: cbPublicKey == SymCryptEckeySizeofPublicKey( pEckey, ecPointFormat ) +// +// If no public key is presented (pbPublicKey == NULL && cbPublicKey == 0) then the public +// key is computed from the provided private key. +// +// At least one of the public and private keys must be provided. +// +// If both are provided, then they must match. +// +// The algorithm always sets the corresponding public key +// +// Allowed flags: +// +// - SYMCRYPT_FLAG_KEY_NO_FIPS +// Opt-out of performing validation required for FIPS +// +// - SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION +// Opt-out of performing almost all validation - must be specified with SYMCRYPT_FLAG_KEY_NO_FIPS +// +// - At least one of the flags indicating what the Eckey is to be used for must be specified: +// SYMCRYPT_FLAG_ECKEY_ECDSA +// SYMCRYPT_FLAG_ECKEY_ECDH +// +// Described in more detail in the "Flags for asymmetric key generation and import" section above +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEckeySetRandom( + _In_ UINT32 flags, + _Inout_ PSYMCRYPT_ECKEY pEckey ); +// +// Generates a new Eckey public/private key pair using the specified curve. The public key +// is a uniformly random non-zero point of the subgroup generated by the distinguished point +// of the curve. This complies with the FIPS 186-4 standard. +// +// Remarks: +// - In the case that the highbit restrictions on the curve are unsatisfiable, i.e. +// there is no private key smaller than the group order it returns +// SYMCRYPT_INVALID_ARGUMENT. +// +// Allowed flags: +// +// - SYMCRYPT_FLAG_KEY_NO_FIPS +// Opt-out of performing validation required for FIPS +// +// - At least one of the flags indicating what the Eckey is to be used for must be specified: +// SYMCRYPT_FLAG_ECKEY_ECDSA +// SYMCRYPT_FLAG_ECKEY_ECDH + +// Described in more detail in the "Flags for asymmetric key generation and import" section above +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEckeyGetValue( + _In_ PCSYMCRYPT_ECKEY pEckey, + _Out_writes_bytes_( cbPrivateKey ) + PBYTE pbPrivateKey, + SIZE_T cbPrivateKey, + _Out_writes_bytes_( cbPublicKey ) + PBYTE pbPublicKey, + SIZE_T cbPublicKey, + SYMCRYPT_NUMBER_FORMAT numFormat, + SYMCRYPT_ECPOINT_FORMAT ecPointFormat, + UINT32 flags ); +// +// Retrieve the public or the private key (or both) from an ECKEY. The buffers should be +// allocated by the caller. +// +// If (pbPrivateKey != NULL), then the function will return the private key in pbPrivateKey +// in the format specified by the numFormat parameter **as long as** the following three +// requirements are satisfied: +// 1. cbPrivateKey >= SymCryptEckeySizeofPrivateKey( pEckey ) +// 2. pEckey contains a private key part (If this fails the function returns SYMCRYPT_INVALID_BLOB) +// If (pbPrivateKey == NULL) and (cbPrivateKey == 0), then these parameters are ignored +// and no private key is returned. +// +// If (pbPublicKey != NULL), then the function will return the public key in pbPublicKey +// in the format specified by the numFormat and the ecPointFormat parameters +// **as long as** the following requirement is satisfied: +// 1. cbPublicKey >= SymCryptEckeySizeofPublicKey( pEckey, ecPointFormat ) +// If (pbPublicKey == NULL) and (cbPublicKey == 0), then these parameters are ignored +// and no public key is returned. +// +// Allowed flags: +// - None. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEckeyExtendKeyUsage( + _Inout_ PSYMCRYPT_ECKEY pEckey, + UINT32 flags ); +// +// Enable an existing key which has been generated or imported to be used in specified algorithms. +// Some callers may not know at key generation or import time what algorithms a key will be used for +// and this API allows the key to be extended for use in additional algorithms. Use of this API may +// not be compliant with FIPS 140-3 +// +// - flags must be some bitwise OR of the following flags: +// SYMCRYPT_FLAG_ECKEY_ECDSA +// SYMCRYPT_FLAG_ECKEY_ECDH + +/************************ + * Crypto algorithm API * + ************************/ + +// +// The Crypto algorithm API implements various cryptographic algorithms that use large-integer arithmetic. +// + +// +// RSA Encryption Algorithms +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaRawEncrypt( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_NUMBER_FORMAT numFormat, + UINT32 flags, + _Out_writes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst ); +// +// This function encrypts the buffer pbSrc (of size cbSrc bytes) under the pkRsakey key using textbook RSA. +// The output is stored in the pbDst buffer (of size cbDst bytes). +// For in place encryption pbSrc = pbDst. +// +// Both input and output buffers store a number in the number format numFormat. +// +// Requirements: +// - If cbDst is too small for the result then SYMCRYPT_BUFFER_TOO_SMALL is returned. +// Safe size is cbDst = SymCryptRsakeySizeofModulus(pkRsakey). +// - The number stored in the pbSrc buffer must be strictly smaller than the value +// of the public modulus in pkRsakey. +// +// Allowed flags: +// None +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaRawDecrypt( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_NUMBER_FORMAT numFormat, + UINT32 flags, + _Out_writes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst ); +// +// This function decrypts the buffer pbSrc (of size cbSrc bytes) with the pkRsakey key using textbook RSA. +// The output is stored in the pbDst buffer (of size cbDst bytes). +// For in place decryption pbSrc = pbDst. +// +// Both input and output buffers store a number in the number format numFormat. +// +// Requirements: +// - If cbDst is too small for the result then SYMCRYPT_BUFFER_TOO_SMALL is returned. +// Safe size is cbDst = SymCryptRsakeySizeofModulus(pkRsakey). +// - The number stored in the pbSrc buffer must be strictly smaller than the value +// of the public modulus in pkRsakey. +// - The RSAKEY pkRsakey must have a private key part. Otherwise SYMCRYPT_INVALID_ARGUMENT is returned. +// +// Allowed flags: +// None +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPkcs1Encrypt( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + UINT32 flags, + SYMCRYPT_NUMBER_FORMAT nfDst, + _Out_writes_opt_( cbDst ) PBYTE pbDst, + SIZE_T cbDst, + _Out_ SIZE_T *pcbDst ); +// +// This function encrypts the buffer pbSrc under the pkRsakey key using RSA PKCS1 v1.5. +// The output is stored in the pbDst buffer and the number of bytes written in *pcbDst. +// +// If pbDst == NULL then only the *pcbDst is output. +// +// nfDst is the number format of the ciphertext (i.e. the pbDst buffer). +// +// Allowed flags: +// None +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPkcs1Decrypt( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_NUMBER_FORMAT nfSrc, + UINT32 flags, + _Out_writes_opt_( cbDst ) PBYTE pbDst, + SIZE_T cbDst, + _Out_ SIZE_T *pcbDst ); +// +// Perform an RSA-PKCS1 decryption. +// - pbSrc/cbSrc: source buffer +// - nfSrc: format of source buffer +// - flags: must be 0 +// - pbDst/cbDst: destination buffer +// - pcbDst: receives the size of the decrypted data. +// +// If the data in improperly formatted, an error is returned. +// If pbDst == NULL, then *pcbDst is set to the decrypted data length, and the functions succeeds. +// This is not recommended as retrieving the actual data requires a second RSA decryption, +// which is expensive. We recommend that callers provide a large enough buffer the first time. +// If pbDst != NULL and cbDst is too small, then *pcbDst is set to the required size of pbDst +// and SYMCRYPT_BUFFER_TOO_SMALL is returned. +// +// Allowed flags: +// None +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaOaepEncrypt( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + _In_ PCSYMCRYPT_HASH hashAlgorithm, + _In_reads_bytes_( cbLabel ) PCBYTE pbLabel, + SIZE_T cbLabel, + UINT32 flags, + SYMCRYPT_NUMBER_FORMAT nfDst, + _Out_writes_opt_( cbDst ) PBYTE pbDst, + SIZE_T cbDst, + _Out_ SIZE_T *pcbDst ); +// +// This function encrypts the buffer pbSrc under the pkRsakey key using RSA OAEP. +// The output is stored in the pbDst buffer and the number of bytes written in *pcbDst. +// +// If pbDst == NULL then only the *pcbDst is output. +// +// nfDst is the number format of the ciphertext (i.e. the pbDst buffer). +// +// Allowed flags: +// None +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaOaepDecrypt( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_NUMBER_FORMAT nfSrc, + _In_ PCSYMCRYPT_HASH hashAlgorithm, + _In_reads_bytes_( cbLabel ) PCBYTE pbLabel, + SIZE_T cbLabel, + UINT32 flags, + _Out_writes_opt_( cbDst ) PBYTE pbDst, + SIZE_T cbDst, + _Out_ SIZE_T *pcbDst ); +// +// This function decrypts the buffer pbSrc with the pkRsakey key using RSA OAEP. +// The output is stored in the pbDst buffer and the number of bytes written in *pcbDst. +// +// If pbDst == NULL then only the *pcbDst is output. +// +// nfSrc is the number format of the ciphertext (i.e. the pbSrc buffer). +// +// Requirement: +// - cbSrc <= SymCryptRsakeySizeofModulus( pkRsakey ). Otherwise the function +// returns SYMCRYPT_INVALID_ARGUMENT. +// +// Allowed flags: +// None +// + +// +// RSA Signing Algorithms +// + +#define SYMCRYPT_FLAG_RSA_PKCS1_NO_ASN1 (0x01) +#define SYMCRYPT_FLAG_RSA_PKCS1_OPTIONAL_HASH_OID (0x02) + +#define SYMCRYPT_FLAG_RSA_PSS_VERIFY_WITH_MINIMUM_SALT (0x04) + +// +// SYMCRYPT_FLAG_RSA_PKCS1_NO_ASN1: For RSA PKCS1 to not use the OID on signing or verifying. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPkcs1Sign( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _In_reads_bytes_( cbHashValue ) PCBYTE pbHashValue, + SIZE_T cbHashValue, + _In_ PCSYMCRYPT_OID pHashOIDs, + _In_ SIZE_T nOIDCount, + UINT32 flags, + SYMCRYPT_NUMBER_FORMAT nfSignature, + _Out_writes_opt_( cbSignature ) PBYTE pbSignature, + SIZE_T cbSignature, + _Out_ SIZE_T *pcbSignature ); +// +// This function signs a message (its hash value is stored in pbHashValue) with +// the pkRsakey key using RSA PKCS1 v1.5. The signature is stored in the pbSignature +// buffer and the number of bytes written in *pcbSignature. +// +// pHashOIDs points to an array of SYMCRYPT_OID and the array size is nOIDCount +// +// If pbSignature == NULL then only the *pcbSignature is output. +// +// nfSignature is the number format of the signature (i.e. the pbSignature buffer). Currently +// only SYMCRYPT_NUMBER_FORMAT_MSB_FIRST is supported. +// +// Allowed flags: +// SYMCRYPT_FLAG_RSA_PKCS1_NO_ASN1 +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPkcs1Verify( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _In_reads_bytes_( cbHashValue ) PCBYTE pbHashValue, + SIZE_T cbHashValue, + _In_reads_bytes_( cbSignature ) PCBYTE pbSignature, + SIZE_T cbSignature, + SYMCRYPT_NUMBER_FORMAT nfSignature, + _In_reads_opt_( nOIDCount ) PCSYMCRYPT_OID pHashOID, + _In_ SIZE_T nOIDCount, + UINT32 flags ); +// +// This function verifies the signature of a message (its hash value is input in +// pbHashValue) with the pkRsakey key using RSA PKCS1 v1.5. The signature is input +// in the pbSignature buffer. +// +// pHashOIDs points to an array of SYMCRYPT_OID and the array size is nOIDCount +// +// It returns SYMCRYPT_NO_ERROR if the verification succeeded or SYMCRYPT_SIGNATURE_VERIFICATION_FAILURE +// if it failed. +// +// nfSignature is the number format of the signature (i.e. the pbSignature buffer). Currently +// only SYMCRYPT_NUMBER_FORMAT_MSB_FIRST is supported. +// +// Allowed flags: +// SYMCRYPT_FLAG_RSA_PKCS1_OPTIONAL_HASH_OID +// +// When the flag is set, this function will do signature verification by not using hash OID when needed +// + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPssSign( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _In_reads_bytes_( cbHashValue ) PCBYTE pbHashValue, + SIZE_T cbHashValue, + _In_ PCSYMCRYPT_HASH hashAlgorithm, + SIZE_T cbSalt, + UINT32 flags, + SYMCRYPT_NUMBER_FORMAT nfSignature, + _Out_writes_opt_( cbSignature ) PBYTE pbSignature, + SIZE_T cbSignature, + _Out_ SIZE_T *pcbSignature ); +// +// Sign a message using RSA-PSS +// - pkRsaKey: Key to sign with; must contain a private key +// - pbHashValue/cbHashValue: Value to sign +// - hashAlgorithm: Hash algorithm to use in the MGF of PSS +// - cbSalt: # bytes of salt to use (typically equal to size of hash value) +// - flags: must be 0 +// - nfSignature: Number format of signature. Typically SYMCRYPT_NUMBER_FORMAT_MSB_FIRST +// - pbSignature/cbSignature: buffer that receives the signature. +// If pbSignature == NULL< only *pcbSignature is returned. +// Note: pbSignature receives an integer, so if the buffer is larger than the modulus size +// it will be padded with zeroes. For MSB-first format the zeroes are at the start of the buffer. +// Typically this buffer is the same size as the RSA modulus. +// - pcbSignature: receives the size of the signature. +// +// Return value: +// If cbHashValue + cbSalt is too large (above modulus size minus 2 or 3 depending on details) then +// signature generation fails. +// +// Allowed flags: +// None +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPssVerify( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _In_reads_bytes_( cbHashValue ) PCBYTE pbHashValue, + SIZE_T cbHashValue, + _In_reads_bytes_( cbSignature ) PCBYTE pbSignature, + SIZE_T cbSignature, + SYMCRYPT_NUMBER_FORMAT nfSignature, + _In_ PCSYMCRYPT_HASH hashAlgorithm, + SIZE_T cbSalt, + UINT32 flags ); +// +// This function verifies the signature of a message (its hash value is input in +// pbHashValue) with the pkRsakey key using RSA PSS. The signature is input +// in the pbSignature buffer. +// +// It returns SYMCRYPT_NO_ERROR if the verification succeeded or SYMCRYPT_SIGNATURE_VERIFICATION_FAILURE +// if it failed. +// +// nfSignature is the number format of the signature (i.e. the pbSignature buffer). Currently +// only SYMCRYPT_NUMBER_FORMAT_MSB_FIRST is supported. +// +// Requirements: +// - cbHashValue <= SymCryptRsakeySizeofModulus( pkRsakey ) +// - cbSalt <= SymCryptRsakeySizeofModulus( pkRsakey ) +// - cbSignature <= SymCryptRsakeySizeofModulus( pkRsakey ) +// +// Allowed flags: +// SYMCRYPT_FLAG_RSA_PSS_VERIFY_WITH_MINIMUM_SALT +// +// When the flag is set, this function will do signature verification using the cbSalt parameter as +// a minimum value for the salt length, rather than using it as an exact value. Specifying this and +// setting cbSalt = 0 allows callers to verify a signature which has a valid encoding with any salt +// length using a single call. +// + +VOID +SYMCRYPT_CALL +SymCryptRsaSelftest(void); +// +// FIPS self-test for RSA sign/verify. This function uses a hardcoded key to perform the self-test +// without having to generate a key. If the self-test fails, SymCryptFatal will be called to +// fastfail. +// The self-test will automatically be performed before first operational use of RSA if using a key +// with FIPS validation, so most callers should never use this function. +// + + +// +// DSA +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDsaSign( + _In_ PCSYMCRYPT_DLKEY pKey, + _In_reads_bytes_( cbHashValue ) PCBYTE pbHashValue, + SIZE_T cbHashValue, + SYMCRYPT_NUMBER_FORMAT format, + UINT32 flags, + _Out_writes_bytes_( cbSignature ) PBYTE pbSignature, + SIZE_T cbSignature ); +// +// Sign a message using the DSA signature algorithm. +// (pbHashValue,cbHashValue) is the output of the hash function that hashed the message to be signed. +// (pbSignature,cbSignature) is the output buffer that receives the signature. +// The signature is encoded as two integers (R,S) mod Q in the format specified by the 'format' parameter. +// +// Allowed flags: +// None +// + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDsaVerify( + _In_ PCSYMCRYPT_DLKEY pKey, + _In_reads_bytes_( cbHashValue ) PCBYTE pbHashValue, + SIZE_T cbHashValue, + _In_reads_bytes_( cbSignature ) PCBYTE pbSignature, + SIZE_T cbSignature, + SYMCRYPT_NUMBER_FORMAT format, + UINT32 flags ); +// +// Verifies a DSA signature using the public part of Key. +// +// It returns SYMCRYPT_NO_ERROR if the verification succeeded or SYMCRYPT_SIGNATURE_VERIFICATION_FAILURE +// if it failed. +// +// Allowed flags: +// None +// + +VOID +SYMCRYPT_CALL +SymCryptDsaSelftest(void); +// +// FIPS self-test for DSA sign/verify. This function uses a hardcoded key to perform the self-test +// without having to generate a key. If the self-test fails, SymCryptFatal will be called to +// fastfail. +// The self-test will automatically be performed before first operational use of DSA if using a key +// with FIPS validation, so most callers should never use this function. +// + +// +// DH +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDhSecretAgreement( + _In_ PCSYMCRYPT_DLKEY pkPrivate, + _In_ PCSYMCRYPT_DLKEY pkPublic, + SYMCRYPT_NUMBER_FORMAT format, + UINT32 flags, + _Out_writes_( cbAgreedSecret ) PBYTE pbAgreedSecret, + SIZE_T cbAgreedSecret ); +// +// Calculates the agreed secret of a DH key exchange and stores it +// in the pbAgreedSecret buffer under the specified number format. +// +// format is the number format of the agreed secret (pbAgreedSecret buffer). +// +// Allowed flags: +// - None +// + +VOID +SYMCRYPT_CALL +SymCryptDhSecretAgreementSelftest(void); +// +// FIPS self-test for DH secret agreement. This function uses two hardcoded keys and a precalculated +// known answer to perform the self-test without having to generate a key. If the self-test fails, +// SymCryptFatal will be called to fastfail. +// The self-test will automatically be performed before first operational use of DH if using keys +// with FIPS validation, so most callers should never use this function. +// + +// +// For both ECDSA and ECDH algorithms the key generation and management is the same. The main algorithms are: +// - SymCryptEckeyAllocate or SymCryptEckeyCreate for creation of the ECKEY object. +// - SymCryptEckeySetValue or SymCryptEckeySetRandom for filling the key with the preferred key material. +// - SymCryptEckeyFree or SymCryptEckeyWipe for freeing or wiping the key. +// + +// +// ECDSA +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEcDsaSign( + _In_ PCSYMCRYPT_ECKEY pKey, + _In_reads_bytes_( cbHashValue ) PCBYTE pbHashValue, + SIZE_T cbHashValue, + SYMCRYPT_NUMBER_FORMAT format, + UINT32 flags, + _Out_writes_bytes_( cbSignature ) PBYTE pbSignature, + SIZE_T cbSignature ); +// +// Sign a message using the ECDSA signature algorithm. +// (pbHashValue,cbHashValue) is the output of the hash function that hashed the message to be signed. +// (pbSignature,cbSignature) is the output buffer that receives the signature. +// The signature is encoded as two integers in the format specified by the 'format' parameter. +// +// Allowed flags: +// SYMCRYPT_FLAG_ECDSA_NO_TRUNCATION: If set then the hash value will +// not be truncated. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEcDsaVerify( + _In_ PCSYMCRYPT_ECKEY pKey, + _In_reads_bytes_( cbHashValue ) PCBYTE pbHashValue, + SIZE_T cbHashValue, + _In_reads_bytes_( cbSignature ) PCBYTE pbSignature, + SIZE_T cbSignature, + SYMCRYPT_NUMBER_FORMAT format, + UINT32 flags ); + +// +// Verifies an ECDSA signature using the public part of Key. +// +// It returns SYMCRYPT_NO_ERROR if the verification succeeded or SYMCRYPT_SIGNATURE_VERIFICATION_FAILURE +// if it failed. +// +// Allowed flags: +// SYMCRYPT_FLAG_ECDSA_NO_TRUNCATION: If set then the hash value will +// not be truncated. + +VOID +SYMCRYPT_CALL +SymCryptEcDsaSelftest(void); +// +// FIPS self-test for ECDSA sign/verify. This function uses a hardcoded key to perform the self-test +// without having to generate a key. If the self-test fails, SymCryptFatal will be called to +// fastfail. +// The self-test will automatically be performed before first operational use of ECDSA if using a +// key with FIPS validation, so most callers should never use this function. +// + +// +// ECDH +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEcDhSecretAgreement( + _In_ PCSYMCRYPT_ECKEY pkPrivate, + _In_ PCSYMCRYPT_ECKEY pkPublic, + SYMCRYPT_NUMBER_FORMAT format, + UINT32 flags, + _Out_writes_( cbAgreedSecret ) PBYTE pbAgreedSecret, + SIZE_T cbAgreedSecret ); + +// +// Calculates the agreed secret of a DH key exchange and stores it +// in the pbAgreedSecret buffer under the specified number format. +// +// Allowed flags: +// - None +// + +VOID +SYMCRYPT_CALL +SymCryptEcDhSecretAgreementSelftest(void); +// +// FIPS self-test for ECDH secret agreement. This function uses two hardcoded keys and a +// precalculated known answer to perform the self-test without having to generate a key. If the +// self-test fails, SymCryptFatal will be called to fastfail. +// The self-test will automatically be performed before first operational use of ECDH if using keys +// with FIPS validation, so most callers should never use this function. +// + +//======================================================================== +// +// Stateful Hash-based Signatures +// +// Hash-based signature schemes are digital signature schemes built out of hash +// functions. Stateful hash-based signatures are many-time signature schemes +// composed of a one-time-signature (OTS) scheme and a Merkle-tree representing +// multiple OTS with a public root value. At each signing operation, one of the +// (unused) OTS keys is used to sign the message, and the private key is updated +// so that the same OTS is not used again. Because there is a limited number of +// OTS keys determined at key generation time, signing cannot be performed after +// all OTSs are used. This is an important distinction from other digital signature +// schemes such as RSA or ECDSA. +// +// It is crucial for the security of the *stateful* hash-based signatures that the +// same private key state NOT be used more than once to sign messages, otherwise all +// security is lost. +// + + +//======================================================================== +// XMSS API +// +// XMSS is a stateful hash-based signature scheme specified in RFC 8391. The +// multi-tree variant is named XMSS^MT. +// +// XMSS uses WOTS+ as the one-time-signature (OTS) scheme. Public key consists +// of two parts; Merkle-tree hash of OTS public keys called the Root, and a Seed value +// used in in hash computations. The private key consists of SK_XMSS which is +// used to deterministically create OTS keys, SK_PRF which is used to generate +// the randomizer for hashing, and an integer Idx is used to select the next OTS key +// for signing. +// + +typedef enum _SYMCRYPT_XMSS_ALGID +{ + // Hash Fn. RFC-8391 SP800-208 + SYMCRYPT_XMSS_SHA2_10_256 = 0x00000001, // SHA-256 X X + SYMCRYPT_XMSS_SHA2_16_256 = 0x00000002, // SHA-256 X X + SYMCRYPT_XMSS_SHA2_20_256 = 0x00000003, // SHA-256 X X + SYMCRYPT_XMSS_SHA2_10_512 = 0x00000004, // SHA-512 X + SYMCRYPT_XMSS_SHA2_16_512 = 0x00000005, // SHA-512 X + SYMCRYPT_XMSS_SHA2_20_512 = 0x00000006, // SHA-512 X + SYMCRYPT_XMSS_SHAKE_10_256 = 0x00000007, // SHAKE128 X + SYMCRYPT_XMSS_SHAKE_16_256 = 0x00000008, // SHAKE128 X + SYMCRYPT_XMSS_SHAKE_20_256 = 0x00000009, // SHAKE128 X + SYMCRYPT_XMSS_SHAKE_10_512 = 0x0000000A, // SHAKE256 X + SYMCRYPT_XMSS_SHAKE_16_512 = 0x0000000B, // SHAKE256 X + SYMCRYPT_XMSS_SHAKE_20_512 = 0x0000000C, // SHAKE256 X + SYMCRYPT_XMSS_SHA2_10_192 = 0x0000000D, // SHA-256 X + SYMCRYPT_XMSS_SHA2_16_192 = 0x0000000E, // SHA-256 X + SYMCRYPT_XMSS_SHA2_20_192 = 0x0000000F, // SHA-256 X + SYMCRYPT_XMSS_SHAKE256_10_256 = 0x00000010, // SHAKE256 X + SYMCRYPT_XMSS_SHAKE256_16_256 = 0x00000011, // SHAKE256 X + SYMCRYPT_XMSS_SHAKE256_20_256 = 0x00000012, // SHAKE256 X + SYMCRYPT_XMSS_SHAKE256_10_192 = 0x00000013, // SHAKE256 X + SYMCRYPT_XMSS_SHAKE256_16_192 = 0x00000014, // SHAKE256 X + SYMCRYPT_XMSS_SHAKE256_20_192 = 0x00000015, // SHAKE256 X + +} SYMCRYPT_XMSS_ALGID; + +typedef enum _SYMCRYPT_XMSSMT_ALGID +{ + // Hash Fn. RFC-8391 SP800-208 + // SHA-256 X X + SYMCRYPT_XMSSMT_SHA2_20_2_256 = 0x00000001, + SYMCRYPT_XMSSMT_SHA2_20_4_256 = 0x00000002, + SYMCRYPT_XMSSMT_SHA2_40_2_256 = 0x00000003, + SYMCRYPT_XMSSMT_SHA2_40_4_256 = 0x00000004, + SYMCRYPT_XMSSMT_SHA2_40_8_256 = 0x00000005, + SYMCRYPT_XMSSMT_SHA2_60_3_256 = 0x00000006, + SYMCRYPT_XMSSMT_SHA2_60_6_256 = 0x00000007, + SYMCRYPT_XMSSMT_SHA2_60_12_256 = 0x00000008, + + // SHA-512 X + SYMCRYPT_XMSSMT_SHA2_20_2_512 = 0x00000009, + SYMCRYPT_XMSSMT_SHA2_20_4_512 = 0x0000000A, + SYMCRYPT_XMSSMT_SHA2_40_2_512 = 0x0000000B, + SYMCRYPT_XMSSMT_SHA2_40_4_512 = 0x0000000C, + SYMCRYPT_XMSSMT_SHA2_40_8_512 = 0x0000000D, + SYMCRYPT_XMSSMT_SHA2_60_3_512 = 0x0000000E, + SYMCRYPT_XMSSMT_SHA2_60_6_512 = 0x0000000F, + SYMCRYPT_XMSSMT_SHA2_60_12_512 = 0x00000010, + + // SHAKE128 X + SYMCRYPT_XMSSMT_SHAKE_20_2_256 = 0x00000011, + SYMCRYPT_XMSSMT_SHAKE_20_4_256 = 0x00000012, + SYMCRYPT_XMSSMT_SHAKE_40_2_256 = 0x00000013, + SYMCRYPT_XMSSMT_SHAKE_40_4_256 = 0x00000014, + SYMCRYPT_XMSSMT_SHAKE_40_8_256 = 0x00000015, + SYMCRYPT_XMSSMT_SHAKE_60_3_256 = 0x00000016, + SYMCRYPT_XMSSMT_SHAKE_60_6_256 = 0x00000017, + SYMCRYPT_XMSSMT_SHAKE_60_12_256 = 0x00000018, + + // SHAKE256 X + SYMCRYPT_XMSSMT_SHAKE_20_2_512 = 0x00000019, + SYMCRYPT_XMSSMT_SHAKE_20_4_512 = 0x0000001A, + SYMCRYPT_XMSSMT_SHAKE_40_2_512 = 0x0000001B, + SYMCRYPT_XMSSMT_SHAKE_40_4_512 = 0x0000001C, + SYMCRYPT_XMSSMT_SHAKE_40_8_512 = 0x0000001D, + SYMCRYPT_XMSSMT_SHAKE_60_3_512 = 0x0000001E, + SYMCRYPT_XMSSMT_SHAKE_60_6_512 = 0x0000001F, + SYMCRYPT_XMSSMT_SHAKE_60_12_512 = 0x00000020, + + // SHA-256 X + SYMCRYPT_XMSSMT_SHA2_20_2_192 = 0x00000021, + SYMCRYPT_XMSSMT_SHA2_20_4_192 = 0x00000022, + SYMCRYPT_XMSSMT_SHA2_40_2_192 = 0x00000023, + SYMCRYPT_XMSSMT_SHA2_40_4_192 = 0x00000024, + SYMCRYPT_XMSSMT_SHA2_40_8_192 = 0x00000025, + SYMCRYPT_XMSSMT_SHA2_60_3_192 = 0x00000026, + SYMCRYPT_XMSSMT_SHA2_60_6_192 = 0x00000027, + SYMCRYPT_XMSSMT_SHA2_60_12_192 = 0x00000028, + + // SHAKE256 X + SYMCRYPT_XMSSMT_SHAKE256_20_2_256 = 0x00000029, + SYMCRYPT_XMSSMT_SHAKE256_20_4_256 = 0x0000002A, + SYMCRYPT_XMSSMT_SHAKE256_40_2_256 = 0x0000002B, + SYMCRYPT_XMSSMT_SHAKE256_40_4_256 = 0x0000002C, + SYMCRYPT_XMSSMT_SHAKE256_40_8_256 = 0x0000002D, + SYMCRYPT_XMSSMT_SHAKE256_60_3_256 = 0x0000002E, + SYMCRYPT_XMSSMT_SHAKE256_60_6_256 = 0x0000002F, + SYMCRYPT_XMSSMT_SHAKE256_60_12_256 = 0x00000030, + + // SHAKE256 X + SYMCRYPT_XMSSMT_SHAKE256_20_2_192 = 0x00000031, + SYMCRYPT_XMSSMT_SHAKE256_20_4_192 = 0x00000032, + SYMCRYPT_XMSSMT_SHAKE256_40_2_192 = 0x00000033, + SYMCRYPT_XMSSMT_SHAKE256_40_4_192 = 0x00000034, + SYMCRYPT_XMSSMT_SHAKE256_40_8_192 = 0x00000035, + SYMCRYPT_XMSSMT_SHAKE256_60_3_192 = 0x00000036, + SYMCRYPT_XMSSMT_SHAKE256_60_6_192 = 0x00000037, + SYMCRYPT_XMSSMT_SHAKE256_60_12_192 = 0x00000038, + +} SYMCRYPT_XMSSMT_ALGID; + + +typedef enum _SYMCRYPT_XMSSKEY_TYPE +{ + SYMCRYPT_XMSSKEY_TYPE_NONE = 0, + SYMCRYPT_XMSSKEY_TYPE_PUBLIC = 1, // Key object contains only public key + SYMCRYPT_XMSSKEY_TYPE_PRIVATE = 2, // Key object contains both public key and private key +} SYMCRYPT_XMSSKEY_TYPE; + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmssParamsFromAlgId( + SYMCRYPT_XMSS_ALGID id, + _Out_ PSYMCRYPT_XMSS_PARAMS pParams); +// +// Populate SYMCRYPT_XMSS_PARAMS structure for the specified XMSS algorithm identifier +// using the predefined parameter sets from RFC 8391 and NIST SP800-208 +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmssMtParamsFromAlgId( + SYMCRYPT_XMSSMT_ALGID id, + _Out_ PSYMCRYPT_XMSS_PARAMS pParams); +// +// Populate SYMCRYPT_XMSS_PARAMS structure for the specified XMSS^MT algorithm identifier +// using the predefined parameter sets from RFC 8391 and NIST SP800-208 +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmssSetParams( + _Out_ PSYMCRYPT_XMSS_PARAMS pParams, + UINT32 id, // algorithm identifier + _In_ PCSYMCRYPT_HASH pHash, // hash algorithm + UINT32 cbHashOutput, // hash output size + UINT32 nWinternitzWidth, // Winternitz parameter (width of digits) + UINT32 nTotalTreeHeight, // total tree height + UINT32 nLayers, // number of levels + UINT32 cbPrefix // domain separator prefix length + ); +// +// Populates SYMCRYPT_XMSS_PARAMS structure by user defined parameters +// +// +// Parameters: +// +// pParams. Pointer to the structure that will be populated with the +// supplied parameters. +// +// id. Algorithm identifier, will be embedded in key and signature objects. +// +// pHash. Pointer to a hash object that implements a hash function which +// will be used in XMSS/XMSS^MT operations. +// +// cbHashOutput. Output size of the hash function in bytes. Leading cbHashOutput +// bytes are taken as hash output if the hash algorithm's actual output size is larger. +// +// nWinternitzWidth. Winternitz parameter, width of digits in byte sequences. +// See remark below for more explanation. +// +// nTotalTreeHeight. Height of the XMSS/XMSS^MT tree. In a multi-tree setting, +// it is the sum of the tree heights of each layer. +// +// nLayers. Number of layers. For XMSS nLayers=1, otherwise nLayers > 1. When nLayers > 1, +// it must divide nTotalTreeHeight without remainder, so that each layer has height +// nTotalTreeHeight/nLayers. +// +// cbPrefix. Number of bytes in the prefix to the hash inputs used to domain separate +// PRF functions. +// +// Requirements: +// +// cbHashOutput must be nonzero, must be less than or equal to pHash->resultSize, +// and must be less than or equal to SYMCRYPT_HASH_MAX_RESULT_SIZE +// +// nWinternitzWidth must be one of 1, 2, 4, or 8 +// +// nTotalTreeHeight must be non-zero, it must be less than or equal to 32 for +// single-tree (nLayers = 1), and must be less than 64 for multi-tree (nLayers > 1) +// +// nLayers must be non-zero and must divide nTotalTreeHeight without remainder +// +// cbPrefix must be non-zero +// +// Remarks: +// +// RFC 8391 specifies w as the length of the Winternitz chains. Here, +// it is used as the width of the digits in an octet string, i.e., +// base2 logarithm of the chain length, which is similar to its use +// in LMS/HSS in RFC 8554. +// + + +#define SYMCRYPT_FLAG_XMSSKEY_VERIFY_ROOT (0x00000001) +// Verifies the public root value when importing a private key + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmssSizeofKeyBlobFromParams( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams, + SYMCRYPT_XMSSKEY_TYPE keyType, + _Out_ SIZE_T* pcbKey ); +// +// Return the size of an XMSS/XMSS^MT key blob associated with the provided XMSS parameters +// +// +// Parameters: +// +// pParams. Pointer to an XMSS parameters structure that has been properly +// initialized before this call. +// +// keyType. SYMCRYPT_XMSSKEY_TYPE_PUBLIC (resp. SYMCRYPT_XMSSKEY_TYPE_PRIVATE) to +// retrieve the size of the public key (resp. private key) blob. +// +// pcbKey. Pointer to the variable to store the size of a public/private +// key blob associated with the XMSS parameters. +// + +PSYMCRYPT_XMSS_KEY +SYMCRYPT_CALL +SymCryptXmsskeyAllocate( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams, + UINT32 flags ); +// +// Allocate an XMSS/XMSS^MT key object and initialize it +// +// After this call, the key object does not contain a key yet. It must be +// followed by a call to SymCryptXmsskeyGenerate or SymCryptXmsskeySetValue. +// +// Allowed flags: +// +// No flags defined for this function +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmsskeyGenerate( + _Inout_ PSYMCRYPT_XMSS_KEY pKey, + UINT32 flags ); +// +// Generate a public/private XMSS/XMSS^MT key-pair +// +// Parameters: +// +// pKey. Key object to store the public/private key-pair +// +// flags. No flags defined for this function +// +// Return values: +// +// - SYMCRYPT_NO_ERROR +// On successful key generation +// +// - SYMCRYPT_MEMORY_ALLOCATION_FAILURE +// If there is not enough memory to perform key generation +// +// Remarks: +// +// - Generates a random private key (SK_XMSS, SK_PRF) and a random +// public seed SEED, and computes the public value Root from it. +// - If the function fails, the key object will be in an invalid state. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmsskeySetValue( + _In_reads_bytes_( cbInput ) PCBYTE pbInput, + SIZE_T cbInput, + SYMCRYPT_XMSSKEY_TYPE keyType, + UINT32 flags, + _Inout_ PSYMCRYPT_XMSS_KEY pKey ); +// +// Set an XMSS/XMSS^MT public/private key from key blob +// +// Key formats: +// +// PubKey: algId | Root | Seed +// PrvKey: algId | Root | Seed | Idx | SK_XMSS | SK_PRF +// +// algId and Idx are 32-bit and 64-bit integers respectively, stored in big-endian format. +// Other values are n-bytes where n is the output size (in bytes) of the hash +// algorithm (or the truncated size if the hash output is truncated). +// +// Public-key format is specified in RFC 8391, whereas private-key format is not. +// We define the private-key as an extension of the public-key with the private key +// material. +// +// Parameters: +// +// (pbInput, cbInput). Input key blob to import the key from +// +// keyType. Indicates whether (pbInput, cbInput) contains a public or a private key. +// Must be one of SYMCRYPT_XMSSKEY_TYPE_PUBLIC, or SYMCRYPT_XMSSKEY_TYPE_PRIVATE. +// +// flags. See below +// +// pKey. Pointer to the XMSS key object to be initialized from the key blob +// +// Allowed flags: +// +// - SYMCRYPT_FLAG_XMSSKEY_VERIFY_ROOT +// Can only be specified when importing a private key. Recomputes the +// public root value and compares it to the one that is imported from the +// key blob. +// +// Return values: +// +// - SYMCRYPT_NO_ERROR +// On successfully updating the key object from the provided key blob +// +// - SYMCRYPT_INVALID_ARGUMENT +// If cbInput does not match a public/private key size indicated by keyType parameter +// If an invalid flag is specified, or SYMCRYPT_FLAG_XMSSKEY_VERIFY_ROOT is +// specified when setting a public key +// +// - SYMCRYPT_INVALID_BLOB +// If the XMSS algorithm ID in the key blob does not match the algorithm ID +// used in creating the key object pointed to by pKey +// +// - SYMCRYPT_MEMORY_ALLOCATION_FAILURE +// If there is not sufficient memory for public root verification (only if +// SYMCRYPT_FLAG_XMSSKEY_VERIFY_ROOT is set in flags) +// +// - SYMCRYPT_HBS_PUBLIC_ROOT_MISMATCH +// If public root value in the key blob does not match the recomputed root value +// (only if key blob is for a private key and SYMCRYPT_FLAG_XMSSKEY_VERIFY_ROOT is +// specified) +// +// Remarks: +// +// - The key blob size pbInput must match the size returned by SymCryptXmssSizeofKeyBlobFromParams +// for the same keyType and XMSS parameters the key object is created with. +// - If the function fails, the key object will be in an invalid state. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmsskeyGetValue( + _In_ PCSYMCRYPT_XMSS_KEY pKey, + SYMCRYPT_XMSSKEY_TYPE keyType, + UINT32 flags, + _Out_writes_bytes_( cbOutput ) PBYTE pbOutput, + SIZE_T cbOutput ); +// +// Get public/private key value from an XMSS/XMSS^MT key object +// +// Key formats: +// +// PubKey: algId | Root | Seed +// PrvKey: algId | Root | Seed | Idx | SK_XMSS | SK_PRF +// +// algId and Idx are 32-bit and 64-bit integers respectively, stored in big-endian format. +// Other values are n-bytes where n is the output size (in bytes) of the hash +// algorithm (or the truncated size if the hash output is truncated). +// +// Public-key format is specified in RFC 8391, whereas private-key format is not. +// We define the private-key as an extension of the public-key with the private key +// material. +// +// Parameters: +// +// pKey. The key object to export the key material from +// +// keyType. Type of the key (public or private) to get. If the key object +// contains a public key, keyType must be SYMCRYPT_XMSSKEY_TYPE_PUBLIC. If +// the key object contains a private key, keyType can be one of +// SYMCRYPT_XMSSKEY_TYPE_PUBLIC or SYMCRYPT_XMSSKEY_TYPE_PRIVATE +// +// flags. No flags defined for this function +// +// (pbOutput, cbOutput). Buffer to store the exported key blob. cbOutput must match +// the size of the key to be exported, which can be queried by calling +// SymCryptXmssSizeofKeyBlobFromParams. +// +// Return values: +// +// - SYMCRYPT_NO_ERROR +// On successful exporting of the key +// +// - SYMCRYPT_INVALID_ARGUMENT +// If cbOutput does not match the exact size of the key blob for the specified +// keyType +// If the key object does not contain private key material when keyType +// equals SYMCRYPT_XMSSKEY_TYPE_PRIVATE +// If unsupported flags are specified in flags parameter +// + +VOID +SYMCRYPT_CALL +SymCryptXmsskeyFree( + _Inout_ PSYMCRYPT_XMSS_KEY pKey); +// +// Free an allocated XMSS/XMSS^MT key object +// + +SIZE_T +SYMCRYPT_CALL +SymCryptXmssSizeofSignatureFromParams( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams ); +// +// Return the size of the signature for given XMSS parameters +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmssSign( + _Inout_ PSYMCRYPT_XMSS_KEY pKey, + _In_reads_bytes_( cbMessage ) PCBYTE pbMessage, + SIZE_T cbMessage, + UINT32 flags, + _Out_writes_bytes_( cbSignature ) PBYTE pbSignature, + SIZE_T cbSignature ); +// +// Sign a message using XMSS/XMSS^MT +// +// Parameters: +// +// pKey. Private XMSS/XMSS^MT key used in signing +// +// (pbMessage, cbMessage). Message to be signed +// +// flags. No flags defined for this function +// +// (pbSignature, cbSignature). Buffer to store the generated signature +// +// Requirements: +// +// pKey must contain the private key +// +// cbSignature must be equal to the generated signature size +// +// Return values: +// +// - SYMCRYPT_NO_ERROR on successful signature generation +// +// - SYMCRYPT_INVALID_ARGUMENT +// If flags parameter is invalid, +// or if the key object does not contain private key, +// or cbSignature is not of correct size +// +// - SYMCRYPT_HBS_NO_OTS_KEYS_LEFT +// If the key doesn't have any one-time-signatures left for signing +// +// Remarks: +// +// The input pbMessage can be of arbitrary length and its randomized hash will be the actual +// value that is going to be signed with a WOTSP signature. Applications wanting to pass the hash +// value of a message to be signed as opposed to the message itself must make sure to have +// domain separation between the space of messages and the hashes of the messages. +// +// The signature size can be queried with SymCryptSizeofXmssSignatureFromParams function. +// + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmssVerify( + _Inout_ PSYMCRYPT_XMSS_KEY pKey, + _In_reads_bytes_( cbMessage ) PCBYTE pbMessage, + SIZE_T cbMessage, + UINT32 flags, + _In_reads_bytes_( cbSignature ) PCBYTE pbSignature, + SIZE_T cbSignature ); +// +// Verify an XMSS/XMSS^MT signature on a message +// +// Parameters: +// +// pKey. XMSS key used to verify the signature +// +// (pbMessage, cbMessage) Message for which the signature was created +// +// flags. No flags defined for this function +// +// (pbSignature, cbSignature) XMSS or XMSS^MT signature +// +// Return values: +// +// - SYMCRYPT_NO_ERROR +// If signature verification succeeds +// +// - SYMCRYPT_INVALID_ARGUMENT +// If flags is invalid or cbSignature is of incorrect size +// +// - SYMCRYPT_SIGNATURE_VERIFICATION_ERROR +// If the signature is not valid +// +// Requirements: +// +// cbSignature must be equal to the exact signature size associated with +// the XMSS parameters. +// +// Remarks: +// +// In XMSS, the message can be arbitrarily long and a randomized hash of the message +// will be computed first to be signed by the WOTSP internally. +// + +VOID +SYMCRYPT_CALL +SymCryptXmssSelftest(void); +// +// FIPS self-test for signature verification +// + +//======================================================================== +// Leighton-Micali Hash-Based Signatures (LMS) - LMS external struct definitions - implementing +// RFC8554/NIST Special Publication 800-208 +// +typedef enum _SYMCRYPT_LMS_ALGID +{ + // + // Algorithm IDs for Leighton-Micali Hash-Based Signatures (LMS) + // M equals the output length of the hash function, where H is the tree height. + // The M parameter primarily affects the security and size of the signatures, while the H parameter + // impacts the number of possible signatures and the computational cost for signing and verification. + // Larger M increases security and signature size, but increases the computational cost + // Higher H means more signatures but higher computational cost for signing and verification + // + SYMCRYPT_LMS_SHA256_M32_H5 = 0x00000005, + SYMCRYPT_LMS_SHA256_M32_H10 = 0x00000006, + SYMCRYPT_LMS_SHA256_M32_H15 = 0x00000007, + SYMCRYPT_LMS_SHA256_M32_H20 = 0x00000008, + SYMCRYPT_LMS_SHA256_M32_H25 = 0x00000009, + SYMCRYPT_LMS_SHA256_M24_H5 = 0x0000000A, + SYMCRYPT_LMS_SHA256_M24_H10 = 0x0000000B, + SYMCRYPT_LMS_SHA256_M24_H15 = 0x0000000C, + SYMCRYPT_LMS_SHA256_M24_H20 = 0x0000000D, + SYMCRYPT_LMS_SHA256_M24_H25 = 0x0000000E, + SYMCRYPT_LMS_SHAKE_M32_H5 = 0x0000000F, + SYMCRYPT_LMS_SHAKE_M32_H10 = 0x00000010, + SYMCRYPT_LMS_SHAKE_M32_H15 = 0x00000011, + SYMCRYPT_LMS_SHAKE_M32_H20 = 0x00000012, + SYMCRYPT_LMS_SHAKE_M32_H25 = 0x00000013, + SYMCRYPT_LMS_SHAKE_M24_H5 = 0x00000014, + SYMCRYPT_LMS_SHAKE_M24_H10 = 0x00000015, + SYMCRYPT_LMS_SHAKE_M24_H15 = 0x00000016, + SYMCRYPT_LMS_SHAKE_M24_H20 = 0x00000017, + SYMCRYPT_LMS_SHAKE_M24_H25 = 0x00000018, +} SYMCRYPT_LMS_ALGID; + +typedef enum _SYMCRYPT_LMS_OTS_ALGID +{ + // Algorithm IDs for Leighton-Micali Hash-Based Signatures (LMS) One-Time-Signature (OTS) + // N parameter represents the number of bytes in the hash function output. It determines the size of the hash values used in + // the LMS OTS scheme. + // W parameter represents the width of the Winternitz parameter used in LMS OTS. A larger value of w results in shorter + // signatures but requires more computation during key generation, signature generation, and signature verification. + // + SYMCRYPT_LMS_OTS_SHA256_N32_W1 = 0x00000001, + SYMCRYPT_LMS_OTS_SHA256_N32_W2 = 0x00000002, + SYMCRYPT_LMS_OTS_SHA256_N32_W4 = 0x00000003, + SYMCRYPT_LMS_OTS_SHA256_N32_W8 = 0x00000004, + SYMCRYPT_LMS_OTS_SHA256_N24_W1 = 0x00000005, + SYMCRYPT_LMS_OTS_SHA256_N24_W2 = 0x00000006, + SYMCRYPT_LMS_OTS_SHA256_N24_W4 = 0x00000007, + SYMCRYPT_LMS_OTS_SHA256_N24_W8 = 0x00000008, + SYMCRYPT_LMS_OTS_SHAKE_N32_W1 = 0x00000009, + SYMCRYPT_LMS_OTS_SHAKE_N32_W2 = 0x0000000A, + SYMCRYPT_LMS_OTS_SHAKE_N32_W4 = 0x0000000B, + SYMCRYPT_LMS_OTS_SHAKE_N32_W8 = 0x0000000C, + SYMCRYPT_LMS_OTS_SHAKE_N24_W1 = 0x0000000D, + SYMCRYPT_LMS_OTS_SHAKE_N24_W2 = 0x0000000E, + SYMCRYPT_LMS_OTS_SHAKE_N24_W4 = 0x0000000F, + SYMCRYPT_LMS_OTS_SHAKE_N24_W8 = 0x00000010, +} SYMCRYPT_LMS_OTS_ALGID; + +// Verifies the public key root value when importing a private key +#define SYMCRYPT_FLAG_LMSKEY_VERIFY_ROOT (0x00000001) + +typedef enum _SYMCRYPT_LMSKEY_TYPE +{ + SYMCRYPT_LMSKEY_TYPE_NONE = 0, // Key object does not contain any key material + SYMCRYPT_LMSKEY_TYPE_PUBLIC = 1, // Key object contains only public key + SYMCRYPT_LMSKEY_TYPE_PRIVATE = 2, // Key object contains both public key and private key +} SYMCRYPT_LMSKEY_TYPE; +// The format of the private key blob is as follows: +// [ Public key parts || Private key parts ] +// [ 4 || 4 || 16 || m || 4 || m ] +// [ LmsAlgId || LmsOtsAlgId || I || RootNode || NextUnusedLeaf || Seed ] +// +// The format of the public key blob is as follows: +// [ 4 || 4 || 16 || m ] +// [ LmsAlgId || LmsOtsAlgId || I || RootNode ] + +//===================================================== +// LMS operations + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLmsParamsFromAlgId( + SYMCRYPT_LMS_ALGID lmsAlgID, + SYMCRYPT_LMS_OTS_ALGID lmsOtsAlgID, + _Out_ PSYMCRYPT_LMS_PARAMS pParams); +// +// This function populates a SYMCRYPT_LMS_PARAMS structure with the predefined parameter sets for a given LMS +// algorithm identifier and LMS OTS algorithm identifier. The resulting structure can be used to create LMS key objects. +// The values defined by SYMCRYPT_LMS_OTS_ALGID and SYMCRYPT_LMS_ALGID are all of the NIST SP 800-208 parameter +// sets supported by SymCrypt. +// +// Parameters: +// lmsAlgID: The LMS algorithm identifier to use +// +// lmsOtsAlgID: The LMS OTS algorithm identifier to use +// +// pParams: A pointer to a SYMCRYPT_LMS_PARAMS structure that will be populated with the predefined parameter sets +// +// Return value: +// If the function succeeds, it returns SYMCRYPT_NO_ERROR +// If the function fails, it returns SYMCRYPT_INVALID_ARGUMENT +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLmsSetParams( + _Out_ PSYMCRYPT_LMS_PARAMS pParams, + UINT32 lmsAlgID, + UINT32 lmsOtsAlgID, + _In_ PCSYMCRYPT_HASH pLmsHashFunction, + UINT32 cbHashOutput, + UINT32 nTreeHeight, + UINT32 nWinternitzChainWidth); +// +// This function allows for the customization of non-standard parameter sets, which cannot be set using LmsParamsFromAlgId. +// +// Parameters: +// pParams: A pointer to a SYMCRYPT_LMS_PARAMS structure to be initialized. +// +// lmsAlgID: LMS algorithm identifier, will be embedded in key and signature objects. +// +// lmsOtsAlgID: LMS OTS algorithm identifier, will be embedded in key and signature objects. +// +// pLmsHashFunction: A pointer to the hash function to be used for the LMS system. +// +// cbHashOutput: The number of bytes for each tree node, equal to the output length of the hash function. +// Must be less than or equal to 32. +// +// nTreeHeight: The height of the LMS tree. Must be < 32, there are (2^nTreeHeight) leaves in the tree. +// +// nWinternitzChainWidth: An integer that specifies the base2 logarithm of Winternitz chain lengths. +// Must be one of 1, 2, 4, or 8 +// +// Return value: +// If the function succeeds, it fills PSYMCRYPT_LMS_PARAMS structure by user defined values and return SYMCRYPT_NO_ERROR. +// Otherwise, it sets the values of PSYMCRYPT_LMS_PARAMS to 0 and returns SYMCRYPT_INVALID_ARGUMENT. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLmsSizeofKeyBlobFromParams( + _In_ PCSYMCRYPT_LMS_PARAMS pParams, + SYMCRYPT_LMSKEY_TYPE keyType, + _Out_ SIZE_T* pcbKey); +// +// Returns the size of an LMS key blob based on the provided LMS parameters and keyType. +// +// Parameters: +// pParams: A pointer to a SYMCRYPT_LMS_PARAMS structure that specifies the parameters of the LMS key. +// +// keyType: Specifies the type of blob for which to retrieve the size. +// Must be one of SYMCRYPT_LMSKEY_TYPE_PUBLIC or SYMCRYPT_LMSKEY_TYPE_PRIVATE. +// +// pcbKey: Pointer to the variable to store the size of a public/private +// key blob associated with the LMS parameters. +// +// Return value: +// If the function succeeds, it returns SYMCRYPT_NO_ERROR. In case keyType is not recognized +// it returns SYMCRYPT_INVALID_ARGUMENT. +// + +PSYMCRYPT_LMS_KEY +SYMCRYPT_CALL +SymCryptLmskeyAllocate( + _In_ PCSYMCRYPT_LMS_PARAMS pParams, + UINT32 flags); +// +// This function allocates a new SYMCRYPT_LMS_KEY object, which represents a key for the Leighton-Micali Signature (LMS) +// scheme, based on the given PCSYMCRYPT_LMS_PARAMS. The function allocates memory for the key object, and returns a pointer to it. +// The caller is responsible for freeing the memory when the key is no longer needed, using the SymCryptLmskeyFree function. +// +// Parameters: +// pParams: A pointer to a constant SYMCRYPT_LMS_PARAMS structure that describes +// the LMS parameters to be used for the key. +// The structure must be non-null, and must be initialized by one of the initialization functions: +// SymCryptLmsParamsFromAlgId or SymCryptLmsSetParams. +// +// flags: Currently not used. Must be set to 0. +// +// Return value: +// If the function succeeds, it returns a pointer to the newly created SYMCRYPT_LMS_KEY object. +// Otherwise, it returns NULL, indicating an error that should be handled by the caller. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLmskeyGenerate( + _Inout_ PSYMCRYPT_LMS_KEY pKey, + UINT32 flags); +// +// This function generates an LMS public/private key pair in the pKey object. +// +// Parameters: +// pKey: A pointer to a SYMCRYPT_LMS_KEY structure that represents the LMS key object to be initialized. The structure +// must be valid and non-null, and must have been previously created using the SymCryptLmskeyAllocate +// function. If the key object already contains key values, they will be overwritten by the generated values. +// +// flags: Currently not used. Must be set to 0. +// +// Return value: +// If the function succeeds, it returns SYMCRYPT_NO_ERROR. Otherwise, it returns an error code that describes the nature of the +// failure, such as SYMCRYPT_INVALID_ARGUMENT. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLmskeySetValue( + _In_reads_bytes_(cbInput) PCBYTE pbInput, + SIZE_T cbInput, + SYMCRYPT_LMSKEY_TYPE keyType, + UINT32 flags, + _Inout_ PSYMCRYPT_LMS_KEY pKey); +// +// This function imports an LMS key from a buffer, setting the key object with the provided data. +// +// Parameters: +// pbInput: A pointer to a byte buffer containing the key data to be imported into the LMS key object +// The format of the input buffer is specified by the SYMCRYPT_LMSKEY_TYPE enumeration. +// +// cbInput: The size, in bytes, of the key data buffer pointed to by pbInput +// +// keyType: Indicates whether (pbInput, cbInput) contains a public or a private key. +// Must be one of SYMCRYPT_LMSKEY_TYPE_PUBLIC, or SYMCRYPT_LMSKEY_TYPE_PRIVATE. +// +// flags: See allowed flags below. +// +// pKey: A pointer to a SYMCRYPT_LMS_KEY structure that will receive the imported key from the buffer pbInput +// +// Allowed flags: +// SYMCRYPT_FLAG_LMSKEY_VERIFY_ROOT: Can only be specified when importing a private key. Recomputes the +// public root value and compares it to the one that is imported from the key blob. +// +// Return value: +// If the function succeeds, it returns SYMCRYPT_NO_ERROR and the SYMCRYPT_LMS_KEY structure is set with the imported key data. +// If the function fails, it returns an error code that describes the nature of the failure, such as SYMCRYPT_INVALID_ARGUMENT. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLmskeyGetValue( + _In_ PCSYMCRYPT_LMS_KEY pKey, + SYMCRYPT_LMSKEY_TYPE keyType, + UINT32 flags, + _Out_writes_bytes_(cbOutput) PBYTE pbOutput, + SIZE_T cbOutput); +// +// This function retrieves the public or private key value from an LMS key object, depending on the keyType parameter +// +// Parameters: +// pKey: A pointer to a SYMCRYPT_LMS_KEY structure that represents the LMS key object to retrieve the key value from. +// The structure must be valid and non-null, and must contain the key values to retrieve +// +// keyType: Type of the key (public or private) to get. If the key object only +// contains a public key, keyType must be SYMCRYPT_LMSKEY_TYPE_PUBLIC. If +// the key object contains a private key, keyType can be one of +// SYMCRYPT_LMSKEY_TYPE_PUBLIC or SYMCRYPT_LMSKEY_TYPE_PRIVATE +// +// flags: Currently not used. Must be set to 0. +// +// pbOutput: A buffer to hold the key value. The buffer must be large enough to hold the key value. +// The format of the output buffer is specified by the SYMCRYPT_LMSKEY_TYPE enumeration. +// +// cbOutput: The size of the pbOutput buffer in bytes +// +// Return value: +// If the function succeeds, it returns SYMCRYPT_NO_ERROR. Otherwise, it returns SYMCRYPT_INVALID_ARGUMENT. +// + +VOID +SYMCRYPT_CALL +SymCryptLmskeyFree( + _Inout_ PSYMCRYPT_LMS_KEY pKey); +// +// This function frees the memory that was allocated for the given LMS key object, which was previously created using the +// SymCryptLmskeyAllocate function. The function wipes and deallocates the memory. +// +// Parameters: +// pKey: A pointer to a SYMCRYPT_LMS_KEY structure that represents the LMS key object to be freed. The structure +// must be valid and non-null, and must have been previously created using the SymCryptLmskeyAllocate function. +// +// Return value: +// The function does not return a value. +// + +SIZE_T +SYMCRYPT_CALL +SymCryptLmsSizeofSignatureFromParams( + _In_ PCSYMCRYPT_LMS_PARAMS pParams); +// +// This function returns the size, in bytes, of the signature that will be generated by the LMS signature scheme, based on the +// specified LMS parameters. +// +// Parameters: +// pParams: A pointer to a SYMCRYPT_LMS_PARAMS structure that represents the parameters associated with the LMS key to +// use for computing the signature size. The structure must be valid and non-null. +// +// Return value: +// Signature size in bytes. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLmsSign( + _Inout_ PSYMCRYPT_LMS_KEY pKey, + _In_reads_bytes_(cbMessage) PCBYTE pbMessage, + SIZE_T cbMessage, + UINT32 flags, + _Out_writes_bytes_(cbSignature) PBYTE pbSignature, + SIZE_T cbSignature); +// +// This function generates an LMS signature for the given message, using the private key in the given LMS key object. +// The function fills the buffer pointed to by pbSignature with the LMS signature. It uses the LMS parameters +// and key values that were specified when the key object was created to generate the signature. +// Stateful hash-based signatures are not approved by FIPS for key generation and signature generation in software +// modules. Special care must be taken to ensure that the same private key state is not used more than once to +// sign messages. This can be done, for instance, by releasing a signature only after verifying that the private +// key has been updated and serialized to a physical storage. +// +// Parameters: +// pKey: A pointer to a SYMCRYPT_LMS_KEY structure that represents the LMS key object to be used for signing the message. +// The structure must be valid and non-null, and must contain the private key values for the LMS scheme. The private key +// must have been initialized previously using the SymCryptLmsKeyGenerate or SymCryptLmskeySetValue function. +// +// pbMessage: A pointer to a buffer that contains the message to be signed. +// +// cbMessage: The length in bytes of the message to be signed. +// +// flags: Currently not used. Must be set to 0. +// +// pbSignature: A pointer to the buffer that receives the computed signature. It must be large enough to hold the +// generated signature. The required size can be retrieved using: SymCryptLmsSizeofSignatureFromParams. +// +// cbSignature: The size of the signature buffer pbSignature. If the passed size is different than the +// required signature size an error will be returned. +// +// Return value: +// SYMCRYPT_NO_ERROR - If the function succeeds +// +// SYMCRYPT_HBS_NO_OTS_KEYS_LEFT - If the key has run out of available OTS keys +// +// SYMCRYPT_INVALID_ARGUMENT - If one of the input parameters is invalid +// +// Remarks: +// The LMS signing process inherits its signature from the LMS OTS, which means that it will always compute a digest of the +// given message before signing, even if a hash value is provided as the message. +// Developers should always be consistent with the input to the LMS sign and verify functions and ensure that the input message +// is in the correct format before passing it to these functions +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLmsVerify( + _In_ PCSYMCRYPT_LMS_KEY pKey, + _In_reads_bytes_(cbMessage) PCBYTE pbMessage, + SIZE_T cbMessage, + UINT32 flags, + _In_reads_bytes_(cbSignature) PCBYTE pbSignature, + SIZE_T cbSignature); +// +// This function verifies the given LMS signature (pbSignature) for the given message (pbMessage), using the public key +// in the given LMS key object. The function returns SYMCRYPT_NO_ERROR if the signature is valid, and an error code otherwise. +// +// Parameters: +// pKey: A pointer to a SYMCRYPT_LMS_KEY structure that represents the LMS key object to be used for verifying +// the signature. The structure must be valid and non-null, and must contain the public or private key values for the LMS scheme. +// The public key must have been generated previously using the SymCryptLmsKeyGenerate or SymCryptLmskeySetValue functions, and must match the +// private key that was used to generate the signature. +// +// pbMessage: A pointer to a buffer that contains the message that was signed.The buffer must be valid and non-null, and +// must contain at least cbMessage bytes of data. +// +// cbMessage: The length in bytes of the message that was signed. The length should be set to the actual size of the message. +// If the message is larger than the maximum size allowed by the LMS parameters, the function will return an error. +// +// flags: Currently not used. Must be set to 0. +// +// pbSignature: A pointer to a buffer that contains the signature that was generated for the message. The buffer must +// be valid and non-null, and must contain at least cbSignature bytes. +// +// cbSignature: The length in bytes of the signature buffer that contains the signature. The length must be +// equal to the exact signature size associated with the given LMS parameters and key values. +// +// Return value: +// SYMCRYPT_NO_ERROR - If the function succeeds +// +// SYMCRYPT_INVALID_ARGUMENT - If the signature structure is not correct or if there is a mismatch between the +// input parameters. +// +// SYMCRYPT_SIGNATURE_VERIFICATION_FAILURE - If the signature verification fails +// + +VOID +SYMCRYPT_CALL +SymCryptLmsSelftest(void); + + +// MLKEMKEY objects' API +// + +// MLKEM key formats +// ================== +// The below formats apply **only to external formats**: When somebody is importing or exporting +// a key. The internal format of the keys is not visible to the caller. +typedef enum _SYMCRYPT_MLKEMKEY_FORMAT { + SYMCRYPT_MLKEMKEY_FORMAT_NULL = 0, + SYMCRYPT_MLKEMKEY_FORMAT_PRIVATE_SEED = 1, + // 64-byte concatenation of d || z from FIPS 203. Smallest representation of a full + // ML-KEM key. + // On its own it is ambiguous which ML-KEM parameter set this represents; callers wanting to + // store this format must track the parameter set alongside the key. + SYMCRYPT_MLKEMKEY_FORMAT_DECAPSULATION_KEY = 2, + // Standard byte encoding of an ML-KEM Decapsulation key, per FIPS 203. + // Size is 1632, 2400, or 3168 bytes for ML-KEM 512, 768, and 1024 respectively. + SYMCRYPT_MLKEMKEY_FORMAT_ENCAPSULATION_KEY = 3, + // Standard byte encoding of an ML-KEM Encapsulation key, per FIPS 203. + // Size is 800, 1184, or 1568 bytes for ML-KEM 512, 768, and 1024 respectively. +} SYMCRYPT_MLKEMKEY_FORMAT; + + +typedef enum _SYMCRYPT_MLKEM_PARAMS { + SYMCRYPT_MLKEM_PARAMS_NULL = 0, + SYMCRYPT_MLKEM_PARAMS_MLKEM512 = 1, + SYMCRYPT_MLKEM_PARAMS_MLKEM768 = 2, + SYMCRYPT_MLKEM_PARAMS_MLKEM1024 = 3, +} SYMCRYPT_MLKEM_PARAMS; +// +// Currently supported ML-KEM parameter sets are represented externally only by the enum +// + +PSYMCRYPT_MLKEMKEY +SYMCRYPT_CALL +SymCryptMlKemkeyAllocate( + SYMCRYPT_MLKEM_PARAMS params ); +// +// Allocate and create a new MLKEMKEY object sized according to the specified parameters. +// +// This call does not initialize the key. It should be +// followed by a call to SymCryptMlKemkeyGenerate or +// SymCryptMlKemkeySetValue. +// + +VOID +SYMCRYPT_CALL +SymCryptMlKemkeyFree( + _Inout_ PSYMCRYPT_MLKEMKEY pkMlKemkey ); + + +// d and z are each 32 bytes +#define SYMCRYPT_MLKEM_PRIVATE_SEED_SIZE (2*32) + +#define SYMCRYPT_MLKEM_ENCAPSULATION_KEY_SIZE_MLKEM512 (800) +#define SYMCRYPT_MLKEM_ENCAPSULATION_KEY_SIZE_MLKEM768 (1184) +#define SYMCRYPT_MLKEM_ENCAPSULATION_KEY_SIZE_MLKEM1024 (1568) + +#define SYMCRYPT_MLKEM_DECAPSULATION_KEY_SIZE_MLKEM512 (1632) +#define SYMCRYPT_MLKEM_DECAPSULATION_KEY_SIZE_MLKEM768 (2400) +#define SYMCRYPT_MLKEM_DECAPSULATION_KEY_SIZE_MLKEM1024 (3168) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlKemSizeofKeyFormatFromParams( + SYMCRYPT_MLKEM_PARAMS params, + SYMCRYPT_MLKEMKEY_FORMAT mlKemkeyformat, + _Out_ SIZE_T* pcbKeyFormat ); +// +// Gives the size in bytes of the blob of the given format for the given ML-KEM +// parameters via pcbKeyFormat output. +// Returns SYMCRYPT_INCOMPATIBLE_FORMAT if mlKemkeyFormat is an unsupported value, +// or SYMCRYPT_INVALID_ARGUMENT if other parameters are invalid. +// + +#define SYMCRYPT_MLKEM_CIPHERTEXT_SIZE_MLKEM512 (768) +#define SYMCRYPT_MLKEM_CIPHERTEXT_SIZE_MLKEM768 (1088) +#define SYMCRYPT_MLKEM_CIPHERTEXT_SIZE_MLKEM1024 (1568) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlKemSizeofCiphertextFromParams( + SYMCRYPT_MLKEM_PARAMS params, + _Out_ SIZE_T* pcbCiphertext ); +// +// Gives the size in bytes of the ciphertext for the given ML-KEM parameters. +// Returns SYMCRYPT_INVALID_ARGUMENT if parameters are invalid. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlKemkeyGenerate( + _Inout_ PSYMCRYPT_MLKEMKEY pkMlKemkey, + UINT32 flags ); +// +// Generate a new random ML-KEM key using the information from the +// parameters passed to SymCryptMlKemkeyAllocate. +// +// Allowed flags: +// +// - SYMCRYPT_FLAG_KEY_NO_FIPS +// Opt-out of performing validation required for FIPS +// +// Described in more detail in the "Flags for asymmetric key generation and import" section above +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlKemkeySetValue( + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_MLKEMKEY_FORMAT mlKemkeyFormat, + UINT32 flags, + _Inout_ PSYMCRYPT_MLKEMKEY pkMlKemkey ); +// +// Import key material to an ML-KEM key object. The arguments are the following: +// - (pbSrc, cbSrc): a buffer containing a representation of an ML-KEM key, +// in format specified by mlKemkeyFormat. +// - mlKemkeyFormat format of the input +// +// Allowed flags: +// +// - SYMCRYPT_FLAG_KEY_NO_FIPS +// Opt-out of performing validation required for FIPS +// +// - SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION +// Opt-out of performing almost all validation - must be specified with SYMCRYPT_FLAG_KEY_NO_FIPS +// +// Remarks: +// - cbSrc must be equal to the cbKeyFormat returned from +// SymCryptMlKemSizeofKeyFormatFromParams(params, mlKemkeyFormat, &cbKeyFormat), though typically this +// value can be known statically (see definition of SYMCRYPT_MLKEMKEY_FORMAT) +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlKemkeyGetValue( + _In_ PCSYMCRYPT_MLKEMKEY pkMlKemkey, + _Out_writes_bytes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst, + SYMCRYPT_MLKEMKEY_FORMAT mlKemkeyFormat, + UINT32 flags ); +// +// Export key material from an ML-KEM key object. The arguments are the following: +// - (pbDst, cbDst): a buffer into which a representation of an ML-KEM key is +// written, in the format specified by mlKemkeyFormat. +// - mlKemkeyFormat format of the output +// +// Allowed flags: +// - None. +// +// Remarks: +// - If the key object does not have the information required to export to the format +// specified by mlKemkeyFormat this function will return SYMCRYPT_INCOMPATIBLE_FORMAT. +// - cbDst must be equal to the cbKeyFormat returned from +// SymCryptMlKemSizeofKeyFormatFromParams(params, mlKemkeyFormat, &cbKeyFormat), though typically this +// value can be known statically (see definition of SYMCRYPT_MLKEMKEY_FORMAT) +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlKemEncapsulate( + _In_ PCSYMCRYPT_MLKEMKEY pkMlKemkey, + _Out_writes_bytes_( cbAgreedSecret ) PBYTE pbAgreedSecret, + SIZE_T cbAgreedSecret, + _Out_writes_bytes_( cbCiphertext ) PBYTE pbCiphertext, + SIZE_T cbCiphertext ); +// +// Performs the Encapsulate operation of ML-KEM. +// This uses the public information of an ML-KEM keypair to generate an agreed secret +// and a ciphertext. Only a peer with the private information of an ML-KEM keypair can +// decapsulate the ciphertext to compute the agreed secret. +// +// The arguments are the following: +// - pkMlKemkey: a key which contains public information required for encapsulation. +// - (pbAgreedSecret, cbAgreedSecret): a buffer into which the generated secret is written. +// Currently cbAgreedSecret must be 32 for all parameterizations of ML-KEM. +// - (pbCiphertext, cbCiphertext): a buffer into which the encapsulated secret is written. +// cbCiphertext must equal cbCiphertext given by SymCryptMlKemSizeofCiphertextFromParams, +// though typically this value can be known statically (see definition of +// SYMCRYPT_MLKEM_CIPHERTEXT_SIZE_*). +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlKemDecapsulate( + _In_ PCSYMCRYPT_MLKEMKEY pkMlKemkey, + _In_reads_bytes_( cbCiphertext ) PCBYTE pbCiphertext, + SIZE_T cbCiphertext, + _Out_writes_bytes_( cbAgreedSecret ) PBYTE pbAgreedSecret, + SIZE_T cbAgreedSecret ); +// +// Performs the Decapsulate operation of ML-KEM. +// This uses the private information of an ML-KEM keypair to generate an agreed +// secret from a ciphertext. +// +// The arguments are the following: +// - pkMlKemkey: a key which contains private information required for decapsulation. +// - (pbCiphertext, cbCiphertext): a buffer containing an encapsulated secret. +// cbCiphertext must equal cbCiphertext given by SymCryptMlKemSizeofCiphertextFromParams, +// though typically this value can be known statically (see definition of +// SYMCRYPT_MLKEM_CIPHERTEXT_SIZE_*). +// - (pbAgreedSecret, cbAgreedSecret): a buffer into which the generated secret is written. +// Currently cbAgreedSecret must be 32 for all parameterizations of ML-KEM. +// +// Note: Given an invalid, but correctly-sized, ciphertext, the ML-KEM Decapsulation operation +// will "implicitly reject" the ciphertext, by returning success in equal time to a valid +// decapsulation operation, with pseudo-random agreed secret output. This forces higher +// level protocols to fail later when symmetric keys of peers do not match. +// So decapsulate will only ever return an error if there are programming errors (e.g. incorrect size), +// or something fundamentally goes wrong with the environment (e.g. internal memory allocation fails). +// + +VOID +SYMCRYPT_CALL +SymCryptMlKemSelftest(void); +// +// FIPS self-test for ML-KEM. If the self-test fails, SymCryptFatal will be called to fastfail. +// The self-test will automatically be performed before first operational use of ML-KEM if using +// keys with FIPS validation, so most callers should never use this function. +// + +// +// COMPOSITE MLKEMKEY objects' API +// +// The below formats apply **only to external formats**: When somebody is importing or exporting +// a key. The internal format of the keys is not visible to the caller. +typedef enum _SYMCRYPT_COMPOSITE_MLKEMKEY_FORMAT { + SYMCRYPT_COMPOSITE_MLKEMKEY_FORMAT_NULL = 0, + SYMCRYPT_COMPOSITE_MLKEMKEY_FORMAT_IRTF_PRIVATE_SEED = 1, + // 32-byte seed for deriving Composite ML-KEM key, per irtf-cfrg-hybrid-kems CG framework + SYMCRYPT_COMPOSITE_MLKEMKEY_FORMAT_LAMPS_PRIVATE_KEY = 2, + // Standard byte encoding of a Composite ML-KEM private key, per LAMPS composite ML-KEM draft 12. + // Concatenation of ML-KEM private seed and private key of the traditional component: + // mlkemSeed || tradSK + // Size in bytes are MLKEM768_P256: 115, MLKEM768_X25519: 96, MLKEM1024_P384: 128 + SYMCRYPT_COMPOSITE_MLKEMKEY_FORMAT_PUBLIC_KEY = 3, + // Standard byte encoding of a Composite ML-KEM public key, per irtf-cfrg-hybrid-kems CG framework + // and LAMPS composite ML-KEM draft 12. + // Concatenation of ML-KEM encapsulation key and public key of the traditional component: + // mlkemPK || tradPK + // Size in bytes are MLKEM768_P256: 1249, MLKEM768_X25519: 1216, MLKEM1024_P384: 1665 +} SYMCRYPT_COMPOSITE_MLKEMKEY_FORMAT; + + +typedef enum _SYMCRYPT_COMPOSITE_MLKEM_PARAMS { + SYMCRYPT_COMPOSITE_MLKEM_PARAMS_NULL = 0, + SYMCRYPT_COMPOSITE_MLKEM_PARAMS_MLKEM768_P256 = 1, + SYMCRYPT_COMPOSITE_MLKEM_PARAMS_MLKEM768_X25519 = 2, + SYMCRYPT_COMPOSITE_MLKEM_PARAMS_MLKEM1024_P384 = 3, +} SYMCRYPT_COMPOSITE_MLKEM_PARAMS; +// +// Currently supported Composite ML-KEM parameter sets are represented externally only by the enum +// + +PSYMCRYPT_COMPOSITE_MLKEMKEY +SYMCRYPT_CALL +SymCryptCompositeMlKemkeyAllocate( + SYMCRYPT_COMPOSITE_MLKEM_PARAMS params ); +// +// Allocate and create a new COMPOSITE_MLKEMKEY object sized according to the specified parameters. +// +// This call does not initialize the key. It should be +// followed by a call to SymCryptCompositeMlKemkeyGenerate or +// SymCryptCompositeMlKemkeySetValue. +// + +VOID +SYMCRYPT_CALL +SymCryptCompositeMlKemkeyFree( + _Inout_ PSYMCRYPT_COMPOSITE_MLKEMKEY pkCompositeMlKemkey ); + + +#define SYMCRYPT_COMPOSITE_MLKEM_IRTF_PRIVATE_SEED_SIZE (32) + +#define SYMCRYPT_COMPOSITE_MLKEM_LAMPS_PRIVATE_KEY_SIZE_MLKEM768_P256 (115) +#define SYMCRYPT_COMPOSITE_MLKEM_LAMPS_PRIVATE_KEY_SIZE_MLKEM768_X25519 (96) +#define SYMCRYPT_COMPOSITE_MLKEM_LAMPS_PRIVATE_KEY_SIZE_MLKEM1024_P384 (128) + +#define SYMCRYPT_COMPOSITE_MLKEM_PUBLIC_KEY_SIZE_MLKEM768_P256 (1249) +#define SYMCRYPT_COMPOSITE_MLKEM_PUBLIC_KEY_SIZE_MLKEM768_X25519 (1216) +#define SYMCRYPT_COMPOSITE_MLKEM_PUBLIC_KEY_SIZE_MLKEM1024_P384 (1665) + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptCompositeMlKemSizeofKeyFormatFromParams( + SYMCRYPT_COMPOSITE_MLKEM_PARAMS params, + SYMCRYPT_COMPOSITE_MLKEMKEY_FORMAT compositeMlKemkeyformat, + _Out_ SIZE_T* pcbKeyFormat ); +// +// Gives the size in bytes of the blob of the given format for the given Composite ML-KEM +// parameters via pcbKeyFormat output. +// Returns SYMCRYPT_INCOMPATIBLE_FORMAT if compositeMlKemkeyformat is an unsupported value, +// or SYMCRYPT_INVALID_ARGUMENT if other parameters are invalid. +// + +#define SYMCRYPT_COMPOSITE_MLKEM_CIPHERTEXT_SIZE_MLKEM768_P256 (1153) +#define SYMCRYPT_COMPOSITE_MLKEM_CIPHERTEXT_SIZE_MLKEM768_X25519 (1120) +#define SYMCRYPT_COMPOSITE_MLKEM_CIPHERTEXT_SIZE_MLKEM1024_P384 (1665) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptCompositeMlKemSizeofCiphertextFromParams( + SYMCRYPT_COMPOSITE_MLKEM_PARAMS params, + _Out_ SIZE_T* pcbCiphertext ); +// +// Gives the size in bytes of the ciphertext for the given Composite ML-KEM parameters. +// Returns SYMCRYPT_INVALID_ARGUMENT if parameters are invalid. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptCompositeMlKemkeyGenerate( + _Inout_ PSYMCRYPT_COMPOSITE_MLKEMKEY pkCompositeMlKemkey, + UINT32 flags ); +// +// Generate a new random Composite ML-KEM key using the information from the +// parameters passed to SymCryptCompositeMlKemkeyAllocate. +// +// Allowed flags: +// +// - SYMCRYPT_FLAG_KEY_NO_FIPS +// Opt-out of performing validation required for FIPS +// +// Described in more detail in the "Flags for asymmetric key generation and import" section above +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptCompositeMlKemkeySetValue( + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_COMPOSITE_MLKEMKEY_FORMAT compositeMlKemkeyFormat, + UINT32 flags, + _Inout_ PSYMCRYPT_COMPOSITE_MLKEMKEY pkCompositeMlKemkey ); +// +// Import key material to a Composite ML-KEM key object. The arguments are the following: +// - (pbSrc, cbSrc): a buffer containing a representation of a Composite ML-KEM key, +// in format specified by compositeMlKemkeyFormat. +// - compositeMlKemkeyFormat format of the input +// +// Allowed flags: +// +// - SYMCRYPT_FLAG_KEY_NO_FIPS +// Opt-out of performing validation required for FIPS +// +// - SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION +// Opt-out of performing almost all validation - must be specified with SYMCRYPT_FLAG_KEY_NO_FIPS +// +// Remarks: +// - cbSrc must be equal to the cbKeyFormat returned from +// SymCryptCompositeMlKemSizeofKeyFormatFromParams(params, compositeMlKemkeyFormat, &cbKeyFormat), though +// typically this value can be known statically (see definition of SYMCRYPT_COMPOSITE_MLKEMKEY_FORMAT) +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptCompositeMlKemkeyGetValue( + _In_ PCSYMCRYPT_COMPOSITE_MLKEMKEY pkCompositeMlKemkey, + _Out_writes_bytes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst, + SYMCRYPT_COMPOSITE_MLKEMKEY_FORMAT compositeMlKemkeyFormat, + UINT32 flags ); +// +// Export key material from a Composite ML-KEM key object. The arguments are the following: +// - (pbDst, cbDst): a buffer into which a representation of a Composite ML-KEM key is +// written, in the format specified by compositeMlKemkeyFormat. +// - compositeMlKemkeyFormat format of the output +// +// Allowed flags: +// - None. +// +// Remarks: +// - If the key object does not have the information required to export to the format +// specified by compositeMlKemkeyFormat this function will return SYMCRYPT_INCOMPATIBLE_FORMAT. +// - cbDst must be equal to the cbKeyFormat returned from +// SymCryptCompositeMlKemSizeofKeyFormatFromParams(params, compositeMlKemkeyFormat, &cbKeyFormat), though typically this +// value can be known statically (see definition of SYMCRYPT_COMPOSITE_MLKEMKEY_FORMAT) +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptCompositeMlKemEncapsulate( + _In_ PCSYMCRYPT_COMPOSITE_MLKEMKEY pkCompositeMlKemkey, + _Out_writes_bytes_( cbAgreedSecret ) PBYTE pbAgreedSecret, + SIZE_T cbAgreedSecret, + _Out_writes_bytes_( cbCiphertext ) PBYTE pbCiphertext, + SIZE_T cbCiphertext ); +// +// Performs the Encapsulate operation of Composite ML-KEM. +// This uses the public information of a Composite ML-KEM keypair to generate an agreed secret +// and a ciphertext. Only a peer with the private information of a Composite ML-KEM keypair can +// decapsulate the ciphertext to compute the agreed secret. +// +// The arguments are the following: +// - pkCompositeMlKemkey: a key which contains public information required for encapsulation. +// - (pbAgreedSecret, cbAgreedSecret): a buffer into which the generated secret is written. +// Currently cbAgreedSecret must be 32 for all parameterizations of Composite ML-KEM. +// - (pbCiphertext, cbCiphertext): a buffer into which the encapsulated secret is written. +// cbCiphertext must equal cbCiphertext given by SymCryptCompositeMlKemSizeofCiphertextFromParams, +// though typically this value can be known statically (see definition of +// SYMCRYPT_COMPOSITE_MLKEM_CIPHERTEXT_SIZE_*). +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptCompositeMlKemDecapsulate( + _In_ PCSYMCRYPT_COMPOSITE_MLKEMKEY pkCompositeMlKemkey, + _In_reads_bytes_( cbCiphertext ) PCBYTE pbCiphertext, + SIZE_T cbCiphertext, + _Out_writes_bytes_( cbAgreedSecret ) PBYTE pbAgreedSecret, + SIZE_T cbAgreedSecret ); +// +// Performs the Decapsulate operation of Composite ML-KEM. +// This uses the private information of a Composite ML-KEM keypair to generate an agreed +// secret from a ciphertext. +// +// The arguments are the following: +// - pkCompositeMlKemkey: a key which contains private information required for decapsulation. +// - (pbCiphertext, cbCiphertext): a buffer containing an encapsulated secret. +// cbCiphertext must equal cbCiphertext given by SymCryptCompositeMlKemSizeofCiphertextFromParams, +// though typically this value can be known statically (see definition of +// SYMCRYPT_COMPOSITE_MLKEM_CIPHERTEXT_SIZE_*). +// - (pbAgreedSecret, cbAgreedSecret): a buffer into which the generated secret is written. +// Currently cbAgreedSecret must be 32 for all parameterizations of Composite ML-KEM. +// +// Note: Given an invalid, but correctly-sized, ciphertext, the Composite ML-KEM Decapsulation operation +// will "implicitly reject" the ciphertext, by returning success in equal time to a valid +// decapsulation operation, with pseudo-random agreed secret output. This forces higher +// level protocols to fail later when symmetric keys of peers do not match. +// So decapsulate will only ever return an error if there are programming errors (e.g. incorrect size), +// or something fundamentally goes wrong with the environment (e.g. internal memory allocation fails). +// + +//////////////////////////////////////////////////////////// +// Module-Lattice-Based Digital Signature Algorithm (ML-DSA) +//////////////////////////////////////////////////////////// + +// Maximum length of the context string used in signing and verification +#define SYMCRYPT_MLDSA_CONTEXT_MAX_LENGTH (255) + +// ML-DSA key formats +// ================== +// The below formats apply **only to external formats**: When somebody is importing or exporting +// a key. The internal format of the keys is not visible to the caller. +typedef enum _SYMCRYPT_MLDSAKEY_FORMAT { + SYMCRYPT_MLDSAKEY_FORMAT_NULL = 0, + SYMCRYPT_MLDSAKEY_FORMAT_PRIVATE_SEED = 1, + // 32-byte private root seed xi from which all other parameters can be derived. + // On its own it is ambiguous which ML-DSA parameter set this represents; callers wanting to + // store this format must track the parameter set alongside the key. + SYMCRYPT_MLDSAKEY_FORMAT_PRIVATE_KEY = 2, + // Standard byte encoding of an ML-DSA private key, per FIPS 204. + // Size is 2560, 4032, or 4896 bytes for ML-DSA 44, 65, and 87 respectively. + SYMCRYPT_MLDSAKEY_FORMAT_PUBLIC_KEY = 3, + // Standard byte encoding of an ML-DSA public key, per FIPS 204. + // Size is 1312, 1952, or 2592 bytes for ML-DSA 44, 65, and 87 respectively. +} SYMCRYPT_MLDSAKEY_FORMAT; + +typedef enum _SYMCRYPT_MLDSA_PARAMS { + SYMCRYPT_MLDSA_PARAMS_NULL = 0, + SYMCRYPT_MLDSA_PARAMS_MLDSA44 = 1, + SYMCRYPT_MLDSA_PARAMS_MLDSA65 = 2, + SYMCRYPT_MLDSA_PARAMS_MLDSA87 = 3, +} SYMCRYPT_MLDSA_PARAMS; +// Currently supported ML-DSA parameter sets are represented externally only by the enum + +typedef enum _SYMCRYPT_PQDSA_HASH_ID { + SYMCRYPT_PQDSA_HASH_ID_NULL = 0, + SYMCRYPT_PQDSA_HASH_ID_SHA256 = 1, + SYMCRYPT_PQDSA_HASH_ID_SHA384 = 2, + SYMCRYPT_PQDSA_HASH_ID_SHA512 = 3, + SYMCRYPT_PQDSA_HASH_ID_SHA512_256 = 4, + SYMCRYPT_PQDSA_HASH_ID_SHA3_256 = 5, + SYMCRYPT_PQDSA_HASH_ID_SHA3_384 = 6, + SYMCRYPT_PQDSA_HASH_ID_SHA3_512 = 7, + SYMCRYPT_PQDSA_HASH_ID_SHAKE128 = 8, + SYMCRYPT_PQDSA_HASH_ID_SHAKE256 = 9, +} SYMCRYPT_PQDSA_HASH_ID; +// Supported hash algorithms for use with Hash-ML-DSA + +//======================================================================== +// MLDSAKEY objects' API +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaSizeofKeyFormatFromParams( + SYMCRYPT_MLDSA_PARAMS params, + SYMCRYPT_MLDSAKEY_FORMAT mlDsakeyFormat, + _Out_ SIZE_T* pcbKeyFormat ); +// +// Gives the size in bytes of the blob of the given format for the given ML-DSA +// parameters and the specified format via pcbKeyFormat output. +// +// Return values: +// - SYMCRYPT_NO_ERROR on success. +// - SYMCRYPT_INCOMPATIBLE_FORMAT if mlDsakeyFormat is an unsupported value. +// - SYMCRYPT_INVALID_ARGUMENT if other parameters are invalid. +// + +#define SYMCRYPT_MLDSA_SIGNATURE_SIZE_MLDSA44 (2420) +#define SYMCRYPT_MLDSA_SIGNATURE_SIZE_MLDSA65 (3309) +#define SYMCRYPT_MLDSA_SIGNATURE_SIZE_MLDSA87 (4627) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaSizeofSignatureFromParams( + SYMCRYPT_MLDSA_PARAMS params, + _Out_ SIZE_T* pcbSignature ); +// +// Gives the size in bytes of the signature for the given ML-DSA parameters. +// +// Return values: +// - SYMCRYPT_NO_ERROR on success. +// - SYMCRYPT_INVALID_ARGUMENT if parameters are invalid. +// + +_Success_( return != NULL ) +PSYMCRYPT_MLDSAKEY +SYMCRYPT_CALL +SymCryptMlDsakeyAllocate( + SYMCRYPT_MLDSA_PARAMS params ); +// +// Allocate a new ML-DSA key object sized according to the parameters. +// +// This call does not generate key material. It should be followed by a call to +// SymCryptMlDsakeyGenerate or SymCryptMlDsakeySetValue. +// +// May return NULL if memory allocation fails. +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsakeyFree( + _Post_invalid_ PSYMCRYPT_MLDSAKEY pkMlDsakey ); +// +// Free an ML-DSA key object that was allocated with SymCryptMlDsakeyAllocate. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsakeyGenerate( + _Inout_ PSYMCRYPT_MLDSAKEY pkMlDsakey, + UINT32 flags ); +// +// Generate a new random ML-DSA key using the information from the +// parameters passed to SymCryptMlDsakeyAllocate. +// +// Parameters: +// - pkMlDsakey: a pointer to an ML-DSA key object allocated with SymCryptMlDsakeyAllocate +// +// Allowed flags: +// +// - SYMCRYPT_FLAG_KEY_NO_FIPS +// Opt-out of performing validation required for FIPS +// +// Return values: +// - SYMCRYPT_NO_ERROR on success. +// - SYMCRYPT_MEMORY_ALLOCATION_FAILURE if memory allocation fails. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsakeySetValue( + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_MLDSAKEY_FORMAT mlDsakeyFormat, + UINT32 flags, + _Inout_ PSYMCRYPT_MLDSAKEY pkMlDsakey ); +// +// Import key material to an ML-DSA key object from a byte blob. +// +// Parameters: +// - (pbSrc, cbSrc): a buffer containing a representation of an ML-DSA key, in the format specified +// by the format parameter. +// - mlDsakeyFormat: format of the input +// - pkMlDsakey: a pointer to an ML-DSA key object allocated with SymCryptMlDsakeyAllocate. +// +// Allowed flags: +// +// - SYMCRYPT_FLAG_KEY_NO_FIPS +// Opt-out of performing validation required for FIPS +// +// Remarks: +// - cbSrc must be equal to the cbKeyFormat returned from +// SymCryptMlDsaSizeofKeyFormatFromParams(params, format, &cbKeyFormat), though typically this +// value can be known statically (see definition of SYMCRYPT_MLDSAKEY_FORMAT) +// +// Return values: +// - SYMCRYPT_NO_ERROR on success. +// - SYMCRYPT_INCOMPATIBLE_FORMAT if the key format is invalid. +// - SYMCRYPT_INVALID_ARGUMENT if other arguments are invalid. +// - SYMCRYPT_WRONG_KEY_SIZE if cbSrc does not match the expected size for the key format. +// - SYMCRYPT_INVALID_BLOB if the encoded key is invalid. +// - SYMCRYPT_MEMORY_ALLOCATION_FAILURE if memory allocation fails. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsakeyGetValue( + _In_ PCSYMCRYPT_MLDSAKEY pkMlDsakey, + _Out_writes_bytes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst, + SYMCRYPT_MLDSAKEY_FORMAT mlDsakeyFormat, + UINT32 flags ); +// +// Export key material from an ML-DSA key object to a byte blob. +// +// Parameters: +// - pkMlDsakey: pointer to a valid ML-DSA key object. +// - (pbDst, cbDst): buffer for the exported ML-DSA key, in the format specified by the format +// parameter. +// - mlDsakeyFormat: format of the output +// - flags: no flags are currently defined; must be set to 0 +// +// Return values: +// - SYMCRYPT_NO_ERROR on success. +// - SYMCRYPT_INCOMPATIBLE_FORMAT if the key object does not have the information required to export +// the format specified by mlDsakeyFormat. +// - SYMCRYPT_INVALID_ARGUMENT if the output buffer size or other arguments are incorrect. +// +// Remarks: +// - cbDst must be equal to the cbKeyFormat returned from +// SymCryptMlDsaSizeofKeyFormatFromParams(params, format, &cbKeyFormat), though typically this +// value can be known statically (see definition of SYMCRYPT_MLDSAKEY_FORMAT) +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaSign( + _In_ PCSYMCRYPT_MLDSAKEY pkMlDsakey, + _In_reads_bytes_( cbMessage ) PCBYTE pbMessage, + SIZE_T cbMessage, + _In_reads_bytes_opt_( cbContext ) PCBYTE pbContext, + _In_range_( 0, SYMCRYPT_MLDSA_CONTEXT_MAX_LENGTH ) SIZE_T cbContext, + UINT32 flags, + _Out_writes_bytes_( cbSignature ) PBYTE pbSignature, + SIZE_T cbSignature ); +// +// Sign a message using "pure" ML-DSA. The message can be of arbitrary length. +// +// Parameters: +// - pkMlDsakey: an ML-DSA key object. Must contain the private key material. +// - (pbMessage, cbMessage): the message to sign. May be of arbitrary length. +// - (pbContext, cbContext): an optional context string which will be included in the message +// representative to be signed. Length must be <= SYMCRYPT_MLDSA_CONTEXT_MAX_LENGTH. +// - flags: no flags are currently defined; must be set to 0 +// - (pbSignature, cbSignature): the buffer into which the signature is written. +// +// Return values: +// - SYMCRYPT_NO_ERROR on success. +// - SYMCRYPT_INVALID_ARGUMENT if the key object does not contain a private key, or if other +// parameters are invalid. +// - SYMCRYPT_MEMORY_ALLOCATION_FAILURE if memory allocation fails. +// +// Remarks: +// cbSignature must be equal to the cbKeyFormat returned from +// SymCryptMlDsaSizeofSignatureFromParams( params, &cbSignature ), though typically this +// value can be known statically (see definition of SYMCRYPT_MLDSA_SIGNATURE_SIZE_*). +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptExternalMuMlDsaSign( + _In_ PCSYMCRYPT_MLDSAKEY pkMlDsakey, + _In_reads_bytes_( cbMu ) PCBYTE pbMu, + SIZE_T cbMu, + UINT32 flags, + _Out_writes_bytes_( cbSignature ) PBYTE pbSignature, + SIZE_T cbSignature ); +// +// Sign a precomputed message representative Mu. +// +// Parameters: +// - (pbMu, cbMu): the message representative to sign, +// which must be of size 64 (SYMCRYPT_SHAKE256_RESULT_SIZE). +// - All other parameters are the same as for SymCryptMlDsaSign. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHashMlDsaSign( + _In_ PCSYMCRYPT_MLDSAKEY pkMlDsakey, + SYMCRYPT_PQDSA_HASH_ID hashAlg, + _In_reads_bytes_( cbHash ) PCBYTE pbHash, + SIZE_T cbHash, + _In_reads_bytes_opt_( cbContext ) PCBYTE pbContext, + _In_range_( 0, SYMCRYPT_MLDSA_CONTEXT_MAX_LENGTH ) SIZE_T cbContext, + UINT32 flags, + _Out_writes_bytes_( cbSignature ) PBYTE pbSignature, + SIZE_T cbSignature ); +// +// Sign a message using "pre-hash" ML-DSA. The caller precomputes the hash of the message. +// +// Parameters: +// - hashAlg: the ID of the hash algorithm used to compute pbHash. +// - (pbHash, cbHash): the hash of the message to sign. +// - All other parameters are the same as for SymCryptMlDsaSign. +// +// Return values: +// - SYMCRYPT_NO_ERROR on success. +// - SYMCRYPT_INVALID_ARGUMENT if the key object does not contain a private key, or if other +// parameters are invalid. +// - SYMCRYPT_MEMORY_ALLOCATION_FAILURE if memory allocation fails. +// +// Remarks: +// The hash algorithm provided must meet the minimum required collision strength defined for the +// chosen ML-DSA parameter set. This is the lambda parameter in FIPS 204. This means that the +// following hash algorithms are supported: +// +// ML-DSA-44 (lambda = 128): SHA-256, SHA-384, SHA-512, SHA-512/256, SHA3-256, SHA3-384, SHA3-512, SHAKE128, SHAKE256 +// ML-DSA-65 (lambda = 192): SHA-384, SHA-512, SHA3-384, SHA3-512, SHAKE256 +// ML-DSA-87 (lambda = 256): SHA-512, SHA3-512, SHAKE256 +// +// Additionally, cbHash must match the output length of the hash algorithm. +// For XOFs, the any output length >= the minimum collision strength is acceptable. If this +// requirement is not met, the function returns SYMCRYPT_INVALID_ARGUMENT. +// +// As with SymCryptMlDsaSign, cbSignature must be equal to the cbKeyFormat returned from +// SymCryptMlDsaSizeofSignatureFromParams( params, &cbSignature ), though typically this +// value can be known statically (see definition of SYMCRYPT_MLDSA_SIGNATURE_SIZE_*). +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaVerify( + _In_ PCSYMCRYPT_MLDSAKEY pkMlDsakey, + _In_reads_bytes_( cbMessage ) PCBYTE pbMessage, + SIZE_T cbMessage, + _In_reads_bytes_opt_( cbContext ) PCBYTE pbContext, + _In_range_( 0, SYMCRYPT_MLDSA_CONTEXT_MAX_LENGTH ) SIZE_T cbContext, + _In_reads_bytes_( cbSignature ) PCBYTE pbSignature, + SIZE_T cbSignature, + UINT32 flags ); +// +// Verify a signature using "pure" ML-DSA. The message can be of arbitrary length. +// +// Parameters: +// - pkMlDsakey: the ML-DSA key object used to verify the signature. +// - (pbMessage, cbMessage): the message that the signature was generated from. +// - (pbContext, cbContext): an optional context string which will be included in the message +// representative to be signed. Length must be <= SYMCRYPT_MLDSA_CONTEXT_MAX_LENGTH. +// - (pbSignature, cbSignature): the signature to verify. +// - flags: no flags are currently defined; must be set to 0 +// +// Return values: +// - SYMCRYPT_NO_ERROR if the signature was verified successfully. +// - SYMCRYPT_SIGNATURE_VERIFICATION_FAILURE if the signature is invalid. +// - SYMCRYPT_INVALID_ARGUMENT if the parameters are invalid. + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptExternalMuMlDsaVerify( + _In_ PCSYMCRYPT_MLDSAKEY pkMlDsakey, + _In_reads_bytes_( cbMu ) PCBYTE pbMu, + SIZE_T cbMu, + _In_reads_bytes_( cbSignature ) PCBYTE pbSignature, + SIZE_T cbSignature, + UINT32 flags ); +// +// Verify a signature of a precomputed message representative Mu. +// +// Parameters: +// - (pbMu, cbMu): the message representative that was signed, +// which must be of size 64 (SYMCRYPT_SHAKE256_RESULT_SIZE). +// - All other parameters are the same as for SymCryptMlDsaVerify. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHashMlDsaVerify( + _In_ PCSYMCRYPT_MLDSAKEY pkMlDsakey, + SYMCRYPT_PQDSA_HASH_ID hashAlg, + _In_reads_bytes_( cbHash ) PCBYTE pbHash, + SIZE_T cbHash, + _In_reads_bytes_opt_( cbContext ) PCBYTE pbContext, + _In_range_( 0, SYMCRYPT_MLDSA_CONTEXT_MAX_LENGTH ) SIZE_T cbContext, + _In_reads_bytes_( cbSignature ) PCBYTE pbSignature, + SIZE_T cbSignature, + UINT32 flags ); +// +// Verify a signature using "pre-hash" ML-DSA. The caller precomputes the hash of the message. +// +// Parameters: +// - hashAlg: the ID of the hash algorithm used to compute pbHash. +// - (pbHash, cbHash): the hash of the message that the signature was generated from. +// - All other parameters are the same as for SymCryptMlDsaVerify. +// +// Return values: +// - SYMCRYPT_NO_ERROR if the signature was validated successfully. +// - SYMCRYPT_SIGNATURE_VERIFICATION_FAILURE if the signature is invalid. +// - SYMCRYPT_INVALID_ARGUMENT if the parameters are invalid. +// +// Remarks: +// See the remarks for SymCryptHashMlDsaSign regarding the required security strength of the hash +// algorithm. For unsupported hash algorithms, the function will return SYMCRYPT_INVALID_ARGUMENT. + +VOID +SYMCRYPT_CALL +SymCryptMlDsaSelftest( void ); +// +// FIPS selftest for ML-DSA +// + +_Analysis_noreturn_ +VOID +SYMCRYPT_CALL +SymCryptFatal( UINT32 fatalCode ); +// +// Call the Fatal routine passed to the library upon initialization +// We use the SYMCRYPT_ASSERT macro to catch problems in Debug builds +// + + +typedef struct _SYMCRYPT_UINT32_MAP { + UINT32 from; // map this value... + UINT32 to; // ...into this value +} SYMCRYPT_UINT32_MAP, *PSYMCRYPT_UINT32_MAP; +typedef const SYMCRYPT_UINT32_MAP * PCSYMCRYPT_UINT32_MAP; + + +UINT32 +SYMCRYPT_CALL +SymCryptMapUint32( + UINT32 u32Input, + UINT32 u32Default, + _In_reads_( nMap ) PCSYMCRYPT_UINT32_MAP pcMap, + SIZE_T nMap ); +// +// Map values in a side-channel safe way, typically used for mapping error codes. +// +// (pcMap, nMap) point to an array of nMap entries of type SYMCRYPT_UINT32_MAP; +// each entry specifies a single mapping. If u32Input matches the +// 'from' field, the return value will be the 'to' field value. +// If u32Input is not equal to any 'from' field values, the return value is u32Default. +// Both u32Input and the return value are treated as secrets w.r.t. side channels. +// +// If multiple map entries have the same 'from' field value, then the return value +// is one of the several 'to' field values; which one is not defined. +// +// This function is particularly useful when mapping error codes in situations where +// the actual error cannot be revealed through side channels. + +#if SYMCRYPT_DEBUG +#define SYMCRYPT_ASSERT( _x ) \ + {\ + if( !(_x) ){ SymCryptFatal( 'asrt' ); }\ + }\ + _Analysis_assume_( _x ) +#else +#define SYMCRYPT_ASSERT( _x ) \ + _Analysis_assume_( _x ) +#endif + + +#ifdef __cplusplus +} +#endif diff --git a/libs/symcrypt/inc/symcrypt_internal.h b/libs/symcrypt/inc/symcrypt_internal.h new file mode 100644 index 00000000000..0fe5fe313d3 --- /dev/null +++ b/libs/symcrypt/inc/symcrypt_internal.h @@ -0,0 +1,3768 @@ +// +// SymCrypt_internal.h +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +// +// This file contains information that is internal to the symcrypt library, +// but which still needs to be known to the compiler to be able to use the library. +// This includes structure declarations and all support for inline implementations +// of some of the library functions. +// Information in this file is not part of the API and can change at any time. +// + +#pragma GCC diagnostic ignored "-Wunknown-pragmas" + +// +// We use Prefast pragmas, but they are not recognized by the compiler. +// We disable the 'unknown pragma' warning if we are not in prefast mode. +// +#ifndef _PREFAST_ +#pragma warning(disable:4068) +#endif + +//============================================================================================== +// PLATFORM/COMPILER DETECTION +//============================================================================================== + +#define SYMCRYPT_PLATFORM_WINDOWS 0 +#define SYMCRYPT_PLATFORM_APPLE 0 // macOS and other Apple platforms +#define SYMCRYPT_PLATFORM_UNIX 0 // Linux and other Unix-likes, besides macOS. Must support POSIX. + +#if defined(_WIN32) + #undef SYMCRYPT_PLATFORM_WINDOWS + #define SYMCRYPT_PLATFORM_WINDOWS 1 +#elif defined(__APPLE__) + #undef SYMCRYPT_PLATFORM_APPLE + #define SYMCRYPT_PLATFORM_APPLE 1 +#elif (defined(linux) || defined(__unix__)) + #undef SYMCRYPT_PLATFORM_UNIX + #define SYMCRYPT_PLATFORM_UNIX 1 +#endif + +#define SYMCRYPT_MS_VC 0 // Microsoft compiler (cl.exe - Visual Studio/MSBuild) +#define SYMCRYPT_GNUC 0 // GCC and compatible compilers (including Clang) + +#if defined(_MSC_VER) + #undef SYMCRYPT_MS_VC + #define SYMCRYPT_MS_VC 1 +#elif defined(__GNUC__) + #undef SYMCRYPT_GNUC + #define SYMCRYPT_GNUC 1 +#else + #error Unsupported compiler +#endif + +#if SYMCRYPT_MS_VC + +// This should go somewhere else. Same in the other #if branches. +#define SYMCRYPT_ANYSIZE_ARRAY 1 +#define SYMCRYPT_NOINLINE __declspec(noinline) +#define SYMCRYPT_CDECL __cdecl +#define SYMCRYPT_FASTCALL __fastcall + +#define SYMCRYPT_UNALIGNED + +#elif SYMCRYPT_GNUC + +// Ignore the multi-character character constant warnings +#pragma GCC diagnostic ignored "-Wmultichar" +#pragma GCC diagnostic ignored "-Wincompatible-pointer-types" + +#define SYMCRYPT_ANYSIZE_ARRAY 1 +#define SYMCRYPT_NOINLINE __attribute__ ((noinline)) +#define SYMCRYPT_UNALIGNED +#define SYMCRYPT_CDECL +#define SYMCRYPT_FASTCALL __attribute__((fastcall)) + +#endif + +#ifdef __clang__ +#pragma clang diagnostic ignored "-Wmultichar" +#pragma clang diagnostic ignored "-Wincompatible-function-pointer-types" +#pragma clang diagnostic ignored "-Wincompatible-pointer-types-discards-qualifiers" +#endif + +//============================================================================================== +// PLATFORM SPECIFICS +//============================================================================================== + +// +// SYMCRYPT_CALL & SYMCRYPT_ALIGN +// +// SYMCRYPT_CALL is a macro that selects the calling convention used by the library. +// Crypto functions often have to perform very many small operations, and a fast calling convention is +// preferable. We use __fastcall on platforms that support it. +// +// SYMCRYPT_ALIGN is the default alignment for the platform. +// On platforms that have alignment restrictions the default alignment should be large enough that +// an aligned BYTE * can be cast to a pointer to a UINT32 and be used. +// +// +// The SYMCRYPT_IGNORE_PLATFORM macro can be defined to switch off any platform-specific +// optimizations and run just the C implementations. +// The rest of the library uses SYMCRYPT_CPU_* macros to make platform decisions. +// +// +// WARNING: both the library and the calling application must be compiled with the same +// set of flags, as the flags affect things like the structure layout and size and +// the calling convention, both of which need to be in sync between the lib and the caller. +// + +//#define SYMCRYPT_IGNORE_PLATFORM // #defining this flag disables all platform optimizations. + +#define SYMCRYPT_CPU_X86 0 +#define SYMCRYPT_CPU_AMD64 0 +#define SYMCRYPT_CPU_ARM 0 +#define SYMCRYPT_CPU_ARM64 0 +#define SYMCRYPT_CPU_UNKNOWN 0 + +#if (defined( _X86_ ) || defined( _M_IX86 ) || defined( __i386__ )) && !defined ( SYMCRYPT_IGNORE_PLATFORM ) + +#undef SYMCRYPT_CPU_X86 +#define SYMCRYPT_CPU_X86 1 + +#define SYMCRYPT_CALL SYMCRYPT_FASTCALL +#define SYMCRYPT_ALIGN_VALUE 4 + +#ifndef _PREFAST_ +#pragma warning(push) +#pragma warning(disable:4359) // *** Alignment specifier is less than actual alignment +#endif + +#elif (defined( _ARM64_ ) || defined( _ARM64EC_ ) || defined( _M_ARM64 ) || defined( __aarch64__ ) || defined(__arm64ec__)) && !defined( SYMCRYPT_IGNORE_PLATFORM ) + +#undef SYMCRYPT_CPU_ARM64 +#define SYMCRYPT_CPU_ARM64 1 +#define SYMCRYPT_CALL +#define SYMCRYPT_ALIGN_VALUE 16 + +#elif (defined( _AMD64_ ) || defined( _M_AMD64 ) || defined( __amd64__ )) && !defined ( SYMCRYPT_IGNORE_PLATFORM ) + +#undef SYMCRYPT_CPU_AMD64 +#define SYMCRYPT_CPU_AMD64 1 + +#define SYMCRYPT_CALL +#define SYMCRYPT_ALIGN_VALUE 16 + +#elif (defined( _ARM_ ) || defined( _M_ARM ) || defined( __arm__ )) && !defined( SYMCRYPT_IGNORE_PLATFORM ) + +#undef SYMCRYPT_CPU_ARM +#define SYMCRYPT_CPU_ARM 1 +#define SYMCRYPT_CALL +#define SYMCRYPT_ALIGN_VALUE 8 + +#elif defined( SYMCRYPT_IGNORE_PLATFORM ) + +#undef SYMCRYPT_CPU_UNKNOWN +#define SYMCRYPT_CPU_UNKNOWN 1 +#define SYMCRYPT_CALL +#define SYMCRYPT_ALIGN_VALUE 16 + +#ifndef _PREFAST_ +#pragma warning(push) +#pragma warning(disable:4359) // *** Alignment specifier is less than actual alignment +#endif + +#else + +#error Unknown CPU platform + +#endif // SYMCRYPT_CALL platforms switch + + +// +// Datatypes used by the SymCrypt library. This ensures compatibility +// with multiple environments, such as Windows, iOS, and Android. +// + +#if SYMCRYPT_PLATFORM_WINDOWS + + // + // Types included in intsafe.h: + // BYTE, + // INT16, UINT16, + // INT32, UINT32, + // INT64, UINT64, + // UINT_PTR + // and macro: + // UINT32_MAX + // +#include <intsafe.h> + +#else + +#include <stdint.h> + +typedef uint8_t BYTE; + +#ifndef UINT32_MAX +#define UINT32_MAX (0xffffffff) +#endif + +#ifndef TRUE +#define TRUE 0x01 +#endif + +#ifndef FALSE +#define FALSE 0x00 +#endif + +// Size_t +typedef size_t SIZE_T; + +#ifndef SIZE_T_MAX +#define SIZE_T_MAX SIZE_MAX +#endif + +typedef int BOOL; + +typedef int8_t INT8, *PINT8; +typedef int16_t INT16, *PINT16; +typedef int32_t INT32, *PINT32; +typedef int64_t INT64, *PINT64; +typedef uint8_t UINT8, *PUINT8; +typedef uint16_t UINT16, *PUINT16; +typedef uint32_t UINT32, *PUINT32; +typedef uint64_t UINT64, *PUINT64; + +// minwindef.h +typedef char CHAR; + +#endif //WIN32 + +#include <stddef.h> + +// +// Pointer types +// +typedef BYTE * PBYTE; +typedef const BYTE * PCBYTE; + +typedef UINT16 * PUINT16; +typedef const UINT16 * PCUINT16; + +typedef UINT32 * PUINT32; +typedef const UINT32 * PCUINT32; + +typedef UINT64 * PUINT64; +typedef const UINT64 * PCUINT64; + +// Void + +#ifndef VOID +#define VOID void +#endif + +typedef void * PVOID; +typedef const void * PCVOID; + +// winnt.h +typedef BYTE BOOLEAN; + +// Useful macros for structs +#define SYMCRYPT_FIELD_OFFSET(type, field) (offsetof(type, field)) +#define SYMCRYPT_FIELD_SIZE(type, field) (sizeof( ((type *)0)->field )) + +#if SYMCRYPT_MS_VC + +#ifndef FORCEINLINE +#if (_MSC_VER >= 1200) +#define FORCEINLINE __forceinline +#else +#define FORCEINLINE __inline +#endif +#endif + +#else + +#define FORCEINLINE inline __attribute__((always_inline)) + +#endif + +C_ASSERT( (SYMCRYPT_ALIGN_VALUE & (SYMCRYPT_ALIGN_VALUE - 1 )) == 0 ); +#define SYMCRYPT_ALIGN_UP( _p ) ((PBYTE) ( ((SIZE_T) (_p) + SYMCRYPT_ALIGN_VALUE - 1) & ~(SYMCRYPT_ALIGN_VALUE - 1 ) ) ) + +#if SYMCRYPT_MS_VC + #define SYMCRYPT_ALIGN_AT(alignment) __declspec(align(alignment)) + #define SYMCRYPT_WEAK_SYMBOL +#elif SYMCRYPT_GNUC + #define SYMCRYPT_ALIGN_AT(alignment) __attribute__((aligned(alignment))) + #define SYMCRYPT_WEAK_SYMBOL __attribute__((weak)) +#else + #define SYMCRYPT_ALIGN_AT(alignment) + #define SYMCRYPT_WEAK_SYMBOL +#endif +#define SYMCRYPT_ALIGN_TYPE_AT(typename, alignment) typename SYMCRYPT_ALIGN_AT(alignment) +#define SYMCRYPT_ALIGN SYMCRYPT_ALIGN_AT(SYMCRYPT_ALIGN_VALUE) +#define SYMCRYPT_ALIGN_STRUCT SYMCRYPT_ALIGN_TYPE_AT(struct, SYMCRYPT_ALIGN_VALUE) +#define SYMCRYPT_ALIGN_UNION SYMCRYPT_ALIGN_TYPE_AT(union, SYMCRYPT_ALIGN_VALUE) + + +#define SYMCRYPT_MAX( _a, _b ) ((_a)>(_b)?(_a):(_b)) +#define SYMCRYPT_MIN( _a, _b ) ((_a)<(_b)?(_a):(_b)) + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 +// +// XMM related declarations, used in data structures. +// +#pragma prefast(push) +#pragma prefast(disable: 28251, "Windows headers define _mm_clflush with SAL annotation, Intel header doesn't have SAL annotation leading to inconsistent annotation errors") +#include <emmintrin.h> +#pragma prefast(pop) +#endif + + +// +// To provide quick error detection we have magic values in all +// our data structures, but only in CHKed builds. +// Our magic value depends on the address of the structure. +// This has the advantage that we detect blind memcpy's of our data structures. +// Memcpy is not supported as it limits what the library is allowed to do. +// Where needed the library provides for copy functions of its internal data structures. +// +#if SYMCRYPT_DEBUG + #define SYMCRYPT_MAGIC_ENABLED +#endif + +#if defined(SYMCRYPT_MAGIC_ENABLED ) + +#define SYMCRYPT_MAGIC_FIELD SIZE_T magic; +#define SYMCRYPT_MAGIC_VALUE( p ) ((SIZE_T) p + 'S1mv' + SYMCRYPT_API_VERSION) + + +#define SYMCRYPT_SET_MAGIC( p ) {(p)->magic = SYMCRYPT_MAGIC_VALUE( p );} +#define SYMCRYPT_CHECK_MAGIC( p ) {if((p)->magic!=SYMCRYPT_MAGIC_VALUE(p)) SymCryptFatal('magc');} +#define SYMCRYPT_WIPE_MAGIC( p ) {(p)->magic = 0;} + +#else + +// +// We define the magic field even for FRE builds, because we get too many +// hard-to-debug problems with people who accidentally mix FRE headers with CHKed libraries, +// or the other way around. +// E.g. BitLocker only publishes the FRE version of their library, and building a CHKed binary with +// that FRE lib crashes +// + +#define SYMCRYPT_MAGIC_FIELD SIZE_T magic; +#define SYMCRYPT_SET_MAGIC( p ) +#define SYMCRYPT_CHECK_MAGIC( p ) +#define SYMCRYPT_WIPE_MAGIC( p ) + +#endif + +// +// CPU feature detection infrastructure +// + +#if !SYMCRYPT_PLATFORM_WINDOWS + // Forward declarations for CPUID intrinsic replacements + void __cpuidex(int CPUInfo[4], int InfoType, int ECXValue); +#endif + +#if SYMCRYPT_CPU_ARM || SYMCRYPT_CPU_ARM64 + +#define SYMCRYPT_CPU_FEATURE_NEON 0x01 +#define SYMCRYPT_CPU_FEATURE_NEON_AES 0x02 +#define SYMCRYPT_CPU_FEATURE_NEON_PMULL 0x04 +#define SYMCRYPT_CPU_FEATURE_NEON_SHA256 0x08 + +#elif SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + +// +// We keep the most commonly tested bits in the least significant byte, to make it easier for the compiler to optimize +// There is a many to one relationship between CPUID feature flags and SYMCRYPT_CPU_FEATURE_XXX bits +// since a SYMCRYPT_CPU_FEATURE_XXX could require multiple CPUID features. + +#define SYMCRYPT_CPU_FEATURE_SSE2 0x0001 // includes SSE, SSE2 +#define SYMCRYPT_CPU_FEATURE_SSSE3 0x0002 // includes SSE, SSE2, SSE3, SSSE3 +#define SYMCRYPT_CPU_FEATURE_AESNI 0x0004 +#define SYMCRYPT_CPU_FEATURE_PCLMULQDQ 0x0008 +#define SYMCRYPT_CPU_FEATURE_AVX2 0x0010 // includes AVX, AVX2 - also indicates support for saving/restoring Ymm registers +#define SYMCRYPT_CPU_FEATURE_SAVEXMM_NOFAIL 0x0020 // if SymCryptSaveXmm() will never fail +#define SYMCRYPT_CPU_FEATURE_SHANI 0x0040 +#define SYMCRYPT_CPU_FEATURE_BMI2 0x0080 // MULX, RORX, SARX, SHLX, SHRX + +#define SYMCRYPT_CPU_FEATURE_ADX 0x0100 // ADCX, ADOX +#define SYMCRYPT_CPU_FEATURE_RDRAND 0x0200 +#define SYMCRYPT_CPU_FEATURE_RDSEED 0x0400 +#define SYMCRYPT_CPU_FEATURE_VAES 0x0800 // support for VAES and VPCLMULQDQ (may only be supported on Ymm registers (i.e. Zen3)) +#define SYMCRYPT_CPU_FEATURE_AVX512 0x1000 // includes F, VL, DQ, BW (VL allows AVX-512 instructions to be used on Xmm and Ymm registers) + // also indicates support for saving/restoring additional AVX-512 state + +#define SYMCRYPT_CPU_FEATURE_CMPXCHG16B 0x2000 // Compare and Swap 128b value + +#endif + +typedef UINT32 SYMCRYPT_CPU_FEATURES; + +// +// We have two feature fields. +// g_SymCryptCpuFeaturesNotPresent reports with features are not present on the current CPU +// SymCryptCpuFeaturesNeverPresent() is a function that returns a static (compiler-predictable) value, +// and allows the environment to lock out features in a way that the compiler can optimize away all the code that uses these features. +// Using a function allows the environment macro to forward it to an environment-specific function. +// + +extern SYMCRYPT_CPU_FEATURES g_SymCryptCpuFeaturesNotPresent; + +SYMCRYPT_CPU_FEATURES +SYMCRYPT_CALL +SymCryptCpuFeaturesNeverPresent(void); + +#define SYMCRYPT_CPU_FEATURES_PRESENT( x ) ( ((x) & SymCryptCpuFeaturesNeverPresent()) == 0 && ( (x) & g_SymCryptCpuFeaturesNotPresent ) == 0 ) + +// +// VOLATILE MEMORY ACCESS +// +// These macros are used to explicitly handle volatile memory access independent of compiler settings. +// If volatile memory is accessed directly without using the appropriate macro, MSVC may emit warning +// C4746, because the volatile semantics depend on the value of the /volatile flag, which can result in +// undesired hardware memory barriers that impact performance. +// +// More info: +// https://docs.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compil... +// https://docs.microsoft.com/en-us/cpp/build/reference/volatile-volatile-keywo... +// + +#if SYMCRYPT_MS_VC // Microsoft VC++ Compiler + + #if SYMCRYPT_CPU_ARM || SYMCRYPT_CPU_ARM64 + #define SYMCRYPT_INTERNAL_VOLATILE_READ8( _p ) ( __iso_volatile_load8( (const volatile char*)(_p) ) ) + #define SYMCRYPT_INTERNAL_VOLATILE_READ16( _p ) ( __iso_volatile_load16( (const volatile short*)(_p) ) ) + #define SYMCRYPT_INTERNAL_VOLATILE_READ32( _p ) ( __iso_volatile_load32( (const volatile int*)(_p) ) ) + #define SYMCRYPT_INTERNAL_VOLATILE_READ64( _p ) ( __iso_volatile_load64( (const volatile __int64*)(_p) ) ) + + #define SYMCRYPT_INTERNAL_VOLATILE_WRITE8( _p, _v ) ( __iso_volatile_store8( (volatile char*)(_p), (_v) ) ) + #define SYMCRYPT_INTERNAL_VOLATILE_WRITE16( _p, _v ) ( __iso_volatile_store16( (volatile short*)(_p), (_v) ) ) + #define SYMCRYPT_INTERNAL_VOLATILE_WRITE32( _p, _v ) ( __iso_volatile_store32( (volatile int*)(_p), (_v) ) ) + #define SYMCRYPT_INTERNAL_VOLATILE_WRITE64( _p, _v ) ( __iso_volatile_store64( (volatile __int64*)(_p), (_v) ) ) + #elif SYMCRYPT_CPU_X86 || SYMCRYPT_CPU_AMD64 + #define SYMCRYPT_INTERNAL_VOLATILE_READ8( _p ) ( *((const volatile BYTE*) (_p)) ) + #define SYMCRYPT_INTERNAL_VOLATILE_READ16( _p ) ( *((const volatile UINT16*)(_p)) ) + #define SYMCRYPT_INTERNAL_VOLATILE_READ32( _p ) ( *((const volatile UINT32*)(_p)) ) + #define SYMCRYPT_INTERNAL_VOLATILE_READ64( _p ) ( *((const volatile UINT64*)(_p)) ) + + #define SYMCRYPT_INTERNAL_VOLATILE_WRITE8( _p, _v ) ( *((volatile BYTE*) (_p)) = (_v) ) + #define SYMCRYPT_INTERNAL_VOLATILE_WRITE16( _p, _v ) ( *((volatile UINT16*)(_p)) = (_v) ) + #define SYMCRYPT_INTERNAL_VOLATILE_WRITE32( _p, _v ) ( *((volatile UINT32*)(_p)) = (_v) ) + #define SYMCRYPT_INTERNAL_VOLATILE_WRITE64( _p, _v ) ( *((volatile UINT64*)(_p)) = (_v) ) + #else // Temporary workaround for CMake compilation issues on Windows. Assume X86/ADM64. + #define SYMCRYPT_INTERNAL_VOLATILE_READ8( _p ) ( *((const volatile BYTE*) (_p)) ) + #define SYMCRYPT_INTERNAL_VOLATILE_READ16( _p ) ( *((const volatile UINT16*)(_p)) ) + #define SYMCRYPT_INTERNAL_VOLATILE_READ32( _p ) ( *((const volatile UINT32*)(_p)) ) + #define SYMCRYPT_INTERNAL_VOLATILE_READ64( _p ) ( *((const volatile UINT64*)(_p)) ) + + #define SYMCRYPT_INTERNAL_VOLATILE_WRITE8( _p, _v ) ( *((volatile BYTE*) (_p)) = (_v) ) + #define SYMCRYPT_INTERNAL_VOLATILE_WRITE16( _p, _v ) ( *((volatile UINT16*)(_p)) = (_v) ) + #define SYMCRYPT_INTERNAL_VOLATILE_WRITE32( _p, _v ) ( *((volatile UINT32*)(_p)) = (_v) ) + #define SYMCRYPT_INTERNAL_VOLATILE_WRITE64( _p, _v ) ( *((volatile UINT64*)(_p)) = (_v) ) + #endif + +#elif SYMCRYPT_GNUC + + #if !SYMCRYPT_CPU_ARM + #define SYMCRYPT_INTERNAL_VOLATILE_READ8( _p ) ( *((const volatile BYTE*) (_p)) ) + #define SYMCRYPT_INTERNAL_VOLATILE_READ16( _p ) ( *((const volatile UINT16*)(_p)) ) + #define SYMCRYPT_INTERNAL_VOLATILE_READ32( _p ) ( *((const volatile UINT32*)(_p)) ) + #define SYMCRYPT_INTERNAL_VOLATILE_READ64( _p ) ( *((const volatile UINT64*)(_p)) ) + + #define SYMCRYPT_INTERNAL_VOLATILE_WRITE8( _p, _v ) ( *((volatile BYTE*) (_p)) = (_v) ) + #define SYMCRYPT_INTERNAL_VOLATILE_WRITE16( _p, _v ) ( *((volatile UINT16*)(_p)) = (_v) ) + #define SYMCRYPT_INTERNAL_VOLATILE_WRITE32( _p, _v ) ( *((volatile UINT32*)(_p)) = (_v) ) + #define SYMCRYPT_INTERNAL_VOLATILE_WRITE64( _p, _v ) ( *((volatile UINT64*)(_p)) = (_v) ) + #else // SYMCRYPT_CPU_ARM + #define SYMCRYPT_INTERNAL_VOLATILE_READ8( _p ) ( *((const volatile BYTE*) (_p)) ) + #define SYMCRYPT_INTERNAL_VOLATILE_READ16( _p ) ( *((const volatile UINT16*)(_p)) ) + #define SYMCRYPT_INTERNAL_VOLATILE_READ32( _p ) ( *((const volatile UINT32*)(_p)) ) + #define SYMCRYPT_INTERNAL_VOLATILE_READ64( p ) ( (UINT64)SYMCRYPT_INTERNAL_VOLATILE_READ32(&((PBYTE)p)[4]) << 32 | SYMCRYPT_INTERNAL_VOLATILE_READ32(&((PBYTE)p)[0]) ) + + #define SYMCRYPT_INTERNAL_VOLATILE_WRITE8( _p, _v ) ( *((volatile BYTE*) (_p)) = (_v) ) + #define SYMCRYPT_INTERNAL_VOLATILE_WRITE16( _p, _v ) ( *((volatile UINT16*)(_p)) = (_v) ) + #define SYMCRYPT_INTERNAL_VOLATILE_WRITE32( _p, _v ) ( *((volatile UINT32*)(_p)) = (_v) ) + #define SYMCRYPT_INTERNAL_VOLATILE_WRITE64( p, x ) { \ + SYMCRYPT_INTERNAL_VOLATILE_WRITE32( &((PBYTE)p)[0], (UINT32)((x) ) );\ + SYMCRYPT_INTERNAL_VOLATILE_WRITE32( &((PBYTE)p)[4], (UINT32)(((UINT64)(x))>>32) );\ + } + #endif + +#else + + #error Unknown compiler + +#endif + +// +// FORCED MEMORY ACCESS +// +// These macros force a memory access. That is, they require that the memory +// read or write takes place, and do not allow the compiler to optimize the access +// away. +// They provide no other memory ordering requirements, so there are no acquire/release +// semantics, memory barriers, etc. +// +// The generic versions are implemented with a volatile access, but that is inefficient on some platforms +// because it might introduce memory ordering requirements. +// + +#define SYMCRYPT_INTERNAL_FORCE_READ8( _p ) SYMCRYPT_INTERNAL_VOLATILE_READ8( _p ) +#define SYMCRYPT_INTERNAL_FORCE_READ16( _p ) SYMCRYPT_INTERNAL_VOLATILE_READ16( _p ) +#define SYMCRYPT_INTERNAL_FORCE_READ32( _p ) SYMCRYPT_INTERNAL_VOLATILE_READ32( _p ) +#define SYMCRYPT_INTERNAL_FORCE_READ64( _p ) SYMCRYPT_INTERNAL_VOLATILE_READ64( _p ) + +#define SYMCRYPT_INTERNAL_FORCE_WRITE8( _p, _v ) SYMCRYPT_INTERNAL_VOLATILE_WRITE8( _p, _v ) +#define SYMCRYPT_INTERNAL_FORCE_WRITE16( _p, _v ) SYMCRYPT_INTERNAL_VOLATILE_WRITE16( _p, _v ) +#define SYMCRYPT_INTERNAL_FORCE_WRITE32( _p, _v ) SYMCRYPT_INTERNAL_VOLATILE_WRITE32( _p, _v ) +#define SYMCRYPT_INTERNAL_FORCE_WRITE64( _p, _v ) SYMCRYPT_INTERNAL_VOLATILE_WRITE64( _p, _v ) + +// +// FIXED ENDIANNESS ACCESS +// +// Fixed endianness load and store +// We do this by platform because it affected by both endianness and alignment requirements +// The p pointer is always a pointer to BYTE +// +#if SYMCRYPT_MS_VC // Microsoft VC++ Compiler + #define SYMCRYPT_BSWAP16( x ) _byteswap_ushort(x) + #define SYMCRYPT_BSWAP32( x ) _byteswap_ulong(x) + #define SYMCRYPT_BSWAP64( x ) _byteswap_uint64(x) +#elif SYMCRYPT_GNUC + #define SYMCRYPT_BSWAP16( x ) __builtin_bswap16(x) + #define SYMCRYPT_BSWAP32( x ) __builtin_bswap32(x) + #define SYMCRYPT_BSWAP64( x ) __builtin_bswap64(x) +#endif + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM64 + + +// +// X86, AMD64, ARM, and ARM64 have no alignment restrictions, and are little-endian. +// We do straight store/loads with BSWAPs where required. +// This technically relies upon on undefined behavior, as we assume the compiler will translate +// operations on unaligned pointers to 2, 4, and 8 bytes types to appropriately unaligned store/load +// instructions on these platforms (not just in these macros). This works for all compilers we +// currently use. +// +#define SYMCRYPT_INTERNAL_LOAD_MSBFIRST16( p ) SYMCRYPT_BSWAP16( *((UINT16 *)(p)) ) +#define SYMCRYPT_INTERNAL_LOAD_LSBFIRST16( p ) ( *((UINT16 *)(p)) ) +#define SYMCRYPT_INTERNAL_LOAD_MSBFIRST32( p ) SYMCRYPT_BSWAP32( *((UINT32 *)(p)) ) +#define SYMCRYPT_INTERNAL_LOAD_LSBFIRST32( p ) ( *((UINT32 *)(p)) ) +#define SYMCRYPT_INTERNAL_LOAD_MSBFIRST64( p ) SYMCRYPT_BSWAP64( *((UINT64 *)(p)) ) +#define SYMCRYPT_INTERNAL_LOAD_LSBFIRST64( p ) ( *((UINT64 *)(p)) ) + +#define SYMCRYPT_INTERNAL_STORE_MSBFIRST16( p, x ) ( *(UINT16 *)(p) = SYMCRYPT_BSWAP16(x) ) +#define SYMCRYPT_INTERNAL_STORE_LSBFIRST16( p, x ) ( *(UINT16 *)(p) = (x) ) +#define SYMCRYPT_INTERNAL_STORE_MSBFIRST32( p, x ) ( *(UINT32 *)(p) = SYMCRYPT_BSWAP32(x) ) +#define SYMCRYPT_INTERNAL_STORE_LSBFIRST32( p, x ) ( *(UINT32 *)(p) = (x) ) +#define SYMCRYPT_INTERNAL_STORE_MSBFIRST64( p, x ) ( *(UINT64 *)(p) = SYMCRYPT_BSWAP64(x) ) +#define SYMCRYPT_INTERNAL_STORE_LSBFIRST64( p, x ) ( *(UINT64 *)(p) = (x) ) + +#elif SYMCRYPT_CPU_ARM + +// +// Only 64 bit accesses need to be aligned. +// +#define SYMCRYPT_INTERNAL_LOAD_MSBFIRST16( p ) SYMCRYPT_BSWAP16( *((UINT16 *)(p)) ) +#define SYMCRYPT_INTERNAL_LOAD_LSBFIRST16( p ) ( *((UINT16 *)(p)) ) +#define SYMCRYPT_INTERNAL_LOAD_MSBFIRST32( p ) SYMCRYPT_BSWAP32( *((UINT32 *)(p)) ) +#define SYMCRYPT_INTERNAL_LOAD_LSBFIRST32( p ) ( *((UINT32 *)(p)) ) + +#define SYMCRYPT_INTERNAL_LOAD_MSBFIRST64( p ) ( (UINT64)SYMCRYPT_INTERNAL_LOAD_MSBFIRST32(&((PBYTE)p)[0]) << 32 | SYMCRYPT_INTERNAL_LOAD_MSBFIRST32(&((PBYTE)p)[4]) ) +#define SYMCRYPT_INTERNAL_LOAD_LSBFIRST64( p ) ( (UINT64)SYMCRYPT_INTERNAL_LOAD_LSBFIRST32(&((PBYTE)p)[4]) << 32 | SYMCRYPT_INTERNAL_LOAD_LSBFIRST32(&((PBYTE)p)[0]) ) + + + +#define SYMCRYPT_INTERNAL_STORE_MSBFIRST16( p, x ) ( *(UINT16 *)(p) = SYMCRYPT_BSWAP16(x) ) +#define SYMCRYPT_INTERNAL_STORE_LSBFIRST16( p, x ) ( *(UINT16 *)(p) = (x) ) +#define SYMCRYPT_INTERNAL_STORE_MSBFIRST32( p, x ) ( *(UINT32 *)(p) = SYMCRYPT_BSWAP32(x) ) +#define SYMCRYPT_INTERNAL_STORE_LSBFIRST32( p, x ) ( *(UINT32 *)(p) = (x) ) +#define SYMCRYPT_INTERNAL_STORE_MSBFIRST64( p, x ) { \ + SYMCRYPT_INTERNAL_STORE_MSBFIRST32( &((PBYTE)p)[0],(UINT32)(((UINT64)(x))>>32) );\ + SYMCRYPT_INTERNAL_STORE_MSBFIRST32( &((PBYTE)p)[4],(UINT32)(x));\ + } + +#define SYMCRYPT_INTERNAL_STORE_LSBFIRST64( p, x ) { \ + SYMCRYPT_INTERNAL_STORE_LSBFIRST32( &((PBYTE)p)[0], (UINT32)((x) ) );\ + SYMCRYPT_INTERNAL_STORE_LSBFIRST32( &((PBYTE)p)[4], (UINT32)(((UINT64)(x))>>32) );\ + } +#else // unknown platform + +// +// These functions have to handle arbitrary alignments too, so we do them byte-by-byte in the +// generic case. +// So far these macros have not been fully tested +// +#define SYMCRYPT_INTERNAL_LOAD_MSBFIRST16( p ) ( ((UINT16)((PBYTE)p)[0]) << 8 | ((PBYTE)p)[1] ) +#define SYMCRYPT_INTERNAL_LOAD_LSBFIRST16( p ) ( ((UINT16)((PBYTE)p)[1]) << 8 | ((PBYTE)p)[0] ) +#define SYMCRYPT_INTERNAL_LOAD_MSBFIRST32( p ) ( (UINT32)SYMCRYPT_INTERNAL_LOAD_MSBFIRST16(&((PBYTE)p)[0]) << 16 | SYMCRYPT_INTERNAL_LOAD_MSBFIRST16(&((PBYTE)p)[2]) ) +#define SYMCRYPT_INTERNAL_LOAD_LSBFIRST32( p ) ( (UINT32)SYMCRYPT_INTERNAL_LOAD_LSBFIRST16(&((PBYTE)p)[2]) << 16 | SYMCRYPT_INTERNAL_LOAD_LSBFIRST16(&((PBYTE)p)[0]) ) +#define SYMCRYPT_INTERNAL_LOAD_MSBFIRST64( p ) ( (UINT64)SYMCRYPT_INTERNAL_LOAD_MSBFIRST32(&((PBYTE)p)[0]) << 32 | SYMCRYPT_INTERNAL_LOAD_MSBFIRST32(&((PBYTE)p)[4]) ) +#define SYMCRYPT_INTERNAL_LOAD_LSBFIRST64( p ) ( (UINT64)SYMCRYPT_INTERNAL_LOAD_LSBFIRST32(&((PBYTE)p)[4]) << 32 | SYMCRYPT_INTERNAL_LOAD_LSBFIRST32(&((PBYTE)p)[0]) ) + +#define SYMCRYPT_INTERNAL_STORE_MSBFIRST16( p, x ) { \ + ((PBYTE)p)[0] = (BYTE)((x)>> 8);\ + ((PBYTE)p)[1] = (BYTE)((x) );\ + } + +#define SYMCRYPT_INTERNAL_STORE_LSBFIRST16( p, x ) { \ + ((PBYTE)p)[0] = (BYTE)((x) );\ + ((PBYTE)p)[1] = (BYTE)((x)>> 8);\ + } + +#define SYMCRYPT_INTERNAL_STORE_MSBFIRST32( p, x ) { \ + ((PBYTE)p)[0] = (BYTE)((x)>>24);\ + ((PBYTE)p)[1] = (BYTE)((x)>>16);\ + ((PBYTE)p)[2] = (BYTE)((x)>> 8);\ + ((PBYTE)p)[3] = (BYTE)((x) );\ + } + +#define SYMCRYPT_INTERNAL_STORE_LSBFIRST32( p, x ) { \ + ((PBYTE)p)[0] = (BYTE)((x) );\ + ((PBYTE)p)[1] = (BYTE)((x)>> 8);\ + ((PBYTE)p)[2] = (BYTE)((x)>>16);\ + ((PBYTE)p)[3] = (BYTE)((x)>>24);\ + } + +#define SYMCRYPT_INTERNAL_STORE_MSBFIRST64( p, x ) { \ + SYMCRYPT_INTERNAL_STORE_MSBFIRST32( &((PBYTE)p)[0],(UINT32)(((UINT64)(x))>>32) );\ + SYMCRYPT_INTERNAL_STORE_MSBFIRST32( &((PBYTE)p)[4],(UINT32)(x));\ + } + +#define SYMCRYPT_INTERNAL_STORE_LSBFIRST64( p, x ) { \ + SYMCRYPT_INTERNAL_STORE_LSBFIRST32( &((PBYTE)p)[0], (UINT32)((x) ) );\ + SYMCRYPT_INTERNAL_STORE_LSBFIRST32( &((PBYTE)p)[4], (UINT32)(((UINT64)(x))>>32) );\ + } + +#endif // platform switch for load/store macros + + +//============================================================================================== +// INTERNAL DATA STRUCTURES +//============================================================================================== +// +// Note: we do not use the symbolic names like SYMCRYPT_SHA1_INPUT_BLOCK_SIZE as this +// file is included before that name is defined. Fixing that would make the public API header +// file harder to read by moving the constant away from the associated functions, or forcing +// the header file to use the struct name rather than the typedef. The current solution +// works quite well. +// + +//----------------------------------------------------------------- +// Block cipher description table +// Below are the typedefs for the block cipher description table type +// Callers can use this to define their own block cipher and use the block cipher +// modes. +// + +typedef struct _SYMCRYPT_BLOCKCIPHER SYMCRYPT_BLOCKCIPHER, *PSYMCRYPT_BLOCKCIPHER; +typedef const SYMCRYPT_BLOCKCIPHER * PCSYMCRYPT_BLOCKCIPHER; + +// +// Note that blockSize must be <= 32 and must be a power of two. This is true for all the block ciphers +// implemented in SymCrypt. +// + +// +// HASH STATES +// +// All hash states have the same basic structure. This allows all hash implementations to share +// the same buffer management code. Some algorithms might still have optimized buffer management code +// specific for their algorithm, but most algs use the generic code. +// This is especially important for parallel hashing, where the buffer management & parallel organizational +// code are tightly coupled. +// + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_COMMON_HASH_STATE +{ + UINT32 bytesInBuffer; + SYMCRYPT_MAGIC_FIELD + UINT64 dataLengthL; // lower part of msg length + UINT64 dataLengthH; // upper part of msg length + SYMCRYPT_ALIGN BYTE buffer[SYMCRYPT_ANYSIZE_ARRAY]; // Size depends on algorithm + // ... + // Chaining state // type/location depends on algorithm + // +} SYMCRYPT_COMMON_HASH_STATE, *PSYMCRYPT_COMMON_HASH_STATE; + + +// +// SYMCRYPT_MD2_STATE +// +// Data structure that stores the state of an ongoing MD2 computation. +// +// The field names are from RFC 1319. +// It would be more efficient to store only the first 16 bytes of the X array, +// but that would complicate the code and MD2 isn't important enough to add +// extra complications. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_MD2_CHAINING_STATE +{ + SYMCRYPT_ALIGN BYTE C[16]; // State for internal checksum computation + BYTE X[48]; // State for actual hash chaining +} SYMCRYPT_MD2_CHAINING_STATE, *PSYMCRYPT_MD2_CHAINING_STATE; + +// +// MD2 hash computation state. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_MD2_STATE +{ + UINT32 bytesInBuffer; + SYMCRYPT_MAGIC_FIELD + UINT64 dataLengthL; // lower part of msg length + UINT64 dataLengthH; // upper part of msg length + SYMCRYPT_ALIGN BYTE buffer[16]; // buffer to keep one input block in + SYMCRYPT_MD2_CHAINING_STATE chain; +} SYMCRYPT_MD2_STATE, *PSYMCRYPT_MD2_STATE; +typedef const SYMCRYPT_MD2_STATE *PCSYMCRYPT_MD2_STATE; + +// +// SYMCRYPT_MD4_STATE +// +// Data structure that stores the state of an ongoing MD4 computation. +// The buffer contains dataLength % 64 bytes of data. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_MD4_CHAINING_STATE +{ + UINT32 H[4]; +} SYMCRYPT_MD4_CHAINING_STATE, *PSYMCRYPT_MD4_CHAINING_STATE; + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_MD4_STATE +{ + UINT32 bytesInBuffer; + SYMCRYPT_MAGIC_FIELD + UINT64 dataLengthL; // lower part of msg length + UINT64 dataLengthH; // upper part of msg length + SYMCRYPT_ALIGN BYTE buffer[64]; // buffer to keep one input block in + SYMCRYPT_MD4_CHAINING_STATE chain; // chaining state +} SYMCRYPT_MD4_STATE, *PSYMCRYPT_MD4_STATE; +typedef const SYMCRYPT_MD4_STATE *PCSYMCRYPT_MD4_STATE; + + +// +// SYMCRYPT_MD5_STATE +// +// Data structure that stores the state of an ongoing MD5 computation. +// The buffer contains dataLength % 64 bytes of data. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_MD5_CHAINING_STATE +{ + UINT32 H[4]; +} SYMCRYPT_MD5_CHAINING_STATE, *PSYMCRYPT_MD5_CHAINING_STATE; + + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_MD5_STATE +{ + UINT32 bytesInBuffer; + SYMCRYPT_MAGIC_FIELD + UINT64 dataLengthL; // lower part of msg length + UINT64 dataLengthH; // upper part of msg length + SYMCRYPT_ALIGN BYTE buffer[64]; // buffer to keep one input block in + SYMCRYPT_MD5_CHAINING_STATE chain; // chaining state +} SYMCRYPT_MD5_STATE, *PSYMCRYPT_MD5_STATE; +typedef const SYMCRYPT_MD5_STATE *PCSYMCRYPT_MD5_STATE; + + +// +// SYMCRYPT_SHA1_STATE +// +// Data structure that stores the state of an ongoing SHA1 computation. +// The buffer contains dataLength % 64 bytes of data. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_SHA1_CHAINING_STATE +{ + UINT32 H[5]; +} SYMCRYPT_SHA1_CHAINING_STATE, *PSYMCRYPT_SHA1_CHAINING_STATE; + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_SHA1_STATE +{ + UINT32 bytesInBuffer; + SYMCRYPT_MAGIC_FIELD + UINT64 dataLengthL; // lower part of msg length + UINT64 dataLengthH; // upper part of msg length + SYMCRYPT_ALIGN BYTE buffer[64]; // buffer to keep one input block in + SYMCRYPT_SHA1_CHAINING_STATE chain; // chaining state +} SYMCRYPT_SHA1_STATE, *PSYMCRYPT_SHA1_STATE; +typedef const SYMCRYPT_SHA1_STATE *PCSYMCRYPT_SHA1_STATE; + + +// +// SYMCRYPT_SHA256_STATE +// +// Data structure that stores the state of an ongoing SHA256 computation. +// The buffer contains dataLength % 64 bytes of data. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_SHA256_CHAINING_STATE +{ + SYMCRYPT_ALIGN UINT32 H[8]; +} SYMCRYPT_SHA256_CHAINING_STATE, * PSYMCRYPT_SHA256_CHAINING_STATE; + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_SHA256_STATE +{ + UINT32 bytesInBuffer; + SYMCRYPT_MAGIC_FIELD + UINT64 dataLengthL; // lower part of msg length + UINT64 dataLengthH; // upper part of msg length + SYMCRYPT_ALIGN BYTE buffer[64]; // buffer to keep one input block in + SYMCRYPT_SHA256_CHAINING_STATE chain; // chaining state +} SYMCRYPT_SHA256_STATE, *PSYMCRYPT_SHA256_STATE; +typedef const SYMCRYPT_SHA256_STATE *PCSYMCRYPT_SHA256_STATE; + + +// +// SYMCRYPT_SHA224_STATE +// +// This is identical to the SHA256 state. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_SHA224_STATE +{ + UINT32 bytesInBuffer; + SYMCRYPT_MAGIC_FIELD + UINT64 dataLengthL; // lower part of msg length + UINT64 dataLengthH; // upper part of msg length + SYMCRYPT_ALIGN BYTE buffer[64]; // buffer to keep one input block in + SYMCRYPT_SHA256_CHAINING_STATE chain; // chaining state +} SYMCRYPT_SHA224_STATE, *PSYMCRYPT_SHA224_STATE; +typedef const SYMCRYPT_SHA224_STATE *PCSYMCRYPT_SHA224_STATE; + + +// +// SYMCRYPT_SHA512_STATE +// +// Data structure that stores the state of an ongoing SHA512 computation. +// The buffer contains dataLength % 128 bytes of data. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_SHA512_CHAINING_STATE +{ + UINT64 H[8]; +} SYMCRYPT_SHA512_CHAINING_STATE, *PSYMCRYPT_SHA512_CHAINING_STATE; + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_SHA512_STATE +{ + UINT32 bytesInBuffer; + SYMCRYPT_MAGIC_FIELD + UINT64 dataLengthL; // lower part of msg length + UINT64 dataLengthH; // upper part of msg length + SYMCRYPT_ALIGN BYTE buffer[128]; // buffer to keep one input block in + SYMCRYPT_SHA512_CHAINING_STATE chain; // chaining state +} SYMCRYPT_SHA512_STATE, *PSYMCRYPT_SHA512_STATE; +typedef const SYMCRYPT_SHA512_STATE *PCSYMCRYPT_SHA512_STATE; + + +// +// SYMCRYPT_SHA384_STATE +// +// This is identical to the SHA512. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_SHA384_STATE +{ + UINT32 bytesInBuffer; + SYMCRYPT_MAGIC_FIELD + UINT64 dataLengthL; // lower part of msg length + UINT64 dataLengthH; // upper part of msg length + SYMCRYPT_ALIGN BYTE buffer[128]; // buffer to keep one input block in + SYMCRYPT_SHA512_CHAINING_STATE chain; // chaining state +} SYMCRYPT_SHA384_STATE, *PSYMCRYPT_SHA384_STATE; +typedef const SYMCRYPT_SHA384_STATE *PCSYMCRYPT_SHA384_STATE; + + +// +// SYMCRYPT_SHA512_224_STATE +// +// This is identical to the SHA512. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_SHA512_224_STATE +{ + UINT32 bytesInBuffer; + SYMCRYPT_MAGIC_FIELD + UINT64 dataLengthL; // lower part of msg length + UINT64 dataLengthH; // upper part of msg length + SYMCRYPT_ALIGN BYTE buffer[128]; // buffer to keep one input block in + SYMCRYPT_SHA512_CHAINING_STATE chain; // chaining state +} SYMCRYPT_SHA512_224_STATE, *PSYMCRYPT_SHA512_224_STATE; +typedef const SYMCRYPT_SHA512_224_STATE *PCSYMCRYPT_SHA512_224_STATE; + + +// +// SYMCRYPT_SHA512_256_STATE +// +// This is identical to the SHA512. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_SHA512_256_STATE +{ + UINT32 bytesInBuffer; + SYMCRYPT_MAGIC_FIELD + UINT64 dataLengthL; // lower part of msg length + UINT64 dataLengthH; // upper part of msg length + SYMCRYPT_ALIGN BYTE buffer[128]; // buffer to keep one input block in + SYMCRYPT_SHA512_CHAINING_STATE chain; // chaining state +} SYMCRYPT_SHA512_256_STATE, *PSYMCRYPT_SHA512_256_STATE; +typedef const SYMCRYPT_SHA512_256_STATE *PCSYMCRYPT_SHA512_256_STATE; + + +// +// SYMCRYPT_KECCAK_STATE +// +// Data structure that stores the state of an ongoing SHA-3 derived algorithm computation. +// + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_KECCAK_STATE +{ + SYMCRYPT_ALIGN UINT64 state[25]; // state for Keccak-f[1600] permutation + UINT32 inputBlockSize; // rate + UINT32 stateIndex; // position in the state for next merge/extract operation + UINT8 paddingValue; // Keccak padding value + BOOLEAN squeezeMode; // denotes whether the state is in squeeze mode +} SYMCRYPT_KECCAK_STATE, *PSYMCRYPT_KECCAK_STATE; +typedef const SYMCRYPT_KECCAK_STATE *PCSYMCRYPT_KECCAK_STATE; + +// +// SYMCRYPT_SHA3_224_STATE +// +// Data structure that stores the state of an ongoing SHA3-224 computation. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_SHA3_224_STATE +{ + SYMCRYPT_KECCAK_STATE ks; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_SHA3_224_STATE, * PSYMCRYPT_SHA3_224_STATE; +typedef const SYMCRYPT_SHA3_224_STATE* PCSYMCRYPT_SHA3_224_STATE; + +// +// SYMCRYPT_SHA3_256_STATE +// +// Data structure that stores the state of an ongoing SHA3-256 computation. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_SHA3_256_STATE +{ + SYMCRYPT_KECCAK_STATE ks; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_SHA3_256_STATE, * PSYMCRYPT_SHA3_256_STATE; +typedef const SYMCRYPT_SHA3_256_STATE* PCSYMCRYPT_SHA3_256_STATE; + +// +// SYMCRYPT_SHA3_384_STATE +// +// Data structure that stores the state of an ongoing SHA3-384 computation. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_SHA3_384_STATE +{ + SYMCRYPT_KECCAK_STATE ks; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_SHA3_384_STATE, * PSYMCRYPT_SHA3_384_STATE; +typedef const SYMCRYPT_SHA3_384_STATE* PCSYMCRYPT_SHA3_384_STATE; + +// +// SYMCRYPT_SHA3_512_STATE +// +// Data structure that stores the state of an ongoing SHA3-512 computation. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_SHA3_512_STATE +{ + SYMCRYPT_KECCAK_STATE ks; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_SHA3_512_STATE, * PSYMCRYPT_SHA3_512_STATE; +typedef const SYMCRYPT_SHA3_512_STATE* PCSYMCRYPT_SHA3_512_STATE; + +// +// SYMCRYPT_SHAKE128_STATE +// +// Data structure that stores the state of an ongoing SHAKE128 computation. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_SHAKE128_STATE +{ + SYMCRYPT_KECCAK_STATE ks; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_SHAKE128_STATE, * PSYMCRYPT_SHAKE128_STATE; +typedef const SYMCRYPT_SHAKE128_STATE* PCSYMCRYPT_SHAKE128_STATE; + +// +// SYMCRYPT_SHAKE256_STATE +// +// Data structure that stores the state of an ongoing SHAKE256 computation. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_SHAKE256_STATE +{ + SYMCRYPT_KECCAK_STATE ks; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_SHAKE256_STATE, * PSYMCRYPT_SHAKE256_STATE; +typedef const SYMCRYPT_SHAKE256_STATE* PCSYMCRYPT_SHAKE256_STATE; + +// +// SYMCRYPT_CSHAKE128_STATE +// +// Data structure that stores the state of an ongoing CSHAKE128 computation. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_CSHAKE128_STATE +{ + SYMCRYPT_KECCAK_STATE ks; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_CSHAKE128_STATE, * PSYMCRYPT_CSHAKE128_STATE; +typedef const SYMCRYPT_CSHAKE128_STATE* PCSYMCRYPT_CSHAKE128_STATE; + +// +// SYMCRYPT_CSHAKE256_STATE +// +// Data structure that stores the state of an ongoing CSHAKE256 computation. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_CSHAKE256_STATE +{ + SYMCRYPT_KECCAK_STATE ks; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_CSHAKE256_STATE, * PSYMCRYPT_CSHAKE256_STATE; +typedef const SYMCRYPT_CSHAKE256_STATE* PCSYMCRYPT_CSHAKE256_STATE; + +// +// SYMCRYPT_KMAC128_EXPANDED_KEY +// +// Data structure that stores the expanded key for KMAC128. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_KMAC128_EXPANDED_KEY +{ + SYMCRYPT_KECCAK_STATE ks; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_KMAC128_EXPANDED_KEY, * PSYMCRYPT_KMAC128_EXPANDED_KEY; +typedef const SYMCRYPT_KMAC128_EXPANDED_KEY* PCSYMCRYPT_KMAC128_EXPANDED_KEY; + +// +// SYMCRYPT_KMAC128_STATE +// +// Data structure that stores the state of an ongoing KMAC128 computation. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_KMAC128_STATE +{ + SYMCRYPT_KECCAK_STATE ks; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_KMAC128_STATE, * PSYMCRYPT_KMAC128_STATE; +typedef const SYMCRYPT_KMAC128_STATE* PCSYMCRYPT_KMAC128_STATE; + +// +// SYMCRYPT_KMAC256_EXPANDED_KEY +// +// Data structure that stores the expanded key for KMAC256. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_KMAC256_EXPANDED_KEY +{ + SYMCRYPT_KECCAK_STATE ks; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_KMAC256_EXPANDED_KEY, * PSYMCRYPT_KMAC256_EXPANDED_KEY; +typedef const SYMCRYPT_KMAC256_EXPANDED_KEY* PCSYMCRYPT_KMAC256_EXPANDED_KEY; + +// +// SYMCRYPT_KMAC256_STATE +// +// Data structure that stores the state of an ongoing KMAC256 computation. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_KMAC256_STATE +{ + SYMCRYPT_KECCAK_STATE ks; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_KMAC256_STATE, * PSYMCRYPT_KMAC256_STATE; +typedef const SYMCRYPT_KMAC256_STATE* PCSYMCRYPT_KMAC256_STATE; + + +// +// Generic hashing +// + +typedef struct _SYMCRYPT_OID { + UINT32 cbOID; + _Field_size_( cbOID ) PCBYTE pbOID; +} SYMCRYPT_OID, *PSYMCRYPT_OID; +typedef const SYMCRYPT_OID *PCSYMCRYPT_OID; + +// +// OID lists for the most commonly used hash functions +// + +#define SYMCRYPT_MD5_OID_COUNT (2) +extern const SYMCRYPT_OID SymCryptMd5OidList[SYMCRYPT_MD5_OID_COUNT]; + +#define SYMCRYPT_SHA1_OID_COUNT (2) +extern const SYMCRYPT_OID SymCryptSha1OidList[SYMCRYPT_SHA1_OID_COUNT]; + +#define SYMCRYPT_SHA224_OID_COUNT (2) +extern const SYMCRYPT_OID SymCryptSha224OidList[SYMCRYPT_SHA224_OID_COUNT]; + +#define SYMCRYPT_SHA256_OID_COUNT (2) +extern const SYMCRYPT_OID SymCryptSha256OidList[SYMCRYPT_SHA256_OID_COUNT]; + +#define SYMCRYPT_SHA384_OID_COUNT (2) +extern const SYMCRYPT_OID SymCryptSha384OidList[SYMCRYPT_SHA384_OID_COUNT]; + +#define SYMCRYPT_SHA512_OID_COUNT (2) +extern const SYMCRYPT_OID SymCryptSha512OidList[SYMCRYPT_SHA512_OID_COUNT]; + +#define SYMCRYPT_SHA512_224_OID_COUNT (2) +extern const SYMCRYPT_OID SymCryptSha512_224OidList[SYMCRYPT_SHA512_224_OID_COUNT]; + +#define SYMCRYPT_SHA512_256_OID_COUNT (2) +extern const SYMCRYPT_OID SymCryptSha512_256OidList[SYMCRYPT_SHA512_256_OID_COUNT]; + +#define SYMCRYPT_SHA3_224_OID_COUNT (2) +extern const SYMCRYPT_OID SymCryptSha3_224OidList[SYMCRYPT_SHA3_224_OID_COUNT]; + +#define SYMCRYPT_SHA3_256_OID_COUNT (2) +extern const SYMCRYPT_OID SymCryptSha3_256OidList[SYMCRYPT_SHA3_256_OID_COUNT]; + +#define SYMCRYPT_SHA3_384_OID_COUNT (2) +extern const SYMCRYPT_OID SymCryptSha3_384OidList[SYMCRYPT_SHA3_384_OID_COUNT]; + +#define SYMCRYPT_SHA3_512_OID_COUNT (2) +extern const SYMCRYPT_OID SymCryptSha3_512OidList[SYMCRYPT_SHA3_512_OID_COUNT]; + +#define SYMCRYPT_SHAKE128_OID_COUNT (2) +extern const SYMCRYPT_OID SymCryptShake128OidList[SYMCRYPT_SHAKE128_OID_COUNT]; + +#define SYMCRYPT_SHAKE256_OID_COUNT (2) +extern const SYMCRYPT_OID SymCryptShake256OidList[SYMCRYPT_SHAKE256_OID_COUNT]; + +typedef enum _SYMCRYPT_OID_LIST_ID +{ + SYMCRYPT_OID_LIST_ID_NULL = 0, + SYMCRYPT_OID_LIST_ID_MD5 = 1, + SYMCRYPT_OID_LIST_ID_SHA1 = 2, + SYMCRYPT_OID_LIST_ID_SHA224 = 3, + SYMCRYPT_OID_LIST_ID_SHA256 = 4, + SYMCRYPT_OID_LIST_ID_SHA384 = 5, + SYMCRYPT_OID_LIST_ID_SHA512 = 6, + SYMCRYPT_OID_LIST_ID_SHA512_224 = 7, + SYMCRYPT_OID_LIST_ID_SHA512_256 = 8, + SYMCRYPT_OID_LIST_ID_SHA3_224 = 9, + SYMCRYPT_OID_LIST_ID_SHA3_256 = 10, + SYMCRYPT_OID_LIST_ID_SHA3_384 = 11, + SYMCRYPT_OID_LIST_ID_SHA3_512 = 12, + SYMCRYPT_OID_LIST_ID_SHAKE128 = 13, + SYMCRYPT_OID_LIST_ID_SHAKE256 = 14 +} SYMCRYPT_OID_LIST_ID; + +PCSYMCRYPT_OID +SYMCRYPT_CALL +SymCryptGetOidList( SYMCRYPT_OID_LIST_ID oidId, _Out_opt_ SIZE_T* pCount ); +// +// Returns a pointer to the OID list for the specified OID list ID. If pCount is non-NULL, the +// pointed-to value will be set to the number of elements in the OID list. +// Returns NULL if the OID list ID is invalid. +// + +typedef union _SYMCRYPT_HASH_STATE +{ + SYMCRYPT_MD2_STATE md2State; + SYMCRYPT_MD4_STATE md4State; + SYMCRYPT_MD5_STATE md5State; + SYMCRYPT_SHA1_STATE sha1State; + SYMCRYPT_SHA224_STATE sha224State; + SYMCRYPT_SHA256_STATE sha256State; + SYMCRYPT_SHA384_STATE sha384State; + SYMCRYPT_SHA512_STATE sha512State; + SYMCRYPT_SHA512_224_STATE sha512_224State; + SYMCRYPT_SHA512_256_STATE sha512_256State; + SYMCRYPT_SHA3_224_STATE sha3_224State; + SYMCRYPT_SHA3_256_STATE sha3_256State; + SYMCRYPT_SHA3_384_STATE sha3_384State; + SYMCRYPT_SHA3_512_STATE sha3_512State; +} SYMCRYPT_HASH_STATE, *PSYMCRYPT_HASH_STATE; +typedef const SYMCRYPT_HASH_STATE *PCSYMCRYPT_HASH_STATE; + +#define SYMCRYPT_HASH_MAX_RESULT_SIZE SYMCRYPT_SHA512_RESULT_SIZE + +SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HASH; +SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_PARALLEL_HASH; + +typedef struct _SYMCRYPT_HASH SYMCRYPT_HASH, *PSYMCRYPT_HASH; +typedef const SYMCRYPT_HASH *PCSYMCRYPT_HASH; +typedef struct _SYMCRYPT_PARALLEL_HASH SYMCRYPT_PARALLEL_HASH, *PSYMCRYPT_PARALLEL_HASH; +typedef const SYMCRYPT_PARALLEL_HASH *PCSYMCRYPT_PARALLEL_HASH; + +typedef VOID (SYMCRYPT_CALL * PSYMCRYPT_HASH_INIT_FUNC) ( PVOID pState ); +typedef VOID (SYMCRYPT_CALL * PSYMCRYPT_HASH_APPEND_FUNC) ( PVOID pState, PCBYTE pbData, SIZE_T cbData ); +typedef VOID (SYMCRYPT_CALL * PSYMCRYPT_HASH_RESULT_FUNC) ( PVOID pState, PVOID pbResult ); +typedef VOID (SYMCRYPT_CALL * PSYMCRYPT_HASH_APPEND_BLOCKS_FUNC) ( PVOID pChain, PCBYTE pbData, SIZE_T cbData, SIZE_T * pcbRemaining ); +typedef VOID (SYMCRYPT_CALL * PSYMCRYPT_HASH_STATE_COPY_FUNC) ( PCVOID pStateSrc, PVOID pStateDst ); + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HASH +{ + PSYMCRYPT_HASH_INIT_FUNC initFunc; + PSYMCRYPT_HASH_APPEND_FUNC appendFunc; + PSYMCRYPT_HASH_RESULT_FUNC resultFunc; + PSYMCRYPT_HASH_APPEND_BLOCKS_FUNC appendBlockFunc; + PSYMCRYPT_HASH_STATE_COPY_FUNC stateCopyFunc; + UINT32 stateSize; // sizeof( hash state ) + UINT32 resultSize; // size of hash result + UINT32 inputBlockSize; + UINT32 chainOffset; // offset into state structure of the chaining state + UINT32 chainSize; // size of chaining state +} SYMCRYPT_HASH, *PSYMCRYPT_HASH; + + +// +// Parallel hashing +// + +#if SYMCRYPT_CPU_ARM +#define SYMCRYPT_PARALLEL_SHA256_MIN_PARALLELISM (3) +#define SYMCRYPT_PARALLEL_SHA256_MAX_PARALLELISM (4) +#else +#define SYMCRYPT_PARALLEL_SHA256_MIN_PARALLELISM (2) +#define SYMCRYPT_PARALLEL_SHA256_MAX_PARALLELISM (8) +#endif + +typedef enum _SYMCRYPT_HASH_OPERATION_TYPE { + SYMCRYPT_HASH_OPERATION_APPEND = 1, + SYMCRYPT_HASH_OPERATION_RESULT = 2, +} SYMCRYPT_HASH_OPERATION_TYPE; + +typedef struct _SYMCRYPT_PARALLEL_HASH_OPERATION SYMCRYPT_PARALLEL_HASH_OPERATION, *PSYMCRYPT_PARALLEL_HASH_OPERATION; +typedef const SYMCRYPT_PARALLEL_HASH_OPERATION *PCSYMRYPT_PARALLEL_HASH_OPERATION; + +struct _SYMCRYPT_PARALLEL_HASH_OPERATION { + SIZE_T iHash; // index of hash object into the state array + SYMCRYPT_HASH_OPERATION_TYPE hashOperation; // operation to be performed + _Field_size_( cbBuffer ) PBYTE pbBuffer; // data to be hashed, or result buffer + SIZE_T cbBuffer; // size of pbData buffer. + PSYMCRYPT_PARALLEL_HASH_OPERATION next; // internal scratch space; do not use. +}; + + +SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_PARALLEL_HASH_SCRATCH_OPERATION; // as yet unspecified struct +typedef struct _SYMCRYPT_PARALLEL_HASH_SCRATCH_OPERATION + SYMCRYPT_PARALLEL_HASH_SCRATCH_OPERATION, *PSYMCRYPT_PARALLEL_HASH_SCRATCH_OPERATION; + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_PARALLEL_HASH_SCRATCH_STATE { + PVOID hashState; // the actual hash state + BYTE processingState; + BYTE bytesAlreadyProcessed; // of the next Append operation + UINT64 bytes; // # bytes left to process on this state + PSYMCRYPT_PARALLEL_HASH_OPERATION next; // next operation to be performed. + PCBYTE pbData; // data/size of ongoing append operation; this op has already been removed from the next linked list + SIZE_T cbData; +}SYMCRYPT_PARALLEL_HASH_SCRATCH_STATE, *PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE; + + +// +// The scratch space used by parallel SHA-256 consists of three regions: +// - an array of SYMCRYPT_PARALLEL_HASH_SCRATCH_STATE structures, aligned to SYMCRYPT_ALIGN_VALUE. +// - the work array, an array of pointers to SYMCRYPT_PARALLEL_HASH_SCRATCH_STATEs. +// - an array of 4 + 8 + 64 SIMD vector elements, aligned to the size of those elements. +// +// +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 +#define SYMCRYPT_SIMD_ELEMENT_SIZE 32 +#elif SYMCRYPT_CPU_ARM | SYMCRYPT_CPU_ARM64 +#define SYMCRYPT_SIMD_ELEMENT_SIZE 16 +#elif SYMCRYPT_CPU_UNKNOWN +#define SYMCRYPT_SIMD_ELEMENT_SIZE 0 +#else +#error Unknown CPU +#endif + +#define SYMCRYPT_PARALLEL_SHA256_FIXED_SCRATCH ( (4 + 8 + 64) * SYMCRYPT_SIMD_ELEMENT_SIZE + SYMCRYPT_SIMD_ELEMENT_SIZE - 1 + SYMCRYPT_ALIGN_VALUE - 1 ) +#define SYMCRYPT_PARALLEL_SHA384_FIXED_SCRATCH ( (4 + 8 + 80) * SYMCRYPT_SIMD_ELEMENT_SIZE + SYMCRYPT_SIMD_ELEMENT_SIZE - 1 + SYMCRYPT_ALIGN_VALUE - 1 ) +#define SYMCRYPT_PARALLEL_SHA512_FIXED_SCRATCH ( (4 + 8 + 80) * SYMCRYPT_SIMD_ELEMENT_SIZE + SYMCRYPT_SIMD_ELEMENT_SIZE - 1 + SYMCRYPT_ALIGN_VALUE - 1 ) +#define SYMCRYPT_PARALLEL_HASH_PER_STATE_SCRATCH (sizeof( SYMCRYPT_PARALLEL_HASH_SCRATCH_STATE ) + sizeof( PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE ) ) + +SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_PARALLEL_HASH; +typedef struct _SYMCRYPT_PARALLEL_HASH SYMCRYPT_PARALLEL_HASH, *PSYMCRYPT_PARALLEL_HASH; +typedef const SYMCRYPT_PARALLEL_HASH *PCSYMCRYPT_PARALLEL_HASH; + +typedef BOOLEAN (SYMCRYPT_CALL * PSYMCRYPT_PARALLEL_HASH_RESULT_FUNC) (PCSYMCRYPT_PARALLEL_HASH pParHash, PSYMCRYPT_COMMON_HASH_STATE pState, PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE pScratch, BOOLEAN *pRes ); +typedef VOID (SYMCRYPT_CALL * PSYMCRYPT_PARALLEL_HASH_RESULT_DONE_FUNC ) (PCSYMCRYPT_PARALLEL_HASH pParHash, PSYMCRYPT_COMMON_HASH_STATE pState, PCSYMRYPT_PARALLEL_HASH_OPERATION pOp); +typedef VOID (SYMCRYPT_CALL * PSYMCRYPT_PARALLEL_APPEND_FUNC) ( + _Inout_updates_( nPar ) PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE * pWork, + SIZE_T nPar, + SIZE_T nBytes, + _Out_writes_( cbSimdScratch ) PBYTE pbSimdScratch, + SIZE_T cbSimdScratch ); + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_PARALLEL_HASH +{ + PCSYMCRYPT_HASH pHash; + UINT32 parScratchFixed; // fixed scratch size for parallel hash + PSYMCRYPT_PARALLEL_HASH_RESULT_FUNC parResult1Func; + PSYMCRYPT_PARALLEL_HASH_RESULT_FUNC parResult2Func; + PSYMCRYPT_PARALLEL_HASH_RESULT_DONE_FUNC parResultDoneFunc; + + PSYMCRYPT_PARALLEL_APPEND_FUNC parAppendFunc; +} SYMCRYPT_PARALLEL_HASH, *PSYMCRYPT_PARALLEL_HASH; + + +//====================================================================================================== +// MAC +// + + +// +// SYMCRYPT_HMAC_MD5_EXPANDED_KEY +// +// Data structure to store an expanded key for HMAC-MD5. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_MD5_EXPANDED_KEY +{ + SYMCRYPT_MD5_CHAINING_STATE innerState; + SYMCRYPT_MD5_CHAINING_STATE outerState; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_HMAC_MD5_EXPANDED_KEY, *PSYMCRYPT_HMAC_MD5_EXPANDED_KEY; +typedef const SYMCRYPT_HMAC_MD5_EXPANDED_KEY * PCSYMCRYPT_HMAC_MD5_EXPANDED_KEY; + +// +// SYMCRYPT_HMAC_MD5_STATE +// +// Data structure that encodes an ongoing HMAC-MD5 computation. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_MD5_STATE +{ + SYMCRYPT_MD5_STATE hash; + PCSYMCRYPT_HMAC_MD5_EXPANDED_KEY pKey; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_HMAC_MD5_STATE, *PSYMCRYPT_HMAC_MD5_STATE; +typedef const SYMCRYPT_HMAC_MD5_STATE *PCSYMCRYPT_HMAC_MD5_STATE; + + +// +// SYMCRYPT_HMAC_SHA1_EXPANDED_KEY +// +// Data structure to store an expanded key for HMAC-SHA1. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_SHA1_EXPANDED_KEY +{ + SYMCRYPT_SHA1_CHAINING_STATE innerState; + SYMCRYPT_SHA1_CHAINING_STATE outerState; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_HMAC_SHA1_EXPANDED_KEY, *PSYMCRYPT_HMAC_SHA1_EXPANDED_KEY; +typedef const SYMCRYPT_HMAC_SHA1_EXPANDED_KEY * PCSYMCRYPT_HMAC_SHA1_EXPANDED_KEY; + +// +// SYMCRYPT_HMAC_SHA1_STATE +// +// Data structure that encodes an ongoing HMAC-SHA1 computation. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_SHA1_STATE +{ + SYMCRYPT_SHA1_STATE hash; + PCSYMCRYPT_HMAC_SHA1_EXPANDED_KEY pKey; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_HMAC_SHA1_STATE, *PSYMCRYPT_HMAC_SHA1_STATE; +typedef const SYMCRYPT_HMAC_SHA1_STATE *PCSYMCRYPT_HMAC_SHA1_STATE; + + +// +// SYMCRYPT_HMAC_SHA224_EXPANDED_KEY +// +// Data structure to store an expanded key for HMAC-SHA224. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_SHA224_EXPANDED_KEY +{ + SYMCRYPT_SHA256_CHAINING_STATE innerState; + SYMCRYPT_SHA256_CHAINING_STATE outerState; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_HMAC_SHA224_EXPANDED_KEY, *PSYMCRYPT_HMAC_SHA224_EXPANDED_KEY; +typedef const SYMCRYPT_HMAC_SHA224_EXPANDED_KEY * PCSYMCRYPT_HMAC_SHA224_EXPANDED_KEY; + +// +// SYMCRYPT_HMAC_SHA224_STATE +// +// Data structure that encodes an ongoing HMAC-SHA224 computation. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_SHA224_STATE +{ + SYMCRYPT_SHA224_STATE hash; + PCSYMCRYPT_HMAC_SHA224_EXPANDED_KEY pKey; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_HMAC_SHA224_STATE, *PSYMCRYPT_HMAC_SHA224_STATE; +typedef const SYMCRYPT_HMAC_SHA224_STATE *PCSYMCRYPT_HMAC_SHA224_STATE; + + +// +// SYMCRYPT_HMAC_SHA256_EXPANDED_KEY +// +// Data structure to store an expanded key for HMAC-SHA256. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_SHA256_EXPANDED_KEY +{ + SYMCRYPT_SHA256_CHAINING_STATE innerState; + SYMCRYPT_SHA256_CHAINING_STATE outerState; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_HMAC_SHA256_EXPANDED_KEY, *PSYMCRYPT_HMAC_SHA256_EXPANDED_KEY; +typedef const SYMCRYPT_HMAC_SHA256_EXPANDED_KEY * PCSYMCRYPT_HMAC_SHA256_EXPANDED_KEY; + +// +// SYMCRYPT_HMAC_SHA256_STATE +// +// Data structure that encodes an ongoing HMAC-SHA256 computation. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_SHA256_STATE +{ + SYMCRYPT_SHA256_STATE hash; + PCSYMCRYPT_HMAC_SHA256_EXPANDED_KEY pKey; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_HMAC_SHA256_STATE, *PSYMCRYPT_HMAC_SHA256_STATE; +typedef const SYMCRYPT_HMAC_SHA256_STATE *PCSYMCRYPT_HMAC_SHA256_STATE; + + +// +// SYMCRYPT_HMAC_SHA384_EXPANDED_KEY +// +// Data structure to store an expanded key for HMAC-SHA384. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_SHA384_EXPANDED_KEY +{ + SYMCRYPT_SHA512_CHAINING_STATE innerState; + SYMCRYPT_SHA512_CHAINING_STATE outerState; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_HMAC_SHA384_EXPANDED_KEY, *PSYMCRYPT_HMAC_SHA384_EXPANDED_KEY; +typedef const SYMCRYPT_HMAC_SHA384_EXPANDED_KEY * PCSYMCRYPT_HMAC_SHA384_EXPANDED_KEY; + +// +// SYMCRYPT_HMAC_SHA384_STATE +// +// Data structure that encodes an ongoing HMAC-SHA384 computation. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_SHA384_STATE +{ + SYMCRYPT_SHA384_STATE hash; + PCSYMCRYPT_HMAC_SHA384_EXPANDED_KEY pKey; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_HMAC_SHA384_STATE, *PSYMCRYPT_HMAC_SHA384_STATE; +typedef const SYMCRYPT_HMAC_SHA384_STATE *PCSYMCRYPT_HMAC_SHA384_STATE; + +// +// SYMCRYPT_HMAC_SHA512_EXPANDED_KEY +// +// Data structure to store an expanded key for HMAC-SHA512. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_SHA512_EXPANDED_KEY +{ + SYMCRYPT_SHA512_CHAINING_STATE innerState; + SYMCRYPT_SHA512_CHAINING_STATE outerState; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_HMAC_SHA512_EXPANDED_KEY, *PSYMCRYPT_HMAC_SHA512_EXPANDED_KEY; +typedef const SYMCRYPT_HMAC_SHA512_EXPANDED_KEY * PCSYMCRYPT_HMAC_SHA512_EXPANDED_KEY; + +// +// SYMCRYPT_HMAC_SHA512_STATE +// +// Data structure that encodes an ongoing HMAC-SHA512 computation. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_SHA512_STATE +{ + SYMCRYPT_SHA512_STATE hash; + PCSYMCRYPT_HMAC_SHA512_EXPANDED_KEY pKey; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_HMAC_SHA512_STATE, *PSYMCRYPT_HMAC_SHA512_STATE; +typedef const SYMCRYPT_HMAC_SHA512_STATE *PCSYMCRYPT_HMAC_SHA512_STATE; + +// +// SYMCRYPT_HMAC_SHA512_224_EXPANDED_KEY +// +// Data structure to store an expanded key for HMAC-SHA512_224. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_SHA512_224_EXPANDED_KEY +{ + SYMCRYPT_SHA512_CHAINING_STATE innerState; + SYMCRYPT_SHA512_CHAINING_STATE outerState; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_HMAC_SHA512_224_EXPANDED_KEY, *PSYMCRYPT_HMAC_SHA512_224_EXPANDED_KEY; +typedef const SYMCRYPT_HMAC_SHA512_224_EXPANDED_KEY * PCSYMCRYPT_HMAC_SHA512_224_EXPANDED_KEY; + +// +// SYMCRYPT_HMAC_SHA512_224_STATE +// +// Data structure that encodes an ongoing HMAC-SHA512_224 computation. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_SHA512_224_STATE +{ + SYMCRYPT_SHA512_224_STATE hash; + PCSYMCRYPT_HMAC_SHA512_224_EXPANDED_KEY pKey; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_HMAC_SHA512_224_STATE, *PSYMCRYPT_HMAC_SHA512_224_STATE; +typedef const SYMCRYPT_HMAC_SHA512_224_STATE *PCSYMCRYPT_HMAC_SHA512_224_STATE; + +// +// SYMCRYPT_HMAC_SHA512_256_EXPANDED_KEY +// +// Data structure to store an expanded key for HMAC-SHA512_256. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_SHA512_256_EXPANDED_KEY +{ + SYMCRYPT_SHA512_CHAINING_STATE innerState; + SYMCRYPT_SHA512_CHAINING_STATE outerState; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_HMAC_SHA512_256_EXPANDED_KEY, *PSYMCRYPT_HMAC_SHA512_256_EXPANDED_KEY; +typedef const SYMCRYPT_HMAC_SHA512_256_EXPANDED_KEY * PCSYMCRYPT_HMAC_SHA512_256_EXPANDED_KEY; + +// +// SYMCRYPT_HMAC_SHA512_256_STATE +// +// Data structure that encodes an ongoing HMAC-SHA512_256 computation. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_SHA512_256_STATE +{ + SYMCRYPT_SHA512_256_STATE hash; + PCSYMCRYPT_HMAC_SHA512_256_EXPANDED_KEY pKey; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_HMAC_SHA512_256_STATE, *PSYMCRYPT_HMAC_SHA512_256_STATE; +typedef const SYMCRYPT_HMAC_SHA512_256_STATE *PCSYMCRYPT_HMAC_SHA512_256_STATE; + +// +// SYMCRYPT_HMAC_EXPANDED_KEY +// +// Generic HMAC Expanded Key data structure +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_EXPANDED_KEY +{ + PCSYMCRYPT_HASH pHash; + SYMCRYPT_HASH_STATE innerState; + SYMCRYPT_HASH_STATE outerState; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_HMAC_EXPANDED_KEY, * PSYMCRYPT_HMAC_EXPANDED_KEY; +typedef const SYMCRYPT_HMAC_EXPANDED_KEY* PCSYMCRYPT_HMAC_EXPANDED_KEY; + +// +// SYMCRYPT_HMAC_STATE +// +// Generic HMAC data structure +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_STATE +{ + PCSYMCRYPT_HMAC_EXPANDED_KEY pKey; + SYMCRYPT_HASH_STATE hash; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_HMAC_STATE, * PSYMCRYPT_HMAC_STATE; +typedef const SYMCRYPT_HMAC_STATE* PCSYMCRYPT_HMAC_STATE; + +// +// SYMCRYPT_HMAC_SHA3_224_EXPANDED_KEY +// +// Data structure to store an expanded key for HMAC-SHA3-224 +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_SHA3_224_EXPANDED_KEY +{ + SYMCRYPT_HMAC_EXPANDED_KEY generic; + +} SYMCRYPT_HMAC_SHA3_224_EXPANDED_KEY, *PSYMCRYPT_HMAC_SHA3_224_EXPANDED_KEY; +typedef const SYMCRYPT_HMAC_SHA3_224_EXPANDED_KEY * PCSYMCRYPT_HMAC_SHA3_224_EXPANDED_KEY; + +// +// SYMCRYPT_HMAC_SHA3_224_STATE +// +// Data structure that encodes an ongoing HMAC-SHA3-224 computation. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_SHA3_224_STATE +{ + SYMCRYPT_HMAC_STATE generic; + +} SYMCRYPT_HMAC_SHA3_224_STATE, *PSYMCRYPT_HMAC_SHA3_224_STATE; +typedef const SYMCRYPT_HMAC_SHA3_224_STATE *PCSYMCRYPT_HMAC_SHA3_224_STATE; + +// +// SYMCRYPT_HMAC_SHA3_256_EXPANDED_KEY +// +// Data structure to store an expanded key for HMAC-SHA3-256 +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_SHA3_256_EXPANDED_KEY +{ + SYMCRYPT_HMAC_EXPANDED_KEY generic; + +} SYMCRYPT_HMAC_SHA3_256_EXPANDED_KEY, *PSYMCRYPT_HMAC_SHA3_256_EXPANDED_KEY; +typedef const SYMCRYPT_HMAC_SHA3_256_EXPANDED_KEY * PCSYMCRYPT_HMAC_SHA3_256_EXPANDED_KEY; + +// +// SYMCRYPT_HMAC_SHA3_256_STATE +// +// Data structure that encodes an ongoing HMAC-SHA3-256 computation. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_SHA3_256_STATE +{ + SYMCRYPT_HMAC_STATE generic; + +} SYMCRYPT_HMAC_SHA3_256_STATE, *PSYMCRYPT_HMAC_SHA3_256_STATE; +typedef const SYMCRYPT_HMAC_SHA3_256_STATE *PCSYMCRYPT_HMAC_SHA3_256_STATE; + +// +// SYMCRYPT_HMAC_SHA3_384_EXPANDED_KEY +// +// Data structure to store an expanded key for HMAC-SHA3-384 +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_SHA3_384_EXPANDED_KEY +{ + SYMCRYPT_HMAC_EXPANDED_KEY generic; + +} SYMCRYPT_HMAC_SHA3_384_EXPANDED_KEY, *PSYMCRYPT_HMAC_SHA3_384_EXPANDED_KEY; +typedef const SYMCRYPT_HMAC_SHA3_384_EXPANDED_KEY * PCSYMCRYPT_HMAC_SHA3_384_EXPANDED_KEY; + +// +// SYMCRYPT_HMAC_SHA3_384_STATE +// +// Data structure that encodes an ongoing HMAC-SHA3-384 computation. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_SHA3_384_STATE +{ + SYMCRYPT_HMAC_STATE generic; + +} SYMCRYPT_HMAC_SHA3_384_STATE, *PSYMCRYPT_HMAC_SHA3_384_STATE; +typedef const SYMCRYPT_HMAC_SHA3_384_STATE *PCSYMCRYPT_HMAC_SHA3_384_STATE; + +// +// SYMCRYPT_HMAC_SHA3_512_EXPANDED_KEY +// +// Data structure to store an expanded key for HMAC-SHA3-512 +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_SHA3_512_EXPANDED_KEY +{ + SYMCRYPT_HMAC_EXPANDED_KEY generic; + +} SYMCRYPT_HMAC_SHA3_512_EXPANDED_KEY, *PSYMCRYPT_HMAC_SHA3_512_EXPANDED_KEY; +typedef const SYMCRYPT_HMAC_SHA3_512_EXPANDED_KEY * PCSYMCRYPT_HMAC_SHA3_512_EXPANDED_KEY; + +// +// SYMCRYPT_HMAC_SHA3_512_STATE +// +// Data structure that encodes an ongoing HMAC-SHA3-512 computation. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_SHA3_512_STATE +{ + SYMCRYPT_HMAC_STATE generic; + +} SYMCRYPT_HMAC_SHA3_512_STATE, *PSYMCRYPT_HMAC_SHA3_512_STATE; +typedef const SYMCRYPT_HMAC_SHA3_512_STATE *PCSYMCRYPT_HMAC_SHA3_512_STATE; + +// +// SYMCRYPT_AES_EXPANDED_KEY +// +// Expanded key for AES operations. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_AES_EXPANDED_KEY { + SYMCRYPT_ALIGN BYTE RoundKey[29][4][4]; + // Round keys, first the encryption round keys in encryption order, + // followed by the decryption round keys in decryption order. + // The first decryption round key is the last encryption round key. + // AES-256 has 14 rounds and thus 15 round keys for encryption and 15 + // for decryption. As they share one round key, we need room for 29. + BYTE (*lastEncRoundKey)[4][4]; // Pointer to last encryption round key + // also the first round key for decryption + BYTE (*lastDecRoundKey)[4][4]; // Pointer to last decryption round key. + + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_AES_EXPANDED_KEY, *PSYMCRYPT_AES_EXPANDED_KEY; +typedef const SYMCRYPT_AES_EXPANDED_KEY * PCSYMCRYPT_AES_EXPANDED_KEY; + +// +// AES-CMAC +// +// Note: SYMCRYPT_AES_BLOCK_SIZE is not yet defined, so we use +// literal constants instead. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_AES_CMAC_EXPANDED_KEY +{ + SYMCRYPT_AES_EXPANDED_KEY aesKey; + BYTE K1[16]; + BYTE K2[16]; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_AES_CMAC_EXPANDED_KEY, *PSYMCRYPT_AES_CMAC_EXPANDED_KEY; +typedef const SYMCRYPT_AES_CMAC_EXPANDED_KEY * PCSYMCRYPT_AES_CMAC_EXPANDED_KEY; + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_AES_CMAC_STATE +{ + BYTE chain[16]; + BYTE buf[16]; + SIZE_T bytesInBuf; + PCSYMCRYPT_AES_CMAC_EXPANDED_KEY pKey; + + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_AES_CMAC_STATE, *PSYMCRYPT_AES_CMAC_STATE; +typedef const SYMCRYPT_AES_CMAC_STATE * PCSYMCRYPT_AES_CMAC_STATE; + +// +// POLY1305 +// + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_POLY1305_STATE +{ + UINT32 r[4]; // R := \sum 2^{32*i} r[i]. R is already clamped. + UINT32 s[4]; // S := \sum 2^{32*i} s[i] + UINT32 a[5]; // Accumulator := sum 2^{32*i} a[i], a[4] <= approx 8 + SIZE_T bytesInBuffer; + BYTE buf[16]; // Partial block buffer + + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_POLY1305_STATE, *PSYMCRYPT_POLY1305_STATE; + +// +// XTS-AES +// + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_XTS_AES_EXPANDED_KEY +{ + SYMCRYPT_AES_EXPANDED_KEY key1; + SYMCRYPT_AES_EXPANDED_KEY key2; +} SYMCRYPT_XTS_AES_EXPANDED_KEY, *PSYMCRYPT_XTS_AES_EXPANDED_KEY; +typedef const SYMCRYPT_XTS_AES_EXPANDED_KEY * PCSYMCRYPT_XTS_AES_EXPANDED_KEY; + + +//----------------------------------------------------------------- +// Mac description table +// Below are the typedefs for the Mac description table type +// Callers can use this to define Mac algorithm they want to use +// + +#define SYMCRYPT_MAC_MAX_RESULT_SIZE SYMCRYPT_HMAC_SHA512_RESULT_SIZE + +typedef union _SYMCRYPT_MAC_STATE +{ + SYMCRYPT_HMAC_MD5_STATE md5State; + SYMCRYPT_HMAC_SHA1_STATE sha1State; + SYMCRYPT_HMAC_SHA224_STATE sha224State; + SYMCRYPT_HMAC_SHA256_STATE sha256State; + SYMCRYPT_HMAC_SHA384_STATE sha384State; + SYMCRYPT_HMAC_SHA512_STATE sha512State; + SYMCRYPT_HMAC_SHA512_224_STATE sha512_224State; + SYMCRYPT_HMAC_SHA512_256_STATE sha512_256State; + SYMCRYPT_HMAC_SHA3_224_STATE sha3_224State; + SYMCRYPT_HMAC_SHA3_256_STATE sha3_256State; + SYMCRYPT_HMAC_SHA3_384_STATE sha3_384State; + SYMCRYPT_HMAC_SHA3_512_STATE sha3_512State; + SYMCRYPT_AES_CMAC_STATE aescmacState; + SYMCRYPT_KMAC128_STATE kmac128State; + SYMCRYPT_KMAC256_STATE kmac256State; +} SYMCRYPT_MAC_STATE, *PSYMCRYPT_MAC_STATE; +typedef const SYMCRYPT_MAC_STATE *PCSYMCRYPT_MAC_STATE; + +typedef union _SYMCRYPT_MAC_EXPANDED_KEY +{ + SYMCRYPT_HMAC_MD5_EXPANDED_KEY md5Key; + SYMCRYPT_HMAC_SHA1_EXPANDED_KEY sha1Key; + SYMCRYPT_HMAC_SHA224_EXPANDED_KEY sha224Key; + SYMCRYPT_HMAC_SHA256_EXPANDED_KEY sha256Key; + SYMCRYPT_HMAC_SHA384_EXPANDED_KEY sha384Key; + SYMCRYPT_HMAC_SHA512_EXPANDED_KEY sha512Key; + SYMCRYPT_HMAC_SHA512_224_EXPANDED_KEY sha512_224Key; + SYMCRYPT_HMAC_SHA512_256_EXPANDED_KEY sha512_256Key; + SYMCRYPT_HMAC_SHA3_224_EXPANDED_KEY sha3_224Key; + SYMCRYPT_HMAC_SHA3_256_EXPANDED_KEY sha3_256Key; + SYMCRYPT_HMAC_SHA3_384_EXPANDED_KEY sha3_384Key; + SYMCRYPT_HMAC_SHA3_512_EXPANDED_KEY sha3_512Key; + SYMCRYPT_AES_CMAC_EXPANDED_KEY aescmacKey; + SYMCRYPT_KMAC128_EXPANDED_KEY kmac128Key; + SYMCRYPT_KMAC256_EXPANDED_KEY kmac256Key; +} SYMCRYPT_MAC_EXPANDED_KEY, *PSYMCRYPT_MAC_EXPANDED_KEY; +typedef const SYMCRYPT_MAC_EXPANDED_KEY *PCSYMCRYPT_MAC_EXPANDED_KEY; + +typedef SYMCRYPT_ERROR (SYMCRYPT_CALL * PSYMCRYPT_MAC_EXPAND_KEY) + ( PVOID pExpandedKey, PCBYTE pbKey, SIZE_T cbKey ); +typedef VOID (SYMCRYPT_CALL * PSYMCRYPT_MAC_INIT) ( PVOID pState, PCVOID pExpandedKey ); +typedef VOID (SYMCRYPT_CALL * PSYMCRYPT_MAC_APPEND)( PVOID pState, PCBYTE pbData, SIZE_T cbData ); +typedef VOID (SYMCRYPT_CALL * PSYMCRYPT_MAC_RESULT) ( PVOID pState, PVOID pbResult ); +typedef VOID (SYMCRYPT_CALL * PSYMCRYPT_MAC_RESULT_EX) ( PVOID pState, PVOID pbResult, SIZE_T cbResult ); + +typedef struct _SYMCRYPT_MAC +{ + PSYMCRYPT_MAC_EXPAND_KEY expandKeyFunc; + PSYMCRYPT_MAC_INIT initFunc; + PSYMCRYPT_MAC_APPEND appendFunc; + PSYMCRYPT_MAC_RESULT resultFunc; + SIZE_T expandedKeySize; + SIZE_T stateSize; + SIZE_T resultSize; + const PCSYMCRYPT_HASH * ppHashAlgorithm; // NULL for MACs not based on hashes + UINT32 outerChainingStateOffset; // Offset into expanded key of outer chaining state; 0 for non-HMAC algorithms +} SYMCRYPT_MAC, *PSYMCRYPT_MAC; +typedef const SYMCRYPT_MAC *PCSYMCRYPT_MAC; + + + +// +// 3DES +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_3DES_EXPANDED_KEY { + UINT32 roundKey[3][16][2]; // 3 keys, 16 rounds, 2 UINT32s/round + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_3DES_EXPANDED_KEY, *PSYMCRYPT_3DES_EXPANDED_KEY; +typedef const SYMCRYPT_3DES_EXPANDED_KEY * PCSYMCRYPT_3DES_EXPANDED_KEY; + +// +// DES +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_DES_EXPANDED_KEY { + SYMCRYPT_3DES_EXPANDED_KEY threeDes; +} SYMCRYPT_DES_EXPANDED_KEY, *PSYMCRYPT_DES_EXPANDED_KEY; +typedef const SYMCRYPT_DES_EXPANDED_KEY * PCSYMCRYPT_DES_EXPANDED_KEY; + +// +// DESX +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_DESX_EXPANDED_KEY { + SYMCRYPT_DES_EXPANDED_KEY desKey; + BYTE inputWhitening[8]; + BYTE outputWhitening[8]; +} SYMCRYPT_DESX_EXPANDED_KEY, *PSYMCRYPT_DESX_EXPANDED_KEY; +typedef const SYMCRYPT_DESX_EXPANDED_KEY * PCSYMCRYPT_DESX_EXPANDED_KEY; + +// +// RC2 +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_RC2_EXPANDED_KEY { + UINT16 K[64]; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_RC2_EXPANDED_KEY, *PSYMCRYPT_RC2_EXPANDED_KEY; +typedef const SYMCRYPT_RC2_EXPANDED_KEY * PCSYMCRYPT_RC2_EXPANDED_KEY; + + +// +// CCM states for incremental computations +// +#define SYMCRYPT_CCM_BLOCK_SIZE (16) + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_CCM_STATE { + PCSYMCRYPT_BLOCKCIPHER pBlockCipher; + PCVOID pExpandedKey; + UINT64 cbData; // exact length of data + SIZE_T cbTag; + SIZE_T cbNonce; + SIZE_T cbCounter; // # bytes in counter field + UINT64 bytesProcessed; // data bytes processed so far + _Field_range_( 0, SYMCRYPT_CCM_BLOCK_SIZE-1 ) SIZE_T bytesInMacBlock; + SYMCRYPT_ALIGN BYTE counterBlock[SYMCRYPT_CCM_BLOCK_SIZE]; // Current counter block value + SYMCRYPT_ALIGN BYTE macBlock[SYMCRYPT_CCM_BLOCK_SIZE]; // Current state of the CBC-MAC part of CCM + SYMCRYPT_ALIGN BYTE keystreamBlock[SYMCRYPT_CCM_BLOCK_SIZE]; // Remaining key stream if partial block has been processed + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_CCM_STATE, *PSYMCRYPT_CCM_STATE; + + +// +// GHash & GCM +// + +typedef union _SYMCRYPT_GCM_SUPPORTED_BLOCKCIPHER_KEYS +{ + SYMCRYPT_AES_EXPANDED_KEY aes; +} SYMCRYPT_GCM_SUPPORTED_BLOCKCIPHER_KEYS; + +#define SYMCRYPT_GCM_BLOCKCIPHER_KEY_SIZE sizeof( union _SYMCRYPT_GCM_SUPPORTED_BLOCKCIPHER_KEYS ) + +#define SYMCRYPT_GF128_FIELD_SIZE (128) +#define SYMCRYPT_GF128_BLOCK_SIZE (16) // # bytes in a field element/block +#define SYMCRYPT_GCM_BLOCK_SIZE (16) +#define SYMCRYPT_GCM_MAX_KEY_SIZE (32) + + +#define SYMCRYPT_GCM_MAX_DATA_SIZE (((UINT64)1 << 36) - 32) + +#define SYMCRYPT_GCM_BLOCK_MOD_MASK (SYMCRYPT_GCM_BLOCK_SIZE - 1) +#define SYMCRYPT_GCM_BLOCK_ROUND_MASK (~SYMCRYPT_GCM_BLOCK_MOD_MASK) + +#if SYMCRYPT_CPU_X86 + // + // x86 needs extra alignment of the GHASH expanded key to support + // aligned (fast) XMM access. AMD64 has enough natural alignment to + // achieve this. + // + #define SYMCRYPT_GHASH_EXTRA_KEY_ALIGNMENT +#endif + +#define SYMCRYPT_GHASH_ALLOW_XMM (SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64) +#define SYMCRYPT_GHASH_ALLOW_NEON (SYMCRYPT_CPU_ARM | SYMCRYPT_CPU_ARM64) + + +#if SYMCRYPT_CPU_ARM +#include <arm_neon.h> +#if SYMCRYPT_GNUC || defined(__clang__) + #define __n128 uint32x4_t + #define __n64 uint64x1_t +#endif + +#elif SYMCRYPT_CPU_ARM64 + + #if SYMCRYPT_MS_VC && !defined(__clang__) + #include <arm64_neon.h> + + // See section 6.7.8 of the C standard for details on this initializer usage. + #define SYMCRYPT_SET_N128_U64(d0, d1) \ + ((__n128) {.n128_u64 = {d0, d1}}) + #define SYMCRYPT_SET_N64_U64(d0) \ + ((__n64) {.n64_u64 = {d0}}) + #define SYMCRYPT_SET_N128_U8(b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15) \ + ((__n128) {.n128_u8 = {b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15}}) + #else + #include <arm_neon.h> + + #define __n128 uint8x16_t + #define __n64 uint8x8_t + + #define SYMCRYPT_SET_N128_U64(d0, d1) \ + ((__n128) ((uint64x2_t) {d0, d1})) + #define SYMCRYPT_SET_N64_U64(d0) \ + ((__n64) ((uint64x1_t) {d0})) + #define SYMCRYPT_SET_N128_U8(b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15) \ + ((__n128) ((uint8x16_t) {b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15})) + + #define vmullq_p64( a, b ) ((__n128) vmull_p64(vgetq_lane_p64((poly64x2_t)a, 0), vgetq_lane_p64((poly64x2_t)b, 0))) + #define vmull_p64( a, b ) ((__n128) vmull_p64( (poly64_t)a, (poly64_t)b )) + #define vmull_high_p64( a, b ) ((__n128) vmull_high_p64( (poly64x2_t)a, (poly64x2_t)b )) + #endif + +#endif + +// +// All platforms use the same in-memory representation: +// elements of GF(2^128) stored as two 64-bit integers which are best +// interpreted as a single 128-bit integer, least significant half first. +// Note: the actual GF(2^128) bit order is reversed in the standard +// for some reason; the +// polynomial \sum b_i x^i is represented by integer \sum b_i 2^{127-i}) +// On x86/amd64 the same in-memory byte structure is also accessed as an +// __m128i, which works as both the UINT64s, UINT32s, and the __m128i use +// LSBfirst convention. +// +typedef SYMCRYPT_ALIGN_UNION _SYMCRYPT_GF128_ELEMENT { + UINT64 ull[2]; +#if SYMCRYPT_GHASH_ALLOW_XMM + // + // The XMM code accesses this both as UINT32[] and __m128i + // This is safe as XMM code only runs on little endian machines so the + // ordering is known. + // + __m128i m128i; + UINT32 ul[4]; +#endif +#if SYMCRYPT_GHASH_ALLOW_NEON + __n128 n128; + UINT32 ul[4]; +#endif +} SYMCRYPT_GF128_ELEMENT, *PSYMCRYPT_GF128_ELEMENT; +typedef const SYMCRYPT_GF128_ELEMENT * PCSYMCRYPT_GF128_ELEMENT; + + + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_GHASH_EXPANDED_KEY { +#if defined( SYMCRYPT_GHASH_EXTRA_KEY_ALIGNMENT ) + UINT32 tableOffset; + BYTE tableSpace[ (SYMCRYPT_GF128_FIELD_SIZE + 1) * sizeof( SYMCRYPT_GF128_ELEMENT ) ]; +#else + SYMCRYPT_GF128_ELEMENT table[ SYMCRYPT_GF128_FIELD_SIZE ]; +#endif +} SYMCRYPT_GHASH_EXPANDED_KEY, *PSYMCRYPT_GHASH_EXPANDED_KEY; +typedef const SYMCRYPT_GHASH_EXPANDED_KEY * PCSYMCRYPT_GHASH_EXPANDED_KEY; + + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_GCM_EXPANDED_KEY { + SYMCRYPT_GHASH_EXPANDED_KEY ghashKey; + PCSYMCRYPT_BLOCKCIPHER pBlockCipher; + SYMCRYPT_GCM_SUPPORTED_BLOCKCIPHER_KEYS blockcipherKey; + SIZE_T cbKey; + BYTE abKey[SYMCRYPT_GCM_MAX_KEY_SIZE]; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_GCM_EXPANDED_KEY, * PSYMCRYPT_GCM_EXPANDED_KEY; +typedef const SYMCRYPT_GCM_EXPANDED_KEY * PCSYMCRYPT_GCM_EXPANDED_KEY; + + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_GCM_STATE { + PCSYMCRYPT_GCM_EXPANDED_KEY pKey; + UINT64 cbData; // Number of data bytes + UINT64 cbAuthData; // Number of AAD bytes + _Field_range_( 0, SYMCRYPT_GCM_BLOCK_SIZE-1 ) SIZE_T bytesInMacBlock; + SYMCRYPT_GF128_ELEMENT ghashState; + SYMCRYPT_ALIGN BYTE counterBlock[SYMCRYPT_GCM_BLOCK_SIZE]; + SYMCRYPT_ALIGN BYTE macBlock[SYMCRYPT_GCM_BLOCK_SIZE]; + SYMCRYPT_ALIGN BYTE keystreamBlock[SYMCRYPT_GCM_BLOCK_SIZE]; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_GCM_STATE, * PSYMCRYPT_GCM_STATE; +typedef const SYMCRYPT_GCM_STATE * PCSYMCRYPT_GCM_STATE; + + +// +// Block ciphers +// +#define SYMCRYPT_MAX_BLOCK_SIZE (32) // max block length of a block cipher. + +typedef SYMCRYPT_ERROR( SYMCRYPT_CALL * PSYMCRYPT_BLOCKCIPHER_EXPAND_KEY ) +(PVOID pExpandedKey, PCBYTE pbKey, SIZE_T cbKey); +typedef VOID( SYMCRYPT_CALL * PSYMCRYPT_BLOCKCIPHER_CRYPT ) (PCVOID pExpandedKey, PCBYTE pbSrc, PBYTE pbDst); +typedef VOID( SYMCRYPT_CALL * PSYMCRYPT_BLOCKCIPHER_CRYPT_ECB ) (PCVOID pExpandedKey, PCBYTE pbSrc, PBYTE pbDst, SIZE_T cbData); +typedef VOID( SYMCRYPT_CALL * PSYMCRYPT_BLOCKCIPHER_CRYPT_MODE ) (PCVOID pExpandedKey, PBYTE pbChainingValue, PCBYTE pbSrc, PBYTE pbDst, SIZE_T cbData); +typedef VOID( SYMCRYPT_CALL * PSYMCRYPT_BLOCKCIPHER_MAC_MODE ) (PCVOID pExpandedKey, PBYTE pbChainingValue, PCBYTE pbSrc, SIZE_T cbData); +typedef VOID( SYMCRYPT_CALL * PSYMCRYPT_BLOCKCIPHER_AEADPART_MODE ) (PVOID pState, PCBYTE pbSrc, PBYTE pbDst, SIZE_T cbData); + +struct _SYMCRYPT_BLOCKCIPHER { + PSYMCRYPT_BLOCKCIPHER_EXPAND_KEY expandKeyFunc; // mandatory + PSYMCRYPT_BLOCKCIPHER_CRYPT encryptFunc; // mandatory + PSYMCRYPT_BLOCKCIPHER_CRYPT decryptFunc; // mandatory + PSYMCRYPT_BLOCKCIPHER_CRYPT_ECB ecbEncryptFunc; // NULL if no optimized version available + PSYMCRYPT_BLOCKCIPHER_CRYPT_ECB ecbDecryptFunc; // NULL if no optimized version available + PSYMCRYPT_BLOCKCIPHER_CRYPT_MODE cbcEncryptFunc; // NULL if no optimized version available + PSYMCRYPT_BLOCKCIPHER_CRYPT_MODE cbcDecryptFunc; // NULL if no optimized version available + PSYMCRYPT_BLOCKCIPHER_MAC_MODE cbcMacFunc; // NULL if no optimized version available + PSYMCRYPT_BLOCKCIPHER_CRYPT_MODE ctrMsb64Func; // NULL if no optimized version available + PSYMCRYPT_BLOCKCIPHER_AEADPART_MODE gcmEncryptPartFunc; // NULL if no optimized version available + PSYMCRYPT_BLOCKCIPHER_AEADPART_MODE gcmDecryptPartFunc; // NULL if no optimized version available + _Field_range_( 1, SYMCRYPT_MAX_BLOCK_SIZE ) SIZE_T blockSize; // = SYMCRYPT_XXX_BLOCK_SIZE, power of 2, 1 <= value <= 32. + SIZE_T expandedKeySize; // = sizeof( SYMCRYPT_XXX_EXPANDED_KEY ) +}; + + + +// +// Session structs +// + +#define SYMCRYPT_FLAG_SESSION_ENCRYPT (0x1) + +// +// SYMCRYPT_SESSION tracks the Nonces being used in a session. It is used differently depending on +// whether the session is an Encryption session or a Decryption session. +// +// In Encryption sessions, SYMCRYPT_SESSION tracks the Nonce which was used in the most recent +// attempted encryption in the session. +// messageNumber is atomically incremented by each encryption call, and the encryption method uses +// the messageNumber value that is the _result_ of the increment. +// +// In Decryption sessions, SYMCRYPT_SESSION tracks the most recently received Nonces in a series of +// successful decryptions. Nonces used in unsuccessful decryption calls do not update SYMCRYPT_SESSION. +// Information is tracked such that the decryption function can detect repeated Nonce values and +// fail decryption in this case. In order for this to work the message numbers that are provided +// to decrypt calls must be somewhat ordered. Provided message numbers may be arbitrarily far ahead +// of previously successfully decrypted message numbers, but may only be up to 63 behind the highest +// message number successfully decrypted so far. +// messageNumber normally represents the highest message number used in a successful decryption in +// this session. (The exception is at initialization, where messageNumber is initialized to 64 +// without the corresponding 0th bit in the replayMask being set - this initial state represents +// there have been no successful decryptions yet, and that the earliest messageNumber that can be +// successfully received is 1) +// replayMask represents whether a window of 64 message numbers up to messageNumber have already been +// successfully used; +// bit n of replayMask (from n=0 to n=63) represents message number = (messageNumber-n), 0 means not +// yet used, and 1 means already used in a successful decryption call +// + +#if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM64 +#define SYMCRYPT_USE_CAS128 (1) + +// For CompareAndSwap128 method, SYMCRYPT_SESSION must be aligned to 16B +#define SYMCRYPT_ALIGN_SESSION SYMCRYPT_ALIGN_TYPE_AT(struct, 16) +#else +#define SYMCRYPT_USE_CAS128 (0) + +// For method with only 64-bit atomics, SYMCRYPT_SESSION must be aligned to 8B +#define SYMCRYPT_ALIGN_SESSION SYMCRYPT_ALIGN_TYPE_AT(struct, 8) +#endif + +// Nested struct used within SYMCRYPT_SESSION +typedef SYMCRYPT_ALIGN_SESSION _SYMCRYPT_SESSION_REPLAY_STATE { + UINT64 replayMask; + // 64 bit mask representing message numbers previously successfully decrypted up to 63 + // before the most recent message number. + + UINT64 messageNumber; + // the last 8 bytes of the Nonce (MSB-first) +} SYMCRYPT_SESSION_REPLAY_STATE, * PSYMCRYPT_SESSION_REPLAY_STATE; +typedef const SYMCRYPT_SESSION_REPLAY_STATE * PCSYMCRYPT_SESSION_REPLAY_STATE; + +typedef SYMCRYPT_ALIGN_SESSION _SYMCRYPT_SESSION { + SYMCRYPT_SESSION_REPLAY_STATE replayState; + // nested replayState struct is to improve code clarity in SymCryptSessionDecryptUpdate* + + UINT32 senderId; + // the first 4 bytes of the Nonce (MSB-first) + // (set by the caller and constant for the lifetime of a session) + + UINT32 flags; + // SYMCRYPT_FLAG_SESSION_ENCRYPT indicates the struct is to be used for an encryption session, + // otherwise the struct is to be used for a decryption session + + PVOID pMutex; + // Pointer to a fast single-process mutex object used to enable atomic update of replayMask and + // messageNumber in the absence of support for a 128b CAS operation +} SYMCRYPT_SESSION, * PSYMCRYPT_SESSION; + +#define SYMCRYPT_SESSION_MAX_MESSAGE_NUMBER (0xffffffff00000000ull) +// We do not allow messageNumber to go above some maximum value (currently 2^64 - 2^32) +// This gives us a large window to prevent many concurrent encryption threads from updating the +// session such that the messageNumber overflows and the same IV is used in many encryptions +// (i.e. we would only potentially get a spurious success using a repeated IV when there are +// >2^32 concurrent threads!) + +#if SYMCRYPT_USE_CAS128 +C_ASSERT(SYMCRYPT_FIELD_OFFSET(SYMCRYPT_SESSION, replayState.replayMask) == 0); +C_ASSERT(SYMCRYPT_FIELD_OFFSET(SYMCRYPT_SESSION, replayState.messageNumber) == 8); +// For CompareAndSwap128 method, replayMask and messageNumber must be tightly packed +#endif + +// +// RC4 +// + +// +// Some CPUs like the S array type to be larger than BYTE. We abstract the data type +// of the S array to accommodate such CPUs in future. +// + +typedef BYTE SYMCRYPT_RC4_S_TYPE; + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_RC4_STATE { + SYMCRYPT_RC4_S_TYPE S[256]; + BYTE i; + BYTE j; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_RC4_STATE, *PSYMCRYPT_RC4_STATE; + +// +// ChaCha20 +// + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_CHACHA20_STATE { + UINT32 key[8]; + UINT32 nonce[3]; + UINT64 offset; // offset to use for next operation + BOOLEAN keystreamBufferValid; // keystream buffer matches offset value + BYTE keystream[64]; +} SYMCRYPT_CHACHA20_STATE, *PSYMCRYPT_CHACHA20_STATE; + + +// +// AES_CTR_DRBG +// + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_RNG_AES_STATE { + // + // Key and V value are in one array, to allow fast generation of both of them + // in a single call. + // + BYTE keyAndV[32 + 16]; + BYTE previousBlock[16]; + UINT64 requestCounter; // called reseed_counter in SP 800-90 + BOOLEAN fips140_2Check; // set if the FIPS 140-2 continuous self-test is required + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_RNG_AES_STATE, * PSYMCRYPT_RNG_AES_STATE; + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_RNG_AES_FIPS140_2_STATE { + SYMCRYPT_RNG_AES_STATE rng; +} SYMCRYPT_RNG_AES_FIPS140_2_STATE, *PSYMCRYPT_RNG_AES_FIPS140_2_STATE; + + +// +// MARVIN32 +// + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_MARVIN32_EXPANDED_SEED +{ + UINT32 s[2]; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_MARVIN32_EXPANDED_SEED, *PSYMCRYPT_MARVIN32_EXPANDED_SEED; +typedef const SYMCRYPT_MARVIN32_EXPANDED_SEED * PCSYMCRYPT_MARVIN32_EXPANDED_SEED; + + +typedef SYMCRYPT_MARVIN32_EXPANDED_SEED SYMCRYPT_MARVIN32_CHAINING_STATE, * PSYMCRYPT_MARVIN32_CHAINING_STATE; + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_MARVIN32_STATE +{ + SYMCRYPT_ALIGN BYTE buffer[8]; // 4 bytes of data, 4 more bytes for final padding + SYMCRYPT_MARVIN32_CHAINING_STATE chain; // chaining state + PCSYMCRYPT_MARVIN32_EXPANDED_SEED pSeed; // + UINT32 dataLength; // length of the data processed so far, mod 2^32 + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_MARVIN32_STATE, *PSYMCRYPT_MARVIN32_STATE; +typedef const SYMCRYPT_MARVIN32_STATE *PCSYMCRYPT_MARVIN32_STATE; + + +// +// Export blob sizes +// + +#define SYMCRYPT_MD2_STATE_EXPORT_SIZE (80) +#define SYMCRYPT_MD4_STATE_EXPORT_SIZE (116) +#define SYMCRYPT_MD5_STATE_EXPORT_SIZE (116) +#define SYMCRYPT_SHA1_STATE_EXPORT_SIZE (120) +#define SYMCRYPT_SHA224_STATE_EXPORT_SIZE (132) +#define SYMCRYPT_SHA256_STATE_EXPORT_SIZE (132) +#define SYMCRYPT_SHA384_STATE_EXPORT_SIZE (236) +#define SYMCRYPT_SHA512_STATE_EXPORT_SIZE (236) +#define SYMCRYPT_SHA512_224_STATE_EXPORT_SIZE (236) +#define SYMCRYPT_SHA512_256_STATE_EXPORT_SIZE (236) + +#define SYMCRYPT_KECCAK_STATE_EXPORT_SIZE (234) +#define SYMCRYPT_SHA3_224_STATE_EXPORT_SIZE SYMCRYPT_KECCAK_STATE_EXPORT_SIZE +#define SYMCRYPT_SHA3_256_STATE_EXPORT_SIZE SYMCRYPT_KECCAK_STATE_EXPORT_SIZE +#define SYMCRYPT_SHA3_384_STATE_EXPORT_SIZE SYMCRYPT_KECCAK_STATE_EXPORT_SIZE +#define SYMCRYPT_SHA3_512_STATE_EXPORT_SIZE SYMCRYPT_KECCAK_STATE_EXPORT_SIZE + + +// +// KDF algorithms +// + +// +// PBKDF2 +// + +typedef struct _SYMCRYPT_PBKDF2_EXPANDED_KEY { + SYMCRYPT_MAC_EXPANDED_KEY macKey; + PCSYMCRYPT_MAC macAlg; +} SYMCRYPT_PBKDF2_EXPANDED_KEY, *PSYMCRYPT_PBKDF2_EXPANDED_KEY; +typedef const SYMCRYPT_PBKDF2_EXPANDED_KEY *PCSYMCRYPT_PBKDF2_EXPANDED_KEY; + +// +// SP 800-108 +// + +typedef struct _SYMCRYPT_SP800_108_EXPANDED_KEY { + SYMCRYPT_MAC_EXPANDED_KEY macKey; + PCSYMCRYPT_MAC macAlg; +} SYMCRYPT_SP800_108_EXPANDED_KEY, *PSYMCRYPT_SP800_108_EXPANDED_KEY; +typedef const SYMCRYPT_SP800_108_EXPANDED_KEY *PCSYMCRYPT_SP800_108_EXPANDED_KEY; + +// +// TLS PRF 1.1 +// + +typedef struct _SYMCRYPT_TLSPRF1_1_EXPANDED_KEY { + SYMCRYPT_HMAC_MD5_EXPANDED_KEY macMd5Key; + SYMCRYPT_HMAC_SHA1_EXPANDED_KEY macSha1Key; +} SYMCRYPT_TLSPRF1_1_EXPANDED_KEY, *PSYMCRYPT_TLSPRF1_1_EXPANDED_KEY; +typedef const SYMCRYPT_TLSPRF1_1_EXPANDED_KEY *PCSYMCRYPT_TLSPRF1_1_EXPANDED_KEY; + +// +// TLS PRF 1.2 +// + +typedef struct _SYMCRYPT_TLSPRF1_2_EXPANDED_KEY { + SYMCRYPT_MAC_EXPANDED_KEY macKey; + PCSYMCRYPT_MAC macAlg; +} SYMCRYPT_TLSPRF1_2_EXPANDED_KEY, *PSYMCRYPT_TLSPRF1_2_EXPANDED_KEY; +typedef const SYMCRYPT_TLSPRF1_2_EXPANDED_KEY *PCSYMCRYPT_TLSPRF1_2_EXPANDED_KEY; + +// +// SSH-KDF +// +typedef struct _SYMCRYPT_SSHKDF_EXPANDED_KEY { + PCSYMCRYPT_HASH pHashFunc; + SYMCRYPT_HASH_STATE hashState; +} SYMCRYPT_SSHKDF_EXPANDED_KEY, *PSYMCRYPT_SSHKDF_EXPANDED_KEY; +typedef const SYMCRYPT_SSHKDF_EXPANDED_KEY *PCSYMCRYPT_SSHKDF_EXPANDED_KEY; + +// +// SRTP-KDF +// +typedef struct _SYMCRYPT_SRTPKDF_EXPANDED_KEY { + SYMCRYPT_AES_EXPANDED_KEY aesExpandedKey; +} SYMCRYPT_SRTPKDF_EXPANDED_KEY, *PSYMCRYPT_SRTPKDF_EXPANDED_KEY; +typedef const SYMCRYPT_SRTPKDF_EXPANDED_KEY *PCSYMCRYPT_SRTPKDF_EXPANDED_KEY; + +// +// HKDF +// + +typedef struct _SYMCRYPT_HKDF_EXPANDED_KEY { + SYMCRYPT_MAC_EXPANDED_KEY macKey; + PCSYMCRYPT_MAC macAlg; +} SYMCRYPT_HKDF_EXPANDED_KEY, *PSYMCRYPT_HKDF_EXPANDED_KEY; +typedef const SYMCRYPT_HKDF_EXPANDED_KEY *PCSYMCRYPT_HKDF_EXPANDED_KEY; + +// +// SSKDF +// +typedef struct _SYMCRYPT_SSKDF_MAC_EXPANDED_SALT { + SYMCRYPT_MAC_EXPANDED_KEY macKey; + PCSYMCRYPT_MAC macAlg; +} SYMCRYPT_SSKDF_MAC_EXPANDED_SALT, *PSYMCRYPT_SSKDF_MAC_EXPANDED_SALT; +typedef const SYMCRYPT_SSKDF_MAC_EXPANDED_SALT *PCSYMCRYPT_SSKDF_MAC_EXPANDED_SALT; + +// +// Digit & alignment sizes. +// +// WARNING: do not change these without updating all the optimized code, +// including assembler code. +// The FDEF_DIGIT_SIZE is the digit size used by the FDEF format. +// +#if SYMCRYPT_CPU_AMD64 + +#define SYMCRYPT_FDEF_DIGIT_SIZE 64 +#define SYMCRYPT_ASYM_ALIGN_VALUE 32 + +#elif SYMCRYPT_CPU_ARM64 + +#define SYMCRYPT_FDEF_DIGIT_SIZE 32 +#define SYMCRYPT_ASYM_ALIGN_VALUE 32 + +#else + +#define SYMCRYPT_FDEF_DIGIT_SIZE 16 +#define SYMCRYPT_ASYM_ALIGN_VALUE 16 // We have some bugs when ASYM_ALIGN_VALUE > DIGIT_SIZE; need to fix them if we implement AVX2-based x86 code. + +#endif + +#define SYMCRYPT_ASYM_ALIGN_UP( _p ) ((PBYTE) ( ((SIZE_T) (_p) + SYMCRYPT_ASYM_ALIGN_VALUE - 1) & ~(SYMCRYPT_ASYM_ALIGN_VALUE - 1 ) ) ) + + +//============================================================================================== +// Object types for low-level API +// +// INT integer in range 0..N for some N +// DIVISOR an integer > 0 that can be used to divide with. +// MODULUS a value M > 1 to use in modulo-M computations +// MODELEMENT An element in a modulo-M ring. +// ECPOINT A point on an elliptic curve. +// +// These objects are all aligned to SYMCRYPT_ASYM_ALIGN +// +#define SYMCRYPT_ASYM_ALIGN SYMCRYPT_ALIGN_AT(SYMCRYPT_ASYM_ALIGN_VALUE) +#if SYMCRYPT_MS_VC +#define SYMCRYPT_ASYM_ALIGN_STRUCT SYMCRYPT_ASYM_ALIGN struct +#elif SYMCRYPT_GNUC +#define SYMCRYPT_ASYM_ALIGN_STRUCT struct SYMCRYPT_ASYM_ALIGN +#else +#error Unknown compiler +#endif + +SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_INT; +typedef struct _SYMCRYPT_INT SYMCRYPT_INT; +typedef SYMCRYPT_INT * PSYMCRYPT_INT; +typedef const SYMCRYPT_INT * PCSYMCRYPT_INT; + +SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_DIVISOR; +typedef struct _SYMCRYPT_DIVISOR SYMCRYPT_DIVISOR; +typedef SYMCRYPT_DIVISOR * PSYMCRYPT_DIVISOR; +typedef const SYMCRYPT_DIVISOR * PCSYMCRYPT_DIVISOR; + +SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_MODULUS; +typedef struct _SYMCRYPT_MODULUS SYMCRYPT_MODULUS; +typedef SYMCRYPT_MODULUS * PSYMCRYPT_MODULUS; +typedef const SYMCRYPT_MODULUS * PCSYMCRYPT_MODULUS; + +SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_MODELEMENT; +typedef struct _SYMCRYPT_MODELEMENT SYMCRYPT_MODELEMENT; +typedef SYMCRYPT_MODELEMENT * PSYMCRYPT_MODELEMENT; +typedef const SYMCRYPT_MODELEMENT * PCSYMCRYPT_MODELEMENT; + +SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_ECPOINT; +typedef struct _SYMCRYPT_ECPOINT SYMCRYPT_ECPOINT; +typedef SYMCRYPT_ECPOINT * PSYMCRYPT_ECPOINT; +typedef const SYMCRYPT_ECPOINT * PCSYMCRYPT_ECPOINT; + + +// +// Arithmetic formats +// + +#define SYMCRYPT_ANYSIZE 1 // used to mark arrays of arbitrary size + +#define SYMCRYPT_FDEF_DIGIT_BITS (8*SYMCRYPT_FDEF_DIGIT_SIZE) +#define SYMCRYPT_FDEF_DIGITS_FROM_BITS( _bits ) ( \ + ((_bits)/ SYMCRYPT_FDEF_DIGIT_BITS) + \ + (( ((_bits) & (SYMCRYPT_FDEF_DIGIT_BITS-1)) + (SYMCRYPT_FDEF_DIGIT_BITS - 1) )/SYMCRYPT_FDEF_DIGIT_BITS) \ + ) + +#define SYMCRYPT_BYTES_FROM_BITS(bits) ( ( (bits) + 7 ) / 8 ) + +// The maximum number of bits in any integer value that the library supports. If the +// caller's input exceed this bound then the integer object will not be created. +// The caller either must ensure the bound is not exceeded, or check for NULL before +// using created SymCrypt objects. +// The primary purpose of this limit is to avoid integer overflows in size computations. +// Having a reasonable upper bound avoids all size overflows, even on 32-bit CPUs +#define SYMCRYPT_INT_MAX_BITS ((UINT32)(1 << 20)) + +// +// Upper bound for the number of digits: this MUST be enforced on runtime +// on all Allocate, SizeOf, and Create calls which take as input a digit number. +// +// Using this upper bound and the SYMCRYPT_INT_MAX_BITS upper bound we can argue +// that no integer overflow on 32-bit sizes can happen. Note that the computed upper +// bounds are very loose and the actual values are much smaller. +// +#define SYMCRYPT_FDEF_UPB_DIGITS (SYMCRYPT_FDEF_DIGITS_FROM_BITS(SYMCRYPT_INT_MAX_BITS)) + + + + +// +// All of the following SYMCRYPT_FDEF_SIZEOF_XXX_FROM_YYY computations for the four +// main SymCrypt objects (INT, DIVISOR, MODULUS, MODELEMENT) return a value not +// larger than 2^19 if the inputs _nDigits and _bits are not larger than +// SYMCRYPT_FDEF_UPB_DIGITS and SYMCRYPT_INT_MAX_BITS respectively (For MODELEMENT this bound +// is 2^17). The latter bounds must be enforced on runtime for all calculations taking as inputs +// number of digits or bits. +// +// The 2^19 upper bound is derived from: +// - the maximum (byte) size of an "integer": 2^20 bits / 8 = 2^17 bytes +// - "sizeof" computations add up to less than 2^18 bytes ~ 262 Kb +// - the modulus object contains two "integers" +// + +// +// Type fields contain the following: +// lower 16 bits: offset into virtual table (if any) +// upper 16 bits: bits 16-23: 1-character object type. Bits 24-31: 1 char implementation type +// The upper bits allow objects to be recognized in memory, making debugging easier. +// + +SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_INT { + UINT32 type; + _Field_range_( 1, SYMCRYPT_FDEF_UPB_DIGITS ) UINT32 nDigits; // digit size depends on run-time decisions... + UINT32 cbSize; + + SYMCRYPT_MAGIC_FIELD + SYMCRYPT_ASYM_ALIGN union { + struct { + UINT32 uint32[SYMCRYPT_ANYSIZE]; // FDEF: array UINT32[nDigits * # uint32 per digit] + } fdef; + } ti; // we must have a name here. 'ti' stands for 'Type-Int', it helps catch type errors when type-casting macros are used. +}; + +#define SYMCRYPT_FDEF_INT_PUINT32( p ) (&(p)->ti.fdef.uint32[0]) + + +#define SYMCRYPT_FDEF_SIZEOF_INT_FROM_DIGITS( _nDigits ) ((_nDigits) * SYMCRYPT_FDEF_DIGIT_SIZE + sizeof( SYMCRYPT_INT ) ) +#define SYMCRYPT_FDEF_SIZEOF_INT_FROM_BITS( _bits ) SYMCRYPT_FDEF_SIZEOF_INT_FROM_DIGITS( SYMCRYPT_FDEF_DIGITS_FROM_BITS( _bits )) + +SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_DIVISOR { + UINT32 type; + _Field_range_( 1, SYMCRYPT_FDEF_UPB_DIGITS ) UINT32 nDigits; // digit size depends on run-time decisions... + UINT32 cbSize; + + UINT32 nBits; // # bits in divisor + + SYMCRYPT_MAGIC_FIELD + union{ + struct { + UINT64 W; // approximate inverse of the divisor. Some implementations will use 64 bits, others 32 bits. + } fdef; + } td; + SYMCRYPT_INT Int; // Having a full Int here uses more space, but allows any Divisor to still be used as an Int. + // This structure is directly followed by the Int extension +}; + +#define SYMCRYPT_FDEF_SIZEOF_DIVISOR_FROM_DIGITS( _nDigits ) ((_nDigits) * SYMCRYPT_FDEF_DIGIT_SIZE + sizeof( SYMCRYPT_DIVISOR ) ) +#define SYMCRYPT_FDEF_SIZEOF_DIVISOR_FROM_BITS( _bits ) SYMCRYPT_FDEF_SIZEOF_DIVISOR_FROM_DIGITS( SYMCRYPT_FDEF_DIGITS_FROM_BITS( _bits )) + +SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_MODULUS { + UINT32 type; + _Field_range_( 1, SYMCRYPT_FDEF_UPB_DIGITS ) UINT32 nDigits; // digit size depends on run-time decisions... + UINT32 cbSize; // Size of modulus object + + UINT32 flags; // The flags the modulus was created with + UINT32 cbModElement; // Size of one modElement + UINT64 inv64; // -1/modulus mod 2^64 (always set but only to a useful value when the modulus is odd) + + SYMCRYPT_MAGIC_FIELD + union{ + struct { + //UINT32 nUint32Used; // # 32-bit words used in representing numbers. modulus < 2^{32*nUint32Used}. + // only values used are nDigits * uint32-per-digit or specific smaller values for optimized implementations + PCUINT32 Rsqr; // R^2 mod modulus, in uint32 form, nUint32Used words. Stored after Divisor. R = 2^{32*nUint32Used} + } montgomery; + struct { + UINT32 k; // modulus = 2^<bitsize of modelement> - k + } pseudoMersenne; + } tm; // type specific data. Every Modulus can be used as a generic modulus, so no type-specific data for generic. + + SYMCRYPT_DIVISOR Divisor; + // This structure is directly followed by: + // The extensions of the Divisor object + // and after that: + // FDEF: Rsqr as an array of UINT32, size = nDigits * digitsize + // FDEF: negDivisor as an array of UINT32, size = nDigits * digitsize +}; + +#define SYMCRYPT_FDEF_SIZEOF_MODULUS_FROM_DIGITS( _nDigits ) (sizeof( SYMCRYPT_MODULUS ) + SYMCRYPT_FDEF_SIZEOF_DIVISOR_FROM_DIGITS( _nDigits ) + (2 * _nDigits * SYMCRYPT_FDEF_DIGIT_SIZE) ) +#define SYMCRYPT_FDEF_SIZEOF_MODULUS_FROM_BITS( _bits ) SYMCRYPT_FDEF_SIZEOF_MODULUS_FROM_DIGITS(SYMCRYPT_FDEF_DIGITS_FROM_BITS( _bits )) + +SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_MODELEMENT { + // ModElements just store the information without any header. This union makes this well-defined, and allows easy access. + union{ + UINT32 uint32[SYMCRYPT_ANYSIZE]; + } d; +}; + +#define SYMCRYPT_FDEF_SIZEOF_MODELEMENT_FROM_DIGITS( _nDigits ) ((_nDigits) * SYMCRYPT_FDEF_DIGIT_SIZE) +#define SYMCRYPT_FDEF_SIZEOF_MODELEMENT_FROM_BITS( _bits ) SYMCRYPT_FDEF_SIZEOF_MODELEMENT_FROM_DIGITS( SYMCRYPT_FDEF_DIGITS_FROM_BITS( _bits ) ) + +// +// Upper bound for scratch size computations for FDEF objects depending only on digits +// +// The following 14 scratch size computation macros are all of the form: +// Some SIZEOF macros + max( some other scratch macros ) +// and all depend on some number of digits. (Slight exceptions are +// INT_TO_MODULUS and INT_PRIME_GEN but they can fit into the below +// rationale.) +// +// One can see that the deepest recursion in these macros and the biggest +// return value is for +// INT_PRIME_GEN -> INT_MILLER_RABIN -> MODEXP -> +// COMMON_MOD_OPERATIONS -> SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_DIVMOD +// +// Using the 2^19 (2^17) bound on the sizeof computations the biggest contribution on the above chain is for MODEXP: +// ((1 << SYMCRYPT_FDEF_MAX_WINDOW_MODEXP) + 2) * SYMCRYPT_FDEF_SIZEOF_MODELEMENT_FROM_DIGITS( _nModDigits ) +// which is bounded above by +// (2^6 + 2) * 2^17 < 2^24 +// +// By doubling on each subsequent recursive call we get the conservative +// upper bound for all scratch size computation macros of 2^26. +// + +#define SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_TO_DIVISOR( _nDigits ) (16 * (_nDigits)) // unused currently, but this catches errors + +#define SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_MUL( _nDigits ) (16 * (_nDigits)) // unused currently, but nonzero size catches errors + +#define SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_DIVMOD( _nSrcDigits, _nDivisorDigits ) ( (_nSrcDigits + 1) * SYMCRYPT_FDEF_DIGIT_SIZE ) + +#define SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_EXTENDED_GCD( _nDigits ) ( \ + 4 * SYMCRYPT_FDEF_SIZEOF_INT_FROM_DIGITS( _nDigits ) + \ + SYMCRYPT_FDEF_SIZEOF_INT_FROM_DIGITS( 2 * _nDigits ) + \ + 2 * SYMCRYPT_FDEF_SIZEOF_DIVISOR_FROM_DIGITS( _nDigits ) + \ + SYMCRYPT_MAX( SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_DIVMOD( 2 * _nDigits, _nDigits ), \ + SYMCRYPT_MAX( SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_MUL( 2 * _nDigits ), \ + SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_TO_DIVISOR( _nDigits ) )) ) + +#define SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( _nModDigits ) \ + ( (2*(_nModDigits) * SYMCRYPT_FDEF_DIGIT_SIZE) + \ + SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_DIVMOD( 2*(_nModDigits), _nModDigits )) // for mult: tmp product + divmod scratch + +#define SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_CRT_GENERATION( _nDigits ) ( \ + 2*SYMCRYPT_FDEF_SIZEOF_INT_FROM_DIGITS( _nDigits ) + \ + SYMCRYPT_MAX( SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_EXTENDED_GCD( _nDigits ), \ + SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( _nDigits ) )) + +#define SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_CRT_SOLUTION( _nDigits ) ( \ + SYMCRYPT_FDEF_SIZEOF_INT_FROM_DIGITS( _nDigits ) + \ + SYMCRYPT_FDEF_SIZEOF_MODELEMENT_FROM_DIGITS( _nDigits ) + \ + SYMCRYPT_FDEF_SIZEOF_INT_FROM_DIGITS( 2*_nDigits ) + \ + SYMCRYPT_MAX( SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( _nDigits ), \ + SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_MUL( 2*_nDigits ) )) + +#define SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_TO_MODULUS( _nDigits ) ( \ + SYMCRYPT_MAX( SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_TO_DIVISOR( _nDigits ),\ + (2*_nDigits+1) * SYMCRYPT_FDEF_DIGIT_SIZE + SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_DIVMOD( 2*_nDigits + 1, nDigits )) ) + +#define SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_MODINV( _nModDigits ) ( \ + 4 * SYMCRYPT_FDEF_SIZEOF_MODELEMENT_FROM_DIGITS( _nModDigits ) + \ + 3 * SYMCRYPT_FDEF_SIZEOF_INT_FROM_DIGITS( _nModDigits ) + \ + SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( _nModDigits ) ) + +#define SYMCRYPT_FDEF_MAX_WINDOW_MODEXP (6) + +#define SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_MODEXP( _nModDigits ) ( \ + ((1 << SYMCRYPT_FDEF_MAX_WINDOW_MODEXP) + 2) * SYMCRYPT_FDEF_SIZEOF_MODELEMENT_FROM_DIGITS( _nModDigits ) + \ + SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( _nModDigits ) ) + +#define SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_IS_POTENTIAL_PRIME( _nDigits ) (0) + +#define SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_MILLER_RABIN( _nDigits ) ( \ + SYMCRYPT_FDEF_SIZEOF_MODULUS_FROM_DIGITS(_nDigits) + \ + 3*SYMCRYPT_FDEF_SIZEOF_MODELEMENT_FROM_DIGITS(_nDigits) + \ + SYMCRYPT_FDEF_SIZEOF_INT_FROM_DIGITS(_nDigits) + \ + SYMCRYPT_MAX( SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_TO_MODULUS(_nDigits), \ + SYMCRYPT_MAX( SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS(_nDigits), \ + SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_MODEXP( _nDigits ) )) ) + +#define SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_IS_PRIME( _nDigits ) ( \ + SYMCRYPT_MAX( SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_IS_POTENTIAL_PRIME( _nDigits ), \ + SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_MILLER_RABIN( _nDigits ) )) + +#define SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_PRIME_GEN( _nDigits ) ( \ + SYMCRYPT_RSAKEY_MAX_NUMOF_PUBEXPS * SYMCRYPT_FDEF_SIZEOF_DIVISOR_FROM_DIGITS( 1 ) + \ + SYMCRYPT_FDEF_SIZEOF_INT_FROM_DIGITS( 1 ) + \ + SYMCRYPT_MAX( SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_TO_DIVISOR( 1 ), \ + SYMCRYPT_MAX( SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_DIVMOD( _nDigits, 1 ), \ + SYMCRYPT_MAX( SYMCRYPT_FDEF_SIZEOF_INT_FROM_DIGITS( _nDigits ), \ + SYMCRYPT_MAX( SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_IS_POTENTIAL_PRIME( _nDigits ), \ + SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_MILLER_RABIN( _nDigits ) ))))) + +// +// Upper bound for SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_MODMULTIEXP +// +// _nBase and _nBitsExp are bounded by SYMCRYPT_MODMULTIEXP_MAX_NBASES = 8 and +// SYMCRYPT_MODMULTIEXP_MAX_NBITSEXP = 2^20. Therefore the upper bound on this computation +// is +// 2^21 + 2^3*(2^6+4)*2^17 + 2^3*2^20*4 < 2^27 +// +#define SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_MODMULTIEXP( _nModDigits, _nBases, _nBitsExp ) ( \ + SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( _nModDigits ) + \ + ((_nBases)*(1<<SYMCRYPT_FDEF_MAX_WINDOW_MODEXP) + 4)*SYMCRYPT_FDEF_SIZEOF_MODELEMENT_FROM_DIGITS( _nModDigits ) + \ + (((_nBases)*(_nBitsExp)*sizeof(UINT32) + SYMCRYPT_ASYM_ALIGN_VALUE - 1) & ~(SYMCRYPT_ASYM_ALIGN_VALUE - 1)) ) +// Note: We need +4 multiplied with SYMCRYPT_FDEF_SIZEOF_MODELEMENT_FROM_DIGITS so that SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_MODMULTIEXP +// is always at least 2 modelements bigger than SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_MODEXP (see modexp.c) + +// +// Support for masked operations + +#define SYMCRYPT_MASK32_SET ((UINT32)-1) +#define SYMCRYPT_MASK32_NONZERO( _v ) ((UINT32)(((UINT64)0 - (_v)) >> 32)) +#define SYMCRYPT_MASK32_ZERO( _v ) (~SYMCRYPT_MASK32_NONZERO( _v )) +#define SYMCRYPT_MASK32_EQ( _a, _b ) (~SYMCRYPT_MASK32_NONZERO( (_a) ^ (_b) )) +#define SYMCRYPT_MASK32_LT( _a, _b ) ((UINT32)( ((UINT64)(_a) - (_b)) >> 32 )) + + +// +// Dispatch definitions +// When multiple formats are supported, this is where the information of the multiple formats is combined. +// +// See the comments in SYMCRYPT_FDEF_SCRATCH_XXX regarding 32 bit overflow protection. All results +// are bounded above by 2^27. +// + +#define SYMCRYPT_INTERNAL_SIZEOF_INT_FROM_BITS( _bitsize ) SYMCRYPT_FDEF_SIZEOF_INT_FROM_BITS( _bitsize ) +#define SYMCRYPT_INTERNAL_SIZEOF_DIVISOR_FROM_BITS( _bitsize ) SYMCRYPT_FDEF_SIZEOF_DIVISOR_FROM_BITS( _bitsize ) +#define SYMCRYPT_INTERNAL_SIZEOF_MODULUS_FROM_BITS( _bitsize ) SYMCRYPT_FDEF_SIZEOF_MODULUS_FROM_BITS( _bitsize ) +#define SYMCRYPT_INTERNAL_SIZEOF_MODELEMENT_FROM_BITS( _bitsize ) SYMCRYPT_FDEF_SIZEOF_MODELEMENT_FROM_BITS( _bitsize ) + +#define SYMCRYPT_INTERNAL_SIZEOF_RSAKEY_FROM_PARAMS( modBits, nPrimes, nPubExps ) SYMCRYPT_FDEF_SIZEOF_RSAKEY_FROM_PARAMS( modBits, nPrimes, nPubExps ) +// For now we don't need the pubExpBits so we drop them, but we might use them later. + +#define SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_INT_TO_DIVISOR( _nDigits ) SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_TO_DIVISOR( _nDigits ) +#define SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_INT_MUL( _nDigits ) SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_MUL( _nDigits ) +#define SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_INT_DIVMOD( _nSrcDigits, _nDivisorDigits ) SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_DIVMOD( _nSrcDigits, _nDivisorDigits ) +#define SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_EXTENDED_GCD( _nDigits ) SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_EXTENDED_GCD( _nDigits ) +#define SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( _nModDigits ) SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( _nModDigits ) +#define SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_CRT_GENERATION( _nDigits ) SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_CRT_GENERATION( _nDigits ) +#define SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_CRT_SOLUTION( _nDigits ) SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_CRT_SOLUTION( _nDigits ) +#define SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_INT_TO_MODULUS( _nDigits ) SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_TO_MODULUS( _nDigits ) +#define SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_MODINV( _nModDigits ) SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_MODINV( _nModDigits ) +#define SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_MODEXP( _nModDigits ) SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_MODEXP( _nModDigits ) +#define SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_INT_IS_PRIME( _nDigits ) SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_IS_PRIME( _nDigits ) +#define SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_INT_PRIME_GEN( _nDigits ) SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_PRIME_GEN( _nDigits ) + +#define SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_MODMULTIEXP( _nModDigits, _nBases, _nBitsExp ) SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_MODMULTIEXP( _nModDigits, _nBases, _nBitsExp ) + +// +// Forward declarations for MlKemkey types +// +SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_MLKEMKEY; +typedef struct _SYMCRYPT_MLKEMKEY SYMCRYPT_MLKEMKEY; +typedef SYMCRYPT_MLKEMKEY * PSYMCRYPT_MLKEMKEY; +typedef const SYMCRYPT_MLKEMKEY * PCSYMCRYPT_MLKEMKEY; + +// +// Forward declarations for MlDsakey types +// +struct _SYMCRYPT_MLDSAKEY; +typedef struct _SYMCRYPT_MLDSAKEY SYMCRYPT_MLDSAKEY; +typedef SYMCRYPT_MLDSAKEY * PSYMCRYPT_MLDSAKEY; +typedef const SYMCRYPT_MLDSAKEY * PCSYMCRYPT_MLDSAKEY; + +// +// Forward declarations for CompositeMlKemkey types +// +SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_COMPOSITE_MLKEMKEY; +typedef struct _SYMCRYPT_COMPOSITE_MLKEMKEY SYMCRYPT_COMPOSITE_MLKEMKEY; +typedef SYMCRYPT_COMPOSITE_MLKEMKEY * PSYMCRYPT_COMPOSITE_MLKEMKEY; +typedef const SYMCRYPT_COMPOSITE_MLKEMKEY * PCSYMCRYPT_COMPOSITE_MLKEMKEY; + +// +// RSA padding scratch definitions +// +// The maximum sizes of the state and the result for all hash algorithms are +// sizeof(SYMCRYPT_HASH_STATE) and SYMCRYPT_HASH_MAX_RESULT_SIZE, both not bigger +// 2^20. All the nBytes inputs are bounded by 2^17 (the maximum byte-size +// of the RSA modulus). +// +// Thus a total upper bound on these results is 2^20. +// +#define SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_RSA_OAEP( _hashAlgorithm, _nBytesOAEP ) ( SymCryptHashStateSize( _hashAlgorithm ) + \ + SymCryptHashResultSize( _hashAlgorithm ) + \ + 2*(_nBytesOAEP - 1) ) + +#define SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_RSA_PKCS1( _nBytesPKCS1 ) ( _nBytesPKCS1 ) + +#define SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_RSA_PSS( _hashAlgorithm, _nBytesMessage, _nBytesPSS ) ( SymCryptHashStateSize( _hashAlgorithm ) + \ + _nBytesMessage + \ + 3*(_nBytesPSS) + 5 ) + +// +// RSAKEY Type +// + +#define SYMCRYPT_FDEF_SIZEOF_RSAKEY_FROM_PARAMS( modBits, nPrimes, nPubExps ) \ + sizeof( SYMCRYPT_RSAKEY ) + \ + (nPrimes + 1) * SYMCRYPT_FDEF_SIZEOF_MODULUS_FROM_BITS( modBits ) + \ + nPrimes * SYMCRYPT_FDEF_SIZEOF_MODELEMENT_FROM_BITS( modBits ) + \ + (nPrimes + 1) * nPubExps * SYMCRYPT_FDEF_SIZEOF_INT_FROM_BITS( modBits ) +// 1 modulus object per prime + 1 for the RSA modulus +// 1 modelement for every crtInverse +// 1 int per pubexp for each privexp + 1 int per prime*pubexp for each crtprivexp + +#define SYMCRYPT_RSAKEY_MAX_NUMOF_PRIMES (2) +#define SYMCRYPT_RSAKEY_MAX_NUMOF_PUBEXPS (1) + +#define SYMCRYPT_RSAKEY_MIN_BITSIZE_MODULUS (256) // Some of our SCS code requires at least 32 bytes... +#define SYMCRYPT_RSAKEY_MAX_BITSIZE_MODULUS (1 << 16) // Avoid any integer overflows in size calculations + +// RSA FIPS self-tests require at least 496 bits to avoid fatal +// Require caller to specify NO_FIPS for up to 1024 bits as running FIPS tests on too-small keys +// does not make it FIPS certifiable and gives the wrong impression to callers +#define SYMCRYPT_RSAKEY_FIPS_MIN_BITSIZE_MODULUS (1024) + +#define SYMCRYPT_RSAKEY_MIN_BITSIZE_PRIME (128) +#define SYMCRYPT_RSAKEY_MAX_BITSIZE_PRIME (SYMCRYPT_RSAKEY_MAX_BITSIZE_MODULUS / 2) + +// Minimum allowable bit sizes for generated and imported parameters for +// the RSA modulus and each prime. + +typedef SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_RSAKEY { + UINT32 fAlgorithmInfo; // Tracks which algorithms the key can be used in + // Also tracks which per-key selftests have been performed on this key + // A bitwise OR of SYMCRYPT_FLAG_KEY_*, SYMCRYPT_FLAG_RSAKEY_*, and + // SYMCRYPT_PCT_* values + + UINT32 cbTotalSize; // Total size of the rsa key + BOOLEAN hasPrivateKey; // Set to true if there is private key information set + + UINT32 nSetBitsOfModulus; // Bits of modulus specified during creation + + UINT32 nBitsOfModulus; // Number of bits of the value of the modulus (not the object's size) + UINT32 nDigitsOfModulus; // Number of digits of the modulus object (always equal to SymCryptDigitsFromBits(nSetBitsOfModulus)) + + UINT32 nPubExp; // Number of public exponents + + UINT32 nPrimes; // Number of primes, can be 0 if the object only supports public keys + UINT32 nBitsOfPrimes[SYMCRYPT_RSAKEY_MAX_NUMOF_PRIMES]; + // Number of bits of the value of each prime (not the object's size) + UINT32 nDigitsOfPrimes[SYMCRYPT_RSAKEY_MAX_NUMOF_PRIMES]; + // Number of digits of each prime object + UINT32 nMaxDigitsOfPrimes; // Maximum number of digits in nDigitsOfPrimes + + UINT64 au64PubExp[SYMCRYPT_RSAKEY_MAX_NUMOF_PUBEXPS]; + // SYMCRYPT_ASYM_ALIGN'ed buffers that point to memory allocated for each object + PBYTE pbPrimes[SYMCRYPT_RSAKEY_MAX_NUMOF_PRIMES]; + PBYTE pbCrtInverses[SYMCRYPT_RSAKEY_MAX_NUMOF_PRIMES]; + PBYTE pbPrivExps[SYMCRYPT_RSAKEY_MAX_NUMOF_PUBEXPS]; + PBYTE pbCrtPrivExps[SYMCRYPT_RSAKEY_MAX_NUMOF_PUBEXPS * SYMCRYPT_RSAKEY_MAX_NUMOF_PRIMES]; + + // SymCryptObjects + PSYMCRYPT_MODULUS pmModulus; // The modulus N=p*q + PSYMCRYPT_MODULUS pmPrimes[SYMCRYPT_RSAKEY_MAX_NUMOF_PRIMES]; + // Pointers to the secret primes + PSYMCRYPT_MODELEMENT peCrtInverses[SYMCRYPT_RSAKEY_MAX_NUMOF_PRIMES]; + // Pointers to the CRT inverses of the primes + PSYMCRYPT_INT piPrivExps[SYMCRYPT_RSAKEY_MAX_NUMOF_PUBEXPS]; + // Pointers to the corresponding private exponents + PSYMCRYPT_INT piCrtPrivExps[SYMCRYPT_RSAKEY_MAX_NUMOF_PUBEXPS * SYMCRYPT_RSAKEY_MAX_NUMOF_PRIMES]; + // Pointers to the private exponents modulo each prime minus 1 (for CRT) + + SYMCRYPT_MAGIC_FIELD + // Followed by: + // Modulus + // Primes + // CrtInverses + // PrivExps + // CrtPrivExps +} SYMCRYPT_RSAKEY; +typedef SYMCRYPT_RSAKEY * PSYMCRYPT_RSAKEY; +typedef const SYMCRYPT_RSAKEY * PCSYMCRYPT_RSAKEY; + +// +// The following definitions relating to trial division are not needed by normal callers +// but are used by the test program to measure performance of components. +// + +typedef struct _SYMCRYPT_TRIALDIVISION_PRIME { + UINT64 invMod2e64; // Inverse of prime modulo 2^64 + UINT64 compareLimit; // floor( (2^{64}-1)/ prime ) +} SYMCRYPT_TRIALDIVISION_PRIME, *PSYMCRYPT_TRIALDIVISION_PRIME; +typedef const SYMCRYPT_TRIALDIVISION_PRIME * PCSYMCRYPT_TRIALDIVISION_PRIME; +// +// This structure is used to test whether a UINT64 is a multiple of a (small) prime. +// Let V be the input value, P the small prime, and W the inverse of P modulo 2^64. +// If V = k*P then V * M mod 2^64 = V/P mod 2^64 = k. +// This holds for k = 0, 1, ..., floor( (2^{64}-1)/p ). +// If V is not a multiple of P then the result of the multiplication must be larger than that. +// + +typedef struct _SYMCRYPT_TRIALDIVISION_GROUP { + UINT32 nPrimes; // # primes are in this group (use the next ones) + UINT32 factor[9]; // factors[i] = 2^{32*(i+1)} mod Prod where Prod = product of the primes + // It is guaranteed that Prod <= (2^{32}-1)/9 +} SYMCRYPT_TRIALDIVISION_GROUP, *PSYMCRYPT_TRIALDIVISION_GROUP; +typedef const SYMCRYPT_TRIALDIVISION_GROUP * PCSYMCRYPT_TRIALDIVISION_GROUP; + + +typedef struct _SYMCRYPT_TRIALDIVISION_CONTEXT { + SIZE_T nBytesAlloc; + UINT32 maxTrialPrime; + PSYMCRYPT_TRIALDIVISION_GROUP pGroupList; // terminated with 0 record + PSYMCRYPT_TRIALDIVISION_PRIME pPrimeList; // terminated with 0 record + PUINT32 pPrimes; // terminated with a 0. + SYMCRYPT_TRIALDIVISION_PRIME Primes3_5_17[3]; // Structures for 3, 5 and 17 in that order +} SYMCRYPT_TRIALDIVISION_CONTEXT, *PSYMCRYPT_TRIALDIVISION_CONTEXT; +typedef const SYMCRYPT_TRIALDIVISION_CONTEXT * PCSYMCRYPT_TRIALDIVISION_CONTEXT; + +UINT32 +SymCryptTestTrialdivisionMaxSmallPrime( PCSYMCRYPT_TRIALDIVISION_CONTEXT pContext ); // Expose small prime limit to help test code + +// +// DLGROUP type +// + +#define SYMCRYPT_DLGROUP_MIN_BITSIZE_P (32) +#define SYMCRYPT_DLGROUP_MIN_BITSIZE_Q (31) // Q must always be at least 1 bit shorter than P +// Minimum allowable bit sizes for generated and imported parameters for both P and +// Q primes. + +typedef SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_DLGROUP { + UINT32 cbTotalSize; // Total size of the dl group object + BOOLEAN fHasPrimeQ; // Flag that specifies whether the object has a Q parameter + + UINT32 nBitsOfP; // Number of bits of the value of P (not the object's size) + UINT32 cbPrimeP; // Number of bytes of the value of P (not the object's size), equal to ceil(nBitsOfP/8) + UINT32 nDigitsOfP; // Number of digits of the object of prime P + UINT32 nMaxBitsOfP; // Maximum number of bits of the value of P + + UINT32 nBitsOfQ; // Number of bits of the value of Q (not the object's bits) + UINT32 cbPrimeQ; // Number of bytes of the value of Q (not the object's size), equal to ceil(nBitsOfQ/8) + UINT32 nDigitsOfQ; // Number of digits of the object of prime Q + UINT32 nMaxBitsOfQ; // Maximum number of bits of the value of Q + + BOOLEAN isSafePrimeGroup; // Boolean indicating if this is a Safe Prime group + UINT32 nMinBitsPriv; // Minimum number of bits to be used in private keys for this group + // This only applies to named Safe Prime groups where this is related to the security strength + // i.e. this corresponds to 2s in SP800-56arev3 5.6.1.1.1 / 5.6.2.1.2 + UINT32 nDefaultBitsPriv; // Default number of bits used in private keys for this group + // Normally equals nBitsOfQ, but may be further restricted (i.e. for named Safe Prime groups) + // i.e. this corresponds to a default value of N in SP800-56arev3 5.6.1.1.1 / 5.6.2.1.2 + + UINT32 nBitsOfSeed; // Number of bits of the seed used for generation (seedlen in FIPS 186-3) + UINT32 cbSeed; // Number of bytes of the seed, equal to ceil(nBitsOfSeed/8) + + SYMCRYPT_DLGROUP_FIPS eFipsStandard; // Code specifying the FIPS standard used to create the keys. If 0 the group is unverified. + + PCSYMCRYPT_HASH pHashAlgorithm; // Hash algorithm used for the generation of parameters + UINT32 dwGenCounter; // Number of iterations used for the generation of parameters + BYTE bIndexGenG; // Index for the generation of generator G (FIPS 186-3) (Always 1 for now) + + PBYTE pbQ; // SYMCRYPT_ASYM_ALIGN'ed buffer that points to the memory allocated for modulus Q + + PSYMCRYPT_MODULUS pmP; // Pointer to the prime P + PSYMCRYPT_MODULUS pmQ; // Pointer to the prime Q + + PSYMCRYPT_MODELEMENT peG; // Pointer to the generator G + + PBYTE pbSeed; // Buffer that will hold the seed (this is padded at the end so that the entire structure + // has size a multiple of SYMCRYPT_ASYM_ALIGN_VALUE) + + SYMCRYPT_MAGIC_FIELD + + // P + // Q + // G + // Seed +} SYMCRYPT_DLGROUP; +typedef SYMCRYPT_DLGROUP * PSYMCRYPT_DLGROUP; +typedef const SYMCRYPT_DLGROUP * PCSYMCRYPT_DLGROUP; + +// +// DLKEY type +// +typedef SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_DLKEY { + UINT32 fAlgorithmInfo; // Tracks which algorithms the key can be used in + // Also tracks which per-key selftests have been performed on this key + // A bitwise OR of SYMCRYPT_FLAG_KEY_*, SYMCRYPT_FLAG_DLKEY_*, and + // SYMCRYPT_PCT_* values + + BOOLEAN fHasPrivateKey; // Set to true if there is a private key set + BOOLEAN fPrivateModQ; // Set to true if the private key is at most Q-1, otherwise it is at most P-2 + UINT32 nBitsPriv; // Number of bits used in private keys + + PCSYMCRYPT_DLGROUP pDlgroup; // Handle to the group which created the key + + PBYTE pbPrivate; // SYMCRYPT_ASYM_ALIGN'ed buffer that points to the memory allocated for the private key + + PSYMCRYPT_MODELEMENT pePublicKey; // Public key (modelement modulo P) + PSYMCRYPT_INT piPrivateKey; // Private key (integer up to 2^nBitsPriv-1, Q-1 or P-2) + + SYMCRYPT_MAGIC_FIELD + + // PublicKey + // PrivateKey // The size of this must always be the same as the size of P +} SYMCRYPT_DLKEY; +typedef SYMCRYPT_DLKEY * PSYMCRYPT_DLKEY; +typedef const SYMCRYPT_DLKEY * PCSYMCRYPT_DLKEY; + +// +// Elliptic Curve Function Types +// + +#define SYMCRYPT_ECPOINT_FORMAT_MAX_LENGTH 4 // Number of MODELEMENTs for the largest ECPOINT format + +// Coordinate representations for ECPOINTs +// NOTE: The value masked with 0xf gives you the number of coordinates +typedef enum _SYMCRYPT_ECPOINT_COORDINATES { + SYMCRYPT_ECPOINT_COORDINATES_INVALID = 0x00, // Invalid point representation + SYMCRYPT_ECPOINT_COORDINATES_SINGLE = 0x11, // Representation with only X + SYMCRYPT_ECPOINT_COORDINATES_AFFINE = 0x22, // Affine representation (X,Y) + SYMCRYPT_ECPOINT_COORDINATES_PROJECTIVE = 0x33, // Three equally-sized values where the triple (X,Y,Z) represents the affine point (X/Z, Y/Z) + SYMCRYPT_ECPOINT_COORDINATES_JACOBIAN = 0x43, // Three equally-sized values where the triple (X,Y,Z) represents the affine point (X/Z^2, Y/Z^3) + SYMCRYPT_ECPOINT_COORDINATES_EXTENDED_PROJECTIVE = 0x54, // Four equally-sized values where (X,Y,Z,T) represents the affine point (X/Z, Y/Z) with T=X*Y*Z + SYMCRYPT_ECPOINT_COORDINATES_SINGLE_PROJECTIVE = 0x62, // Two equally-sized values where (X,Z) represents the point (X/Z) +} SYMCRYPT_ECPOINT_COORDINATES; + +#define SYMCRYPT_INTERNAL_NUMOF_COORDINATES( _eCoordinates ) ((_eCoordinates) & 0xf) + + +// +// Curve-type-dependent information +// + +// Short-Weierstrass + +#define SYMCRYPT_ECURVE_SW_DEF_WINDOW (6) // Default window size for the windowed methods + +#define SYMCRYPT_ECURVE_SW_MAX_NPRECOMP_POINTS (64) // Maximum number of precomputed points + +typedef struct _SYMCRYPT_ECURVE_INFO_PRECOMP { + UINT32 window; // Window size + UINT32 nPrecompPoints; // Number of precomputed points + UINT32 nRecodedDigits; // Number of recoded digits + PSYMCRYPT_ECPOINT poPrecompPoints[SYMCRYPT_ECURVE_SW_MAX_NPRECOMP_POINTS]; + // Table of pointers to precomputed powers of the distinguished point +} SYMCRYPT_ECURVE_INFO_PRECOMP; + +// +// ECURVE object +// + +#define SYMCRYPT_ECURVE_MIN_BITSIZE_FMOD (32) +#define SYMCRYPT_ECURVE_MIN_BITSIZE_GORD (32) +#define SYMCRYPT_ECURVE_MAX_COFACTOR_POWER (8) +// Minimum (maximum for cofactor) allowable bit sizes for imported +// parameters for field modulus, group order of curve (and cofactor). + +#define SYMCRYPT_INTERNAL_ECURVE_VERSION_LATEST 1 + +typedef enum _SYMCRYPT_INTERNAL_ECURVE_TYPE { + SYMCRYPT_INTERNAL_ECURVE_TYPE_SHORT_WEIERSTRASS = 1, + SYMCRYPT_INTERNAL_ECURVE_TYPE_TWISTED_EDWARDS = 2, + SYMCRYPT_INTERNAL_ECURVE_TYPE_MONTGOMERY = 3, + SYMCRYPT_INTERNAL_ECURVE_TYPE_SHORT_WEIERSTRASS_AM3 = 4,// This type is a specialization of Short-Weierstrass when A == -3 + // This condition is detected and used for all NIST prime curves +} SYMCRYPT_INTERNAL_ECURVE_TYPE; + +C_ASSERT((UINT32)SYMCRYPT_INTERNAL_ECURVE_TYPE_SHORT_WEIERSTRASS == (UINT32)SYMCRYPT_ECURVE_TYPE_SHORT_WEIERSTRASS ); +C_ASSERT((UINT32)SYMCRYPT_INTERNAL_ECURVE_TYPE_TWISTED_EDWARDS == (UINT32)SYMCRYPT_ECURVE_TYPE_TWISTED_EDWARDS ); +C_ASSERT((UINT32)SYMCRYPT_INTERNAL_ECURVE_TYPE_MONTGOMERY == (UINT32)SYMCRYPT_ECURVE_TYPE_MONTGOMERY ); + +typedef SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_ECURVE { + UINT32 version; // Version # + SYMCRYPT_INTERNAL_ECURVE_TYPE + type; // Internal type of the curve + SYMCRYPT_ECPOINT_COORDINATES + eCoordinates; // Default representation of the EC points + + UINT32 FModBitsize; // Bitsize of the field modulus + UINT32 FModDigits; // Number of digits of the field modulus + UINT32 FModBytesize; // Bytesize of the field modulus (specified in the curve parameters as cbFieldLength) + + UINT32 GOrdBitsize; // Bitsize of the (sub)group order + UINT32 GOrdDigits; // Number of digits of the (sub)group order + UINT32 GOrdBytesize; // Bytesize of the (sub)group order (specified in the curve parameters as cbSubgroupOrder) + + UINT32 cbModElement; // (Internal) bytesize of one mod element + + UINT32 cbAlloc; // Bytesize of the total curve blob + + UINT32 cbScratchCommon; // Size of scratch space for common ecurve operations + UINT32 cbScratchScalar; // Size of constant scratch space for scalar ecurve operations (without the nPoints dependence) + UINT32 cbScratchScalarMulti; // Dependence of scratch space for scalar ecurve operations from nPoints + UINT32 cbScratchGetSetValue; // Size of scratch space for get set value ecpoint operations + UINT32 cbScratchEckey; // Size of scratch space for eckey operations + + UINT32 coFactorPower; // The cofactor of the curve will be equal to 2^coFactorPower + + // Parameters V2 Extensions + UINT32 PrivateKeyDefaultFormat; + UINT32 HighBitRestrictionNumOfBits; + UINT32 HighBitRestrictionPosition; + UINT32 HighBitRestrictionValue; + + union { + + SYMCRYPT_ECURVE_INFO_PRECOMP sw; // Info for short Weierstrass curves (only the precomputation parameters are needed now) + + } info; // Precomputed information related to each curve + + PSYMCRYPT_MODULUS FMod; // Field modulus + PSYMCRYPT_MODULUS GOrd; // Order of the subgroup + + PSYMCRYPT_MODELEMENT A; // Parameter A + PSYMCRYPT_MODELEMENT B; // Parameter B + PSYMCRYPT_ECPOINT G; // Distinguished point (generator of the subgroup) + PSYMCRYPT_INT H; // Cofactor of the curve + + SYMCRYPT_MAGIC_FIELD + + // FMod + // A + // B + // GOrd + // H + // G +} SYMCRYPT_ECURVE; +typedef SYMCRYPT_ECURVE * PSYMCRYPT_ECURVE; +typedef const SYMCRYPT_ECURVE * PCSYMCRYPT_ECURVE; + +#define SYMCRYPT_INTERNAL_ECPOINT_COORDINATE_OFFSET( _pCurve, _ord ) ( sizeof(SYMCRYPT_ECPOINT) + (_ord) * (_pCurve)->cbModElement ) +#define SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( _ord, _pCurve, _pEcpoint ) (PSYMCRYPT_MODELEMENT)( (PBYTE)(_pEcpoint) + SYMCRYPT_INTERNAL_ECPOINT_COORDINATE_OFFSET( (_pCurve), _ord ) ) + +// Convenience macros to make adding internal specializations easier +#define SYMCRYPT_CURVE_IS_SHORT_WEIERSTRASS_TYPE( _pCurve ) \ + ( _pCurve->type == SYMCRYPT_INTERNAL_ECURVE_TYPE_SHORT_WEIERSTRASS || \ + _pCurve->type == SYMCRYPT_INTERNAL_ECURVE_TYPE_SHORT_WEIERSTRASS_AM3 ) + +#define SYMCRYPT_CURVE_IS_TWISTED_EDWARDS_TYPE( _pCurve ) \ + ( _pCurve->type == SYMCRYPT_INTERNAL_ECURVE_TYPE_TWISTED_EDWARDS ) + +#define SYMCRYPT_CURVE_IS_MONTGOMERY_TYPE( _pCurve ) \ + ( _pCurve->type == SYMCRYPT_INTERNAL_ECURVE_TYPE_MONTGOMERY ) + +// +// Scratch space sizes for ECURVE operations +// +// Overflow protection is enforced when creating the ECURVE objects on +// the cbScratchCommon, cbScratchScalar, cbScratchScalarMulti, and cbScratchEckey fields. +// +// All of them are upper bounded by 2^26 (see SymCrypt<CurveType>FillScratchSpaces functions) +// and since _nPoints is bounded by SYMCRYPT_ECURVE_MULTI_SCALAR_MUL_MAX_NPOINTS = 2, all +// the macros are bounded by 2^27. +// + +#define SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( _pCurve ) ( (_pCurve)->cbScratchCommon) +#define SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_SCALAR_ECURVE_OPERATIONS( _pCurve, _nPoints ) ( (_pCurve)->cbScratchScalar + \ + (_nPoints) * (_pCurve)->cbScratchScalarMulti ) +#define SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_GETSET_VALUE_ECURVE_OPERATIONS( _pCurve ) ( (_pCurve)->cbScratchGetSetValue) +#define SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_ECKEY_ECURVE_OPERATIONS( _pCurve ) ( (_pCurve)->cbScratchEckey) + +typedef SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_ECPOINT { + BOOLEAN normalized; // A flag specifying whether the point is normalized or not. This flag + // makes sense only for PROJECTIVE, JACOBIAN, EXTENDED_PROJECTIVE, and + // SINGLE_PROJECTIVE coordinates. If set to TRUE (non-zero), it means + // that the Z coordinate of the point is equal to 1. + PCSYMCRYPT_ECURVE pCurve; // Handle to the curve which the point is on. Only used in CHKed builds for ASSERTs + SYMCRYPT_MAGIC_FIELD + // An array of MODELEMENTs. The total size will depend on the MODELEMENT size and the number of MODELEMENTs. +} SYMCRYPT_ECPOINT, *PSYMCRYPT_ECPOINT; +typedef const SYMCRYPT_ECPOINT * PCSYMCRYPT_ECPOINT; + +typedef SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_ECKEY { + UINT32 fAlgorithmInfo; // Tracks which algorithms the key can be used in + // Also tracks which per-key selftests have been performed on this key + // A bitwise OR of SYMCRYPT_FLAG_KEY_*, SYMCRYPT_FLAG_ECKEY_*, and + // SYMCRYPT_PCT_* values + BOOLEAN hasPrivateKey; // Set to true if there is a private key set + PCSYMCRYPT_ECURVE pCurve; // Handle to the curve which created the key + + PSYMCRYPT_ECPOINT poPublicKey; // Public key (ECPOINT) + PSYMCRYPT_INT piPrivateKey; // Private key + + SYMCRYPT_MAGIC_FIELD + + // PublicKey + // PrivateKey +} SYMCRYPT_ECKEY; +typedef SYMCRYPT_ECKEY * PSYMCRYPT_ECKEY; +typedef const SYMCRYPT_ECKEY * PCSYMCRYPT_ECKEY; + +SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_802_11_SAE_CUSTOM_STATE { + PSYMCRYPT_ECURVE pCurve; + PCSYMCRYPT_MAC macAlgorithm; + PSYMCRYPT_MODELEMENT peRand; + PSYMCRYPT_MODELEMENT peMask; + PSYMCRYPT_ECPOINT poPWE; + BYTE counter; +}; + +// +// XMSS +// + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_XMSS_PARAMS +{ + PCSYMCRYPT_HASH hash; // hash function + UINT32 id; // algorithm identifier + UINT32 cbHashOutput; // hash function output size, must be less than or equal to hash->resultSize + UINT32 nWinternitzWidth;// Winternitz coefficient, width of digits in bits (chain length = 2^nWinternitzWidth) + UINT32 nTotalTreeHeight;// number of layers times the tree height of one layer (each layer has the same height) + UINT32 nLayers; // hyper-tree layers, 1 for single tree + UINT32 cbPrefix; // length of the domain separator prefix in PRFs + + // + // The following are derived from the above + // + UINT32 len1; // number of w-bit digits in the hash output to be signed ( len1 = ceil(8n / w) ) + UINT32 len2; // number of w-bit digits in the checksum + UINT32 len; // len1 + len2 + UINT32 nLayerHeight; // tree height of a single layer (h / d) + UINT32 cbIdx; // size of leaf counter in bytes (for single trees cbIdx = 4) + UINT32 nLeftShift32; // left shift count to align the checksum digits to MSB of a 32-bit word + + BYTE Reserved[16]; // Reserved for future use +} SYMCRYPT_XMSS_PARAMS; + +typedef SYMCRYPT_XMSS_PARAMS* PSYMCRYPT_XMSS_PARAMS; +typedef const SYMCRYPT_XMSS_PARAMS* PCSYMCRYPT_XMSS_PARAMS; + +struct _SYMCRYPT_XMSS_KEY; +typedef struct _SYMCRYPT_XMSS_KEY SYMCRYPT_XMSS_KEY; +typedef SYMCRYPT_XMSS_KEY* PSYMCRYPT_XMSS_KEY; +typedef const SYMCRYPT_XMSS_KEY* PCSYMCRYPT_XMSS_KEY; + + +//========================================================================== +// LMS internal structures +//========================================================================== + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_LMS_PARAMS +{ + // algorithm ID of the LMS signature scheme + UINT32 lmsAlgID; + + // algorithm ID of the LM-OTS signature scheme + UINT32 lmsOtsAlgID; + + // hash function pointer to be used as part of the LMS operations + PCSYMCRYPT_HASH pLmsHashFunction; + + // the height of the LMS tree. There are 2^h leaves in the tree - h + UINT32 nTreeHeight; + + // the number of bytes for each tree node, equals to the output length of the hash function - m, n + UINT32 cbHashOutput; + + // Winternitz coefficient, width of digits in bits (chain length = 2^w) - w + UINT32 nWinternitzChainWidth; + + // the number of n-byte string elements that make up the LM-OTS signature - p + UINT32 nByteStringCount; + + // the number of left-shift bits used in the checksum function Cksm - ls + UINT32 nChecksumLShiftBits; +} SYMCRYPT_LMS_PARAMS; +typedef SYMCRYPT_LMS_PARAMS* PSYMCRYPT_LMS_PARAMS; +typedef const SYMCRYPT_LMS_PARAMS* PCSYMCRYPT_LMS_PARAMS; + +struct _SYMCRYPT_LMS_KEY; +typedef struct _SYMCRYPT_LMS_KEY SYMCRYPT_LMS_KEY; +typedef SYMCRYPT_LMS_KEY* PSYMCRYPT_LMS_KEY; +typedef const SYMCRYPT_LMS_KEY* PCSYMCRYPT_LMS_KEY; + +#ifndef _PREFAST_ +#if SYMCRYPT_CPU_X86 +#pragma warning(pop) +#endif +#endif + + + +////////////////////////////////////////////////////////// +// +// Environment macros +// + +#ifdef __cplusplus +#define SYMCRYPT_EXTERN_C extern "C" { +#define SYMCRYPT_EXTERN_C_END } +#else +#define SYMCRYPT_EXTERN_C +#define SYMCRYPT_EXTERN_C_END +#endif + +// +// Callers of SymCrypt should NOT depend on the function names in these macros. +// The definition of these macros can change in future releases of the library. +// + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 +typedef struct _SYMCRYPT_EXTENDED_SAVE_DATA SYMCRYPT_EXTENDED_SAVE_DATA, *PSYMCRYPT_EXTENDED_SAVE_DATA; + +#define SYMCRYPT_ENVIRONMENT_DEFS_SAVEYMM( envName ) \ + SYMCRYPT_ERROR SYMCRYPT_CALL SymCryptSaveYmmEnv##envName( _Out_ PSYMCRYPT_EXTENDED_SAVE_DATA pSaveArea ); \ + SYMCRYPT_ERROR SYMCRYPT_CALL SymCryptSaveYmm( _Out_ PSYMCRYPT_EXTENDED_SAVE_DATA pSaveArea ) \ + { return SymCryptSaveYmmEnv##envName( pSaveArea ); } \ + \ + VOID SYMCRYPT_CALL SymCryptRestoreYmmEnv##envName( _Inout_ PSYMCRYPT_EXTENDED_SAVE_DATA pSaveArea ); \ + VOID SYMCRYPT_CALL SymCryptRestoreYmm( _Inout_ PSYMCRYPT_EXTENDED_SAVE_DATA pSaveArea ) \ + { SymCryptRestoreYmmEnv##envName( pSaveArea ); } \ + +#define SYMCRYPT_ENVIRONMENT_DEFS_SAVEXMM( envName ) \ + SYMCRYPT_ERROR SYMCRYPT_CALL SymCryptSaveXmmEnv##envName( _Out_ PSYMCRYPT_EXTENDED_SAVE_DATA pSaveArea ); \ + SYMCRYPT_ERROR SYMCRYPT_CALL SymCryptSaveXmm( _Out_ PSYMCRYPT_EXTENDED_SAVE_DATA pSaveArea ) \ + { return SymCryptSaveXmmEnv##envName( pSaveArea ); } \ + \ + VOID SYMCRYPT_CALL SymCryptRestoreXmmEnv##envName( _Inout_ PSYMCRYPT_EXTENDED_SAVE_DATA pSaveArea ); \ + VOID SYMCRYPT_CALL SymCryptRestoreXmm( _Inout_ PSYMCRYPT_EXTENDED_SAVE_DATA pSaveArea ) \ + { SymCryptRestoreXmmEnv##envName( pSaveArea ); } \ + + +#else + +#define SYMCRYPT_ENVIRONMENT_DEFS_SAVEYMM( envName ) +#define SYMCRYPT_ENVIRONMENT_DEFS_SAVEXMM( envName ) + +#endif + +// Environment forwarding functions. +// CPUIDEX is only forwarded on CPUs that have it. +#if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_X86 +#define SYMCRYPT_ENVIRONMENT_FORWARD_CPUIDEX( envName ) \ + VOID SYMCRYPT_CALL SymCryptCpuidExFuncEnv##envName( int cpuInfo[4], int function_id, int subfunction_id ); \ + VOID SYMCRYPT_CALL SymCryptCpuidExFunc( int cpuInfo[4], int function_id, int subfunction_id ) \ + { SymCryptCpuidExFuncEnv##envName( cpuInfo, function_id, subfunction_id ); } +#else +#define SYMCRYPT_ENVIRONMENT_FORWARD_CPUIDEX( envName ) +#endif + +#define SYMCRYPT_ENVIRONMENT_DEFS( envName ) \ +SYMCRYPT_EXTERN_C \ + VOID SYMCRYPT_CALL SymCryptInitEnv##envName( UINT32 version ); \ + VOID SYMCRYPT_CALL SymCryptInit(void) \ + { SymCryptInitEnv##envName( SYMCRYPT_API_VERSION ); } \ + \ + _Analysis_noreturn_ VOID SYMCRYPT_CALL SymCryptFatalEnv##envName( UINT32 fatalCode ); \ + _Analysis_noreturn_ VOID SYMCRYPT_CALL SymCryptFatal( UINT32 fatalCode ) \ + { SymCryptFatalEnv##envName( fatalCode ); } \ + SYMCRYPT_CPU_FEATURES SYMCRYPT_CALL SymCryptCpuFeaturesNeverPresentEnv##envName(void); \ + SYMCRYPT_CPU_FEATURES SYMCRYPT_CALL SymCryptCpuFeaturesNeverPresent(void) \ + { return SymCryptCpuFeaturesNeverPresentEnv##envName(); } \ + \ + SYMCRYPT_ENVIRONMENT_DEFS_SAVEXMM( envName ) \ + SYMCRYPT_ENVIRONMENT_DEFS_SAVEYMM( envName ) \ + \ + VOID SYMCRYPT_CALL SymCryptTestInjectErrorEnv##envName( PBYTE pbBuf, SIZE_T cbBuf ); \ + VOID SYMCRYPT_CALL SymCryptInjectError( PBYTE pbBuf, SIZE_T cbBuf ) \ + { SymCryptTestInjectErrorEnv##envName( pbBuf, cbBuf ); } \ + SYMCRYPT_ENVIRONMENT_FORWARD_CPUIDEX( envName ) \ +SYMCRYPT_EXTERN_C_END + +// +// To avoid hard-do-diagnose mistakes, we skip defining environment macros in those cases where we +// know they cannot or should not be used. +// + +#define SYMCRYPT_ENVIRONMENT_GENERIC SYMCRYPT_ENVIRONMENT_DEFS( Generic ) + +#if defined(EFI) | defined(PCAT) | defined(DIRECT) +#define SYMCRYPT_ENVIRONMENT_WINDOWS_BOOTLIBRARY SYMCRYPT_ENVIRONMENT_DEFS( WindowsBootlibrary ) +#endif + +// +// There are no defined symbols that we can use to detect that we are in debugger code +// But this is unlikely to be misused. +// +#define SYMCRYPT_ENVIRONMENT_WINDOWS_KERNELDEBUGGER SYMCRYPT_ENVIRONMENT_DEFS( WindowsKernelDebugger ) + + + +#define SYMCRYPT_ENVIRONMENT_WINDOWS_KERNELMODE_LEGACY SYMCRYPT_ENVIRONMENT_GENERIC + +#ifdef NTDDI_VERSION +#if (NTDDI_VERSION >= NTDDI_WIN7) +#define SYMCRYPT_ENVIRONMENT_WINDOWS_KERNELMODE_WIN7_N_LATER SYMCRYPT_ENVIRONMENT_DEFS( WindowsKernelmodeWin7nLater ) +#endif + +#if (NTDDI_VERSION >= NTDDI_WINBLUE) +#define SYMCRYPT_ENVIRONMENT_WINDOWS_KERNELMODE_WIN8_1_N_LATER SYMCRYPT_ENVIRONMENT_DEFS( WindowsKernelmodeWin8_1nLater ) +#endif + +#define SYMCRYPT_ENVIRONMENT_WINDOWS_KERNELMODE_LATEST SYMCRYPT_ENVIRONMENT_WINDOWS_KERNELMODE_WIN8_1_N_LATER + + + +#define SYMCRYPT_ENVIRONMENT_WINDOWS_USERMODE_LEGACY SYMCRYPT_ENVIRONMENT_GENERIC + +#if (NTDDI_VERSION >= NTDDI_WIN7) +#define SYMCRYPT_ENVIRONMENT_WINDOWS_USERMODE_WIN7_N_LATER SYMCRYPT_ENVIRONMENT_DEFS( WindowsUsermodeWin7nLater ) +#endif + +#if (NTDDI_VERSION >= NTDDI_WINBLUE) +#define SYMCRYPT_ENVIRONMENT_WINDOWS_USERMODE_WIN8_1_N_LATER SYMCRYPT_ENVIRONMENT_DEFS( WindowsUsermodeWin8_1nLater ) +#endif + +#if (NTDDI_VERSION >= NTDDI_WIN10) +#define SYMCRYPT_ENVIRONMENT_WINDOWS_USERMODE_WIN10_SGX SYMCRYPT_ENVIRONMENT_DEFS( Win10Sgx ) +#endif +#endif // NTDDI_VERSION + +#define SYMCRYPT_ENVIRONMENT_WINDOWS_USERMODE_LATEST SYMCRYPT_ENVIRONMENT_WINDOWS_USERMODE_WIN8_1_N_LATER + + +#define SYMCRYPT_ENVIRONMENT_POSIX_USERMODE SYMCRYPT_ENVIRONMENT_DEFS( PosixUsermode ) + +// For backwards compatibility with previous macro name +#define SYMCRYPT_ENVIRONMENT_LINUX_USERMODE SYMCRYPT_ENVIRONMENT_POSIX_USERMODE + + +#define SYMCRYPT_ENVIRONMENT_OPTEE_TA SYMCRYPT_ENVIRONMENT_DEFS( OpteeTa ) + +////////////////////////////////////////////////////////// +// +// SymCryptWipe & SymCryptWipeKnownSize +// + +VOID +SYMCRYPT_CALL +SymCryptWipe( + _Out_writes_bytes_(cbData) PVOID pbData, + SIZE_T cbData); + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM | SYMCRYPT_CPU_ARM64 + +// +// If the known size is large we call the generic wipe function anyway. +// For small known sizes we perform the wipe inline. +// This is a tradeoff between speed and code size and there are diminishing returns to supporting +// increasingly large sizes. +// We currently put the limit at ~8 native writes, which varies by platform. +// +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_ARM +#define SYMCRYPT_WIPE_FUNCTION_LIMIT (32) // If this is increased beyond 127 the code below must be updated. +#elif SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM64 +#define SYMCRYPT_WIPE_FUNCTION_LIMIT (64) // If this is increased beyond 127 the code below must be updated. +#else +#error ?? +#endif + +// +// The buffer analysis code doesn't understand our optimized in-line wiping code +// well enough to conclude it is safe. +// +#pragma prefast(push) +#pragma prefast( disable: 26001 ) + +static +FORCEINLINE +VOID +SYMCRYPT_CALL +#pragma prefast( suppress: 6101, "Logic why this properly initializes the pbData buffer is too complicated for prefast" ) +SymCryptWipeKnownSize(_Out_writes_bytes_(cbData) PVOID pbData, SIZE_T cbData) +{ + volatile BYTE * pb = (volatile BYTE *)pbData; + + if (cbData > SYMCRYPT_WIPE_FUNCTION_LIMIT) + { + SymCryptWipe(pbData, cbData); + } + else + { + // + // We assume that pb is aligned, so we wipe from the end to the front to keep alignment. + // + if (cbData & 1) + { + cbData--; + SYMCRYPT_INTERNAL_FORCE_WRITE8((volatile BYTE *)&pb[cbData], 0); + } + if (cbData & 2) + { + cbData -= 2; + SYMCRYPT_INTERNAL_FORCE_WRITE16((volatile UINT16 *)&pb[cbData], 0); + } + if (cbData & 4) + { + cbData -= 4; + SYMCRYPT_INTERNAL_FORCE_WRITE32((volatile UINT32 *)&pb[cbData], 0); + } + if (cbData & 8) + { + cbData -= 8; + SYMCRYPT_INTERNAL_FORCE_WRITE64((volatile UINT64 *)&pb[cbData], 0); + } + if (cbData & 16) + { + cbData -= 16; + SYMCRYPT_INTERNAL_FORCE_WRITE64((volatile UINT64 *)&pb[cbData], 0); + SYMCRYPT_INTERNAL_FORCE_WRITE64((volatile UINT64 *)&pb[cbData + 8], 0); + } + if (cbData & 32) + { + cbData -= 32; + SYMCRYPT_INTERNAL_FORCE_WRITE64((volatile UINT64 *)&pb[cbData], 0); + SYMCRYPT_INTERNAL_FORCE_WRITE64((volatile UINT64 *)&pb[cbData + 8], 0); + SYMCRYPT_INTERNAL_FORCE_WRITE64((volatile UINT64 *)&pb[cbData + 16], 0); + SYMCRYPT_INTERNAL_FORCE_WRITE64((volatile UINT64 *)&pb[cbData + 24], 0); + } +#if SYMCRYPT_WIPE_FUNCTION_LIMIT >= 64 + if (cbData & 64) + { + cbData -= 64; + SYMCRYPT_INTERNAL_FORCE_WRITE64((volatile UINT64 *)&pb[cbData], 0); + SYMCRYPT_INTERNAL_FORCE_WRITE64((volatile UINT64 *)&pb[cbData + 8], 0); + SYMCRYPT_INTERNAL_FORCE_WRITE64((volatile UINT64 *)&pb[cbData + 16], 0); + SYMCRYPT_INTERNAL_FORCE_WRITE64((volatile UINT64 *)&pb[cbData + 24], 0); + SYMCRYPT_INTERNAL_FORCE_WRITE64((volatile UINT64 *)&pb[cbData + 32], 0); + SYMCRYPT_INTERNAL_FORCE_WRITE64((volatile UINT64 *)&pb[cbData + 40], 0); + SYMCRYPT_INTERNAL_FORCE_WRITE64((volatile UINT64 *)&pb[cbData + 48], 0); + SYMCRYPT_INTERNAL_FORCE_WRITE64((volatile UINT64 *)&pb[cbData + 56], 0); + } +#endif + } +} + +#pragma prefast(pop) + +#else // Platform switch for SymCryptWipeKnownSize + +static +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptWipeKnownSize(_Out_writes_bytes_(cbData) PVOID pbData, SIZE_T cbData) +{ + SymCryptWipe(pbData, cbData); +} + +#endif // Platform switch for SymCryptWipeKnownSize + +#define SYMCRYPT_FIPS_ASSERT(x) { if(!(x)){ SymCryptFatal('FIPS'); } } + +// Flags for FIPS on-demand selftests. When an on-demand selftest succeeds, the corresponding flag +// will be set in g_SymCryptFipsSelftestsPerformed. Other selftests are performed automatically +// when the module is loaded, so they don't have a corresponding flag. +typedef enum _SYMCRYPT_SELFTEST_ALGORITHM { + SYMCRYPT_SELFTEST_ALGORITHM_NONE = 0x0, + SYMCRYPT_SELFTEST_ALGORITHM_STARTUP = 0x1, + SYMCRYPT_SELFTEST_ALGORITHM_DSA = 0x2, + SYMCRYPT_SELFTEST_ALGORITHM_ECDSA = 0x4, + SYMCRYPT_SELFTEST_ALGORITHM_RSA = 0x8, + SYMCRYPT_SELFTEST_ALGORITHM_DH = 0x10, + SYMCRYPT_SELFTEST_ALGORITHM_ECDH = 0x20, + SYMCRYPT_SELFTEST_ALGORITHM_MLKEM = 0x40, + SYMCRYPT_SELFTEST_ALGORITHM_XMSS = 0x80, + SYMCRYPT_SELFTEST_ALGORITHM_LMS = 0x100, + SYMCRYPT_SELFTEST_ALGORITHM_MLDSA = 0x200, +} SYMCRYPT_SELFTEST_ALGORITHM; + +// Takes values which are some bitwise OR combination of SYMCRYPT_SELFTEST_ALGORITHM values +// Specified as UINT32 as we will update with 32 bit atomics, and compilers may choose to make enum +// types smaller than 32 bits. +extern UINT32 g_SymCryptFipsSelftestsPerformed; + +UINT32 +SYMCRYPT_CALL +SymCryptFipsGetSelftestsPerformed(void); +// Returns current value of g_SymCryptFipsSelftestsPerformed so callers may inspect which FIPS +// algorithm selftests have run + +// Flags for per-key selftests. +// When an asymmetric key is generated or imported, and SYMCRYPT_FLAG_KEY_NO_FIPS is not specified, +// some selftests must be performed on the key, before its operational use in an algorithm, to +// comply with FIPS. +// The algorithms the key may be used in will be tracked in the key's fAlgorithmInfo field, as a +// bitwise OR of SYMCRYPT_FLAG_<keytype>_<algorithm> (e.g. SYMCRYPT_FLAG_DLKEY_DH). +// This field will also track which per-key selftests have been run on the key using the below flags +// We want to track which selftests have been run independently of which algorithms the key may be +// used in as in some scenarios at key generation / import time we may not know what algorithm the +// key will actually be used in. Tracking the run per-key selftests in fAlgorithmInfo allows us to +// defer running expensive tests until we know they are required (e.g. if we generate an Eckey which +// may be used in ECDH or ECDSA, and only use it for ECDH, the ECDSA PCT is deferred until we first +// attempt to use the key in ECDSA, or export the private key). +// +// For clarity, SYMCRYPT_PCT_* should be used instead of SYMCRYPT_SELFTEST_KEY_* going forward. +// The latter is retained for compatibility with existing code, but may be removed in a future +// breaking change. + +// Dlkey selftest flags +// DSA Pairwise Consistency Test to be run on generated keys +#define SYMCRYPT_SELFTEST_KEY_DSA (0x1) +#define SYMCRYPT_PCT_DSA SYMCRYPT_SELFTEST_KEY_DSA + +// Eckey selftest flags +// ECDSA Pairwise Consistency Test to be run on generated keys +#define SYMCRYPT_SELFTEST_KEY_ECDSA (0x1) +#define SYMCRYPT_PCT_ECDSA SYMCRYPT_SELFTEST_KEY_ECDSA + +// Rsakey selftest flags +// RSA Pairwise Consistency Test to be run on generated keys +#define SYMCRYPT_SELFTEST_KEY_RSA_SIGN (0x1) +#define SYMCRYPT_PCT_RSA_SIGN SYMCRYPT_SELFTEST_KEY_RSA_SIGN + +UINT32 +SYMCRYPT_CALL +SymCryptDeprecatedStatusIndicator(PBYTE pbOutput, UINT32 cbOutput); +// +// Returns the FIPS Approved Services Status Indicator as an ASCII string. +// This API is required to satisfy FIPS 140-3 requirements, but is *not* recommended +// to be used in production code. It should be considered unstable, +// and may be removed at any time. +// +// The output string will be copied to pbOutput if the size of the buffer +// cbOutput is large enough. The function returns the required buffer size +// when pbOutput is passed as NULL. If pbOutput is not NULL, the function +// returns the number of bytes copied to pbOutput. +// + + + +typedef enum _SYMCRYPT_SI_TYPE { + + // Algorithm types (specific algorithms are represented as a bitmask of a type) + SYMCRYPT_SI_TYPE_CIPHER = 0x01, + SYMCRYPT_SI_TYPE_HASH = 0x02, + SYMCRYPT_SI_TYPE_MAC = 0x03, + SYMCRYPT_SI_TYPE_KDF = 0x04, + SYMCRYPT_SI_TYPE_DRBG = 0x05, + SYMCRYPT_SI_TYPE_ASYM_ALG = 0x06, + SYMCRYPT_SI_TYPE_KAS = 0x07, + SYMCRYPT_SI_TYPE_KEM = 0x08, + + // Other types where elements are a bitmask + SYMCRYPT_SI_TYPE_ECURVE = 0x40, + SYMCRYPT_SI_TYPE_KAS_SCHEME = 0x41, + SYMCRYPT_SI_TYPE_SAFE_PRIME_GROUP = 0x42, + + // Non-bitmask types + SYMCRYPT_SI_TYPE_INTRANGE = 0x80, + SYMCRYPT_SI_TYPE_INTPAIR = 0x81, + SYMCRYPT_SI_TYPE_SIZERANGE = 0x82, + + SYMCRYPT_SI_TYPE_MAX = 0xFF +} SYMCRYPT_SI_TYPE; + +#define SYMCRYPT_SI_CREATE_ID(type, index) (((UINT64)(type) << 56) + (1ULL << (index))) + +#define SYMCRYPT_SI_INTBITS ((64 - 8) / 2) // 8-bits for type, remaining bits shared by two integers +#define SYMCRYPT_SI_INTMASK ((1ULL << SYMCRYPT_SI_INTBITS) - 1) // typically should be 0x0FFFFFFF with 28 1s +#define SYMCRYPT_SI_INTPACK(High, Low) (((((UINT64)High) & SYMCRYPT_SI_INTMASK) << SYMCRYPT_SI_INTBITS) | (((UINT64)Low) & SYMCRYPT_SI_INTMASK)) +#define SYMCRYPT_SI_INTUNPACKLO(X) ((X) & SYMCRYPT_SI_INTMASK) +#define SYMCRYPT_SI_INTUNPACKHI(X) (((X) >> SYMCRYPT_SI_INTBITS) & SYMCRYPT_SI_INTMASK) + +#define SYMCRYPT_SI_INTRANGE(Low, High) (((UINT64)SYMCRYPT_SI_TYPE_INTRANGE << 56) | SYMCRYPT_SI_INTPACK(High, Low)) +#define SYMCRYPT_SI_INTPAIR(X, Y) (((UINT64)SYMCRYPT_SI_TYPE_INTPAIR << 56) | SYMCRYPT_SI_INTPACK(Y, X)) +#define SYMCRYPT_SI_SIZERANGE(Low, High) (((UINT64)SYMCRYPT_SI_TYPE_SIZERANGE << 56) | SYMCRYPT_SI_INTPACK(High, Low)) + +#define SYMCRYPT_SI_CHECK_INT(L) C_ASSERT(L <= SYMCRYPT_SI_INTMASK) + +#define SYMCRYPT_SI_KEYBITS(L) SYMCRYPT_SI_SIZERANGE(L, L) +#define SYMCRYPT_SI_MODULUS(L) SYMCRYPT_SI_SIZERANGE(L, L) +#define SYMCRYPT_SI_DSAPARAMS(N, L) SYMCRYPT_SI_INTPAIR(N, L) + + +// Services +#define SYMCRYPT_SI_SVC_ENCRYPTION 0x00000001 +#define SYMCRYPT_SI_SVC_DECRYPTION 0x00000002 +#define SYMCRYPT_SI_SVC_HASHING 0x00000004 +#define SYMCRYPT_SI_SVC_MESSAGE_AUTHENTICATION 0x00000008 +#define SYMCRYPT_SI_SVC_KEY_DERIVATION 0x00000010 +#define SYMCRYPT_SI_SVC_ASYMMETRIC_KEY_GENERATION 0x00000020 +#define SYMCRYPT_SI_SVC_ASYMMETRIC_KEY_VERIFICATION 0x00000080 +#define SYMCRYPT_SI_SVC_RANDOM_NUMBER_GENERATION 0x00000400 +#define SYMCRYPT_SI_SVC_SECRET_AGREEMENT 0x00000800 +#define SYMCRYPT_SI_SVC_SIGNATURE_GENERATION 0x00001000 +#define SYMCRYPT_SI_SVC_SIGNATURE_VERIFICATION 0x00002000 +#define SYMCRYPT_SI_SVC_KEY_ENCAPSULATION 0x00004000 +#define SYMCRYPT_SI_SVC_KEY_DECAPSULATION 0x00008000 + +// Ciphers +#define SYMCRYPT_SI_AES_CBC SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_CIPHER, 0) +#define SYMCRYPT_SI_AES_CCM SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_CIPHER, 1) +#define SYMCRYPT_SI_AES_CFB128 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_CIPHER, 2) +#define SYMCRYPT_SI_AES_CFB8 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_CIPHER, 3) +#define SYMCRYPT_SI_AES_CTR SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_CIPHER, 4) +#define SYMCRYPT_SI_AES_ECB SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_CIPHER, 5) +#define SYMCRYPT_SI_AES_GCM SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_CIPHER, 6) +#define SYMCRYPT_SI_AES_XTS SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_CIPHER, 7) +#define SYMCRYPT_SI_RC2 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_CIPHER, 8) +#define SYMCRYPT_SI_RC4 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_CIPHER, 9) +#define SYMCRYPT_SI_CHACHA SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_CIPHER, 10) +#define SYMCRYPT_SI_DES SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_CIPHER, 11) +#define SYMCRYPT_SI_TRIPLEDES SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_CIPHER, 12) +#define SYMCRYPT_SI_CHACHA20 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_CIPHER, 13) +#define SYMCRYPT_SI_CHACHA20_POLY1305 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_CIPHER, 14) +#define SYMCRYPT_SI_AES_KW SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_CIPHER, 15) +#define SYMCRYPT_SI_AES_KWP SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_CIPHER, 16) + +// Hash Functions +#define SYMCRYPT_SI_MD2 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_HASH, 0) +#define SYMCRYPT_SI_MD4 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_HASH, 1) +#define SYMCRYPT_SI_MD5 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_HASH, 2) +#define SYMCRYPT_SI_SHA1 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_HASH, 3) +#define SYMCRYPT_SI_SHA2_224 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_HASH, 4) +#define SYMCRYPT_SI_SHA2_256 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_HASH, 5) +#define SYMCRYPT_SI_SHA2_384 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_HASH, 6) +#define SYMCRYPT_SI_SHA2_512 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_HASH, 7) +#define SYMCRYPT_SI_SHA2_512_224 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_HASH, 8) +#define SYMCRYPT_SI_SHA2_512_256 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_HASH, 9) +#define SYMCRYPT_SI_SHA3_224 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_HASH, 10) +#define SYMCRYPT_SI_SHA3_256 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_HASH, 11) +#define SYMCRYPT_SI_SHA3_384 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_HASH, 12) +#define SYMCRYPT_SI_SHA3_512 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_HASH, 13) +#define SYMCRYPT_SI_SHAKE128 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_HASH, 14) +#define SYMCRYPT_SI_SHAKE256 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_HASH, 15) +#define SYMCRYPT_SI_CSHAKE128 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_HASH, 16) +#define SYMCRYPT_SI_CSHAKE256 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_HASH, 17) +#define SYMCRYPT_SI_MARVIN32 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_HASH, 18) + +// MAC +#define SYMCRYPT_SI_HMAC_MD2 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_MAC, 0) +#define SYMCRYPT_SI_HMAC_MD4 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_MAC, 1) +#define SYMCRYPT_SI_HMAC_MD5 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_MAC, 2) +#define SYMCRYPT_SI_HMAC_SHA1 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_MAC, 3) +#define SYMCRYPT_SI_HMAC_SHA2_224 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_MAC, 4) +#define SYMCRYPT_SI_HMAC_SHA2_256 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_MAC, 5) +#define SYMCRYPT_SI_HMAC_SHA2_384 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_MAC, 6) +#define SYMCRYPT_SI_HMAC_SHA2_512 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_MAC, 7) +#define SYMCRYPT_SI_HMAC_SHA2_512_224 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_MAC, 8) +#define SYMCRYPT_SI_HMAC_SHA2_512_256 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_MAC, 9) +#define SYMCRYPT_SI_HMAC_SHA3_224 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_MAC, 10) +#define SYMCRYPT_SI_HMAC_SHA3_256 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_MAC, 11) +#define SYMCRYPT_SI_HMAC_SHA3_384 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_MAC, 12) +#define SYMCRYPT_SI_HMAC_SHA3_512 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_MAC, 13) +#define SYMCRYPT_SI_KMAC128 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_MAC, 14) +#define SYMCRYPT_SI_KMAC256 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_MAC, 15) +#define SYMCRYPT_SI_AES_GMAC SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_MAC, 16) +#define SYMCRYPT_SI_AES_CMAC SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_MAC, 17) +#define SYMCRYPT_SI_AES_CBCMAC SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_MAC, 18) +#define SYMCRYPT_SI_POLY1305 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_MAC, 19) + +// KDF +#define SYMCRYPT_SI_HKDF SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_KDF, 0) +#define SYMCRYPT_SI_PBKDF SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_KDF, 1) +#define SYMCRYPT_SI_KDA_ONESTEP SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_KDF, 2) +#define SYMCRYPT_SI_KDF_IKEV1 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_KDF, 3) +#define SYMCRYPT_SI_KDF_IKEV2 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_KDF, 4) +#define SYMCRYPT_SI_KDF_SP800_108_CTR SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_KDF, 5) +#define SYMCRYPT_SI_KDF_SRTP SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_KDF, 6) +#define SYMCRYPT_SI_KDF_SSH SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_KDF, 7) +#define SYMCRYPT_SI_KDF_TLS SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_KDF, 8) +#define SYMCRYPT_SI_KDF_TLS_V12 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_KDF, 9) + +// DRBG +#define SYMCRYPT_SI_CTR_DRBG_AES256 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_DRBG, 0) + +// Asymmetric Algorithms +#define SYMCRYPT_SI_SAFE_PRIME_KEYGEN SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 0) +#define SYMCRYPT_SI_DSA_KEYGEN SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 1) +#define SYMCRYPT_SI_DSA_PQGGEN SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 2) +#define SYMCRYPT_SI_DSA_PQGVER SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 3) +#define SYMCRYPT_SI_DSA_SIGVER SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 4) + +#define SYMCRYPT_SI_ECDSA_KEYGEN SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 5) +#define SYMCRYPT_SI_ECDSA_KEYVER SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 6) +#define SYMCRYPT_SI_ECDSA_SIGGEN SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 7) +#define SYMCRYPT_SI_ECDSA_SIGVER SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 8) +#define SYMCRYPT_SI_ECDSA_SIGGEN_COMP SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 9) + +#define SYMCRYPT_SI_RSA_KEYGEN SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 10) +#define SYMCRYPT_SI_RSA_DEC_PRIM SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 12) +#define SYMCRYPT_SI_RSA_SIG_PRIM SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 13) +#define SYMCRYPT_SI_RSA_SIGGEN_PKCS15 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 14) +#define SYMCRYPT_SI_RSA_SIGGEN_PKCSPSS SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 15) +#define SYMCRYPT_SI_RSA_SIGVER_PKCS15 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 16) +#define SYMCRYPT_SI_RSA_SIGVER_PKCSPSS SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 17) + +#define SYMCRYPT_SI_KAS_ECC SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 18) +#define SYMCRYPT_SI_KAS_ECC_SSC SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 19) +#define SYMCRYPT_SI_KAS_FFC SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 20) +#define SYMCRYPT_SI_KAS_FFC_SSC SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 21) + +// PQ Algorithms + +// Asym Alg IDs for PQC algorithms in range 22-26 are replaced with more granular +// algorithms as below. +// Keeping this range reserved until there's a need to use it in the future. + +#define SYMCRYPT_SI_MLDSA_KEYGEN SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 27) +#define SYMCRYPT_SI_MLDSA_SIGGEN SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 28) +#define SYMCRYPT_SI_MLDSA_SIGVER SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 29) +#define SYMCRYPT_SI_LMS_KEYGEN SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 30) +#define SYMCRYPT_SI_LMS_SIGGEN SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 31) +#define SYMCRYPT_SI_LMS_SIGVER SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 32) +#define SYMCRYPT_SI_XMSS_KEYGEN SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 33) +#define SYMCRYPT_SI_XMSS_SIGGEN SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 34) +#define SYMCRYPT_SI_XMSS_SIGVER SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 35) +#define SYMCRYPT_SI_XMSS_MT_KEYGEN SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 36) +#define SYMCRYPT_SI_XMSS_MT_SIGGEN SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 37) +#define SYMCRYPT_SI_XMSS_MT_SIGVER SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 38) + +#define SYMCRYPT_SI_MLKEM_KEYGEN SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_KEM, 0) +#define SYMCRYPT_SI_MLKEM_ENCAPS SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_KEM, 1) +#define SYMCRYPT_SI_MLKEM_DECAPS SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_KEM, 2) + + +// Elliptic Curves +#define SYMCRYPT_SI_ECURVE_NISTP192 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ECURVE, 0) +#define SYMCRYPT_SI_ECURVE_NISTP224 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ECURVE, 1) +#define SYMCRYPT_SI_ECURVE_NISTP256 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ECURVE, 2) +#define SYMCRYPT_SI_ECURVE_NISTP384 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ECURVE, 3) +#define SYMCRYPT_SI_ECURVE_NISTP521 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ECURVE, 4) +#define SYMCRYPT_SI_ECURVE_NUMSP256T1 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ECURVE, 5) +#define SYMCRYPT_SI_ECURVE_NUMSP384T1 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ECURVE, 6) +#define SYMCRYPT_SI_ECURVE_NUMSP512T1 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ECURVE, 7) +#define SYMCRYPT_SI_ECURVE_CURVE25519 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ECURVE, 8) + +// Safe Prime Groups +#define SYMCRYPT_SI_SPG_FFDHE_2048 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_SAFE_PRIME_GROUP, 0) +#define SYMCRYPT_SI_SPG_FFDHE_3072 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_SAFE_PRIME_GROUP, 1) +#define SYMCRYPT_SI_SPG_FFDHE_4096 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_SAFE_PRIME_GROUP, 2) +#define SYMCRYPT_SI_SPG_FFDHE_6144 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_SAFE_PRIME_GROUP, 3) +#define SYMCRYPT_SI_SPG_FFDHE_8192 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_SAFE_PRIME_GROUP, 4) +#define SYMCRYPT_SI_SPG_MODP_2048 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_SAFE_PRIME_GROUP, 5) +#define SYMCRYPT_SI_SPG_MODP_3072 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_SAFE_PRIME_GROUP, 6) +#define SYMCRYPT_SI_SPG_MODP_4096 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_SAFE_PRIME_GROUP, 7) +#define SYMCRYPT_SI_SPG_MODP_6144 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_SAFE_PRIME_GROUP, 8) +#define SYMCRYPT_SI_SPG_MODP_8192 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_SAFE_PRIME_GROUP, 9) + +// KAS Schemes +#define SYMCRYPT_SI_SCHEME_EPHEM_UNIFIED SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_KAS_SCHEME, 0) +#define SYMCRYPT_SI_SCHEME_DH_EPHEM SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_KAS_SCHEME, 1) +#define SYMCRYPT_SI_SCHEME_DH_ONEFLOW SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_KAS_SCHEME, 2) +#define SYMCRYPT_SI_SCHEME_DH_STATIC SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_KAS_SCHEME, 3) + + +UINT32 +SYMCRYPT_CALL +SymCryptDeprecatedServiceIndicator( + UINT32 Service, + UINT64 Alg, + UINT64 Param1, + UINT64 Param2, + UINT64 Param3); +// +// Returns FIPS 140 Approved Services Indicator for an algorithm. +// +// Parameters: +// - Service. Service identifier, one of SYMCRYPT_SI_SVC_XXX. +// - Alg. Identifier of the algorithm for which the status is being queried. This must be +// exactly one of the algorithm identifiers defined above. +// - Param1, Param2, Param3. Depending on the Alg parameter, these parameters provide +// additional information about the capabilities and parameters associated with an +// algorithm. For each algorithm, the number and type of the parameters must be provided +// as specified below. Any unused parameters must be passed as 0. The algorithms that require +// parameters to be specified are listed below, the remaining algorithms do not have any parameters. +// +// Alg Id Param1 Param2 +// ----------------------------- -------------------------------- --------------- +// SYMCRYPT_SI_AES_XTS SYMCRYPT_SI_KEYBITS(int) - +// SYMCRYPT_SI_DSA_PQGVER SYMCRYPT_SI_DSAPARAMS(int, int) - +// SYMCRYPT_SI_DSA_SIGVER SYMCRYPT_SI_DSAPARAMS(int, int) - +// SYMCRYPT_SI_ECDSA_KEYGEN SYMCRYPT_SI_ECURVE_XXX - +// SYMCRYPT_SI_ECDSA_KEYVER SYMCRYPT_SI_ECURVE_XXX - +// SYMCRYPT_SI_ECDSA_SIGGEN SYMCRYPT_SI_ECURVE_XXX Hash Alg Id +// SYMCRYPT_SI_ECDSA_SIGGEN_COMP SYMCRYPT_SI_ECURVE_XXX Hash Alg Id +// SYMCRYPT_SI_ECDSA_SIGVER SYMCRYPT_SI_ECURVE_XXX Hash Alg Id +// SYMCRYPT_SI_RSA_DEC_PRIM SYMCRYPT_SI_MODULUS(int) - +// SYMCRYPT_SI_RSA_KEYGEN SYMCRYPT_SI_MODULUS(int) - +// SYMCRYPT_SI_RSA_SIGGEN_PKCS15 SYMCRYPT_SI_MODULUS(int) Hash Alg Id +// SYMCRYPT_SI_RSA_SIGVER_PKCS15 SYMCRYPT_SI_MODULUS(int) Hash Alg Id +// SYMCRYPT_SI_RSA_SIGGEN_PKCSPSS SYMCRYPT_SI_MODULUS(int) Hash Alg Id +// SYMCRYPT_SI_RSA_SIGVER_PKCSPSS SYMCRYPT_SI_MODULUS(int) Hash Alg Id +// SYMCRYPT_SI_SAFE_PRIME_KEYGEN SYMCRYPT_SI_SPG_XXX Hash Alg Id +// SYMCRYPT_SI_HMAC_XXX SYMCRYPT_SI_KEYBITS(int) - +// SYMCRYPT_SI_KDA_ONESTEP Hash Alg Id or MAC alg Id - +// SYMCRYPT_SI_PBKDF MAC Alg Id - +// SYMCRYPT_SI_KDF_SP800_108_CTR MAC Alg Id - +// SYMCRYPT_SI_KDF_SSH Hash Alg Id - +// SYMCRYPT_SI_TLS_V12_KDF Hash Alg Id - +// SYMCRYPT_SI_KAS_ECC SYMCRYPT_SI_ECURVE_XXX Hash Alg Id +// SYMCRYPT_SI_KAS_ECC_SSC SYMCRYPT_SI_ECURVE_XXX SYMCRYPT_SI_SCHEME_XXX +// SYMCRYPT_SI_KAS_FFC SYMCRYPT_SI_SPG_XXX Hash Alg Id +// SYMCRYPT_SI_KAS_FFC_SSC SYMCRYPT_SI_SPG_XXX SYMCRYPT_SI_SCHEME_XXX +// SYMCRYPT_SI_LMS_SIGVER SYMCRYPT_LMS_XXX - +// SYMCRYPT_SI_XMSS_SIGVER SYMCRYPT_XMSS_XXX - +// SYMCRYPT_SI_XMSS_MT_SIGVER SYMCRYPT_XMSSMT_XXX - +// +// +// Return value: +// For the specified service and algorithm (and parameters if any), the function +// returns 0 if SymCrypt implements the algorithm in an approved manner. A non-zero +// value indicates either the algorithm is non-approved or the parameters were invalid. +// +// Remarks: +// - For parameters that contain integer values, the callers must ensure that the values +// are within the acceptable limits by using the SYMCRYPT_SI_CHECK_INT(L) macro. diff --git a/libs/symcrypt/inc/symcrypt_internal_shared.inc b/libs/symcrypt/inc/symcrypt_internal_shared.inc new file mode 100644 index 00000000000..03eae5bf7c0 --- /dev/null +++ b/libs/symcrypt/inc/symcrypt_internal_shared.inc @@ -0,0 +1,33 @@ +// +// symcrypt_internal_shared.inc +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// This is the file that contains the SymCrypt version information and defines SYMCRYPT_DEBUG. +// It is included in both C and ASM such that the values are the same on both sides. +// We use the C preprocessor to set ASM constants, as we already need to use the C preprocessor for +// symcryptasm processing (see scripts/symcryptasm_processor.py). +// +// In previous releases we had a numbering system with major/minor version number. +// This worked well with the sequential servicing imposed by SourceDepot. +// With the switch to Git this no longer works due to having multiple branches. +// We move to having the version here only specify the API and minor version number +// These will NOT be changed for every build. The API version only changes when there are +// breaking changes to the API in symcrypt.h. (Note: symcrypt_low_level.h is not stable and can change +// at any time.) The minor version is changed at regular intervals, but not necessarily at +// every build of the library. +// +// Separate from these numbers the build system includes information about the branch, +// last commit, build time, etc. +// +// The API numbering starts at 100 to avoid number conflicts with the old system. +// + +#define SYMCRYPT_CODE_VERSION_API 103 +#define SYMCRYPT_CODE_VERSION_MINOR 11 +#define SYMCRYPT_CODE_VERSION_PATCH 0 + +#if defined(DBG) +#define SYMCRYPT_DEBUG 1 +#else +#define SYMCRYPT_DEBUG 0 +#endif diff --git a/libs/symcrypt/inc/symcrypt_low_level.h b/libs/symcrypt/inc/symcrypt_low_level.h new file mode 100644 index 00000000000..296fbd48bf5 --- /dev/null +++ b/libs/symcrypt/inc/symcrypt_low_level.h @@ -0,0 +1,3137 @@ +// +// SymCrypt_low_level.h +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#pragma once + + +#ifdef __cplusplus +extern "C" { +#endif + +//======================================================================================= +// WARNING: The low-level APIs are not stable, and can change from release to release. +// The low-level APIs are only provided for certain exceptional use cases. +// All aspects of the low-level API can change in any release. +// Users are strongly advised to only rely on the API surface defined in symcrypt.h +//======================================================================================= + + +// +// Low level asymmetric algorithm API. This is not to be used by external callers. +// + +/************************************************************************************************** + Low-level Integer API + ************************************************************************************************** +The low-level API allows manipulation of arbitrarily large integers. + +The internal representation of large integers is not fixed. It depends on CPU architecture and +on the CPU features available on the exact CPU stepping the current software is running on. +In other words, it can change between different executions of the same binary. +Therefore it is critical that callers refrain from making assumptions about the internal +data format used. SymCrypt numbers should only be manipulated through the SymCrypt +API. + +The low-level API allows the caller to allocate the necessary memory for all objects. +This is typically necessary for high-IRQL level callers, callers running in low-memory +environments, and high-performance scenarios where memory has to be pre-allocated. +SymCrypt also provides routines for allocating objects, which makes the API easier +to use. The caller has to provide the allocation functions that SymCrypt uses. + +Internal data representation, and consequently the size of objects, can depend on the +exact CPU stepping the code is running on. +For robustness, the allocation size requirements are compile-time properties; +they vary per CPU architecture but do not depend on the exact available CPU features. + +General rules: +The functions in the low-level API can impose requirements on their inputs. It is imperative that +these requirements are satisfied for every call; failing to satisfy the requirements leads to undefined +behaviour, including bugchecks, access violations, wrong results, or sometimes even the right result. +CHKed versions of the library add more low-level consistency checks; all binaries should be tested +with a CHKed version of the library to detect any errors that might go unnoticed on FRE versions. + +Scratch space: +Many functions in the API require temporary storage for intermediate results. +Some function simply allocate the necessary memory using the caller-provided allocation routines. +Other low-level functions are so fast that the overhead of the allocations would significantly slow +down the computations. These functions require the caller to allocate the memory; this memory +is called the scratch space. + +For each function that requires scratch space, there is a macro that determines how much scratch space +must be provided. This macro is a compile-time function of its arguments; if the parameters to the macro +are compile-time constants, then the result is also a compile-time constant. Therefore that +the macro can be used for statically sizing arrays. +The scratch space macros are all non-decreasing in each argument. +Callers that perform multiple operations can use a single scratch space, sized for the largest +argument(s) used. Note that the SYMCRYPT_C_MAX macro implements a compile-time MAX function suitable +for combining different scratch space sizes at compile time. + +The scratch space is always passed as a pair or arguments: (pbScratch, cbScratch). +cbScratch needs to be at least as large as the macro definition requires, but may be larger. + +Functions that take scratch parameters do not require memory allocation, and will not fail due to +low-memory conditions. +All functions that use memory allocation will return an error indication if a necessary memory +allocation fails. These functions return an error code, or an object pointer which will be NULL if +the allocation fails. +Functions that do not return an error code or object pointer do not use memory allocation. + +SymCrypt uses several implementation techniques to minimize the cost of the scratch space parameters. +This is necessary because the cost of the parameter passing by itself is significant in scenarios +such as elliptic-curve operations. +In a FRE build, some functions will ignore the cbScratch parameter and simply assume they get enough space; +in this case the SymCrypt may provide an inline-able function that allow the compiler to optimize the cbScratch parameter +away, completely removing it from the actual code. +In environments where some functions don't need any scratch space, similar optimizations are possible for the +pbScratch parameter. + +Scratch and object buffers must all be aligned to SYMCRYPT_ALIGN. + +*/ + +// +// General flags +// +// SYMCRYPT_FLAG_DATA_PUBLIC is used to signal that the data being processed is public, and does not have +// to be protected from side-channel attacks. +#define SYMCRYPT_FLAG_DATA_PUBLIC (0x01) + +/* +INTEGERS + +Integers are internally represented as a sequence of Digits. An INT object with n digits can store +numbers up to (but not including) R^n where R is the _radix_ of the representation. + +The radix R, as well as the size and format of a Digit, are internal to the library, +and can depend on CPU architecture, CPU stepping and other run-time decisions. Therefore, callers +need to be especially careful not to make any assumptions about the size of a digit, or the number +of digits needed for any particular computation. + +At the same time, most INT operations are defined in terms of Digit sizes, so the caller has to +be aware of digits. This becomes important in the following example. Suppose the radix R =2^256, +and a caller wants to multiply two 384-bit numbers. It takes 2 digits to store a 384-bit number. +The caller knows that the product is 768 bits, which can fit in 3 digits. So the caller might try +to multiply two 2-digit numbers into a 3-digit result, which will not work as the result is 4 digits. + +For an INT object of n digits we call the value R^n the capacity of the object. It is the upper bound of +the values that can be stored in the object. + +Additionally, there is a maximum number of bits for any integer value that the library supports (2^20 bits +in the current version). This bound is used to ensure that no object sizes and scratch space computations +have a value of magnitude more than 32 bits. Note that the computed upper bounds are very loose and the +actual values are much smaller. + +Attempts to create objects larger than this bound will result in NULL being returned. Callers either have +to ensure they do not exceed the bounds, or check that create objects are not NULL before using them. The +rationale behind this approach is to avoid any potential route for malicious inputs to trigger DoS by +taking excessive CPU time which would be indistinguishable from an application hang. + + +Digit size and radix can vary widely; on some CPU steppings the library might use a digit that contains +128 bits are requires 16 bytes of memory, on another CPU stepping it might use a digit that contains +416 bits and uses 64 bytes of memory. + +SAL annotations: +Because the different run-time selected implementations underneath this API might use +different size memory buffers for any one operation, fully accurate SAL annotations are not possible as SAL only +performs static analysis. +Furthermore, adding size parameters to every function would add too much overhead, and sizes are often passed +implicitly. Together with the fact that the same API can be implemented by different implementations, this means +that it isn't possible to write the actual size used in a form that SAL can understand. +Instead we use the following conventions: +- Pointers to SYMCRYPT_* objects can only be created with functions that provide the right memory buffer size. +- We annotate each object-pointer with _In_ our _Out_. The SAL engine treats this as just a read/write to + a single object at the pointer location +- The CHKed version of SymCrypt adds run-time checking that the various size parameters are correct. +This allows us to have both high performance and good checking of our memory management. + +API rationale: +One important choice in this API is whether to pass a (ptr,len) for each INT or just a pointer. +We investigated this issue. The ptr-based API means that there are fewer parameters to pass around, +and generally makes the API simpler. The downside of a ptr-based API is that each INT object has some overhead +and this makes arrays of large integers less efficient, especially since the overhead can be a whole alignment +block. +The problem with the (ptr,len) format is that it isn't clear what length measure to use. +Using the bitsize is inefficient; the internal format might store 29 bits of the number in each 32-bit word, +and that means that the code would have to divide the bitsize by 29 just to find the size of the number. +Division is slow, and therefore this is not a good choice. +Another idea is to have the len parameter be the length of the INT object, in bytes. +But some APIs get really messy. For example, we need an API function to do a multiplication of two same-sized +numbers into a double-sized number. This is such a common operation that we want a separate function for it. +But the storage size of the result might not be twice the storage size of the inputs; if each number has some +fixed overhead then the output object might be smaller than the two times the size of the input objects. +This makes it impossible to write suitable SAL annotations. +For this reason, we use a ptr-based API for integers. + +Most crypto algorithms that wish to store arrays of values actually want to store arrays of elements in a +ring modulo an modulus. And for modular operations the caller is already passing the modulus separately, so +there isn't any need to store per-object size information. The API is designed to allow the ModElement for bitsize +B to be smaller than an INT for bitsize B so that implementations can choose to not store any length information in +a ModElement object. +*/ + +//======================================================================== +//======================================================================== +// Main schema for object creation, deletion, and management (low - level calls). +// +// The following are descriptions of some of the generic functions specifically +// modified for the INT, DIVISOR, MODULUS, and MODELEMENT objects. + +// +// PSYMCRYPT_XXX +// SYMCRYPT_CALL +// SymCryptXxxCreate( +// _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, +// SIZE_T cbBuffer, +// UINT32 nDigits ); +// Create an XXX object from the provided (pbBuffer, cbBuffer) space. +// The object will be able to store values up to R^nDigits where R is the digit radix. +// Requirement: +// - 1 <= nDigits <= SymCryptDigitsFromBits(SYMCRYPT_INT_MAX_BITS) +// If the value is outside these bounds it will return NULL +// - cbBuffer >= SymCryptSizeofXxxFromDigits( nDigits ) +// - (pbBuffer,cbBuffer) memory must be exclusively used by this object. +// The last requirement ensures that all objects are non-overlapping (except for API functions +// that explicitly create overlapping objects). +// All parameters are published. +// It is always safe to choose +// cbBuffer = SYMCRYPT_SIZEOF_XXX_FROM_BITS( nBits ) +// nDigits = SymCryptDigitsFromBits( nBits ) +// if the caller wants to be able to store numbers up to 2^nBits. However, it is frequently more +// efficient to use cbBuffer = SymCryptSizeofXxxFromDigits( nDigits ) as that gives the exact size for the +// current CPU stepping rather than the compile-time largest size that might be needed on any stepping. +// +// PSYMCRYPT_XXX +// SYMCRYPT_CALL +// SymCryptXxxRetrieveHandle( _In_ PBYTE pbBuffer ); +// Retrieve the object's handle from the pointer to the memory space in which the object was created via +// a call to SymCryptXxxCreate. This function allows callers to tightly store arrays of objects without having +// to keep track of each object handle. +// Requirement: +// - A call to SymCryptXxxRetrieveHandle( pbBuffer1 ) must be preceded by at least one call to +// SymCryptXxxCreate( pbBuffer2, cbBuffer2, nDigits ) with ( pbBuffer1 == pbBuffer2 ) +// If the requirement is not satisfied the result is undefined. +// +// #define SYMCRYPT_SIZEOF_XXX_FROM_BITS( nBits ) ... +// Returns a memory size that is always sufficient to create an XXX object that can handle +// values of size nBits bits, irrespective of the run-time decision of digit size. +// This is a non-decreasing compile-time function of its inputs, suitable for computing static memory allocations. +// It is always true that +// SYMCRYPT_SIZEOF_XXX_FROM_BITS( n ) >= SymCryptSizeofXxxFromDigits( SymCryptDigitsFromBits( n ) ) +// which guarantees that the n-bit XXX can be stored in a memory area of SYMCRYPT_SIZEOF_XXX_FROM_BITS(n) bytes. +// Warning: It is possible that +// SYMCRYPT_SIZEOF_XXX_FROM_BITS( n+m ) < SymCryptSizeofXxxFromDigits( SymCryptDigitsFromBits( n ) + SymCryptDigitsFromBits( m ) ) +// for some inputs n and m. This is easy to see if you choose n = m = 1; each represents a 1-digit value, but an n+m bit (i.e. a 2-bit ) value is +// also 1 digit. +// In particular, you cannot use SYMCRYPT_SIZEOF_XXX_FROM_BITS( n + m ) to compute the size +// necessary to store the product of two numbers with bitsize n and m respectively. +// It is guaranteed that +// SymCryptSizeofXxxFromDigits( SymCryptDigitsFromBits( n ) + SymCryptDigitsFromBits( m ) ) <= +// SYMCRYPT_SIZEOF_XXX_FROM_BITS( n ) + SYMCRYPT_SIZEOF_XXX_FROM_BITS( m ) +// This is the proper way to statically compute the size needed to store the product of an n- and m-bit value. +// +// UINT32 +// SYMCRYPT_CALL +// SymCryptSizeofXxxFromDigits( UINT32 nDigits ); +// Memory size that is sufficient to store an XXX object with nDigits digits. +// This is a runtime function as the # digits and size of a digit are run-time decision that depend on the CPU stepping. +// Requirement: +// - 1 <= nDigits <= SymCryptDigitsFromBits(SYMCRYPT_INT_MAX_BITS) +// If the value is outside these bounds the returned value will be 0 indicating failure. +// This function is has the following property: +// SymCryptSizeofXxxFromDigits( a + b ) <= SymCryptSizeofXxxFromDigits( a ) + SymCryptSizeofXxxFromDigits( b ) +// for all a and b. +// +// UINT32 +// SYMCRYPT_CALL +// SymCryptXxxBitsizeOfObject( PCSYMCRYPT_XXX pObj ) +// Return the number of bits of the object. +// +// UINT32 +// SYMCRYPT_CALL +// SymCryptXxxDigitsizeOfObject( PCSYMCRYPT_XXX pObj ) +// Return the number of digits of the object. +// + +//============================================================================================== +// Object types for low-level API +// +// SYMCRYPT_INT integer in range 0..N for some N +// SYMCRYPT_DIVISOR an integer > 0 that can be used to divide with. +// SYMCRYPT_MODULUS a value M > 1 to use in modulo-M computations +// SYMCRYPT_MODELEMENT An element in a modulo-M ring. +// SYMCRYPT_ECPOINT A point on an elliptic curve. +// +// See symcrypt_internal.h for definitions. +// + +//======================================================================== +//======================================================================== +// General functions for integers +// + +UINT32 +SymCryptDigitsFromBits( UINT32 nBits ); +// +// Returns the # digits needed to store values (INT, DIVISOR, MODULUS, MODELEMENT) +// in the range 0..(2^nBits - 1). +// +// Remarks: +// If nBits==0 the returned number is 1. +// +// If nBits exceeds SYMCRYPT_INT_MAX_BITS the function will return 0 to indicate an object with +// this many bits is not supported. +// +// This is a run-time decision; the return value can depend on the exact CPU stepping +// the program is running on, or run-time configurations. +// For a and b in the range 0..SYMCRYPT_INT_MAX_BITS, it is always true that +// SymCryptDigitsFromBits( a + b ) <= SymCryptDigitsFromBits( a ) + SymCryptDigitsFromBits( b ) +// + +//======================================================================== +// INT objects +// + +PSYMCRYPT_INT +SYMCRYPT_CALL +SymCryptIntAllocate( UINT32 nDigits ); + +VOID +SYMCRYPT_CALL +SymCryptIntFree( _Out_ PSYMCRYPT_INT piObj ); + +#define SYMCRYPT_SIZEOF_INT_FROM_BITS( _bitsize ) SYMCRYPT_INTERNAL_SIZEOF_INT_FROM_BITS( _bitsize ) + +UINT32 +SYMCRYPT_CALL +SymCryptSizeofIntFromDigits( UINT32 nDigits ); + +PSYMCRYPT_INT +SYMCRYPT_CALL +SymCryptIntCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + UINT32 nDigits ); + +VOID +SYMCRYPT_CALL +SymCryptIntWipe( _Out_ PSYMCRYPT_INT piObj ); + +VOID +SYMCRYPT_CALL +SymCryptIntCopy( + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_INT piDst ); // **** Documentation lacking: requires same size + +VOID +SYMCRYPT_CALL +SymCryptIntMaskedCopy( + _In_ PCSYMCRYPT_INT piSrc, + _Inout_ PSYMCRYPT_INT piDst, + UINT32 mask ); + +VOID +SYMCRYPT_CALL +SymCryptIntConditionalCopy( + _In_ PCSYMCRYPT_INT piSrc, + _Inout_ PSYMCRYPT_INT piDst, + UINT32 cond ); + +VOID +SYMCRYPT_CALL +SymCryptIntConditionalSwap( + _Inout_ PSYMCRYPT_INT piSrc1, + _Inout_ PSYMCRYPT_INT piSrc2, + UINT32 cond ); + +UINT32 +SYMCRYPT_CALL +SymCryptIntBitsizeOfObject( _In_ PCSYMCRYPT_INT piSrc ); + +UINT32 +SYMCRYPT_CALL +SymCryptIntDigitsizeOfObject( _In_ PCSYMCRYPT_INT piSrc ); + +//======================================================================== +// DIVISOR objects +// + +PSYMCRYPT_DIVISOR +SYMCRYPT_CALL +SymCryptDivisorAllocate( UINT32 nDigits ); + +VOID +SYMCRYPT_CALL +SymCryptDivisorFree( _Out_ PSYMCRYPT_DIVISOR pdObj ); + +#define SYMCRYPT_SIZEOF_DIVISOR_FROM_BITS( _bitsize ) SYMCRYPT_INTERNAL_SIZEOF_DIVISOR_FROM_BITS( _bitsize ) + +UINT32 +SYMCRYPT_CALL +SymCryptSizeofDivisorFromDigits( UINT32 nDigits ); + +PSYMCRYPT_DIVISOR +SYMCRYPT_CALL +SymCryptDivisorCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + UINT32 nDigits ); + +VOID +SYMCRYPT_CALL +SymCryptDivisorWipe( _Out_ PSYMCRYPT_DIVISOR pdObj ); + +VOID +SymCryptDivisorCopy( + _In_ PCSYMCRYPT_DIVISOR pdSrc, + _Out_ PSYMCRYPT_DIVISOR pdDst ); + +UINT32 +SYMCRYPT_CALL +SymCryptDivisorDigitsizeOfObject( _In_ PCSYMCRYPT_DIVISOR pdSrc ); + +//======================================================================== +// MODULUS objects +// + +PSYMCRYPT_MODULUS +SYMCRYPT_CALL +SymCryptModulusAllocate( UINT32 nDigits ); + +VOID +SYMCRYPT_CALL +SymCryptModulusFree( _Out_ PSYMCRYPT_MODULUS pmObj ); + +#define SYMCRYPT_SIZEOF_MODULUS_FROM_BITS( _bitsize ) SYMCRYPT_INTERNAL_SIZEOF_MODULUS_FROM_BITS( _bitsize ) + +UINT32 +SYMCRYPT_CALL +SymCryptSizeofModulusFromDigits( UINT32 nDigits ); + +PSYMCRYPT_MODULUS +SYMCRYPT_CALL +SymCryptModulusCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + UINT32 nDigits ); + +VOID +SYMCRYPT_CALL +SymCryptModulusWipe( _Out_ PSYMCRYPT_MODULUS pmObj ); + +VOID +SymCryptModulusCopy( + _In_ PCSYMCRYPT_MODULUS pmSrc, + _Out_ PSYMCRYPT_MODULUS pmDst ); + +UINT32 +SYMCRYPT_CALL +SymCryptModulusDigitsizeOfObject( _In_ PCSYMCRYPT_MODULUS pmSrc ); + +//======================================================================== +// MODELEMENT objects are treated slightly differently because it does not store its own size. +// This allows a MODELEMENT to be more compact which makes large arrays of ModElements more efficient +// and avoids checking that ModElements have the same size. +// All operations require a modulus to be passed. +// + +PSYMCRYPT_MODELEMENT +SYMCRYPT_CALL +SymCryptModElementAllocate( _In_ PCSYMCRYPT_MODULUS pmMod ); + +VOID +SYMCRYPT_CALL +SymCryptModElementFree( + _In_ PCSYMCRYPT_MODULUS pmMod, // only used to determine the digit size of peObj. + _Out_ PSYMCRYPT_MODELEMENT peObj ); + +#define SYMCRYPT_SIZEOF_MODELEMENT_FROM_BITS( _bitsize ) SYMCRYPT_INTERNAL_SIZEOF_MODELEMENT_FROM_BITS( _bitsize ) + +UINT32 +SYMCRYPT_CALL +SymCryptSizeofModElementFromModulus( PCSYMCRYPT_MODULUS pmMod ); + +PSYMCRYPT_MODELEMENT +SYMCRYPT_CALL +SymCryptModElementCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + _In_ PCSYMCRYPT_MODULUS pmMod ); + +VOID +SYMCRYPT_CALL +SymCryptModElementWipe( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peDst ); + +VOID +SymCryptModElementCopy( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst ); + +VOID +SymCryptModElementMaskedCopy( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst, + UINT32 mask ); + +VOID +SymCryptModElementConditionalSwap( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_ PSYMCRYPT_MODELEMENT peData1, + _Inout_ PSYMCRYPT_MODELEMENT peData2, + _In_ UINT32 cond ); + +//======================================================================== +// ECURVE objects + +BOOLEAN +SYMCRYPT_CALL +SymCryptEcurveBufferSizesFromParams( + _In_ PCSYMCRYPT_ECURVE_PARAMS pParams, + _Out_ SIZE_T * pcbCurve, + _Out_ SIZE_T * pcbScratch ); +// +// This call computes the memory size necessary to create the ECURVE object described by pParams, +// including the amount of scratch space needed for the operation. +// +// Returns FALSE if the given parameters are deemed invalid. +// + +PSYMCRYPT_ECURVE +SYMCRYPT_CALL +SymCryptEcurveCreate( + _In_ PSYMCRYPT_ECURVE_PARAMS pParams, + _In_ UINT32 flags, + _Out_writes_bytes_( cbCurve ) PBYTE pbCurve, + SIZE_T cbCurve, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Use caller-allocated memory to create an ECURVE object which +// is defined by the parameters in pParams. +// +// - pParams: parameters that define the curve +// - flags: Not used, must be zero. +// - pbCurve: caller-allocated memory region to hold the curve object +// - cbCurve: size of memory region to hold the curve object +// - pbScratch: caller-allocated memory region used as scratch space to create the curve +// - cbScratch: size of scratch space memory region +// +// Caller should use SymCryptSizeofEcurveBuffersFromParams to determine the necessary sizes for +// pbCurve and pbScratch. These buffers must be SYMCRYPT_ALIGNed. +// +// Future versions might use the flags to enable different features/tradeoffs. +// There are a number of interesting memory/speed/pre-computation cost trades that can be made. +// For example, pre-computing multiples of the distinguished point, or (parallel?) pre-computation +// of (r, rG) pairs for random r values. +// +// This function applies limited validation of the pParams. The validation is intended to eliminate +// the threat of denial-of-service when hostile parameters are presented. It does not ensure that +// the parameters make sense, define a proper curve, or that any elliptic-curve operations made on +// the curve built from these parameters will fail, succeed or provide any security. +// The only guarantee provided for invalid parameters is that all operations on this curve will +// not crash and will return in some reasonable amount of time. +// +// Returns NULL if the given memory regions are not large enough or the +// parameters are deemed invalid. If the return value is not NULL, then +// pbCurve buffer must later be wiped with SymCryptWipe(). And as with all +// pbScratch buffers, it is the caller's responsibility to wipe after +// completing all operations that require scratch space. +// + +//======================================================================== +// ECPOINT objects' API is slightly different than the above API schema in the sense that they +// take as input an ECURVE object pointer instead of the number of digits. +// + +PSYMCRYPT_ECPOINT +SYMCRYPT_CALL +SymCryptEcpointAllocate( _In_ PCSYMCRYPT_ECURVE pCurve ); + +VOID +SYMCRYPT_CALL +SymCryptEcpointFree( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Out_ PSYMCRYPT_ECPOINT poDst ); + +UINT32 +SYMCRYPT_CALL +SymCryptSizeofEcpointFromCurve( PCSYMCRYPT_ECURVE pCurve ); + +PSYMCRYPT_ECPOINT +SYMCRYPT_CALL +SymCryptEcpointCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + _In_ PCSYMCRYPT_ECURVE pCurve ); +// The above can take as input a pointer to a curve that has only the FMod, cbModElement, and the +// eformat fields set + +PSYMCRYPT_ECPOINT +SYMCRYPT_CALL +SymCryptEcpointRetrieveHandle( _In_ PBYTE pbBuffer ); + +VOID +SYMCRYPT_CALL +SymCryptEcpointWipe( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Out_ PSYMCRYPT_ECPOINT poDst ); + +VOID +SymCryptEcpointCopy( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_ PSYMCRYPT_ECPOINT poDst ); + +VOID +SymCryptEcpointMaskedCopy( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_ PSYMCRYPT_ECPOINT poDst, + UINT32 mask ); + + +//======================================== +// Integer operations +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptIntCopyMixedSize( + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_INT piDst ); +// +// Dst = Src, but allows Dst and Src to have different # digits. +// +// Copy the value from piSrc to piDst. +// Returns success if Src < R^Dst.nDigits +// If Src >= R^Dst.nDigits then the value in Src is published and an error is returned. +// Warning: it is not side-channel safe to use this function with a Src value that can't fit in Dst. +// Src and Dst may be the same object. +// + +UINT32 +SYMCRYPT_CALL +SymCryptIntBitsizeOfValue( _In_ PCSYMCRYPT_INT piSrc ); +// +// Returns the number of bits necessary to store the value of Src. +// +// Let V be the value of Src. +// Then this function returns +// 0 if Src == 0 +// 1 + floor( log(Src)/log(2) ) if V > 0 +// Note that there is no defined relationship between the result of this function and the bitsize used to allocate Src. +// Digits can be large, so the value Src might be able to store values much larger than 2^b where b is the bitsize +// used when creating Src. +// This function is side-channel safe, and as a result might be slower than expected. +// + + +VOID +SYMCRYPT_CALL +SymCryptIntSetValueUint32( + UINT32 u32Src, + _Out_ PSYMCRYPT_INT piDst ); +// +// Dst = Src +// This always succeeds as R >= 2^32 on all implementations. +// + +VOID +SYMCRYPT_CALL +SymCryptIntSetValueUint64( + UINT64 u64Src, + _Out_ PSYMCRYPT_INT piDst ); +// +// Dst = Src +// This always succeeds as R >= 2^64 on all implementations. +// + + +//======================================================================================== +// Read/write INTegers in defined formats +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptIntSetValue( + _In_reads_bytes_(cbSrc) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_NUMBER_FORMAT format, + _Out_ PSYMCRYPT_INT piDst ); +// +// Set the value of an INT object from an array of bytes +// +// (pbSrc,cbSrc): buffer that contains the bytes that encode the value in the specified format. +// format: specifies the format of the pbBytes/cbBytes buffer. +// Dst : INT object that receives the value; must previously have been created/allocated. +// +// Return value: +// If the value encoded in the (pbSrc,cbSrc) buffer fits in Dst, then the +// function succeeds. If the value does not fit, then the function +// returns an error. Note that the error condition is only dependent on the value in the input, +// and not on how many bytes are in the input. Importing a very large (pbSrc,cbSrc) buffer +// into a small piDst is fine as long as the value fits in the number (i.e. enough of the most significant +// bytes in the buffer are zero). +// +// Warning: +// Error return values are always published, so if this function fails it is visible to the attacker. +// +// Rationale: +// Because the size of a digit can be any size (even odd) there are always scenarios in which the +// caller can provide an input that is too large for the INT to store. (Restricting only the size of +// the input buffer is not sufficient.) And if we have to handle this +// in one case, we might as well handle it in all cases. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptIntGetValue( + _In_ PCSYMCRYPT_INT piSrc, + _Out_writes_bytes_( cbDst) PBYTE pbDst, + SIZE_T cbDst, + SYMCRYPT_NUMBER_FORMAT format ); +// +// Convert a value from the internal number representation to a byte array. +// +// Src is the number whose value is to be stored in a byte array +// (pbDst, cbDst) the destination buffer +// format: the destination format. +// Return value: if the value of Src when encoded in the format fits in the output buffer then the function succeeds. +// If the encoded value does not fit, the function returns an error. (Note: All errors are published.) +// + +UINT32 +SYMCRYPT_CALL +SymCryptIntGetValueLsbits32( _In_ PCSYMCRYPT_INT piSrc ); +// +// Returns Src mod 2^32 +// +// Usecase: there are many number-theoretic algorithms where the algorithm +// depends on (n mod 8) or similar values. +// + +UINT64 +SYMCRYPT_CALL +SymCryptIntGetValueLsbits64( _In_ PCSYMCRYPT_INT piSrc ); +// +// Returns Src mod 2^64 +// +// Usecase: RSA public exponents can be 64 bits, and validating that +// a candidate prime is suitable uses this function +// + +UINT32 +SYMCRYPT_CALL +SymCryptIntIsEqualUint32( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ UINT32 u32Src2 ); +// +// Returns a mask value which is 0xffffffff if Src1 = Src2 and 0 otherwise. +// + +UINT32 +SYMCRYPT_CALL +SymCryptIntIsEqual( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2 ); +// +// Returns a mask value which is 0xffffffff if Src1 = Src2 and 0 otherwise. +// +// Note that Src1 and Src2 can be of different sizes. +// + +UINT32 +SYMCRYPT_CALL +SymCryptIntIsLessThan( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2 ); +// +// Returns a mask value which is 0xffffffff if Src1 < Src2 and 0 otherwise. +// +// Note that a <= b is equivalent to NOT( b < a ) so all possible comparisons +// can be made using the < and = comparison primitive. +// + + +//============================================================= +// Addition & subtraction +// For all addition and subtraction operations, the destination may be +// the same object as one of the inputs if the other requirements of the function +// allow that. +// + +UINT32 +SYMCRYPT_CALL +SymCryptIntAddUint32( + _In_ PCSYMCRYPT_INT piSrc1, + UINT32 u32Src2, + _Out_ PSYMCRYPT_INT piDst ); +// +// Dst = Src1 + Src2. +// Requirement: Dst.nDigits == Src1.nDigits +// If the result is larger than the capacity of Dst, then +// Dst is set to the result minus the capacity and the value 1 is returned. +// Otherwise the Dst is set to the sum and the value 0 is returned. +// The return value is thus a carry output of the addition. +// + +UINT32 +SYMCRYPT_CALL +SymCryptIntAddSameSize( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + _Out_ PSYMCRYPT_INT piDst ); +// +// Dst = Src1 + Src2. +// Requirement: Src1.nDigits == Src2.nDigits == Dst.nDigits +// In more detail: +// if Src1 + Src2 < Dst.capacity: +// Dst = Src1 + Src2 +// return 0 +// else +// Dst = Src1 + Src2 - Dst.capacity +// return 1 +// The return value is a carry output of the addition. +// +// Dst may be the same object as Src1, Src2, or both. +// + +UINT32 +SYMCRYPT_CALL +SymCryptIntAddMixedSize( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + _Out_ PSYMCRYPT_INT piDst ); +// +// Dst = Src1 + Src2. +// Requirement: Dst.nDigits >= max( Src1.nDigits, Src2.nDigits ) +// In more detail: +// if Src1 + Src2 < Dst.capacity: +// Dst = Src1 + Src2 +// return 0 +// else +// Dst = Src1 + Src2 - Dst.capacity +// return 1 +// The return value is a carry output of the addition. +// +// Dst may be the same object as Src1, Src2, or both. +// + +// +// Subtraction +// Subtraction functions are the equivalent of addition functions. +// The return value is 1 if an underflow occurred (borrow), and 0 if no underflow/borrow occurred. +// On underflow, the value of the result is the result of the subtraction plus Dst.capacity. +// +// Rationale: For an underflow we could also return (UINT32)-1 or return -1 on a INT32. +// -1 in an unsigned type is actually 2^32 -1 which makes no sense. +// Returning a signed type is somewhat neater, but all other values are unsigned, and mixing +// signed and unsigned types is always error-prone. Furthermore, converting from a signed integer +// to a mask is also error-prone (at least within the behaviour guaranteed by the C standard.) +// Returning an unsigned 1 is therefore preferred. +// + +UINT32 +SYMCRYPT_CALL +SymCryptIntSubUint32( + _In_ PCSYMCRYPT_INT piSrc1, + UINT32 Src2, + _Out_ PSYMCRYPT_INT piDst ); + +UINT32 +SYMCRYPT_CALL +SymCryptIntSubSameSize( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + _Out_ PSYMCRYPT_INT piDst ); + +UINT32 +SYMCRYPT_CALL +SymCryptIntSubMixedSize( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + _Out_ PSYMCRYPT_INT piDst ); + +VOID +SYMCRYPT_CALL +SymCryptIntNeg( + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_INT piDst ); + +// +// Dst = (- Src) mod Dst.Capacity; +// Requirement: +// - Dst.nDigits == Src.nDigits; +// This is a negate modulo the capacity. +// Useful when you want the absolute value of a difference. +// Compute the difference, and if the subtraction yields a carry, negate the result. +// + +//=================================================================== +// Shifts +// Note that the shift amount is always published. +// If the need arises, we can define variants that are side-channel safe +// w.r.t. the shift size, but that incurs a significant performance cost. +// + +VOID +SYMCRYPT_CALL +SymCryptIntMulPow2( + _In_ PCSYMCRYPT_INT piSrc, + SIZE_T exp, + _Out_ PSYMCRYPT_INT piDst ); +// +// Dst = (Src * 2^Exp ) mod R^n where n = Dst.nDigits. +// Requirement: Dst.nDigits == Src.nDigits, Dst == Src is allowed +// Exp is published. +// +// A variant that keeps Exp private is currently not available, but can be added to the API if needed. +// (A side-channel safe variant might require scratch space.) +// +// Dst may be the same object as Src1. +// + +VOID +SYMCRYPT_CALL +SymCryptIntDivPow2( + _In_ PCSYMCRYPT_INT piSrc, + SIZE_T exp, + _Out_ PSYMCRYPT_INT piDst ); +// +// Dst = (Src div 2^Exp ) +// Requirement: Dst.nDigits == Src.nDigits, Dst == Src is allowed +// Exp is published +// +// A variant that keeps Exp private is currently not available, but can be added to the API if needed. +// (A side-channel safe variant might require scratch space.) +// +// Dst may be the same object as Src1. +// + +VOID +SYMCRYPT_CALL +SymCryptIntShr1( + UINT32 highestBit, + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_INT piDst ); +// +// Dst = (Src + highestBit * Src.Capacity) div 2 +// +// Requirements: +// Src.nDigits == Dst.nDigits +// highestBit <= 1 +// +// This is the Int equivalent of the 'shift right 1' instruction. +// Shifting by one can be implemented faster than variable sized shifts. +// + +VOID +SYMCRYPT_CALL +SymCryptIntModPow2( + _In_ PCSYMCRYPT_INT piSrc, + SIZE_T exp, + _Out_ PSYMCRYPT_INT piDst ); +// +// Dst = Src mod 2^Exp +// Requirement: Dst.nDigits == Src.nDigits, Dst == Src is allowed +// Exp is published +// +// Dst may be the same object as Src1. +// + +UINT32 +SYMCRYPT_CALL +SymCryptIntGetBit( + _In_ PCSYMCRYPT_INT piSrc, + UINT32 iBit ); +// +// Returns the i-th bit (starting from 0 for the LSB) of piSrc. +// Therefore the only possible return values are 0 and 1. +// +// Requirements: +// - iBit < SymCryptIntBitsizeOfObject( piSrc ) +// + +UINT32 +SYMCRYPT_CALL +SymCryptIntGetBits( + _In_ PCSYMCRYPT_INT piSrc, + UINT32 iBit, + UINT32 nBits ); +// +// Returns the bits from position iBit up to (iBit + nBits - 1) +// (starting from 0 for the LSB). Total of nBits. The 0-th bit of +// the return value corresponds to the iBit-th bit of the source. +// +// Requirements: +// - 1 <= nBits <= 32 +// - iBit + nBits <= SymCryptIntBitsizeOfObject( piSrc ) +// +// Remarks: +// - The values iBit and nBits are not protected by side-channel attacks, +// therefore they should be treated as published. +// - The bits of the return value after the (nBits)-th bit are zero. +// + +VOID +SYMCRYPT_CALL +SymCryptIntSetBits( + _In_ PSYMCRYPT_INT piDst, + UINT32 value, + UINT32 iBit, + UINT32 nBits ); +// +// Sets the bits from position iBit up to (iBit + nBits - 1) +// (starting from 0 for the LSB). Total of nBits. The 0-th bit of +// the input value corresponds to the iBit-th bit of the destination. +// +// Requirements: +// - 1 <= nBits <= 32 +// - iBit + nBits <= SymCryptIntBitsizeOfObject( piSrc ) +// +// Remarks: +// - The values iBit and nBits are not protected by side-channel attacks, +// therefore they should be treated as published. +// - The bits of the value after the (nBits)-th bit are ignored. +// + +//=========================================================== +// Mul & div +// + +UINT32 +SYMCRYPT_CALL +SymCryptIntMulUint32( + _In_ PCSYMCRYPT_INT piSrc1, + UINT32 Src2, + _Out_ PSYMCRYPT_INT piDst ); +// +// Dst = Src1 * Src2 mod Dst.capacity; return value = Src1 * Src2 div Dst.capacity +// Requirement: piDst.nDigits == piSrc1.nDigits, Dst == Src is allowed +// + +#define SYMCRYPT_SCRATCH_BYTES_FOR_INT_MUL( _nResultDigits ) SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_INT_MUL( _nResultDigits ) + +VOID +SYMCRYPT_CALL +SymCryptIntMulSameSize( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + _Out_ PSYMCRYPT_INT piDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Dst = Src1 * Src2. +// Requirement: +// - Src1.nDigits == Src2.nDigits; Dst.nDigits == Src1.nDigits + Src2.nDigits +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_INT_MUL( Dst.nDigits ) +// +// Note that Dst cannot be the same object as Src1 or Src2 because of the size restrictions. +// + +VOID +SYMCRYPT_CALL +SymCryptIntSquare( + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_INT piDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Dst = Src^2 +// Requirement: +// - Dst.nDigits == 2 * Src.nDigits +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_INT_MUL( Dst.nDigits ) +// +// Note that Dst cannot be the same object as Src1 or Src2 because of the size restrictions. +// + +VOID +SYMCRYPT_CALL +SymCryptIntMulMixedSize( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + _Out_ PSYMCRYPT_INT piDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Dst = Src1 * Src2. +// Requirement: +// - Dst.nDigits >= Src1.nDigits + Src2.nDigits +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_INT_MUL( Dst.nDigits ) +// +// Note that Dst cannot be the same object as Src1 or Src2 because of the size restrictions. +// + + +// +// Division +// For all division and modulo operations, there are pre-computations that have to be done +// on the divisor. The pre-computed divisor information is stored in a DIVISOR object. +// Note that the bitsize of the value of the divisor is published. +// Therefore, a generic division is not side-channel safe. +// Rationale: Hiding the bitsize of the value of the divisor is quite expensive, +// and we have no cryptographic algorithms that require it. +// + + +PSYMCRYPT_INT +SYMCRYPT_CALL +SymCryptIntFromDivisor( _In_ PSYMCRYPT_DIVISOR pdSrc ); +// +// Returns the INT object inside the DIVISOR object. +// Digit size of the INT object is equal to the digit size of the DIVISOR object. +// This object has two uses: +// - On an uninitialized DIVISOR object it is a suitable place to put a value before calling +// SymCryptIntToDivisor. +// - On an initialized DIVISOR object the function returns a pointer to the INT that contains +// the divisor value. Modifying the INT value from an initialized DIVISOR value corrupts +// the divisor value. +// +// This is typically a very fast function, with a run-time cost that is zero or only one instruction. +// + +#define SYMCRYPT_SCRATCH_BYTES_FOR_INT_TO_DIVISOR( _nDigits ) SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_INT_TO_DIVISOR( _nDigits ) + +VOID +SYMCRYPT_CALL +SymCryptIntToDivisor( + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_DIVISOR pdDst, + UINT32 totalOperations, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Create a DIVISOR object from an INT. +// Requirement: +// - Dst.nDigits == Src.nDigits +// - Src != 0 +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_INT_TO_DIVISOR( Src.nDigits ) +// SymCryptIntBitsizeOfValue( Src ) is published. +// Src may be equal to SymCryptIntFromDivisor( Dst ). +// totalOperations is an estimate of how many divide/modulo operations will be performed with this divisor. +// An implementation may use this to decide how much pre-computations to do. +// flags: any combination of the following flag values: +// - SYMCRYPT_FLAG_DATA_PUBLIC +// Signals that the Src value is public. +// Implementations can use this to use more efficient divisor algorithms depending on the actual value of Src. +// For example, if Src is very close to a power of 2, division can be implemented more efficiently. +// +// Once a divisor object has been created, it is immutable. +// Multiple threads can use the same divisor object for different division operations in parallel. +// + +#define SYMCRYPT_SCRATCH_BYTES_FOR_INT_DIVMOD( _nSrcDigits, _nDivisorDigits ) SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_INT_DIVMOD( _nSrcDigits, _nDivisorDigits ) + +VOID +SYMCRYPT_CALL +SymCryptIntDivMod( + _In_ PCSYMCRYPT_INT piSrc, + _In_ PCSYMCRYPT_DIVISOR pdDivisor, + _Out_opt_ PSYMCRYPT_INT piQuotient, + _Out_opt_ PSYMCRYPT_INT piRemainder, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Quotient = Src div Divisor +// Remainder = Src mod Divisor +// Quotient & Remainder may be NULL in which case that result is not returned. +// Requirements: +// - Quotient.nDigits >= Src.nDigits +// - Remainder.nDigits >= Divisor.nDigits +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_INT_DIVMOD( Src.nDigits, Divisor.nDigits ) +// Quotient and Remainder must be different objects. +// Src may be the same object as either Quotient or Remainder. +// + +#define SYMCRYPT_SCRATCH_BYTES_FOR_EXTENDED_GCD( _nDigits ) SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_EXTENDED_GCD( _nDigits ) + +VOID +SYMCRYPT_CALL +SymCryptIntExtendedGcd( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + UINT32 flags, + _Out_opt_ PSYMCRYPT_INT piGcd, + _Out_opt_ PSYMCRYPT_INT piLcm, + _Out_opt_ PSYMCRYPT_INT piInvSrc1ModSrc2, + _Out_opt_ PSYMCRYPT_INT piInvSrc2ModSrc1, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Compute up to four results from Src1 and Src2. +// GCD is the greatest common divisor of Src1 and Src2. +// LCM is the Least Common Multiple of Src1 and Src2. +// InvSrc1ModSrc2 is the smallest value such that (InvSrc1ModSrc2 * Src1) mod Src2= GCD( Src1, Src2 ) +// UNLESS Src1 is a multiple of Src2, i.e. when Src1 = 0 mod Src2. In this case the result is +// undefined. +// InvSrc2ModSrc1 is the smallest value such that (InvSrc2ModSrc1 * Src2) mod Src1= GCD( Src1, Src2 ) +// UNLESS Src2 is a multiple of Src1, i.e. when Src2 = 0 mod Src1. In this case the result is +// undefined. +// +// The last two modular inverse values are not true modular inverses unless GCD( Src1, Src2 ) = 1. +// +// Any of the output pointers can be NULL and then that result is not returned. +// Requirements: +// - Src1 > 0 +// - Src2 > 0 and Src2 odd +// - Gcd.nDigits >= min( Src1.nDigits, Src2.nDigits ) +// - Lcm.nDigits >= Src1.nDigits + Src2.nDigits +// - InvSrc1ModSrc2.nDigits >= max(Src1.nDigits, Src2.nDigits) // Future work: Make these bounds Src2 and Src1 respectively. +// - InvSrc2ModSrc1.nDigits >= max(Src1.nDigits, Src2.nDigits) +// - if piInvSrc2ModSrc1 is not NULL, max( Src1.nDigits, Src2.nDigits ) * 2 <= SymCryptDigitsFromBits(SYMCRYPT_INT_MAX_BITS) +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_EXTENDED_GCD( max( Src1.nDigits, Src2.nDigits ) ) +// +// If only one inverse value is needed, it is most efficient to use only InvSrc1ModSrc2. +// +// The restriction that Src2 must be odd can be removed in a future version. +// The SYMCRYPT_FLAG_DATA_PUBLIC flag signals that the inputs are public information and do not have +// to be side-channel protected. +// The SYMCRYPT_FLAG_GCD_INPUTS_NOT_BOTH_EVEN signals that at least one input is odd. This speeds up the +// side-channel safe implementation; this flag is not needed if the inputs are signaled as public as the code can then +// afford to check that condition and change use a optimized algorithm. +// The SYMCRYPT_FLAG_GCD_PUBLIC signals that the GCD value is public. This can make some computations +// (of the inverses) more efficient when GCD = 1. +// + +#define SYMCRYPT_FLAG_GCD_INPUTS_NOT_BOTH_EVEN (0x02) +#define SYMCRYPT_FLAG_GCD_PUBLIC (0x04) + + +UINT64 +SYMCRYPT_CALL +SymCryptUint64Gcd( UINT64 a, UINT64 b, UINT32 flags ); +// +// Return GCD of two 64-bit integers. +// a, b : inputs to the GCD +// flags: +// - SYMCRYPT_FLAG_DATA_PUBLIC signals that a and b are public values (w.r.t. side-channel safety) +// This may improve performance. +// - SYMCRYPT_FLAG_GCD_INPUTS_NOT_BOTH_EVEN: signals that at least one of (a,b) is odd. This +// simplifies & speeds up the GCD computation. +// +// Note: +// The current implementation requires that the INPUTS_NOT_BOTH_EVEN flag is set (and at least one input be odd). +// Also note that GCD(x, 0) == GCD(0, x) == x +// + + +#define SYMCRYPT_SCRATCH_BYTES_FOR_CRT_GENERATION( _nDigits ) SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_CRT_GENERATION( _nDigits ) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptCrtGenerateInverses( + UINT32 nCoprimes, + _In_reads_( nCoprimes ) PCSYMCRYPT_MODULUS * ppmCoprimes, + UINT32 flags, + _Out_writes_( nCoprimes ) PSYMCRYPT_MODELEMENT * ppeCrtInverses, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Compute the Chinese Remainder Theorem (CRT) constants for a set of nCoprimes +// pairwise coprime moduli. Pointers to the input numbers are stored in the array of +// pointers ppmCoprimes, while the outputs are stored to the locations pointed by +// ppeCrtInverses. +// +// For input numbers Src1, Src2, ..., SrcK where K = nCoprimes, let N = Src1*Src2*...*SrcK. +// Then this function outputs the constants: +// (( Src1 / N ) mod Src1), (( Src2 / N ) mod Src2), ..., (( SrcK / N ) mod SrcK) +// +// The most common case is for the RSA algorithm where the inputs are 2 prime numbers P and Q +// and only Q^{-1} mod P is needed (i.e. only the first term of the output array). +// +// Any of the output pointers in the ppeCrtInverses can be NULL and then that result +// is not returned (resulting in a faster total running time). +// +// The number of inputs nCoprimes and which outputs are returned is public. +// +// Requirements: +// - nCoprimes >= 2 +// - Both ppmCoprimes and ppeCrtInverses must be arrays of pointers of exactly nCoprimes pointers. +// - ppmCoprimes[i] != NULL for all i in [0, nCoprimes-1]. +// - The input moduli must be pairwise coprime. +// - The number of digits of all input moduli must match the number of digits of the corresponding +// output modelements. +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_CRT_GENERATION( nDigits ) where nDigits is the maximum number +// of digits of the inputs and outputs. +// + +#define SYMCRYPT_SCRATCH_BYTES_FOR_CRT_SOLUTION( _nDigits ) SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_CRT_SOLUTION( _nDigits ) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptCrtSolve( + UINT32 nCoprimes, + _In_reads_( nCoprimes ) PCSYMCRYPT_MODULUS * ppmCoprimes, + _In_reads_( nCoprimes ) PCSYMCRYPT_MODELEMENT * ppeCrtInverses, + _In_reads_( nCoprimes ) PCSYMCRYPT_MODELEMENT * ppeCrtRemainders, + UINT32 flags, + _Out_ PSYMCRYPT_INT piSolution, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Solve for x the system of nCoprimes congruences of the form +// x = ppeCrtRemainders[0] (mod ppmCoprimes[0]) +// x = ppeCrtRemainders[1] (mod ppmCoprimes[1]) +// ... +// x = ppeCrtRemainders[nCoprimes-1] (mod ppmCoprimes[nCoprimes-1]) +// +// The input array ppeCrtInverses must have been pre-computed by a call to SymCryptCrtGenerateInverses. +// +// The number of inputs nCoprimes is public. +// +// Requirements: +// - nCoprimes == 2 +// - ppmCoprimes, ppeCrtInverses, and ppeCrtRemainders must be arrays of pointers of exactly nCoprimes elements. All +// of them non-NULL. +// - piSolution must be large enough to hold the result modulo the product of all the coprimes. +// - max( ppmCoprimes[0].nDigits, ppmCoprimes[1].nDigits ) * 2 <= SymCryptDigitsFromBits(SYMCRYPT_INT_MAX_BITS) +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_CRT_SOLUTION( nDigits ) where nDigits is the maximum number +// of digits of the input moduli. +// + + +typedef const struct _SYMCRYPT_TRIALDIVISION_CONTEXT *PCSYMCRYPT_TRIALDIVISION_CONTEXT; + +PCSYMCRYPT_TRIALDIVISION_CONTEXT +SYMCRYPT_CALL +SymCryptCreateTrialDivisionContext( UINT32 nDigits ); +// +// Create a trial division context that can be used for integers up to and including nDigits digits. +// The Trial division context can be used in multiple threads in parallel. +// The context should be freed with SymCryptFreeTrialDivisionContext after use. +// A context can be fairly large (100 kB) so freeing it is important. +// Returns NULL if out of memory or an invalid digit count is provided. +// + +VOID +SYMCRYPT_CALL +SymCryptFreeTrialDivisionContext( PCSYMCRYPT_TRIALDIVISION_CONTEXT pContext ); +// +// Free the trial division context after use. +// + +UINT32 +SYMCRYPT_CALL +SymCryptIntFindSmallDivisor( + _In_ PCSYMCRYPT_TRIALDIVISION_CONTEXT pContext, + _In_ PCSYMCRYPT_INT piSrc, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Returns a divisor of piSrc, or zero. +// Requirements: +// Requirement: +// - pContext is a valid trial division context, and Context.nDigits >= Src.nDigits +// - Src >= 2 +// Note: +// - Src is published if this function returns a divisor. +// +// There is no guarantee that this function finds small divisors; +// it is valid for the implementation to always return 0. +// Any nonzero return value is always >= 2 and an actual divisor of Src. +// Note: this function might not find 2 as a small divisor. +// + +#define SYMCRYPT_SCRATCH_BYTES_FOR_INT_IS_PRIME( _nDigits ) SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_INT_IS_PRIME( _nDigits ) + +UINT32 +SYMCRYPT_CALL +SymCryptIntMillerRabinPrimalityTest( + _In_ PCSYMCRYPT_INT piSrc, + UINT32 nBitsSrc, + UINT32 nIterations, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Applies the Miller-Rabin prime testing algorithm using nIterations on the integer +// piSrc. +// +// The maximum bitsize of the value of piSrc is equal to nBitsSrc and it is public. +// The number of iterations nIterations is also public. +// +// If the return value is 0, then Src is guaranteed to be a composite value. +// In this case, the value of Src is treated as public. +// +// If the return value is 0xffffffff, then Src might be prime. +// In this case, the value of Src is treated as private except when the +// SYMCRYPT_FLAG_DATA_PUBLIC flag is specified. +// +// If the flag SYMCRYPT_FLAG_DATA_PUBLIC is specified the +// algorithm leaks the number of trailing zeros of Src-1. The reason for +// not having a fully side-channel safe implementation for arbitrary +// numbers is that such a function would be prohibitively slow. +// +// Requirements: +// - SymCryptIntBitsizeOfValue( piSrc ) <= nBitsSrc <= SymCryptIntBitsizeOfObject( piSrc ) +// - Src is odd and greater than 3. +// - If flags == 0 then Src must be 3 modulo 4. (See the comment above for +// the SYMCRYPT_FLAG_DATA_PUBLIC flag) +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_INT_IS_PRIME( Src.nDigits ) +// + +// + +#define SYMCRYPT_SCRATCH_BYTES_FOR_INT_PRIME_GEN( _nDigits ) SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_INT_PRIME_GEN( _nDigits ) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptIntGenerateRandomPrime( + _In_ PCSYMCRYPT_INT piLow, + _In_ PCSYMCRYPT_INT piHigh, + _In_reads_opt_( nPubExp ) PCUINT64 pu64PubExp, + UINT32 nPubExp, + UINT32 nTries, + UINT32 flags, + _Inout_ PSYMCRYPT_INT piDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// This function generates a random prime Dst such that +// Dst == 3 mod 4 and +// Low <= Dst < High +// for e in PubExp[]: GCD( Dst - 1, e ) == 1 +// +// (pu64PubExp, nPubExp) can be (NULL, 0) if no pubexp restriction is needed. +// The nTries parameter specifies the maximum number of candidate numbers +// until a prime number is found satisfying the above restrictions. +// If the function cannot find one after nTries, it returns SYMCRYPT_INVALID_ARGUMENT +// (For example, if the caller passes in a Low bound bigger than the High bound, +// or if there are no primes between Low and High). +// +// The values of the pubexps, piLow and piHigh are public. +// +// flags: None +// +// Requirements: +// - SymCryptIntBitsizeOfValue( piHigh ) <= SymCryptIntBitsizeOfObject(piDst) +// - piLow > 3 +// - Each public exponent must be greater than 0 +// - 0 <= nPubExp <= SYMCRYPT_RSAKEY_MAX_NUMOF_PUBEXPS +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_INT_PRIME_GEN( Dst.nDigits ) +// + +//===================================================== +// Modular arithmetic +// +// To perform modular arithmetic the modulus has to be prepared into a Modulus object. +// Arithmetic in the ring modulo the modulus can then be done using ModElement objects. +// + +PSYMCRYPT_DIVISOR +SYMCRYPT_CALL +SymCryptDivisorFromModulus( _In_ PSYMCRYPT_MODULUS pmSrc ); +// +// Returns the DIVISOR object inside the MODULUS object. +// +// Digit size of the DIVISOR object is equal to the digit size of the MODULUS object. +// This object has one use: +// - On an initialized MODULUS object the function returns a pointer to the DIVISOR that contains +// the modulus value. Modifying the DIVISOR value from an initialized MODULUS value corrupts +// the modulus. +// +// This is typically a very fast function, with a run-time cost that is zero or one instruction. +// + +PSYMCRYPT_INT +SYMCRYPT_CALL +SymCryptIntFromModulus( _In_ PSYMCRYPT_MODULUS pmSrc ); +// +// Returns the INT object inside the MODULUS object. +// +// Digit size of the INT object is equal to the digit size of the MODULUS object. +// This object has two uses: +// - On an uninitialized MODULUS object it is a suitable place to put a value before calling +// SymCryptIntToModulus. +// - On an initialized MODULUS object the function returns a pointer to the INT that contains +// the modulus value. Modifying the INT value from an initialized MODULUS value corrupts +// the modulus. +// +// This is typically a very fast function, with a run-time cost that is zero or one instruction. +// + +#define SYMCRYPT_SCRATCH_BYTES_FOR_INT_TO_MODULUS( _nDigits ) SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_INT_TO_MODULUS( _nDigits ) + +VOID +SYMCRYPT_CALL +SymCryptIntToModulus( + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_MODULUS pmDst, + UINT32 averageOperations, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Create a modulus from an INT. +// Requirements: +// - Src != 0 +// - Dst.nDigits == Src.nDigits +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_INT_TO_MODULUS( Src.nDigits ) +// SymCryptIntBitsizeOfValue( Src ) is published. +// averageOperations is the average number of multiplications that are performed on a ModElement created with this modulus between the time that the value is +// created as a ModElement and the time it is exported out of modElement form. +// There are multiple ways of doing modular computations; some of them are faster but have an overhead for converting into and out of modular form. +// For example, for RSA verification the # operations is small and conversion overhead should be avoided. +// For RSA signatures, the # operations is large and the fastest per-operation form should be used. +// This parameter allows the library to select the right kind of modular arithmetic for this modulus. +// The following flags are supported: +// SYMCRYPT_FLAG_DATA_PUBLIC +// Signals the code that the Src value is public. This may improve performance because it allows further optimizations that +// depend on the value. (For example, if Src is close to a power of 2, the modulo reduction can be made significantly faster.) +// SYMCRYPT_FLAG_MODULUS_PARITY_PUBLIC +// Signals that the parity of Src (whether it is even or odd) may be treated as a public value. +// There are some algorithms that can speed up operations on odd moduli, but their use publishes the fact that the modulus is odd. +// SYMCRYPT_FLAG_MODULUS_ADDITIVE_ONLY +// The modulus will only be used for addition and subtraction, not for multiplication or division. +// This can significantly reduce the cost of this function as there is no need to pre-compute the divisor information. +// SYMCRYPT_FLAG_MODULUS_PRIME +// Signals that the modulus is a prime. Some algorithms can be more efficient for prime moduli. Note that setting this flag +// for a non-prime modulus can result in incorrect answers. +// The flags and averageOperations parameters are published. +// + +#define SYMCRYPT_FLAG_MODULUS_PARITY_PUBLIC (0x02) +#define SYMCRYPT_FLAG_MODULUS_ADDITIVE_ONLY (0x04) +#define SYMCRYPT_FLAG_MODULUS_PRIME (0x08) + +#define SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( _nDigits ) SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( _nDigits ) + + +VOID +SYMCRYPT_CALL +SymCryptIntToModElement( + _In_ PCSYMCRYPT_INT piSrc, + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Dst = Src mod Mod +// Requirements: +// - Dst.nDigits == Mod.nDigits +// - piSrc.nDigits <= 2 * Mod.nDigits +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( Mod.nDigits ) +// +// Note: the input is limited in size to be no more than twice the modulus size (in digits). +// This should be a rare case, and it simplifies the scratch space handling significantly. +// + +VOID +SYMCRYPT_CALL +SymCryptModElementToInt( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_INT piDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Dst = Src +// +// Requirement: +// - Dst.nDigits >= Mod.nDigits +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( Mod.nDigits ) +// +// Convert a ModElement to an Int. +// The internal format in which a ModElement is stored might be different +// from the format of an Int; this function converts the value to the INT format. +// + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptModElementSetValue( + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_NUMBER_FORMAT format, + PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Dst = decode( pbSrc, cbSrc, format ) mod Mod +// Requirement: +// - SymCryptDigitsFromBits( 8 * cbSrc ) <= Mod.nDigits +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( Mod.nDigits ) +// +// This is a separate function as it is frequently used, and does not require the allocation of an INT object. +// + +VOID +SYMCRYPT_CALL +SymCryptModElementSetValueUint32( + UINT32 value, + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Dst = value mod Mod +// value is published. +// Requirement: +// - value < Mod +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( Mod.nDigits ) +// Note: this function does NOT hide the value. +// Rationale: typically the value parameter is known, either 0 or 1. +// + +VOID +SYMCRYPT_CALL +SymCryptModElementSetValueNegUint32( + UINT32 value, + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Dst = -value mod Mod +// value is published. +// Requirement: +// - 0 < value < Mod +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( Mod.nDigits ) +// Note: this function does NOT hide the value. +// Rationale: typically the value parameter is known, either 0 or 1. +// + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptModElementGetValue( + PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_writes_bytes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst, + SYMCRYPT_NUMBER_FORMAT format, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// (pbDst, cbDst) = encode( format, cbDst, Src ) +// Requirement: +// - SymCryptDigitsFromBits( 8 * cbDst ) <= Mod.nDigits +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( Mod.nDigits ) +// +// Retrieve the value of a ModElement as an array of bytes +// + +UINT32 +SYMCRYPT_CALL +SymCryptModElementIsEqual( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2 ); +// +// Returns a mask value which is 0xffffffff if Src1 = Src2 and 0 otherwise. +// +// Both SYMCRYPT_MODELEMENTs should have been created using the modulus pmMod. Otherwise +// the result is undefined. +// + +UINT32 +SYMCRYPT_CALL +SymCryptModElementIsZero( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc ); +// +// Returns a mask value which is 0xffffffff if Src = 0 and 0 otherwise. +// +// Useful for quickly checking if a ModElement is 0. +// + + +//=============================== +// Modular arithmetic. +// + +VOID +SYMCRYPT_CALL +SymCryptModSetRandom( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Dst = random value modulus Mod. +// Requirement: +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( Mod.nDigits ) +// +// Random value is chosen uniformly from the set of allowed values. +// By default this function does not return the values 0, 1, or -1 (see below NOTE for small moduli exception) +// Flags parameter can signal that these special values are allowed. +// flags parameter is published. +// +// Rationale: these values cause problems in many situations, and for all commonly used cryptographic modulo sizes +// the absence of these values is statistically undetectable even if they are allowed. +// For completeness of the API, the flags parameter can be used to allow these three values. +// flags is a bitmask containing a combination of the following bit values: +// SYMCRYPT_FLAG_MODRANDOM_ALLOW_ZERO +// SYMCRYPT_FLAG_MODRANDOM_ALLOW_ONE +// SYMCRYPT_FLAG_MODRANDOM_ALLOW_MINUSONE +// Specifying ALLOW_ZERO implies ALLOW_ONE, there is no way to allow 0 and disallow 1. +// +// NOTE: +// For very small moduli (1, 2, and 3), not allowing 0, 1, or -1 by default does not make sense because this would +// exclude all possible values! Instead the default behavior is to allow -1 for these moduli. +// Modulo 1 => return 0 by default +// Modulo 2 => return 1 by default +// may also return 0 if SYMCRYPT_FLAG_MODRANDOM_ALLOW_ZERO is specified +// Modulo 3 => return 2 by default +// may also return 1 if SYMCRYPT_FLAG_MODRANDOM_ALLOW_ONE is specified, and +// may also return 0 or 1 if SYMCRYPT_FLAG_MODRANDOM_ALLOW_ZERO is specified, +// +// Callers relying on not having 0, 1, or -1 are required to pass a larger modulus. + +#define SYMCRYPT_FLAG_MODRANDOM_ALLOW_ZERO (0x01) +#define SYMCRYPT_FLAG_MODRANDOM_ALLOW_ONE (0x02) +#define SYMCRYPT_FLAG_MODRANDOM_ALLOW_MINUSONE (0x04) + + +VOID +SYMCRYPT_CALL +SymCryptModNeg( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Dst = -Src mod Mod +// Requirements: +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( Mod.nDigits ) +// + +VOID +SYMCRYPT_CALL +SymCryptModAdd( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Dst = Src1 + Src2 mod Mod +// Requirement: +// - Src1.modulus == Src2.modulus == Mod. +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( Mod.nDigits ) +// Dst == Src1, Dst == Src2, and Src1 == Src2 are all allowed. +// Rationale: +// scratch space can make the mod-add faster for side-channel safe implementations. +// It allows: +// Dst = Src1 + Src2; +// Tmp = Dst - Mod; +// Dst = choose( Dst, Tmp, carry_bits ) +// And the choose() operation is fast because it does not require carry propagation. +// + + +VOID +SYMCRYPT_CALL +SymCryptModSub( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Dst = Src1 - Src2 mod Mod +// Requirement: +// Same as SymCryptModAdd +// + + +VOID +SYMCRYPT_CALL +SymCryptModMul( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Dst = Src1 * Src2 mod Mod +// Requirement: +// - Src1.modulus == Src2.modulus == Mod. +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( Mod.nDigits ) +// + +VOID +SYMCRYPT_CALL +SymCryptModSquare( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Dst = Src1^2 mod Mod +// Requirement: +// - Src.modulus == Mod. +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( Mod.nDigits ) +// + +VOID +SYMCRYPT_CALL +SymCryptModDivPow2( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + UINT32 exp, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Dst = Src / 2^exp mod Mod +// Requirements: +// - Mod is odd. +// - Src.modulus == Dst.modulus == Mod. +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( Mod.nDigits ) +// +// Remarks: +// - The value exp is *** public ***; hence it should be treated as known to the attacker. +// - This function may write intermediate values to peDst and read them back, violating the +// read-once/write-once rule, so the caller must ensure that the peDst buffer is trusted. +// + +#define SYMCRYPT_SCRATCH_BYTES_FOR_MODINV( _nDigits ) SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_MODINV( _nDigits ) + +// SYMCRYPT_FLAG_DATA_PUBLIC signals that the Src element is public and does not have to be protected +// against side-channel attacks. The public-ness of the Modulus is part of the Modulus object, specified when the +// modulus value was set. +// Marking the source value as public has very little effect on performance, but it removes the random blinding used. +// The main goal of this flag is to allow ECDSA verification without a source of random numbers. + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptModInv( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Dst = 1/Src mod Mod. +// +// - pmMod: Modulus, must have the SYMCRYPT_FLAG_MODULUS_PRIME and SYMCRYPT_FLAG_DATA_PUBLIC flag set. +// Non-prime or non-public moduli are currently not supported. +// - peSrc: Source value, modulo pmMod +// - peDst: Destination value, mod element modulo pmMod +// - flags: SYMCRYPT_FLAG_DATA_PUBLIC signals that peSrc is a public value. +// - pbScratch/cbScratch: scratch space >= SYMCRYPT_SCRATCH_BYTES_FOR_MODINV( nDigits( pmMod ) ) +// +// Returns an error if +// - GCD( Src, Mod ) != 1 +// + +#define SYMCRYPT_SCRATCH_BYTES_FOR_MODEXP( _nDigits ) SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_MODEXP( _nDigits ) + +VOID +SYMCRYPT_CALL +SymCryptModExp( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peBase, + _In_ PCSYMCRYPT_INT piExp, + UINT32 nBitsExp, + UINT32 flags, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Dst = Base ^ Exp mod Mod +// where only the least significant (nBitsExp) bits of the exponent are used. +// +// Requirements: +// - nBitsExp != 0 +// - Mod > 1 +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_MODEXP( Mod.nDigits ) +// +// Allowed flags: +// SYMCRYPT_FLAG_DATA_PUBLIC: If set then the algorithm +// is not side-channel safe (For use in RSA encryption - exponentiation +// with a public exponent). The default behaviour is side channel safety. +// +// Remarks: +// - The undefined operation 0^0 will return 1. +// - The value nBitsExp is *** public ***; hence it should be treated as known to the attacker. +// Examples: +// - nBitsExp = SymCryptIntBitsizeOfObject( piExp ) => This processes all the +// bits of the exponent object. +// - nBitsExp = number of bits of modulus ==> This processes the same +// number of bits (even leading zeros) as the modulus. In this case +// the exponent should have a value with bitsize less or equal to the +// bitsize of the modulus. +// - nBitsExp = max(1, SymCryptIntBitsizeOfValue( piExp )) => This processes +// the bits of the exponent ignoring the leading zeros. Therefore, this +// option leaks the bitsize of the value of the exponent. +// + +// SYMCRYPT_MODMULTIEXP_MAX_NBASES, _NBITSEXP: The maximum number of bases +// and exponent bits allowed for the multi-exponentiation operation. +#define SYMCRYPT_MODMULTIEXP_MAX_NBASES (8) +#define SYMCRYPT_MODMULTIEXP_MAX_NBITSEXP (SYMCRYPT_INT_MAX_BITS) + +#define SYMCRYPT_SCRATCH_BYTES_FOR_MODMULTIEXP( _nDigits, _nBases, _nBitsExp ) SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_MODMULTIEXP( _nDigits, _nBases, _nBitsExp ) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptModMultiExp( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_reads_( nBases ) PCSYMCRYPT_MODELEMENT * peBaseArray, + _In_reads_( nBases ) PCSYMCRYPT_INT * piExpArray, + UINT32 nBases, + UINT32 nBitsExp, + UINT32 flags, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Dst = ( peBaseArray[0]^piExpArray[0] * peBaseArray[1]^piExpArray[1] * ... * +// peBaseArray[nBases-1]^piExpArray[nBases-1] ) mod Mod +// where only the least significant (nBitsExp) bits of the exponents are used. +// +// Requirements: +// - 1<= nBitsExp <= SYMCRYPT_MODMULTIEXP_MAX_NBITSEXP +// - Mod > 1 +// - 1<= nBases <= SYMCRYPT_MODMULTIEXP_MAX_NBASES +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_MODMULTIEXP( Mod.nDigits, nBases, nBitsExp ) +// +// Allowed flags: +// SYMCRYPT_FLAG_DATA_PUBLIC: If set then the algorithm +// is not side-channel safe (For use in DSA verification). +// The default behaviour is side channel safety. +// + +// ========================================= +// Tools for side-channel safety +// ======================================== + +// +//Side-channel safe lookup table +// + +typedef struct _SYMCRYPT_SCSTABLE { + UINT32 groupSize; + UINT32 interleaveSize; + UINT32 nElements; // must be multiple of groupSize + UINT32 elementSize; // # bytes in each element, note: limited to UINT32 for efficiency + PBYTE pbTableData; + UINT32 cbTableData; +} SYMCRYPT_SCSTABLE, *PSYMCRYPT_SCSTABLE; + +UINT32 +SYMCRYPT_CALL +SymCryptScsTableInit( + _Out_ PSYMCRYPT_SCSTABLE pScsTable, + UINT32 nElements, + UINT32 elementSize ); +// Initializes an ScsTable for nElements elements each of elementSize bytes. +// nElements and elementSize are limited to less than 2^16. +// Return value is the size of the buffer that the caller needs to provide. +// +// Requirements: +// - nElements must be a multiple of groupSize and elementSize must be a +// multiple of interleaveSize. Currently all implementations have as +// defaults +// groupSize = 4 +// interleaveSize = 8 +// + +VOID +SYMCRYPT_CALL +SymCryptScsTableSetBuffer( + _Inout_ PSYMCRYPT_SCSTABLE pScsTable, + _Inout_updates_bytes_( cbBuffer ) PBYTE pbBuffer, + UINT32 cbBuffer ); +// Sets the caller-provided buffer on the ScsTable. +// cbBuffer should be >= the size returned by the SymCryptScsTableInit function + +VOID +SYMCRYPT_CALL +SymCryptScsTableStore( + _Inout_ PSYMCRYPT_SCSTABLE pScsTable, + UINT32 iIndex, + _In_reads_bytes_( cbData ) PCBYTE pbData, + UINT32 cbData ); +// Not side-channel safe; publishes iIndex. +// cbData must match the elementSize i.e. the size of a single element. + +VOID +SYMCRYPT_CALL +SymCryptScsTableLoad( + _In_ PSYMCRYPT_SCSTABLE pScsTable, + UINT32 iIndex, + _Out_writes_bytes_(cbData) PBYTE pbData, + UINT32 cbData ); +// Side-channel safe fetching of data; iIndex is kept secret. +// cbData must match the elementSize i.e. the size of a single element. + +VOID +SYMCRYPT_CALL +SymCryptScsTableWipe( + _Inout_ PSYMCRYPT_SCSTABLE pScsTable ); +// Wipes the part of the buffer that the table used + +// Other Side-channel safety tools + +VOID +SYMCRYPT_CALL +SymCryptScsRotateBuffer( + _Inout_updates_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + SIZE_T lshift ); +// Rotates buffer left by lshift without revealing lshift +// through side channels. +// - pbBuffer/cbBuffer: buffer to rotate +// pbBuffer must be aligned to the native integer of the platform (4 or 8 bytes) +// cbBuffer must be a power of two >= 32 +// - lshift: # bytes to left rotate the buffer +// pbBuffer[0] will get the value pbBuffer[ lshift % cbBuffer ] + +VOID +SYMCRYPT_CALL +SymCryptScsCopy( + _In_reads_( cbDst ) PCBYTE pbSrc, + SIZE_T cbSrc, + _Out_writes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst ); +// Copy cbSrc bytes of pbSrc into pbDst without revealing cbSrc +// through side channels. +// +// WARNING: pbSrc buffer must be at least cbDst bytes long; not cbSrc! +// +// - pbSrc pointer to buffer to copy data from +// This buffer must be at least cbDst bytes long +// - cbSrc number of bytes to be copied, must be <= 2^31 +// - pbDst destination buffer +// - pbDst size of the destination buffer, must be <= 2^31 +// Equivalent to: +// n = min( cbSrc, cbDst ) +// pbDst[ 0.. n-1 ] = pbSrc[ 0 .. n - 1 ] +// cbSrc is protected from side-channels; cbDst is public. + + +// +// Mask generation functions. +// All these functions are side-channel safe in all parameters. +// Naming convention: +// SymCrypt <MaskType> <Op> <ParameterType> +// <MaskType> is the type of the function result: +// Mask32 UINT32 mask that is 0 or -1 +// Mask64 UINT64 mask that is 0 or -1 +// <Op> is the boolean operation performed on the parameters +// IsZero v == 0 +// IsNonzero v != 0 +// Eq a == b +// Neq a != b +// <ParameterType> is an indication of the parameter type supported. +// U31 UINT32 which is limited to values < 2^31 +// This allows more efficient masking functions. +// U32 UINT32 +// Other mask types, operations, and parameter types may be defined in future. +// + +UINT32 +SYMCRYPT_CALL +SymCryptMask32IsZeroU31( UINT32 v ); + +UINT32 +SYMCRYPT_CALL +SymCryptMask32IsNonzeroU31( UINT32 v ); + + +UINT32 +SYMCRYPT_CALL +SymCryptMask32EqU32( UINT32 a, UINT32 b ); + +UINT32 +SYMCRYPT_CALL +SymCryptMask32NeqU31( UINT32 a, UINT32 b ); + +UINT32 +SYMCRYPT_CALL +SymCryptMask32LtU31( UINT32 a, UINT32 b ); + + +// +// Other helper functions +// +SIZE_T +SYMCRYPT_CALL +SymCryptRoundUpPow2Sizet( SIZE_T v ); +// Round up to the next power of 2 +// +// Requirements: +// v <= (SIZE_T_MAX / 2) + 1 +// i.e. rounding v up to the next power of 2 fits within SIZE_T, so v is +// less than or equal to the maximum power of 2 representable in SIZE_T + + +//===================================================== +//===================================================== +// RSA padding operations +//===================================================== +//===================================================== + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPkcs1ApplyEncryptionPadding( + _In_reads_bytes_( cbPlaintext ) PCBYTE pbPlaintext, + SIZE_T cbPlaintext, + _Out_writes_bytes_( cbPkcs1Format ) PBYTE pbPkcs1Format, + SIZE_T cbPkcs1Format ); +// +// Applies the RSA PKCS1 v1.5 encryption padding to the plaintext buffer. +// - Plaintext buffer containing plaintext to be encoded +// - Pkcs1Format Output buffer, typically the size of the RSA modulus +// Requirement: cbPkcs1Format >= cbPlaintext + 11 due to the PKCS1 overhead. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPkcs1RemoveEncryptionPadding( + _Inout_updates_bytes_( cbPkcs1Buffer ) PBYTE pbPkcs1Format, + SIZE_T cbPkcs1Format, + SIZE_T cbPkcs1Buffer, + _Out_writes_bytes_opt_( cbPlaintext ) PBYTE pbPlaintext, + SIZE_T cbPlaintext, + _Out_ SIZE_T *pcbPlaintext ); +// +// Remove the PKCS1 encryption padding and extract the message plaintext. +// This function is side-channel safe w.r.t. the data in the Pkcs1Format buffer. +// - pbPkcs1Format points to a buffer containing the raw RSA decrypted data. +// This buffer will be modified by this function. +// - cbPkcs1Format is the # bytes of the buffer that were decrypted with raw RSA +// - cbPkcs1Buffer is the size of the buffer that pbPkcs1Format points to +// cbPkcs1Buffer must be a power of 2 and >= cbPkcs1Format and >= 32 +// cbPkcs1Buffer must be <= 2^30 +// - pbPlaintext/cbPlaintext is the output buffer that will receive the data. +// if pbPlaintext == NULL no message is output, but *pcbPlaintext is still set. +// - pcbPlaintext receives the # bytes in the actual decrypted message. +// set to 0 if an error occurred. +// + +#define SYMCRYPT_SCRATCH_BYTES_FOR_RSA_OAEP( _hashAlgorithm, _nBytesOAEP ) SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_RSA_OAEP( _hashAlgorithm, _nBytesOAEP ) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaOaepApplyEncryptionPadding( + _In_reads_bytes_( cbPlaintext ) PCBYTE pbPlaintext, + SIZE_T cbPlaintext, + _In_ PCSYMCRYPT_HASH hashAlgorithm, + _In_reads_bytes_( cbLabel ) PCBYTE pbLabel, + SIZE_T cbLabel, + _In_reads_bytes_opt_( cbSeed ) PCBYTE pbSeed, + SIZE_T cbSeed, + _Out_writes_bytes_( cbOaepFormat ) PBYTE pbOaepFormat, + SIZE_T cbOaepFormat, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Apply the RSA OAEP encryption padding to the plaintext buffer. +// - Plaintext Plaintext to be encoded +// - hashAlgorithm Hash algorithm to use during padding +// - Label Label input for OAEP +// - Seed Specified seed value. 0 <= cbSeed < hash size +// - OaepFormat Output buffer, typically the size of the RSA modulus +// +// Remarks: +// - If pbSeed == NULL and cbSeed != 0, then the function picks +// a uniformly random seed of size cbSeed bytes. +// +// Requirements: +// cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_RSA_OAEP( hashAlgorithm, cbOAEPFormat ) +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaOaepRemoveEncryptionPadding( + _In_reads_bytes_( cbOAEPFormat ) + PCBYTE pbOAEPFormat, + SIZE_T cbOAEPFormat, + _In_ PCSYMCRYPT_HASH hashAlgorithm, + _In_reads_bytes_( cbLabel ) PCBYTE pbLabel, + SIZE_T cbLabel, + UINT32 flags, + _Out_writes_bytes_( cbPlaintext ) + PBYTE pbPlaintext, + SIZE_T cbPlaintext, + _Out_ SIZE_T *pcbPlaintext, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Removes the RSA OAEP encryption padding from the OAEP formatted buffer +// after it checks the validity of the format. +// +// *pcbPlaintext is the number of bytes output. If pbPlaintext == NULL then this +// is the only output value. +// +// Allowed flags: +// None +// +// Requirements: +// cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_RSA_OAEP( hashAlgorithm, cbOAEPFormat ) +// + +#define SYMCRYPT_SCRATCH_BYTES_FOR_RSA_PKCS1( _nBytesPKCS1 ) SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_RSA_PKCS1( _nBytesPKCS1 ) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPkcs1ApplySignaturePadding( + _In_reads_bytes_( cbHash ) PCBYTE pbHash, + SIZE_T cbHash, + _In_reads_bytes_( cbHashOid ) + PCBYTE pbHashOid, + SIZE_T cbHashOid, + UINT32 flags, + _Out_writes_bytes_( cbPKCS1Format ) + PBYTE pbPKCS1Format, + SIZE_T cbPKCS1Format ); +// +// Applies the RSA PKCS1 v1.5 signature padding to the source buffer, which typically contains the +// hash of the message. +// +// Allowed flags: +// SYMCRYPT_FLAG_RSA_PKCS1_NO_ASN1 +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPkcs1VerifySignaturePadding( + _In_reads_bytes_( cbHash ) PCBYTE pbHash, + SIZE_T cbHash, + _In_reads_( nOIDCount ) PCSYMCRYPT_OID pHashOIDs, + _In_ SIZE_T nOIDCount, + _In_reads_bytes_( cbPKCS1Format ) + PCBYTE pbPKCS1Format, + SIZE_T cbPKCS1Format, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Verifies that the RSA PKCS1 v1.5 signature padding is valid. +// +// It returns SYMCRYPT_NO_ERROR if the verification succeeded or SYMCRYPT_VERIFICATION_FAIL +// if it failed. +// +// Allowed flags: +// SYMCRYPT_FLAG_RSA_PKCS1_OPTIONAL_HASH_OID +// +// Requirements: +// cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_RSA_PKCS1( cbPKCS1Format ) +// + +#define SYMCRYPT_SCRATCH_BYTES_FOR_RSA_PSS( _hashAlgorithm, _nBytesMessage, _nBytesPSS ) SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_RSA_PSS( _hashAlgorithm, _nBytesMessage, _nBytesPSS ) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPssApplySignaturePadding( + _In_reads_bytes_( cbHash ) PCBYTE pbHash, + SIZE_T cbHash, + _In_ PCSYMCRYPT_HASH hashAlgorithm, + _In_reads_bytes_opt_( cbSalt ) + PCBYTE pbSalt, + _In_range_(0, cbPSSFormat) SIZE_T cbSalt, + UINT32 nBitsOfModulus, + UINT32 flags, + _Out_writes_bytes_( cbPSSFormat ) + PBYTE pbPSSFormat, + SIZE_T cbPSSFormat, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Applies the RSA PSS signature padding to the source buffer, which typically contains the +// hash of the message. +// +// Remarks: +// - If pbSalt == NULL and cbSalt != 0, then the function picks +// a uniformly random salt of size cbSalt bytes. +// +// Allowed flags: +// None +// +// Requirements: +// cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_RSA_PSS( hashAlgorithm, cbHash, cbPSSFormat ) +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPssVerifySignaturePadding( + _In_reads_bytes_( cbHash ) PCBYTE pbHash, + SIZE_T cbHash, + _In_ PCSYMCRYPT_HASH hashAlgorithm, + _In_range_(0, cbPSSFormat) SIZE_T cbSalt, + _In_reads_bytes_( cbPSSFormat ) + PCBYTE pbPSSFormat, + SIZE_T cbPSSFormat, + UINT32 nBitsOfModulus, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Verifies that the RSA PSS signature padding is valid. +// +// It returns SYMCRYPT_NO_ERROR if the verification succeeded or SYMCRYPT_VERIFICATION_FAIL +// if it failed. +// +// Allowed flags: +// SYMCRYPT_FLAG_RSA_PSS_VERIFY_WITH_MINIMUM_SALT +// +// When the flag is set, this function will do signature verification using the cbSalt parameter as +// a minimum value for the salt length, rather than using it as an exact value. Specifying this and +// setting cbSalt = 0 allows callers to verify a signature which has a valid encoding with any salt +// length using a single call. +// +// +// Requirements: +// cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_RSA_PSS( hashAlgorithm, cbHash, cbPSSFormat ) +// + +//===================================================== +//===================================================== +// EC point operations +//===================================================== +//===================================================== + +PCSYMCRYPT_MODULUS +SYMCRYPT_CALL +SymCryptEcurveGroupOrder( _In_ PCSYMCRYPT_ECURVE pCurve ); +// +// This function returns a pointer to the group order of the curve's subgroup. +// + +UINT32 +SYMCRYPT_CALL +SymCryptEcurveDigitsofScalarMultiplier( _In_ PCSYMCRYPT_ECURVE pCurve ); +// +// This function returns the number of digits of a scalar that is big enough to +// store a multiplier of an elliptic curve point. +// See also, SymCryptEcurveSizeofScalarMultiplier. +// + +UINT32 +SYMCRYPT_CALL +SymCryptEcurveDigitsofFieldElement( _In_ PCSYMCRYPT_ECURVE pCurve ); +// +// This function returns the number of digits for one coordinate of the public key. +// + +//===================================================== +// GETSET_VALUE_ECURVE_OPERATIONS +// + +#define SYMCRYPT_SCRATCH_BYTES_FOR_GETSET_VALUE_ECURVE_OPERATIONS( _pCurve ) SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_GETSET_VALUE_ECURVE_OPERATIONS( _pCurve ) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEcpointSetValue( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_reads_bytes_(cbSrc) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_NUMBER_FORMAT nformat, + SYMCRYPT_ECPOINT_FORMAT eformat, + _Out_ PSYMCRYPT_ECPOINT poDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Set the value of an ECPOINT object from a source buffer pbSrc of size cbSrc. The buffer +// will contain the necessary coordinates of the ECPOINT in the format specified by nformat +// and eformat. The nformat determines the format of the integers in the buffer while the +// eformat determines the layout (and the number) of the coordinates. +// +// Requirements: +// - cbSrc = X * SymCryptEcurveSizeofFieldElement( pCurve ) where X depends on the +// eformat specified and denotes the number of coordinates. For example, for +// SYMCRYPT_ECPOINT_FORMAT_XY it is equal to 2. +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_GETSET_VALUE_ECURVE_OPERATIONS( pCurve ) +// +// Flag values: +// SYMCRYPT_FLAG_DATA_PUBLIC data is public (no side-channel protection needed) +// +// Rationale: +// Scratch space provides room for conversion of point representations. +// +// Example: +// Set an ECPOINT to (X,Y) point in affine coordinates where the size of each coordinate +// is t = SymCryptEcurveSizeofFieldElement( pCurve ) bytes. The coordinates are +// X=(X_(t-1), ... , X_1, X_0) and Y=(Y_(t-1), ... , Y_1, Y_0) with t-1 the +// most significant byte. Then the function can be called with +// pbSrc = { X_(t-1), ... , X_1, X_0, Y_(t-1), ... , Y_1, Y_0 } +// cbSrc = 2 * t +// nformat = SYMCRYPT_NUMBER_FORMAT_MSB_FIRST +// eformat = SYMCRYPT_ECPOINT_FORMAT_XY +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEcpointGetValue( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + SYMCRYPT_NUMBER_FORMAT nformat, + SYMCRYPT_ECPOINT_FORMAT eformat, + _Out_writes_bytes_(cbDst) PBYTE pbDst, + SIZE_T cbDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Retrieve the value of an ECPOINT object into a destination buffer pbDst of size cbDst. The buffer +// will contain the necessary coordinates of the ECPOINT in the format specified by nformat +// and eformat. The nformat determines the format of the integers in the buffer while the +// eformat determines the layout (and the number) of the coordinates. +// +// Flag values: +// SYMCRYPT_FLAG_DATA_PUBLIC data is public (no side-channel protection needed) +// +// Remarks: +// - If the source point is the "zero" point and it cannot be exported into the +// required ECPOINT_FORMAT (XY or X), the function fails with SYMCRYPT_INCOMPATIBLE_FORMAT. +// +// Requirements: +// - cbDst = X * SymCryptEcurveSizeofFieldElement( pCurve ) where X depends on the +// eformat specified and denotes the number of coordinates. For example for SYMCRYPT_ECPOINT_FORMAT_XY it is equal to 2. +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_GETSET_VALUE_ECURVE_OPERATIONS( pCurve ) +// +// Rationale: +// Scratch space provides room for conversion of point representations. +// + +// +// Low-level flags for ECC operations +// +// SYMCRYPT_FLAG_DATA_PUBLIC: When set, the operation will not be side-channel safe. +// It is used to speed up operation on public data. (default: side-channel safe) +// +// SYMCRYPT_FLAG_ECC_LL_COFACTOR_MUL: When set, the underlying operation will multiply +// by the cofactor of the curve. (default: no multiplication by the cofactor) +// Remark: **Notice that the default behaviour is the opposite of the higher-level +// functions in symcrypt.h.** +#define SYMCRYPT_FLAG_ECC_LL_COFACTOR_MUL (0x20) + +//===================================================== +// COMMON_ECURVE_OPERATIONS +// + +#define SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( _pCurve ) SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( _pCurve ) + +VOID +SYMCRYPT_CALL +SymCryptEcpointSetZero( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Set the destination point poDst to the zero +// element of the additive group defined by the +// elliptic curve addition rule. +// +// Requirements: +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( pCurve ). +// + +VOID +SYMCRYPT_CALL +SymCryptEcpointSetDistinguishedPoint( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Set the destination point poDst to the +// distinguished point of the curve. +// +// Requirements: +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( pCurve ). +// + +#define SYMCRYPT_FLAG_ECPOINT_EQUAL (0x01) +#define SYMCRYPT_FLAG_ECPOINT_NEG_EQUAL (0x02) + +UINT32 +SYMCRYPT_CALL +SymCryptEcpointIsEqual( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc1, + _In_ PCSYMCRYPT_ECPOINT poSrc2, + UINT32 flags, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); +// +// If the flags argument is equal to 0 (default) or SYMCRYPT_FLAG_ECPOINT_EQUAL, it returns a mask value which is +// 0xffffffff if poSrc1 = poSrc2 and 0 otherwise. +// If the flags argument is equal to SYMCRYPT_FLAG_ECPOINT_NEG_EQUAL, it returns a mask value which is +// 0xffffffff if poSrc1 = -poSrc2 and 0 otherwise. +// If the flags argument is equal to SYMCRYPT_FLAG_ECPOINT_EQUAL | SYMCRYPT_FLAG_ECPOINT_NEG_EQUAL, +// it returns a mask value which is 0xffffffff if (poSrc1 = poSrc2) or (poSrc1 = -poSrc2) and 0 otherwise. +// +// The points should have been created with the same curve pCurve. Otherwise the result is undefined. +// +// Requirements: +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( pCurve ). +// + +UINT32 +SYMCRYPT_CALL +SymCryptEcpointIsZero( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Returns a mask value which is 0xffffffff if the point is the zero point of the group. +// +// Requirements: +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( pCurve ). +// + +UINT32 +SYMCRYPT_CALL +SymCryptEcpointOnCurve( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Returns a mask value which is 0xffffffff if the point is on curve. +// +// Requirements: +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( pCurve ). +// + +VOID +SYMCRYPT_CALL +SymCryptEcpointAdd( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc1, + _In_ PCSYMCRYPT_ECPOINT poSrc2, + _Out_ PSYMCRYPT_ECPOINT poDst, + UINT32 flags, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Point addition over the curve pCurve. +// poDst = poSrc1 + poSrc2 +// +// Allowed flags: +// SYMCRYPT_FLAG_DATA_PUBLIC: If set then the algorithm +// is not side-channel safe (and faster). The default behaviour +// is side-channel safety. +// +// Remarks: +// - Complete (i.e. works for all points) +// - Writes intermediate results to poDst breaking the read-once/write-once rule +// +// Requirements: +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( pCurve ). +// + +VOID +SYMCRYPT_CALL +SymCryptEcpointAddDiffNonZero( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc1, + _In_ PCSYMCRYPT_ECPOINT poSrc2, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Point addition when *peSrc1 != +- *peSrc2 +// and none of them is equal to the zero point. +// +// Remarks: +// - Side-channel safe +// - Complete (i.e. works for all points) +// - Writes intermediate results to poDst breaking the read-once/write-once rule +// +// Requirements: +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( pCurve ). +// + +VOID +SYMCRYPT_CALL +SymCryptEcpointDouble( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_ PSYMCRYPT_ECPOINT poDst, + UINT32 flags, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Point doubling. +// +// Allowed flags: +// SYMCRYPT_FLAG_DATA_PUBLIC: If set then the algorithm +// is not side-channel safe (and faster). The default behaviour +// is side-channel safety. +// +// Remarks: +// - Side-channel safe +// - Writes intermediate results to poDst breaking the read-once/write-once rule +// +// Requirements: +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( pCurve ). +// + +VOID +SYMCRYPT_CALL +SymCryptEcpointNegate( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Inout_ PSYMCRYPT_ECPOINT poSrc, + UINT32 mask, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Negates (in place) the source point poSrc if mask == 0xffffffff. +// +// Requirements: +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( pCurve ). +// + +//===================================================== +// SCALAR_ECURVE_OPERATIONS +// + +#define SYMCRYPT_SCRATCH_BYTES_FOR_SCALAR_ECURVE_OPERATIONS( _pCurve ) SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_SCALAR_ECURVE_OPERATIONS( (_pCurve), 1 ) +#define SYMCRYPT_SCRATCH_BYTES_FOR_MULTI_SCALAR_ECURVE_OPERATIONS( _pCurve, _nPoints ) SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_SCALAR_ECURVE_OPERATIONS( (_pCurve), (_nPoints) ) + +VOID +SYMCRYPT_CALL +SymCryptEcpointSetRandom( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Out_ PSYMCRYPT_INT piScalar, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Set the destination point poDst to a random non-zero point +// of the subgroup generated by the distinguished point. +// The function outputs the integer k and the point kG +// where k is picked uniformly at random from the set +// [1, SubgroupOrder-1] ( 0 is excluded). +// +// Requirements: +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_SCALAR_ECURVE_OPERATIONS( pCurve ). +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEcpointScalarMul( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_INT piScalar, + _In_opt_ + PCSYMCRYPT_ECPOINT poSrc, + UINT32 flags, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Multiplication of point by scalar. +// poDst = piScalar x poSrc +// +// If poSrc == NULL the algorithm uses the distinguished point of the curve as source +// point and it might be faster (depending on the curve optimizations). +// +// Allowed flags: +// SYMCRYPT_FLAG_ECC_LL_COFACTOR_MUL: If set then +// the scalar is multiplied by the cofactor of +// the curve. The default behaviour is to not multiply +// by the cofactor. +// +// Remarks: +// - Complete +// - Side-channel safe +// +// Requirements: +// - The piScalar must have SymCryptEcurveDigitsofScalarMultiplier( pCurve ) digits. +// - For Non-Montgomery curves, the piScalar must be in the range [0, SubgroupOrder]. +// - This is the caller's responsibility, it is not checked. +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_SCALAR_ECURVE_OPERATIONS( pCurve ). +// + +// SYMCRYPT_ECURVE_MULTI_SCALAR_MUL_MAX_NPOINTS: The maximum number of points allowed for the +// multi-scalar multiplication operation. +#define SYMCRYPT_ECURVE_MULTI_SCALAR_MUL_MAX_NPOINTS (2) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEcpointMultiScalarMul( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_INT * piSrcScalarArray, + _In_ PCSYMCRYPT_ECPOINT * poSrcEcpointArray, + UINT32 nPoints, + UINT32 flags, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); +// +// It executes the multi scalar - add operation for nPoints +// pairs of (exponents, points) in (piSrcScalarArray, poSrcEcpointArray). +// +// If poSrcEcpointArray[0] == NULL the algorithm uses the distinguished point of the curve as +// the first source point and it might be faster (depending on the curve optimizations). +// Only the first source point can be NULL. +// +// Allowed flags: +// SYMCRYPT_FLAG_ECC_LL_COFACTOR_MUL: If set then +// the scalar is multiplied by the cofactor of +// the curve. The default behaviour is to not multiply +// by the cofactor. +// SYMCRYPT_FLAG_DATA_PUBLIC: If set then the algorithm +// is not side-channel safe (For use in the ECDSA +// verification with public information). The default behaviour +// is side channel safe. +// +// Requirements: +// - 1<= nPoints <= SYMCRYPT_ECURVE_MULTI_SCALAR_MUL_MAX_NPOINTS +// - Each piScalar must have SymCryptEcurveDigitsofScalarMultiplier( pCurve ) digits. +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_MULTI_SCALAR_ECURVE_OPERATIONS( pCurve, nPoints ). +// + + +//////////////////////////////////////////////////////////////////////////// +// AES-CTR-DRBG +// + +#define SYMCRYPT_RNG_AES_INTERNAL_SEED_SIZE (32 + 16) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRngAesGenerateSmall( + _Inout_ PSYMCRYPT_RNG_AES_STATE pRngState, + _Out_writes_( cbRandom ) PBYTE pbRandom, + SIZE_T cbRandom, + _In_reads_opt_( cbAdditionalInput ) PCBYTE pbAdditionalInput, + SIZE_T cbAdditionalInput ); +// +// Generate random output from the state per SP 800-90. +// Callers should almost always use SymCryptRngAesGenerate from symcrypt.h instead. +// +// This is the core generation function that produces up to 64 kB at a time +// This function returns an error code so that we can test the +// error handling of having done more than 2^48 requests between reseeds, +// as required by SP 800-90. +// This is also the Generate function of our SP800-90 compliant implementation. +// If pRngState->fips140-2Check is true, this function runs the continuous self test +// required by FIPS 140-2 (but not by FIPS 140-3 as far as we know). +// pbAdditionalInput is optional. +// + +//===================================================== +// ECDSA-EX +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEcDsaSignEx( + _In_ PCSYMCRYPT_ECKEY pKey, + _In_reads_bytes_( cbHashValue ) PCBYTE pbHashValue, + SIZE_T cbHashValue, + _In_opt_ PCSYMCRYPT_INT piK, + SYMCRYPT_NUMBER_FORMAT format, + UINT32 flags, + _Out_writes_bytes_( cbSignature ) PBYTE pbSignature, + SIZE_T cbSignature ); +// +// This algorithm is the same as SymCryptEcDsaSign except that the caller can specify +// a value of k in piK. It is used in verifying test vectors of ECDSA. +// +// Requirements: +// - If piK is not NULL it must have SymCryptEcurveDigitsofScalarMultiplier( pCurve ) digits, and +// must be in range [1, SubgroupOrder-1]. +// - If piK is not NULL and the generated signature would be 0, SYMCRYPT_INVALID_ARGUMENT is +// returned. +// +// Allowed flags: +// SYMCRYPT_FLAG_ECDSA_NO_TRUNCATION: If set then the hash value will +// not be truncated. +// +// SYMCRYPT_FLAG_DATA_PUBLIC: If specified, all inputs, including the private key, are +// considered as public information and are not protected against side channel attacks. +// This should only be used when signing with a publicly known private key (i.e. in the ECDSA self-test) +// + +//===================================================== +// ML-KEM-EX +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlKemEncapsulateEx( + _In_ PCSYMCRYPT_MLKEMKEY pkMlKemkey, + _In_reads_bytes_( cbRandom ) PCBYTE pbRandom, + SIZE_T cbRandom, + _Out_writes_bytes_( cbAgreedSecret ) PBYTE pbAgreedSecret, + SIZE_T cbAgreedSecret, + _Out_writes_bytes_( cbCiphertext ) PBYTE pbCiphertext, + SIZE_T cbCiphertext ); +// +// Performs the Encapsulate operation of ML-KEM using caller-provided random input. +// It is used in verifying test vectors of ML-KEM. +// +// This uses the public information of an ML-KEM keypair to generate an agreed secret +// and a ciphertext. Only a peer with the private information of an ML-KEM keypair can +// decapsulate the ciphertext to compute the agreed secret. +// +// The arguments are the following: +// - pkMlKemkey: a key which contains public information required for encapsulation. +// - (pbRandom, cbRandom): a buffer containing the input random. +// Currently cbRandom must be 32 for all parameterizations of ML-KEM. +// - (pbAgreedSecret, cbAgreedSecret): a buffer into which the generated secret is written. +// Currently cbAgreedSecret must be 32 for all parameterizations of ML-KEM. +// - (pbCiphertext, cbCiphertext): a buffer into which the encapsulated secret is written. +// cbCiphertext must equal cbCiphertext given by SymCryptMlKemSizeofCiphertextFromParams, +// though typically this value can be known statically (see definition of +// SYMCRYPT_MLKEM_CIPHERTEXT_SIZE_*). +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptCompositeMlKemEncapsulateEx( + _In_ PCSYMCRYPT_COMPOSITE_MLKEMKEY pkCompositeMlKemkey, + _In_reads_bytes_opt_( cbMlKemRandom ) PCBYTE pbMlKemRandom, + SIZE_T cbMlKemRandom, + _In_reads_bytes_opt_( cbTradRandom ) PCBYTE pbTradRandom, + SIZE_T cbTradRandom, + _Out_writes_bytes_( cbAgreedSecret ) PBYTE pbAgreedSecret, + SIZE_T cbAgreedSecret, + _Out_writes_bytes_( cbCiphertext ) PBYTE pbCiphertext, + SIZE_T cbCiphertext ); + +// +// Performs the Encapsulate operation of Composite ML-KEM using caller-provided random input. +// It is used in verifying test vectors of Composite ML-KEM. +// +// This uses the public information of a Composite ML-KEM keypair to generate an agreed secret +// and a ciphertext. Only a peer with the private information of a Composite ML-KEM keypair can +// decapsulate the ciphertext to compute the agreed secret. +// +// The arguments are the following: +// - pkCompositeMlKemkey: a key which contains public information required for encapsulation. +// - (pbMlKemRandom, cbMlKemRandom): a buffer containing the input random for the ML-KEM component. +// When pbMlKemRandom is NULL, cbMlKemRandom should be 0, and the function will generate the necessary random input internally. +// Currently when pbMlKemRandom is not NULL, cbMlKemRandom must be 32 for all parameterizations of Composite ML-KEM. +// - (pbTradRandom, cbTradRandom): a buffer containing the input random for the traditional component. +// When the traditional portion is an EC key, cbTradRandom must be equal to the private key size of the EC key. +// If pbTradRandom is NULL, cbTradRandom should be 0, and the function will generate the necessary random input internally. +// Currently, only EC keys are supported for the traditional component. +// - (pbAgreedSecret, cbAgreedSecret): a buffer into which the generated secret is written. +// Currently cbAgreedSecret must be 32 for all parameterizations of Composite ML-KEM. +// - (pbCiphertext, cbCiphertext): a buffer into which the encapsulated secret is written. +// cbCiphertext must equal cbCiphertext given by SymCryptCompositeMlKemSizeofCiphertextFromParams, +// though typically this value can be known statically (see definition of +// SYMCRYPT_COMPOSITE_MLKEM_CIPHERTEXT_SIZE_*). +// + +//=================================================================== +// 802.11 SAE protocol +//=================================================================== +// +// WARNING: These functions are NOT part of the stable SymCrypt API. They are a private +// interface for the Windows WiFi driver. These functions can change or disappear +// at any time as we update our WiFi solutions. +// +// These functions implement the non-standard or 'custom' parts of the SAE protocol for +// 802.11 SAE as specified in IEEE 801.11-2016 12.4 +// +// Parts of the protocol that are easy to implement with conventional crypto functions are +// not included in this custom part. +// +// Limitation: The Hunting-and-Pecking method supports only NIST P256 curve. The Hash-to-Element +// method supports curves NIST P256 and NIST P384. +// + +// +// IANA Group numbers identify the elliptic curve and associated parameters to be used in the SAE method. +// +typedef enum _SYMCRYPT_802_11_SAE_GROUP { + SYMCRYPT_SAE_GROUP_19 = 19, // NIST P256 + SYMCRYPT_SAE_GROUP_20, // NIST P384 +} SYMCRYPT_802_11_SAE_GROUP; + +// The sizes of scalars, elliptic curve points, and HMAC outputs will vary depending on which group is selected. +// The following macros define the largest possible sizes supported. +#define SYMCRYPT_SAE_MAX_MOD_SIZE_BITS 384 +#define SYMCRYPT_SAE_MAX_MOD_SIZE_BYTES SYMCRYPT_BYTES_FROM_BITS( SYMCRYPT_SAE_MAX_MOD_SIZE_BITS ) +#define SYMCRYPT_SAE_MAX_EC_POINT_SIZE_BYTES ( 2 * SYMCRYPT_SAE_MAX_MOD_SIZE_BYTES ) +#define SYMCRYPT_SAE_MAX_HMAC_OUTPUT_SIZE_BYTES SYMCRYPT_BYTES_FROM_BITS( 384 ) + + +typedef struct _SYMCRYPT_802_11_SAE_CUSTOM_STATE SYMCRYPT_802_11_SAE_CUSTOM_STATE, *PSYMCRYPT_802_11_SAE_CUSTOM_STATE; +typedef const SYMCRYPT_802_11_SAE_CUSTOM_STATE *PCSYMCRYPT_802_11_SAE_CUSTOM_STATE; +// +// The struct itself is opaque and is defined elsewhere. +// Caller may not rely on the internal fields of the structure as they can +// change at any time. +// + +VOID SymCrypt802_11SaeGetGroupSizes( + SYMCRYPT_802_11_SAE_GROUP group, + _Out_opt_ SIZE_T* pcbScalar, + _Out_opt_ SIZE_T* pcbPoint ); +// +// Helper function that returns the sizes of the field elements and elliptic curve points in bytes +// for a given IANA group number. Both output parameters are optional. +// + +SYMCRYPT_ERROR +SymCrypt802_11SaeCustomInit( + _Out_ PSYMCRYPT_802_11_SAE_CUSTOM_STATE pState, + _In_reads_( 6 ) PCBYTE pbMacA, + _In_reads_( 6 ) PCBYTE pbMacB, + _In_reads_( cbPassword ) PCBYTE pbPassword, + SIZE_T cbPassword, + _Out_opt_ PBYTE pbCounter, + _Inout_updates_opt_( 32 ) PBYTE pbRand, + _Inout_updates_opt_( 32 ) PBYTE pbMask ); +// +// Initialize the state object with the MAC addresses and password. +// All choices for the protocol (i.e. rand and mask) are made at this time. +// +// - State Protocol state to initialize +// - pbMacA, pbMacB Two 6-byte MAC addresses with MacA >= MacB. +// - pbPassword, cbPassword The password buffer +// - pbCounter If not NULL, receives the counter value of the +// successful PWE generation per section 12.4.4.2.2 +// - pbRand Optional pointer to Rand buffer (see below) +// - pbMask Optional pointer to Mask buffer (see below) +// +// The Rand and Mask buffers are optional. If a pointer is not provided then the caller +// has no access to the corresponding value. +// For either of these pointers there are three cases: +// - If a NULL pointer is provided, the function generates an appropriate value internally, +// but does not return it to the caller. +// - If a buffer is provided and the buffer is all-zero, the function generates an appropriate +// value internally and returns it in the buffer. +// - If a buffer is provided and the buffer is nonzero, the value in the buffer is used for +// the corresponding protocol parameter without further validation. +// This last option is useful for testing as it lets the caller specify all the random choices. +// Rand and Mask buffers are MSByte first. +// +// Note: currently this method only supports the NIST P256 curve. If we ever want to support other curves +// we'll update this function to accept a curve parameter and update the SAL annotations +// of the other functions. +// + +SYMCRYPT_ERROR +SymCrypt802_11SaeCustomCreatePT( + _In_reads_( cbSsid ) PCBYTE pbSsid, + SIZE_T cbSsid, + _In_reads_( cbPassword ) PCBYTE pbPassword, + SIZE_T cbPassword, + _In_reads_opt_( cbPasswordIdentifier ) PCBYTE pbPasswordIdentifier, + SIZE_T cbPasswordIdentifier, + _Out_writes_( 64 ) PBYTE pbPT ); +// +// Generate the PT secret element for use with the SAE Hash-to-Element algorithm, as described in +// section 12.4.4.2.3 of the 802.11 spec ("Hash-to-curve generation of the password element with +// ECC groups"). The PT value can be "stored until needed to generate a session specific PWE." +// +// - pbSsid, cbSsid SSID for the connection as a string of bytes +// - pbPassword, cbPassword Password buffer +// - pbPasswordIdentifier, cbPasswordIdentifier Optional password identifier, as a string of bytes +// - pbPT Out pointer to PT (as a byte buffer) +// +// This function uses the NIST P256 curve. +// + + +SYMCRYPT_ERROR +SymCrypt802_11SaeCustomCreatePTGeneric( + SYMCRYPT_802_11_SAE_GROUP group, + _In_reads_( cbSsid ) PCBYTE pbSsid, + SIZE_T cbSsid, + _In_reads_( cbPassword ) PCBYTE pbPassword, + SIZE_T cbPassword, + _In_reads_opt_( cbPasswordIdentifier ) PCBYTE pbPasswordIdentifier, + SIZE_T cbPasswordIdentifier, + _Out_writes_( cbPT ) PBYTE pbPT, + SIZE_T cbPT ); +// +// Generic version of the SymCrypt802_11SaeCustomCreatePT() function that allows elliptic curve +// group selection. +// Generate the PT secret element for use with the SAE Hash-to-Element algorithm, as described in +// section 12.4.4.2.3 of the 802.11 spec ("Hash-to-curve generation of the password element with +// ECC groups"). The PT value can be "stored until needed to generate a session specific PWE." +// +// - group Group number for the elliptic curve selection +// - pbSsid, cbSsid SSID for the connection as a string of bytes +// - pbPassword, cbPassword Password buffer +// - pbPasswordIdentifier, cbPasswordIdentifier Optional password identifier, as a string of bytes +// - pbPT, cbPt PT (as a byte buffer) +// + + +SYMCRYPT_ERROR +SymCrypt802_11SaeCustomInitH2E( + _Out_ PSYMCRYPT_802_11_SAE_CUSTOM_STATE pState, + _In_reads_( 64 ) PCBYTE pbPT, + _In_reads_( 6 ) PCBYTE pbMacA, + _In_reads_( 6 ) PCBYTE pbMacB, + _Inout_updates_opt_( 32 ) PBYTE pbRand, + _Inout_updates_opt_( 32 ) PBYTE pbMask ); +// +// Initialize the state object using the Hash-to-Element algorithm, using the PT value calculated +// by SymCrypt802_11SaeCustomCreatePT. +// +// - pState Protocol state +// - pbPT PT value calculated using SymCrypt802_11SaeCustomCreatePT() +// - pbMacA, pbMacB Two 6-byte MAC addresses +// - pbRand Optional pointer to Rand buffer. See SymCrypt802_11SaeCustomInit() documentation for the use of this parameter. +// - pbMask Optional pointer to Mask buffer. See SymCrypt802_11SaeCustomInit() documentation for the use of this parameter. +// +// See the comment on SymCrypt802_11SaeCustomInit() for more details about the pbRand and pbMask +// parameters. +// + + +SYMCRYPT_ERROR +SymCrypt802_11SaeCustomInitH2EGeneric( + _Out_ PSYMCRYPT_802_11_SAE_CUSTOM_STATE pState, + SYMCRYPT_802_11_SAE_GROUP group, + _In_reads_( cbPT ) PCBYTE pbPT, + SIZE_T cbPT, + _In_reads_( 6 ) PCBYTE pbMacA, + _In_reads_( 6 ) PCBYTE pbMacB, + _Inout_updates_opt_( cbRand ) PBYTE pbRand, + SIZE_T cbRand, + _Inout_updates_opt_( cbMask ) PBYTE pbMask, + SIZE_T cbMask ); +// +// Generic version of the SymCrypt802_11SaeCustomInitH2E() function that allows elliptic curve +// group selection. +// Initialize the state object using the Hash-to-Element algorithm, using the PT value calculated +// by SymCrypt802_11SaeCustomCreatePT. +// +// - pState Protocol state +// - group Group number for the elliptic curve selection +// - pbPT, cbPT PT value (as a byte array) calculated using SymCrypt802_11SaeCustomCreatePTGeneric(). +// PT must be generated on the same elliptic curve as the one supplied in the group parameter. +// - pbMacA, pbMacB Two 6-byte MAC addresses +// - pbRand, cbRand Optional Rand buffer. See SymCrypt802_11SaeCustomInit() documentation for the use of this parameter. +// - pbMask, cbMask Optional Mask buffer. See SymCrypt802_11SaeCustomInit() documentation for the use of this parameter. +// +// See the comment on SymCrypt802_11SaeCustomInit() for more details about the pbRand and pbMask +// parameters. +// + +SYMCRYPT_ERROR +SymCrypt802_11SaeCustomCommitCreate( + _In_ PCSYMCRYPT_802_11_SAE_CUSTOM_STATE pState, + _Out_writes_( 32 ) PBYTE pbCommitScalar, + _Out_writes_( 64 ) PBYTE pbCommitElement ); +// +// Compute the commit-scalar and commit-element values for the Commit message. +// This function does not update the pState and is multi-thread safe w.r.t. the pState object. +// +// - pState Protocol state that was initialized with SymCrypt802_11SaeCustomInit(). +// - pCommitScalar Buffer that receives the commit-scalar value, MSByte first. +// - pCommitElement Buffer that receives the commit-element value encoded as two values +// (x,y) in order, each value in MSByte first. +// + +SYMCRYPT_ERROR +SymCrypt802_11SaeCustomCommitCreateGeneric( + _In_ PCSYMCRYPT_802_11_SAE_CUSTOM_STATE pState, + _Out_writes_( cbCommitScalar ) PBYTE pbCommitScalar, + SIZE_T cbCommitScalar, + _Out_writes_( cbCommitElement ) PBYTE pbCommitElement, + SIZE_T cbCommitElement); +// +// Generic version of the SymCrypt802_11SaeCustomCommitCreate() function that uses the +// state object to determine which elliptic curve group is selected. +// Compute the commit-scalar and commit-element values for the Commit message. +// This function does not update the pState and is multi-thread safe w.r.t. the pState object. +// +// - pState Protocol state that was initialized with SymCrypt802_11SaeCustomInit(). +// - pbCommitScalar, cbCommitScalar Buffer that receives the commit-scalar value, MSByte first. +// - pbCommitElement, cbCommitElement Buffer that receives the commit-element value encoded as two values +// (x,y) in order, each value in MSByte first. +// + +SYMCRYPT_ERROR +SymCrypt802_11SaeCustomCommitProcess( + _In_ PCSYMCRYPT_802_11_SAE_CUSTOM_STATE pState, + _In_reads_( 32 ) PCBYTE pbPeerCommitScalar, + _In_reads_( 64 ) PCBYTE pbPeerCommitElement, + _Out_writes_( 32 ) PBYTE pbSharedSecret, + _Out_writes_( 32 ) PBYTE pbScalarSum ); +// +// Process the commit message received from the peer. +// This function does not update pState and is multi-thread safe w.r.t. the pState object. +// +// - pState pointer to the protocol state. +// - pbPeerCommitScalar pointer to the peer's commit scalar value, MSByte first. +// - pbPeerCommitElement pointer to the peer's commit element, see CommitCreate for format. +// - pbSharedSecret buffer that receives the 'k' value that is the shared secret, MSByte first +// - pbScalarSum buffer that receives the sum of the two commit scalars, MSByte first +// + +SYMCRYPT_ERROR +SymCrypt802_11SaeCustomCommitProcessGeneric( + _In_ PCSYMCRYPT_802_11_SAE_CUSTOM_STATE pState, + _In_reads_( cbPeerCommitScalar ) PCBYTE pbPeerCommitScalar, + SIZE_T cbPeerCommitScalar, + _In_reads_( cbPeerCommitElement ) PCBYTE pbPeerCommitElement, + SIZE_T cbPeerCommitElement, + _Out_writes_( cbSharedSecret ) PBYTE pbSharedSecret, + SIZE_T cbSharedSecret, + _Out_writes_( cbScalarSum ) PBYTE pbScalarSum, + SIZE_T cbScalarSum ); +// +// Generic version of the SymCrypt802_11SaeCustomCommitProcess() function that uses the +// state object to determine which elliptic curve group is selected. +// Process the commit message received from the peer. +// This function does not update pState and is multi-thread safe w.r.t. the pState object. +// +// - pState pointer to the protocol state. +// - pbPeerCommitScalar, cbPeerCommitScalar pointer to the peer's commit scalar value, MSByte first. +// - pbPeerCommitElement, cbPeerCommitElement pointer to the peer's commit element, see CommitCreate for format. +// - pbSharedSecret, cbSharedSecret buffer that receives the 'k' value that is the shared secret, MSByte first +// - pbScalarSum, pbSharedSecret buffer that receives the sum of the two commit scalars, MSByte first +// + + +VOID +SymCrypt802_11SaeCustomDestroy( + _Inout_ PSYMCRYPT_802_11_SAE_CUSTOM_STATE pState ); +// +// Wipe a state object. +// After this call the memory used for pState is uninitialized and can be used for other purposes. +// Note that it is not safe to just wipe the memory of the state object as the state +// object contains pointers to other allocations, which can contain secret information. +// The only way to safely destroy a state is to use this function. +// + +//=================================================================== + + + +#ifdef __cplusplus +} +#endif diff --git a/libs/symcrypt/lib/3des.c b/libs/symcrypt/lib/3des.c new file mode 100644 index 00000000000..6a4091a90ca --- /dev/null +++ b/libs/symcrypt/lib/3des.c @@ -0,0 +1,831 @@ +// +// 3des.c Routines for DES and 3DES +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// This is an updated implementation that is carefully reviewed to be fully copyrighted by +// Microsoft. Our previous implementation was partially based on a very old public domain +// implementation. +// According to Andrew Tucker (atucker) there was a claim many years ago about the copyright of +// our DES code. Working with LCA (legal) it was determined that the DES implementation in RSA32.lib +// was a Microsoft derivative of a public-domain implementation, and therefore is clear of +// any IP issues. To avoid any further claims we have now scrubbed this implementation from all +// copyrightable elements derived from outside sources. +// +// We take non-copyrightable items from the old implementations such as +// - lookup tables +// - algorithm for various bit permutations +// - Any variable & function names that are already MS-generated. +// - Other MS-generated code elements (e.g. SymCrypt integration) +// Some of the considerations we made are: +// Most of the functionality of DES is required by the FIPS standard and there is not much +// choice on how to code it; elements required by the standard are not copyright protected. +// The lookup tables themselves are not copyrightable as they have no artistic expression. +// The format of the lookup tables is almost completely determined by the standard and the algorithm +// used to access them. Any further layout and structure are all standard C conventions. +// Algorithm tricks such as Hoey's IP implementation are not copyrightable but are patentable. +// Fortunately all the techniques we use have been around long enough that any patents have expired. +// + +// +// Feb 2018, Niels Ferguson +// + +#include "precomp.h" + +// +// Tables to describe the DES and 3DES block ciphers so that the generic +// chaining mode functions can use them. +// We have no optimized mode-specific code as the DES block is so slow that there is +// very little to be gained. +// + +const SYMCRYPT_BLOCKCIPHER SymCrypt3DesBlockCipher_default = { + SymCrypt3DesExpandKey, // PSYMCRYPT_BLOCKCIPHER_EXPAND_KEY expandKeyFunc; + SymCrypt3DesEncrypt, // PSYMCRYPT_BLOCKCIPHER_CRYPT encryptFunc; + SymCrypt3DesDecrypt, // PSYMCRYPT_BLOCKCIPHER_CRYPT decryptFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_CRYPT_ECB ecbEncryptFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_CRYPT_ECB ecbDecryptFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_CRYPT_MODE cbcEncryptFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_CRYPT_MODE cbcDecryptFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_MAC_MODE cbcMacFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_CRYPT_MODE ctrMsbFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_AEADPART_MODE gcmEncryptPartFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_AEADPART_MODE gcmDecryptPartFunc; + 8, // SIZE_T blockSize; + sizeof( SYMCRYPT_3DES_EXPANDED_KEY ), // SIZE_T expandedKeySize; // = sizeof( SYMCRYPT_XXX_EXPANDED_KEY ) +}; + +const SYMCRYPT_BLOCKCIPHER SymCryptDesBlockCipher_default = { + SymCryptDesExpandKey, // PSYMCRYPT_BLOCKCIPHER_EXPAND_KEY expandKeyFunc; + SymCryptDesEncrypt, // PSYMCRYPT_BLOCKCIPHER_CRYPT encryptFunc; + SymCryptDesDecrypt, // PSYMCRYPT_BLOCKCIPHER_CRYPT decryptFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_CRYPT_ECB ecbEncryptFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_CRYPT_ECB ecbDecryptFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_CRYPT_MODE cbcEncryptFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_CRYPT_MODE cbcDecryptFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_MAC_MODE cbcMacFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_CRYPT_MODE ctrMsbFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_AEADPART_MODE gcmEncryptPartFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_AEADPART_MODE gcmDecryptPartFunc; + 8, // SIZE_T blockSize; + sizeof( SYMCRYPT_DES_EXPANDED_KEY ), // SIZE_T expandedKeySize; // = sizeof( SYMCRYPT_XXX_EXPANDED_KEY ) +}; + +const PCSYMCRYPT_BLOCKCIPHER SymCrypt3DesBlockCipher = &SymCrypt3DesBlockCipher_default; +const PCSYMCRYPT_BLOCKCIPHER SymCryptDesBlockCipher = &SymCryptDesBlockCipher_default; + +extern SYMCRYPT_ALIGN_AT(256) const UINT32 SymCryptDesSpbox[8][64]; // Combined S and P tables +extern SYMCRYPT_ALIGN_AT(256) const UINT32 SymCryptDesKeySelect[8][64]; + + +// +// The SWAP_BITS_WITHIN_UINT32 macro swaps bits within a UINT32 value +// SWAP_BITS_WITHIN_UINT32( _value, _shift, _mask ) +// swaps each bit in _value selected by _mask with the bit _shift positions to the left +// Thus it swaps (_value & _mask) with (_value & (_mask << _shift)) +// + +#define SWAP_BITS_WITHIN_UINT32( _value, _shift, _mask ) \ +{ \ + UINT32 _tmp; \ + _tmp = ((_value) ^ ((_value) >> (_shift))) & (_mask ); \ + _value = (_value) ^ _tmp ^ (_tmp << (_shift)); \ +} + +// +// The SWAP_BITS_BETWEEN_UINT32 macro swaps bits between two UINT32 values +// SWAP_BITS_BETWEEN_UINT32( _v1, _v2, _shift, _mask ) +// swaps bits in _v1 selected by _mask with bits in _v2 selected by _mask << _shift +// + +#define SWAP_BITS_BETWEEN_UINT32( _v1, _v2, _shift, _mask ) \ +{ \ + UINT32 _tmp; \ + _tmp = ((_v1) ^ ((_v2) >> (_shift))) & (_mask); \ + _v1 ^= _tmp; \ + _v2 ^= (_tmp << (_shift )); \ +} + +// +// For each round, a bit that states whether the key schedule shift registers are clocked twice +// The data is straight from the standard. +// +static const BYTE SymCryptDesDoubleShift[16]={0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0}; + +////////////////////////// +// DES +// We just implement DES as 3DES. +// People using DES have bigger problems than bad performance. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDesExpandKey( + _Out_ PSYMCRYPT_DES_EXPANDED_KEY pExpandedKey, + _In_reads_( cbKey ) PCBYTE pbKey, + SIZE_T cbKey ) +{ + if( cbKey != 8 ) + { + // + // cbKey should be a compile-time constant in most cases, + // so this should be optimized away + // + return SYMCRYPT_WRONG_KEY_SIZE; + } + return SymCrypt3DesExpandKey( &pExpandedKey->threeDes, pbKey, cbKey ); +} + +VOID +SYMCRYPT_CALL +SymCryptDesEncrypt( + _In_ PCSYMCRYPT_DES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_DES_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_DES_BLOCK_SIZE ) PBYTE pbDst ) +{ + SymCrypt3DesEncrypt( &pExpandedKey->threeDes, pbSrc, pbDst ); +} + +VOID +SYMCRYPT_CALL +SymCryptDesDecrypt( + _In_ PCSYMCRYPT_DES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_DES_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_DES_BLOCK_SIZE ) PBYTE pbDst ) +{ + SymCrypt3DesDecrypt( &pExpandedKey->threeDes, pbSrc, pbDst ); +} + +// +// The 3DesCbcEncrypt/Decrypt functions are used to make converting code from +// older libraries to SymCrypt easier. +// + +VOID +SYMCRYPT_CALL +SymCrypt3DesCbcEncrypt( + _In_ PCSYMCRYPT_3DES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_3DES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + SYMCRYPT_ASSERT( SymCrypt3DesBlockCipher->blockSize == SYMCRYPT_3DES_BLOCK_SIZE ); + SymCryptCbcEncrypt( SymCrypt3DesBlockCipher, pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); +} + +VOID +SYMCRYPT_CALL +SymCrypt3DesCbcDecrypt( + _In_ PCSYMCRYPT_3DES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_3DES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + SYMCRYPT_ASSERT( SymCrypt3DesBlockCipher->blockSize == SYMCRYPT_3DES_BLOCK_SIZE ); + SymCryptCbcDecrypt( SymCrypt3DesBlockCipher, pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); +} + + + +VOID +SYMCRYPT_CALL +SymCryptDesExpandSingleKey( + _Out_writes_bytes_(128) UINT32 expandedKeyTable[16][2], + _In_reads_(8) PCBYTE pKey ) +{ + UINT32 Cr, Dr; // The C_r D_r values of FIPS 43 for round value r + UINT32 r; // round + UINT32 K1, K2; // round keys after the permuted choice 2 + UINT32 tmp; + + // + // We follow the FIPS 43 flow quite closely and have not optimized the key expansion much. + // Key expansion is not performance-critical. + // + + // Load the key + Cr = SYMCRYPT_LOAD_LSBFIRST32( pKey ); + Dr = SYMCRYPT_LOAD_LSBFIRST32( pKey + 4 ); + + // + // The Permuted Choice 1 can be done mostly with a sequence of bit swaps. + // The algorithm we use is derived from our earlier implementation and might potentially + // derive from an external source. + // But the algorithm cannot be copyrighted, only patented, and if there were any patents + // they have expired by now. + // The expression of the algorithm in code is purely MS generated, and so not encumbered + // by external copyrights. + // This algorithm is really just a transposition of the bits when viewed as an 8x8 matrix + // with an additional permutation on the output side. + // + SWAP_BITS_BETWEEN_UINT32( Cr, Dr, 4, 0x0f0f0f0f ); + SWAP_BITS_WITHIN_UINT32( Dr, 18, 0x00003333 ); + SWAP_BITS_WITHIN_UINT32( Cr, 18, 0x00003333 ); + SWAP_BITS_BETWEEN_UINT32( Cr, Dr, 1, 0x55555555 ); + SWAP_BITS_BETWEEN_UINT32( Dr, Cr, 8, 0x00ff00ff ); + SWAP_BITS_BETWEEN_UINT32( Cr, Dr, 1, 0x55555555 ); + SWAP_BITS_WITHIN_UINT32( Dr, 16, 0xff ); + + // Have to re-arrange C and D a tiny bit so that each contains 28 bits and we throw away 8 bits + Dr = (Dr & 0x00ffffff) | ((Cr & 0xf0000000 ) >> 4 ); + Cr = (Cr & 0x0fffffff); + + for( r = 0; r < 16; r++) + { + // + // Cr and Dr are the two key shift registers, they are rotated once or twice for each round. + // + + if( SymCryptDesDoubleShift[ r ] ) { + Cr = ((Cr >> 2) | (Cr << 26)); + Dr = ((Dr >> 2) | (Dr << 26)); + } else { + Cr = ((Cr >> 1) | (Cr << 27)); + Dr = ((Dr >> 1) | (Dr << 27)); + } + + Cr &= 0x0fffffff; + Dr &= 0x0fffffff; + + // + // The Permuted Choice 2 is done using table lookups + // Not all bits of C and D are used, so we cut those out using shifts and masks, + // and then index 6 bits at a time into lookup tables that implement the bit relocation. + // + + K1 = SymCryptDesKeySelect[0][ (Cr )&0x3f ] | + SymCryptDesKeySelect[1][((Cr >> 6)&0x03) | ((Cr >> 7)&0x3c)] | + SymCryptDesKeySelect[2][((Cr >> 13)&0x0f) | ((Cr >> 14)&0x30)] | + SymCryptDesKeySelect[3][((Cr >> 20)&0x01) | ((Cr >> 21)&0x06) | ((Cr >> 22)&0x38)]; + + K2 = SymCryptDesKeySelect[4][ (Dr )&0x3f ] | + SymCryptDesKeySelect[5][((Dr >> 7)&0x03) | ((Dr >> 8)&0x3c)] | + SymCryptDesKeySelect[6][ (Dr >> 15)&0x3f ] | + SymCryptDesKeySelect[7][((Dr >> 21)&0x0f) | ((Dr >> 22)&0x30)]; + + // + // After this we still have to swap the halves of K1 and K2, that is done below + // as part of the formatting of the round key + // + + // + // So far we have recreated the round keys per the standard. + // The round keys are stored rotated by 2 as the encrypt/decrypt code finds that easier. + // We could update the tables to do this, but key expansion is not used that frequently, + // and it is not worth the effort to update the tables. + // + // We don't worry about extraneous bits in unused positions as the F function masks out unused bits. + // + + tmp = ((K2 << 16) | (K1 & 0x0000ffff)) ; + expandedKeyTable[r][0] = ROL32(tmp, 2); + + tmp = ((K1 >> 16) | (K2 & 0xffff0000)); + expandedKeyTable[r][1] = ROL32(tmp, 6); + } +} + + +SYMCRYPT_NOINLINE +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCrypt3DesExpandKey( + _Out_ PSYMCRYPT_3DES_EXPANDED_KEY pExpandedKey, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ) +{ + SIZE_T keyIndex = 0; + int i; + + if( cbKey != 8 && cbKey != 16 && cbKey != 24 ) + { + return SYMCRYPT_WRONG_KEY_SIZE; + } + + // + // A loop that goes over the provided key as a circular buffer provides + // the right result with the least complexity. + // This is inefficient for the cases cbKey=8 and cbKey=16, but those should + // not be used anyway. + // + for( i=0; i<3; i++ ) + { + SYMCRYPT_ASSERT( keyIndex <= cbKey - 8 ); // help PreFast + SymCryptDesExpandSingleKey( pExpandedKey->roundKey[i], pbKey + keyIndex ); + keyIndex = (keyIndex + 8) % cbKey; + } + + SYMCRYPT_SET_MAGIC( pExpandedKey ); + + return SYMCRYPT_NO_ERROR; +} + + +// +// The DES round function +// This is straight from the standard. +// Ta and Tb each contain 4 sets of 6 bits that are S-box inputs +// We interleave the use of Ta and Tb to provide better CPU scheduling on weak compilers. +// We ensure that the input bits appear in bits 2-7 of the index to avoid a scaled index +// which can be slower on some CPUs. +// +#define F(L, R, keyptr) { \ + Ta = keyptr[0] ^ R; \ + Tb = keyptr[1] ^ R; \ + Tb = ROR32(Tb, 4); \ + L ^= *(UINT32 *)((PBYTE)SymCryptDesSpbox[0] + ( Ta & 0xfc)); \ + L ^= *(UINT32 *)((PBYTE)SymCryptDesSpbox[1] + ( Tb & 0xfc)); \ + L ^= *(UINT32 *)((PBYTE)SymCryptDesSpbox[2] + ((Ta>> 8)& 0xfc)); \ + L ^= *(UINT32 *)((PBYTE)SymCryptDesSpbox[3] + ((Tb>> 8)& 0xfc)); \ + L ^= *(UINT32 *)((PBYTE)SymCryptDesSpbox[4] + ((Ta>>16)& 0xfc)); \ + L ^= *(UINT32 *)((PBYTE)SymCryptDesSpbox[5] + ((Tb>>16)& 0xfc)); \ + L ^= *(UINT32 *)((PBYTE)SymCryptDesSpbox[6] + ((Ta>>24)& 0xfc)); \ + L ^= *(UINT32 *)((PBYTE)SymCryptDesSpbox[7] + ((Tb>>24)& 0xfc)); } + + + +// +// Block encryption. +// The noinline stops the compiler from inlining the code and creating additional +// implementations which would require separate FIPS selftests. +// +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCrypt3DesEncrypt( + _In_ PCSYMCRYPT_3DES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_3DES_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_3DES_BLOCK_SIZE ) PBYTE pbDst ) +{ + UINT32 L, R, Ta, Tb; + int r; + + SYMCRYPT_CHECK_MAGIC( pExpandedKey ); + + R = SYMCRYPT_LOAD_LSBFIRST32( pbSrc ); + L = SYMCRYPT_LOAD_LSBFIRST32( pbSrc + 4 ); + + // + // Hoey's wonderful initial permutation algorithm, from Outerbridge + // (see Schneier p 478) + // + // The algorithm we use is derived (through several intermediate forms) from the mentioned source. + // But the algorithm cannot be copyrighted, only patented, and if there were any patents + // they have expired by now. + // The expression of the algorithm in code is purely MS generated, + // within the confines of implementing the algorithm in the best way such that even a simple + // compiler will create good code. + // + + R = ROL32(R, 4); + Ta = (L ^ R) & 0xf0f0f0f0; + L ^= Ta; + R ^= Ta; + + L = ROL32(L, 20); + Ta = (L ^ R) & 0xfff0000f; + R ^= Ta; + L ^= Ta; + + L = ROL32(L,14); + Ta = (L ^ R) & 0x33333333; + R ^= Ta; + L ^= Ta; + + R = ROL32(R, 22); + Ta = (L ^ R) & 0x03fc03fc; + R ^= Ta; + L ^= Ta; + + R = ROL32(R, 9); + Ta = (L ^ R) & 0xaaaaaaaa; + R ^= Ta; + L ^= Ta; + + L = ROL32(L, 1); + + // + // First: encryption + // + for( r=0; r<16; r += 2 ) + { + F( L, R, pExpandedKey->roundKey[0][r ] ); + F( R, L, pExpandedKey->roundKey[0][r+1] ); + } + + // + // Second: decryption + // Note that L and R are swapped here, and the round counter counts down. + // + for( r=14; r>=0; r -= 2 ) + { + F( R, L, pExpandedKey->roundKey[1][r+1] ); + F( L, R, pExpandedKey->roundKey[1][r ] ); + } + + // + // Third: encryption + // + for( r=0; r<16; r += 2 ) + { + F( L, R, pExpandedKey->roundKey[2][r ] ); + F( R, L, pExpandedKey->roundKey[2][r+1] ); + } + + R = ROR32(R, 1); + Ta = (L ^ R) & 0xaaaaaaaa; + R ^= Ta; + L ^= Ta; + + L = ROR32(L, 9); + Ta = (L ^ R) & 0x03fc03fc; + R ^= Ta; + L ^= Ta; + + L = ROR32(L, 22); + Ta = (L ^ R) & 0x33333333; + R ^= Ta; + L ^= Ta; + + R = ROR32(R, 14); + Ta = (L ^ R) & 0xfff0000f; + R ^= Ta; + L ^= Ta; + + R = ROR32(R, 20); + Ta = (L ^ R) & 0xf0f0f0f0; + R ^= Ta; + L ^= Ta; + + L = ROR32(L, 4); + + SYMCRYPT_STORE_LSBFIRST32( pbDst, L ); + SYMCRYPT_STORE_LSBFIRST32( pbDst + 4, R ); +} + + +// +// Block decrypt +// The noinline stops the compiler from inlining the code and creating additional +// implementations which would require separate FIPS selftests. +// +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCrypt3DesDecrypt( + _In_ PCSYMCRYPT_3DES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_3DES_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_3DES_BLOCK_SIZE ) PBYTE pbDst ) +{ + UINT32 L, R, Ta, Tb; + int r; + + SYMCRYPT_CHECK_MAGIC( pExpandedKey ); + + R = SYMCRYPT_LOAD_LSBFIRST32( pbSrc ); + L = SYMCRYPT_LOAD_LSBFIRST32( pbSrc + 4 ); + + R = ROL32(R, 4); + Ta = (L ^ R) & 0xf0f0f0f0; + L ^= Ta; + R ^= Ta; + + L = ROL32(L, 20); + Ta = (L ^ R) & 0xfff0000f; + L ^= Ta; + R ^= Ta; + + L = ROL32(L, 14); + Ta = (L ^ R) & 0x33333333; + L ^= Ta; + R ^= Ta; + + R = ROL32(R, 22); + Ta = (L ^ R) & 0x03fc03fc; + L ^= Ta; + R ^= Ta; + + R = ROL32(R, 9); + Ta = (L ^ R) & 0xaaaaaaaa; + L ^= Ta; + R ^= Ta; + + L = ROL32(L, 1); + + + // Decrypt with key 2 + for( r=14; r>=0; r -= 2 ) + { + F( L, R, pExpandedKey->roundKey[2][r+1] ); + F( R, L, pExpandedKey->roundKey[2][r ] ); + } + + // Encrypt with key 1 + for( r=0; r<16; r += 2 ) + { + F( R, L, pExpandedKey->roundKey[1][r ] ); + F( L, R, pExpandedKey->roundKey[1][r+1] ); + } + + // Decrypt with key 0 + for( r=14; r>=0; r -= 2 ) + { + F( L, R, pExpandedKey->roundKey[0][r+1] ); + F( R, L, pExpandedKey->roundKey[0][r ] ); + } + + /* Inverse permutation, also from Hoey via Outerbridge and Schneier */ + + R = ROR32(R, 1); + Ta = (L ^ R) & 0xaaaaaaaa; + L ^= Ta; + R ^= Ta; + + L = ROR32(L, 9); + Ta = (L ^ R) & 0x03fc03fc; + L ^= Ta; + R ^= Ta; + + L = ROR32(L, 22); + Ta = (L ^ R) & 0x33333333; + L ^= Ta; + R ^= Ta; + + R = ROR32(R, 14); + Ta = (L ^ R) & 0xfff0000f; + L ^= Ta; + R ^= Ta; + + R = ROR32(R, 20); + Ta = (L ^ R) & 0xf0f0f0f0; + L ^= Ta; + R ^= Ta; + + L = ROR32(L, 4); + + SYMCRYPT_STORE_LSBFIRST32( pbDst, L ); + SYMCRYPT_STORE_LSBFIRST32( pbDst + 4, R ); +} + + + +VOID +SYMCRYPT_CALL +SymCryptDesSetOddParity( + _Inout_updates_( cbData ) PBYTE pbData, + _In_ SIZE_T cbData ) +// +// For each byte, set bit 0 such that the byte parity is odd. +// This function is side-channel safe +// +{ + SIZE_T i; + BYTE b, t; + for( i=0; i<cbData; i++ ) + { + // We obey the read-once write-once rule + b = *pbData; + + t = b ^ (b>>4); // parity(b) = parity( t & 0xf ) + t ^= t>>2; // = parity( t & 0x3 ) + t ^= t>>1; // = parity( t & 0x1 ) + *pbData++ = b ^ (t&1) ^ 1; + } +} + + +// +// Test vectors for self test +// +static const BYTE SP800_67Key[24] = { + 0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, + 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, 0x01, + 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, 0x01, 0x23, +}; + +static const BYTE des3KnownPlaintext[8] = { + 0x4E, 0x6F, 0x77, 0x20, 0x69, 0x73, 0x20, 0x74, +}; + +static const BYTE des3KnownCiphertext[8] = { + 0x31, 0x4F, 0x83, 0x27, 0xFA, 0x7A, 0x09, 0xA8, +}; + +static const BYTE desKnownCiphertext[8] = { + 0x3F, 0xA4, 0x0E, 0x8A, 0x98, 0x4D, 0x48, 0x15, +}; + + +VOID +SYMCRYPT_CALL +SymCryptDesSelftest(void) +{ + BYTE buf[SYMCRYPT_DES_BLOCK_SIZE]; + SYMCRYPT_DES_EXPANDED_KEY key; + + if( SymCryptDesExpandKey( &key, SP800_67Key, 8 ) != SYMCRYPT_NO_ERROR ) + { + SymCryptFatal( 'desa' ); + } + + SymCryptDesEncrypt( &key, des3KnownPlaintext, buf ); + + SymCryptInjectError( buf, SYMCRYPT_DES_BLOCK_SIZE ); + + if( memcmp( buf, desKnownCiphertext, SYMCRYPT_DES_BLOCK_SIZE ) != 0 ) + { + SymCryptFatal( 'desb' ); + } + + SymCryptDesDecrypt( &key, desKnownCiphertext, buf ); + + SymCryptInjectError( buf, SYMCRYPT_DES_BLOCK_SIZE ); + + if( memcmp( buf, des3KnownPlaintext, SYMCRYPT_DES_BLOCK_SIZE ) != 0 ) + { + SymCryptFatal( 'desc' ); + } +} + + +VOID +SYMCRYPT_CALL +SymCrypt3DesSelftest(void) +{ + BYTE buf[SYMCRYPT_3DES_BLOCK_SIZE]; + SYMCRYPT_3DES_EXPANDED_KEY key; + + if( SymCrypt3DesExpandKey( &key, SP800_67Key, 24 ) != SYMCRYPT_NO_ERROR ) + { + SymCryptFatal( 'des3' ); + } + + SymCrypt3DesEncrypt( &key, des3KnownPlaintext, buf ); + + SymCryptInjectError( buf, SYMCRYPT_3DES_BLOCK_SIZE ); + + if( memcmp( buf, des3KnownCiphertext, SYMCRYPT_3DES_BLOCK_SIZE ) != 0 ) + { + SymCryptFatal( 'des4' ); + } + + SymCrypt3DesDecrypt( &key, des3KnownCiphertext, buf ); + + SymCryptInjectError( buf, SYMCRYPT_3DES_BLOCK_SIZE ); + + if( memcmp( buf, des3KnownPlaintext, SYMCRYPT_3DES_BLOCK_SIZE ) != 0 ) + { + SymCryptFatal( 'des5' ); + } +} + + + + + +#if 0 +//////////////////////////////////////////// +// Useful tables, kept for future reference. +// + +// Tables defined in the Data Encryption Standard documents +// Three of these tables, the initial permutation, the final +// permutation and the expansion operator, are regular enough that +// for speed, we hard-code them. They're here for reference only. +// Also, the S and P boxes are used by a separate program, gensp.c, +// to build the combined SP box, Spbox[]. They're also here just +// for reference. +// +// initial permutation IP +static unsigned BYTE ip[] = { + 58, 50, 42, 34, 26, 18, 10, 2, + 60, 52, 44, 36, 28, 20, 12, 4, + 62, 54, 46, 38, 30, 22, 14, 6, + 64, 56, 48, 40, 32, 24, 16, 8, + 57, 49, 41, 33, 25, 17, 9, 1, + 59, 51, 43, 35, 27, 19, 11, 3, + 61, 53, 45, 37, 29, 21, 13, 5, + 63, 55, 47, 39, 31, 23, 15, 7 +}; + +// final permutation IP^-1 +static unsigned BYTE fp[] = { + 40, 8, 48, 16, 56, 24, 64, 32, + 39, 7, 47, 15, 55, 23, 63, 31, + 38, 6, 46, 14, 54, 22, 62, 30, + 37, 5, 45, 13, 53, 21, 61, 29, + 36, 4, 44, 12, 52, 20, 60, 28, + 35, 3, 43, 11, 51, 19, 59, 27, + 34, 2, 42, 10, 50, 18, 58, 26, + 33, 1, 41, 9, 49, 17, 57, 25 +}; + +// expansion operation matrix +static unsigned BYTE ei[] = { + 32, 1, 2, 3, 4, 5, + 4, 5, 6, 7, 8, 9, + 8, 9, 10, 11, 12, 13, + 12, 13, 14, 15, 16, 17, + 16, 17, 18, 19, 20, 21, + 20, 21, 22, 23, 24, 25, + 24, 25, 26, 27, 28, 29, + 28, 29, 30, 31, 32, 1 +}; + +// The (in)famous S-boxes +static unsigned BYTE sbox[8][64] = { + // S1 + 14, 4, 13, 1, 2, 15, 11, 8, 3, 10, 6, 12, 5, 9, 0, 7, + 0, 15, 7, 4, 14, 2, 13, 1, 10, 6, 12, 11, 9, 5, 3, 8, + 4, 1, 14, 8, 13, 6, 2, 11, 15, 12, 9, 7, 3, 10, 5, 0, + 15, 12, 8, 2, 4, 9, 1, 7, 5, 11, 3, 14, 10, 0, 6, 13, + + // S2 + 15, 1, 8, 14, 6, 11, 3, 4, 9, 7, 2, 13, 12, 0, 5, 10, + 3, 13, 4, 7, 15, 2, 8, 14, 12, 0, 1, 10, 6, 9, 11, 5, + 0, 14, 7, 11, 10, 4, 13, 1, 5, 8, 12, 6, 9, 3, 2, 15, + 13, 8, 10, 1, 3, 15, 4, 2, 11, 6, 7, 12, 0, 5, 14, 9, + + // S3 + 10, 0, 9, 14, 6, 3, 15, 5, 1, 13, 12, 7, 11, 4, 2, 8, + 13, 7, 0, 9, 3, 4, 6, 10, 2, 8, 5, 14, 12, 11, 15, 1, + 13, 6, 4, 9, 8, 15, 3, 0, 11, 1, 2, 12, 5, 10, 14, 7, + 1, 10, 13, 0, 6, 9, 8, 7, 4, 15, 14, 3, 11, 5, 2, 12, + + // S4 + 7, 13, 14, 3, 0, 6, 9, 10, 1, 2, 8, 5, 11, 12, 4, 15, + 13, 8, 11, 5, 6, 15, 0, 3, 4, 7, 2, 12, 1, 10, 14, 9, + 10, 6, 9, 0, 12, 11, 7, 13, 15, 1, 3, 14, 5, 2, 8, 4, + 3, 15, 0, 6, 10, 1, 13, 8, 9, 4, 5, 11, 12, 7, 2, 14, + + // S5 + 2, 12, 4, 1, 7, 10, 11, 6, 8, 5, 3, 15, 13, 0, 14, 9, + 14, 11, 2, 12, 4, 7, 13, 1, 5, 0, 15, 10, 3, 9, 8, 6, + 4, 2, 1, 11, 10, 13, 7, 8, 15, 9, 12, 5, 6, 3, 0, 14, + 11, 8, 12, 7, 1, 14, 2, 13, 6, 15, 0, 9, 10, 4, 5, 3, + + // S6 + 12, 1, 10, 15, 9, 2, 6, 8, 0, 13, 3, 4, 14, 7, 5, 11, + 10, 15, 4, 2, 7, 12, 9, 5, 6, 1, 13, 14, 0, 11, 3, 8, + 9, 14, 15, 5, 2, 8, 12, 3, 7, 0, 4, 10, 1, 13, 11, 6, + 4, 3, 2, 12, 9, 5, 15, 10, 11, 14, 1, 7, 6, 0, 8, 13, + + // S7 + 4, 11, 2, 14, 15, 0, 8, 13, 3, 12, 9, 7, 5, 10, 6, 1, + 13, 0, 11, 7, 4, 9, 1, 10, 14, 3, 5, 12, 2, 15, 8, 6, + 1, 4, 11, 13, 12, 3, 7, 14, 10, 15, 6, 8, 0, 5, 9, 2, + 6, 11, 13, 8, 1, 4, 10, 7, 9, 5, 0, 15, 14, 2, 3, 12, + + // S8 + 13, 2, 8, 4, 6, 15, 11, 1, 10, 9, 3, 14, 5, 0, 12, 7, + 1, 15, 13, 8, 10, 3, 7, 4, 12, 5, 6, 11, 0, 14, 9, 2, + 7, 11, 4, 1, 9, 12, 14, 2, 0, 6, 10, 13, 15, 3, 5, 8, + 2, 1, 14, 7, 4, 10, 8, 13, 15, 12, 9, 0, 3, 5, 6, 11 +}; + +// 32-bit permutation function P used on the output of the S-boxes +static unsigned BYTE p32i[] = { + 16, 7, 20, 21, + 29, 12, 28, 17, + 1, 15, 23, 26, + 5, 18, 31, 10, + 2, 8, 24, 14, + 32, 27, 3, 9, + 19, 13, 30, 6, + 22, 11, 4, 25 +}; + +// permuted choice table (key) +static unsigned BYTE pc1[] = { + 57, 49, 41, 33, 25, 17, 9, + 1, 58, 50, 42, 34, 26, 18, + 10, 2, 59, 51, 43, 35, 27, + 19, 11, 3, 60, 52, 44, 36, + + 63, 55, 47, 39, 31, 23, 15, + 7, 62, 54, 46, 38, 30, 22, + 14, 6, 61, 53, 45, 37, 29, + 21, 13, 5, 28, 20, 12, 4 +}; + +// number left rotations of pc1 +static unsigned BYTE totrot[] = { + 1,2,4,6,8,10,12,14,15,17,19,21,23,25,27,28 +}; + +// permuted choice key (table) +static unsigned BYTE pc2[] = { + 14, 17, 11, 24, 1, 5, + 3, 28, 15, 6, 21, 10, + 23, 19, 12, 4, 26, 8, + 16, 7, 27, 20, 13, 2, + 41, 52, 31, 37, 47, 55, + 30, 40, 51, 45, 33, 48, + 44, 49, 39, 56, 34, 53, + 46, 42, 50, 36, 29, 32 +}; + +#endif diff --git a/libs/symcrypt/lib/AesTables.c b/libs/symcrypt/lib/AesTables.c new file mode 100644 index 00000000000..ceda0c8a342 --- /dev/null +++ b/libs/symcrypt/lib/AesTables.c @@ -0,0 +1,899 @@ +// +// AesTables.c lookup tables for the AES implementation +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// +// We put these in a separate source file to keep the code file uncluttered. +// + +#include "precomp.h" + +// +// Alignments are chosen to reduce side-channel attacks through the TLB cache. +// We align each table to a multiple of the size within which we do data-dependent +// lookups. For example, the table below is aligned to 1024. It is not a secret +// that the 4 sub-tables are accessed, but which value inside each sub-table is a secret. +// Aligning to 1024 still leaves the data cache line leakage, but avoids any TLB-related leakage. +// +SYMCRYPT_ALIGN_AT(1024) const BYTE SymCryptAesSboxMatrixMult[4][256][4] = {{ // Main encryption tables +0xc6,0x63,0x63,0xa5, 0xf8,0x7c,0x7c,0x84, 0xee,0x77,0x77,0x99, 0xf6,0x7b,0x7b,0x8d, +0xff,0xf2,0xf2,0x0d, 0xd6,0x6b,0x6b,0xbd, 0xde,0x6f,0x6f,0xb1, 0x91,0xc5,0xc5,0x54, +0x60,0x30,0x30,0x50, 0x02,0x01,0x01,0x03, 0xce,0x67,0x67,0xa9, 0x56,0x2b,0x2b,0x7d, +0xe7,0xfe,0xfe,0x19, 0xb5,0xd7,0xd7,0x62, 0x4d,0xab,0xab,0xe6, 0xec,0x76,0x76,0x9a, +0x8f,0xca,0xca,0x45, 0x1f,0x82,0x82,0x9d, 0x89,0xc9,0xc9,0x40, 0xfa,0x7d,0x7d,0x87, +0xef,0xfa,0xfa,0x15, 0xb2,0x59,0x59,0xeb, 0x8e,0x47,0x47,0xc9, 0xfb,0xf0,0xf0,0x0b, +0x41,0xad,0xad,0xec, 0xb3,0xd4,0xd4,0x67, 0x5f,0xa2,0xa2,0xfd, 0x45,0xaf,0xaf,0xea, +0x23,0x9c,0x9c,0xbf, 0x53,0xa4,0xa4,0xf7, 0xe4,0x72,0x72,0x96, 0x9b,0xc0,0xc0,0x5b, +0x75,0xb7,0xb7,0xc2, 0xe1,0xfd,0xfd,0x1c, 0x3d,0x93,0x93,0xae, 0x4c,0x26,0x26,0x6a, +0x6c,0x36,0x36,0x5a, 0x7e,0x3f,0x3f,0x41, 0xf5,0xf7,0xf7,0x02, 0x83,0xcc,0xcc,0x4f, +0x68,0x34,0x34,0x5c, 0x51,0xa5,0xa5,0xf4, 0xd1,0xe5,0xe5,0x34, 0xf9,0xf1,0xf1,0x08, +0xe2,0x71,0x71,0x93, 0xab,0xd8,0xd8,0x73, 0x62,0x31,0x31,0x53, 0x2a,0x15,0x15,0x3f, +0x08,0x04,0x04,0x0c, 0x95,0xc7,0xc7,0x52, 0x46,0x23,0x23,0x65, 0x9d,0xc3,0xc3,0x5e, +0x30,0x18,0x18,0x28, 0x37,0x96,0x96,0xa1, 0x0a,0x05,0x05,0x0f, 0x2f,0x9a,0x9a,0xb5, +0x0e,0x07,0x07,0x09, 0x24,0x12,0x12,0x36, 0x1b,0x80,0x80,0x9b, 0xdf,0xe2,0xe2,0x3d, +0xcd,0xeb,0xeb,0x26, 0x4e,0x27,0x27,0x69, 0x7f,0xb2,0xb2,0xcd, 0xea,0x75,0x75,0x9f, +0x12,0x09,0x09,0x1b, 0x1d,0x83,0x83,0x9e, 0x58,0x2c,0x2c,0x74, 0x34,0x1a,0x1a,0x2e, +0x36,0x1b,0x1b,0x2d, 0xdc,0x6e,0x6e,0xb2, 0xb4,0x5a,0x5a,0xee, 0x5b,0xa0,0xa0,0xfb, +0xa4,0x52,0x52,0xf6, 0x76,0x3b,0x3b,0x4d, 0xb7,0xd6,0xd6,0x61, 0x7d,0xb3,0xb3,0xce, +0x52,0x29,0x29,0x7b, 0xdd,0xe3,0xe3,0x3e, 0x5e,0x2f,0x2f,0x71, 0x13,0x84,0x84,0x97, +0xa6,0x53,0x53,0xf5, 0xb9,0xd1,0xd1,0x68, 0x00,0x00,0x00,0x00, 0xc1,0xed,0xed,0x2c, +0x40,0x20,0x20,0x60, 0xe3,0xfc,0xfc,0x1f, 0x79,0xb1,0xb1,0xc8, 0xb6,0x5b,0x5b,0xed, +0xd4,0x6a,0x6a,0xbe, 0x8d,0xcb,0xcb,0x46, 0x67,0xbe,0xbe,0xd9, 0x72,0x39,0x39,0x4b, +0x94,0x4a,0x4a,0xde, 0x98,0x4c,0x4c,0xd4, 0xb0,0x58,0x58,0xe8, 0x85,0xcf,0xcf,0x4a, +0xbb,0xd0,0xd0,0x6b, 0xc5,0xef,0xef,0x2a, 0x4f,0xaa,0xaa,0xe5, 0xed,0xfb,0xfb,0x16, +0x86,0x43,0x43,0xc5, 0x9a,0x4d,0x4d,0xd7, 0x66,0x33,0x33,0x55, 0x11,0x85,0x85,0x94, +0x8a,0x45,0x45,0xcf, 0xe9,0xf9,0xf9,0x10, 0x04,0x02,0x02,0x06, 0xfe,0x7f,0x7f,0x81, +0xa0,0x50,0x50,0xf0, 0x78,0x3c,0x3c,0x44, 0x25,0x9f,0x9f,0xba, 0x4b,0xa8,0xa8,0xe3, +0xa2,0x51,0x51,0xf3, 0x5d,0xa3,0xa3,0xfe, 0x80,0x40,0x40,0xc0, 0x05,0x8f,0x8f,0x8a, +0x3f,0x92,0x92,0xad, 0x21,0x9d,0x9d,0xbc, 0x70,0x38,0x38,0x48, 0xf1,0xf5,0xf5,0x04, +0x63,0xbc,0xbc,0xdf, 0x77,0xb6,0xb6,0xc1, 0xaf,0xda,0xda,0x75, 0x42,0x21,0x21,0x63, +0x20,0x10,0x10,0x30, 0xe5,0xff,0xff,0x1a, 0xfd,0xf3,0xf3,0x0e, 0xbf,0xd2,0xd2,0x6d, +0x81,0xcd,0xcd,0x4c, 0x18,0x0c,0x0c,0x14, 0x26,0x13,0x13,0x35, 0xc3,0xec,0xec,0x2f, +0xbe,0x5f,0x5f,0xe1, 0x35,0x97,0x97,0xa2, 0x88,0x44,0x44,0xcc, 0x2e,0x17,0x17,0x39, +0x93,0xc4,0xc4,0x57, 0x55,0xa7,0xa7,0xf2, 0xfc,0x7e,0x7e,0x82, 0x7a,0x3d,0x3d,0x47, +0xc8,0x64,0x64,0xac, 0xba,0x5d,0x5d,0xe7, 0x32,0x19,0x19,0x2b, 0xe6,0x73,0x73,0x95, +0xc0,0x60,0x60,0xa0, 0x19,0x81,0x81,0x98, 0x9e,0x4f,0x4f,0xd1, 0xa3,0xdc,0xdc,0x7f, +0x44,0x22,0x22,0x66, 0x54,0x2a,0x2a,0x7e, 0x3b,0x90,0x90,0xab, 0x0b,0x88,0x88,0x83, +0x8c,0x46,0x46,0xca, 0xc7,0xee,0xee,0x29, 0x6b,0xb8,0xb8,0xd3, 0x28,0x14,0x14,0x3c, +0xa7,0xde,0xde,0x79, 0xbc,0x5e,0x5e,0xe2, 0x16,0x0b,0x0b,0x1d, 0xad,0xdb,0xdb,0x76, +0xdb,0xe0,0xe0,0x3b, 0x64,0x32,0x32,0x56, 0x74,0x3a,0x3a,0x4e, 0x14,0x0a,0x0a,0x1e, +0x92,0x49,0x49,0xdb, 0x0c,0x06,0x06,0x0a, 0x48,0x24,0x24,0x6c, 0xb8,0x5c,0x5c,0xe4, +0x9f,0xc2,0xc2,0x5d, 0xbd,0xd3,0xd3,0x6e, 0x43,0xac,0xac,0xef, 0xc4,0x62,0x62,0xa6, +0x39,0x91,0x91,0xa8, 0x31,0x95,0x95,0xa4, 0xd3,0xe4,0xe4,0x37, 0xf2,0x79,0x79,0x8b, +0xd5,0xe7,0xe7,0x32, 0x8b,0xc8,0xc8,0x43, 0x6e,0x37,0x37,0x59, 0xda,0x6d,0x6d,0xb7, +0x01,0x8d,0x8d,0x8c, 0xb1,0xd5,0xd5,0x64, 0x9c,0x4e,0x4e,0xd2, 0x49,0xa9,0xa9,0xe0, +0xd8,0x6c,0x6c,0xb4, 0xac,0x56,0x56,0xfa, 0xf3,0xf4,0xf4,0x07, 0xcf,0xea,0xea,0x25, +0xca,0x65,0x65,0xaf, 0xf4,0x7a,0x7a,0x8e, 0x47,0xae,0xae,0xe9, 0x10,0x08,0x08,0x18, +0x6f,0xba,0xba,0xd5, 0xf0,0x78,0x78,0x88, 0x4a,0x25,0x25,0x6f, 0x5c,0x2e,0x2e,0x72, +0x38,0x1c,0x1c,0x24, 0x57,0xa6,0xa6,0xf1, 0x73,0xb4,0xb4,0xc7, 0x97,0xc6,0xc6,0x51, +0xcb,0xe8,0xe8,0x23, 0xa1,0xdd,0xdd,0x7c, 0xe8,0x74,0x74,0x9c, 0x3e,0x1f,0x1f,0x21, +0x96,0x4b,0x4b,0xdd, 0x61,0xbd,0xbd,0xdc, 0x0d,0x8b,0x8b,0x86, 0x0f,0x8a,0x8a,0x85, +0xe0,0x70,0x70,0x90, 0x7c,0x3e,0x3e,0x42, 0x71,0xb5,0xb5,0xc4, 0xcc,0x66,0x66,0xaa, +0x90,0x48,0x48,0xd8, 0x06,0x03,0x03,0x05, 0xf7,0xf6,0xf6,0x01, 0x1c,0x0e,0x0e,0x12, +0xc2,0x61,0x61,0xa3, 0x6a,0x35,0x35,0x5f, 0xae,0x57,0x57,0xf9, 0x69,0xb9,0xb9,0xd0, +0x17,0x86,0x86,0x91, 0x99,0xc1,0xc1,0x58, 0x3a,0x1d,0x1d,0x27, 0x27,0x9e,0x9e,0xb9, +0xd9,0xe1,0xe1,0x38, 0xeb,0xf8,0xf8,0x13, 0x2b,0x98,0x98,0xb3, 0x22,0x11,0x11,0x33, +0xd2,0x69,0x69,0xbb, 0xa9,0xd9,0xd9,0x70, 0x07,0x8e,0x8e,0x89, 0x33,0x94,0x94,0xa7, +0x2d,0x9b,0x9b,0xb6, 0x3c,0x1e,0x1e,0x22, 0x15,0x87,0x87,0x92, 0xc9,0xe9,0xe9,0x20, +0x87,0xce,0xce,0x49, 0xaa,0x55,0x55,0xff, 0x50,0x28,0x28,0x78, 0xa5,0xdf,0xdf,0x7a, +0x03,0x8c,0x8c,0x8f, 0x59,0xa1,0xa1,0xf8, 0x09,0x89,0x89,0x80, 0x1a,0x0d,0x0d,0x17, +0x65,0xbf,0xbf,0xda, 0xd7,0xe6,0xe6,0x31, 0x84,0x42,0x42,0xc6, 0xd0,0x68,0x68,0xb8, +0x82,0x41,0x41,0xc3, 0x29,0x99,0x99,0xb0, 0x5a,0x2d,0x2d,0x77, 0x1e,0x0f,0x0f,0x11, +0x7b,0xb0,0xb0,0xcb, 0xa8,0x54,0x54,0xfc, 0x6d,0xbb,0xbb,0xd6, 0x2c,0x16,0x16,0x3a +},{ +0xa5,0xc6,0x63,0x63, 0x84,0xf8,0x7c,0x7c, 0x99,0xee,0x77,0x77, 0x8d,0xf6,0x7b,0x7b, +0x0d,0xff,0xf2,0xf2, 0xbd,0xd6,0x6b,0x6b, 0xb1,0xde,0x6f,0x6f, 0x54,0x91,0xc5,0xc5, +0x50,0x60,0x30,0x30, 0x03,0x02,0x01,0x01, 0xa9,0xce,0x67,0x67, 0x7d,0x56,0x2b,0x2b, +0x19,0xe7,0xfe,0xfe, 0x62,0xb5,0xd7,0xd7, 0xe6,0x4d,0xab,0xab, 0x9a,0xec,0x76,0x76, +0x45,0x8f,0xca,0xca, 0x9d,0x1f,0x82,0x82, 0x40,0x89,0xc9,0xc9, 0x87,0xfa,0x7d,0x7d, +0x15,0xef,0xfa,0xfa, 0xeb,0xb2,0x59,0x59, 0xc9,0x8e,0x47,0x47, 0x0b,0xfb,0xf0,0xf0, +0xec,0x41,0xad,0xad, 0x67,0xb3,0xd4,0xd4, 0xfd,0x5f,0xa2,0xa2, 0xea,0x45,0xaf,0xaf, +0xbf,0x23,0x9c,0x9c, 0xf7,0x53,0xa4,0xa4, 0x96,0xe4,0x72,0x72, 0x5b,0x9b,0xc0,0xc0, +0xc2,0x75,0xb7,0xb7, 0x1c,0xe1,0xfd,0xfd, 0xae,0x3d,0x93,0x93, 0x6a,0x4c,0x26,0x26, +0x5a,0x6c,0x36,0x36, 0x41,0x7e,0x3f,0x3f, 0x02,0xf5,0xf7,0xf7, 0x4f,0x83,0xcc,0xcc, +0x5c,0x68,0x34,0x34, 0xf4,0x51,0xa5,0xa5, 0x34,0xd1,0xe5,0xe5, 0x08,0xf9,0xf1,0xf1, +0x93,0xe2,0x71,0x71, 0x73,0xab,0xd8,0xd8, 0x53,0x62,0x31,0x31, 0x3f,0x2a,0x15,0x15, +0x0c,0x08,0x04,0x04, 0x52,0x95,0xc7,0xc7, 0x65,0x46,0x23,0x23, 0x5e,0x9d,0xc3,0xc3, +0x28,0x30,0x18,0x18, 0xa1,0x37,0x96,0x96, 0x0f,0x0a,0x05,0x05, 0xb5,0x2f,0x9a,0x9a, +0x09,0x0e,0x07,0x07, 0x36,0x24,0x12,0x12, 0x9b,0x1b,0x80,0x80, 0x3d,0xdf,0xe2,0xe2, +0x26,0xcd,0xeb,0xeb, 0x69,0x4e,0x27,0x27, 0xcd,0x7f,0xb2,0xb2, 0x9f,0xea,0x75,0x75, +0x1b,0x12,0x09,0x09, 0x9e,0x1d,0x83,0x83, 0x74,0x58,0x2c,0x2c, 0x2e,0x34,0x1a,0x1a, +0x2d,0x36,0x1b,0x1b, 0xb2,0xdc,0x6e,0x6e, 0xee,0xb4,0x5a,0x5a, 0xfb,0x5b,0xa0,0xa0, +0xf6,0xa4,0x52,0x52, 0x4d,0x76,0x3b,0x3b, 0x61,0xb7,0xd6,0xd6, 0xce,0x7d,0xb3,0xb3, +0x7b,0x52,0x29,0x29, 0x3e,0xdd,0xe3,0xe3, 0x71,0x5e,0x2f,0x2f, 0x97,0x13,0x84,0x84, +0xf5,0xa6,0x53,0x53, 0x68,0xb9,0xd1,0xd1, 0x00,0x00,0x00,0x00, 0x2c,0xc1,0xed,0xed, +0x60,0x40,0x20,0x20, 0x1f,0xe3,0xfc,0xfc, 0xc8,0x79,0xb1,0xb1, 0xed,0xb6,0x5b,0x5b, +0xbe,0xd4,0x6a,0x6a, 0x46,0x8d,0xcb,0xcb, 0xd9,0x67,0xbe,0xbe, 0x4b,0x72,0x39,0x39, +0xde,0x94,0x4a,0x4a, 0xd4,0x98,0x4c,0x4c, 0xe8,0xb0,0x58,0x58, 0x4a,0x85,0xcf,0xcf, +0x6b,0xbb,0xd0,0xd0, 0x2a,0xc5,0xef,0xef, 0xe5,0x4f,0xaa,0xaa, 0x16,0xed,0xfb,0xfb, +0xc5,0x86,0x43,0x43, 0xd7,0x9a,0x4d,0x4d, 0x55,0x66,0x33,0x33, 0x94,0x11,0x85,0x85, +0xcf,0x8a,0x45,0x45, 0x10,0xe9,0xf9,0xf9, 0x06,0x04,0x02,0x02, 0x81,0xfe,0x7f,0x7f, +0xf0,0xa0,0x50,0x50, 0x44,0x78,0x3c,0x3c, 0xba,0x25,0x9f,0x9f, 0xe3,0x4b,0xa8,0xa8, +0xf3,0xa2,0x51,0x51, 0xfe,0x5d,0xa3,0xa3, 0xc0,0x80,0x40,0x40, 0x8a,0x05,0x8f,0x8f, +0xad,0x3f,0x92,0x92, 0xbc,0x21,0x9d,0x9d, 0x48,0x70,0x38,0x38, 0x04,0xf1,0xf5,0xf5, +0xdf,0x63,0xbc,0xbc, 0xc1,0x77,0xb6,0xb6, 0x75,0xaf,0xda,0xda, 0x63,0x42,0x21,0x21, +0x30,0x20,0x10,0x10, 0x1a,0xe5,0xff,0xff, 0x0e,0xfd,0xf3,0xf3, 0x6d,0xbf,0xd2,0xd2, +0x4c,0x81,0xcd,0xcd, 0x14,0x18,0x0c,0x0c, 0x35,0x26,0x13,0x13, 0x2f,0xc3,0xec,0xec, +0xe1,0xbe,0x5f,0x5f, 0xa2,0x35,0x97,0x97, 0xcc,0x88,0x44,0x44, 0x39,0x2e,0x17,0x17, +0x57,0x93,0xc4,0xc4, 0xf2,0x55,0xa7,0xa7, 0x82,0xfc,0x7e,0x7e, 0x47,0x7a,0x3d,0x3d, +0xac,0xc8,0x64,0x64, 0xe7,0xba,0x5d,0x5d, 0x2b,0x32,0x19,0x19, 0x95,0xe6,0x73,0x73, +0xa0,0xc0,0x60,0x60, 0x98,0x19,0x81,0x81, 0xd1,0x9e,0x4f,0x4f, 0x7f,0xa3,0xdc,0xdc, +0x66,0x44,0x22,0x22, 0x7e,0x54,0x2a,0x2a, 0xab,0x3b,0x90,0x90, 0x83,0x0b,0x88,0x88, +0xca,0x8c,0x46,0x46, 0x29,0xc7,0xee,0xee, 0xd3,0x6b,0xb8,0xb8, 0x3c,0x28,0x14,0x14, +0x79,0xa7,0xde,0xde, 0xe2,0xbc,0x5e,0x5e, 0x1d,0x16,0x0b,0x0b, 0x76,0xad,0xdb,0xdb, +0x3b,0xdb,0xe0,0xe0, 0x56,0x64,0x32,0x32, 0x4e,0x74,0x3a,0x3a, 0x1e,0x14,0x0a,0x0a, +0xdb,0x92,0x49,0x49, 0x0a,0x0c,0x06,0x06, 0x6c,0x48,0x24,0x24, 0xe4,0xb8,0x5c,0x5c, +0x5d,0x9f,0xc2,0xc2, 0x6e,0xbd,0xd3,0xd3, 0xef,0x43,0xac,0xac, 0xa6,0xc4,0x62,0x62, +0xa8,0x39,0x91,0x91, 0xa4,0x31,0x95,0x95, 0x37,0xd3,0xe4,0xe4, 0x8b,0xf2,0x79,0x79, +0x32,0xd5,0xe7,0xe7, 0x43,0x8b,0xc8,0xc8, 0x59,0x6e,0x37,0x37, 0xb7,0xda,0x6d,0x6d, +0x8c,0x01,0x8d,0x8d, 0x64,0xb1,0xd5,0xd5, 0xd2,0x9c,0x4e,0x4e, 0xe0,0x49,0xa9,0xa9, +0xb4,0xd8,0x6c,0x6c, 0xfa,0xac,0x56,0x56, 0x07,0xf3,0xf4,0xf4, 0x25,0xcf,0xea,0xea, +0xaf,0xca,0x65,0x65, 0x8e,0xf4,0x7a,0x7a, 0xe9,0x47,0xae,0xae, 0x18,0x10,0x08,0x08, +0xd5,0x6f,0xba,0xba, 0x88,0xf0,0x78,0x78, 0x6f,0x4a,0x25,0x25, 0x72,0x5c,0x2e,0x2e, +0x24,0x38,0x1c,0x1c, 0xf1,0x57,0xa6,0xa6, 0xc7,0x73,0xb4,0xb4, 0x51,0x97,0xc6,0xc6, +0x23,0xcb,0xe8,0xe8, 0x7c,0xa1,0xdd,0xdd, 0x9c,0xe8,0x74,0x74, 0x21,0x3e,0x1f,0x1f, +0xdd,0x96,0x4b,0x4b, 0xdc,0x61,0xbd,0xbd, 0x86,0x0d,0x8b,0x8b, 0x85,0x0f,0x8a,0x8a, +0x90,0xe0,0x70,0x70, 0x42,0x7c,0x3e,0x3e, 0xc4,0x71,0xb5,0xb5, 0xaa,0xcc,0x66,0x66, +0xd8,0x90,0x48,0x48, 0x05,0x06,0x03,0x03, 0x01,0xf7,0xf6,0xf6, 0x12,0x1c,0x0e,0x0e, +0xa3,0xc2,0x61,0x61, 0x5f,0x6a,0x35,0x35, 0xf9,0xae,0x57,0x57, 0xd0,0x69,0xb9,0xb9, +0x91,0x17,0x86,0x86, 0x58,0x99,0xc1,0xc1, 0x27,0x3a,0x1d,0x1d, 0xb9,0x27,0x9e,0x9e, +0x38,0xd9,0xe1,0xe1, 0x13,0xeb,0xf8,0xf8, 0xb3,0x2b,0x98,0x98, 0x33,0x22,0x11,0x11, +0xbb,0xd2,0x69,0x69, 0x70,0xa9,0xd9,0xd9, 0x89,0x07,0x8e,0x8e, 0xa7,0x33,0x94,0x94, +0xb6,0x2d,0x9b,0x9b, 0x22,0x3c,0x1e,0x1e, 0x92,0x15,0x87,0x87, 0x20,0xc9,0xe9,0xe9, +0x49,0x87,0xce,0xce, 0xff,0xaa,0x55,0x55, 0x78,0x50,0x28,0x28, 0x7a,0xa5,0xdf,0xdf, +0x8f,0x03,0x8c,0x8c, 0xf8,0x59,0xa1,0xa1, 0x80,0x09,0x89,0x89, 0x17,0x1a,0x0d,0x0d, +0xda,0x65,0xbf,0xbf, 0x31,0xd7,0xe6,0xe6, 0xc6,0x84,0x42,0x42, 0xb8,0xd0,0x68,0x68, +0xc3,0x82,0x41,0x41, 0xb0,0x29,0x99,0x99, 0x77,0x5a,0x2d,0x2d, 0x11,0x1e,0x0f,0x0f, +0xcb,0x7b,0xb0,0xb0, 0xfc,0xa8,0x54,0x54, 0xd6,0x6d,0xbb,0xbb, 0x3a,0x2c,0x16,0x16 +},{ +0x63,0xa5,0xc6,0x63, 0x7c,0x84,0xf8,0x7c, 0x77,0x99,0xee,0x77, 0x7b,0x8d,0xf6,0x7b, +0xf2,0x0d,0xff,0xf2, 0x6b,0xbd,0xd6,0x6b, 0x6f,0xb1,0xde,0x6f, 0xc5,0x54,0x91,0xc5, +0x30,0x50,0x60,0x30, 0x01,0x03,0x02,0x01, 0x67,0xa9,0xce,0x67, 0x2b,0x7d,0x56,0x2b, +0xfe,0x19,0xe7,0xfe, 0xd7,0x62,0xb5,0xd7, 0xab,0xe6,0x4d,0xab, 0x76,0x9a,0xec,0x76, +0xca,0x45,0x8f,0xca, 0x82,0x9d,0x1f,0x82, 0xc9,0x40,0x89,0xc9, 0x7d,0x87,0xfa,0x7d, +0xfa,0x15,0xef,0xfa, 0x59,0xeb,0xb2,0x59, 0x47,0xc9,0x8e,0x47, 0xf0,0x0b,0xfb,0xf0, +0xad,0xec,0x41,0xad, 0xd4,0x67,0xb3,0xd4, 0xa2,0xfd,0x5f,0xa2, 0xaf,0xea,0x45,0xaf, +0x9c,0xbf,0x23,0x9c, 0xa4,0xf7,0x53,0xa4, 0x72,0x96,0xe4,0x72, 0xc0,0x5b,0x9b,0xc0, +0xb7,0xc2,0x75,0xb7, 0xfd,0x1c,0xe1,0xfd, 0x93,0xae,0x3d,0x93, 0x26,0x6a,0x4c,0x26, +0x36,0x5a,0x6c,0x36, 0x3f,0x41,0x7e,0x3f, 0xf7,0x02,0xf5,0xf7, 0xcc,0x4f,0x83,0xcc, +0x34,0x5c,0x68,0x34, 0xa5,0xf4,0x51,0xa5, 0xe5,0x34,0xd1,0xe5, 0xf1,0x08,0xf9,0xf1, +0x71,0x93,0xe2,0x71, 0xd8,0x73,0xab,0xd8, 0x31,0x53,0x62,0x31, 0x15,0x3f,0x2a,0x15, +0x04,0x0c,0x08,0x04, 0xc7,0x52,0x95,0xc7, 0x23,0x65,0x46,0x23, 0xc3,0x5e,0x9d,0xc3, +0x18,0x28,0x30,0x18, 0x96,0xa1,0x37,0x96, 0x05,0x0f,0x0a,0x05, 0x9a,0xb5,0x2f,0x9a, +0x07,0x09,0x0e,0x07, 0x12,0x36,0x24,0x12, 0x80,0x9b,0x1b,0x80, 0xe2,0x3d,0xdf,0xe2, +0xeb,0x26,0xcd,0xeb, 0x27,0x69,0x4e,0x27, 0xb2,0xcd,0x7f,0xb2, 0x75,0x9f,0xea,0x75, +0x09,0x1b,0x12,0x09, 0x83,0x9e,0x1d,0x83, 0x2c,0x74,0x58,0x2c, 0x1a,0x2e,0x34,0x1a, +0x1b,0x2d,0x36,0x1b, 0x6e,0xb2,0xdc,0x6e, 0x5a,0xee,0xb4,0x5a, 0xa0,0xfb,0x5b,0xa0, +0x52,0xf6,0xa4,0x52, 0x3b,0x4d,0x76,0x3b, 0xd6,0x61,0xb7,0xd6, 0xb3,0xce,0x7d,0xb3, +0x29,0x7b,0x52,0x29, 0xe3,0x3e,0xdd,0xe3, 0x2f,0x71,0x5e,0x2f, 0x84,0x97,0x13,0x84, +0x53,0xf5,0xa6,0x53, 0xd1,0x68,0xb9,0xd1, 0x00,0x00,0x00,0x00, 0xed,0x2c,0xc1,0xed, +0x20,0x60,0x40,0x20, 0xfc,0x1f,0xe3,0xfc, 0xb1,0xc8,0x79,0xb1, 0x5b,0xed,0xb6,0x5b, +0x6a,0xbe,0xd4,0x6a, 0xcb,0x46,0x8d,0xcb, 0xbe,0xd9,0x67,0xbe, 0x39,0x4b,0x72,0x39, +0x4a,0xde,0x94,0x4a, 0x4c,0xd4,0x98,0x4c, 0x58,0xe8,0xb0,0x58, 0xcf,0x4a,0x85,0xcf, +0xd0,0x6b,0xbb,0xd0, 0xef,0x2a,0xc5,0xef, 0xaa,0xe5,0x4f,0xaa, 0xfb,0x16,0xed,0xfb, +0x43,0xc5,0x86,0x43, 0x4d,0xd7,0x9a,0x4d, 0x33,0x55,0x66,0x33, 0x85,0x94,0x11,0x85, +0x45,0xcf,0x8a,0x45, 0xf9,0x10,0xe9,0xf9, 0x02,0x06,0x04,0x02, 0x7f,0x81,0xfe,0x7f, +0x50,0xf0,0xa0,0x50, 0x3c,0x44,0x78,0x3c, 0x9f,0xba,0x25,0x9f, 0xa8,0xe3,0x4b,0xa8, +0x51,0xf3,0xa2,0x51, 0xa3,0xfe,0x5d,0xa3, 0x40,0xc0,0x80,0x40, 0x8f,0x8a,0x05,0x8f, +0x92,0xad,0x3f,0x92, 0x9d,0xbc,0x21,0x9d, 0x38,0x48,0x70,0x38, 0xf5,0x04,0xf1,0xf5, +0xbc,0xdf,0x63,0xbc, 0xb6,0xc1,0x77,0xb6, 0xda,0x75,0xaf,0xda, 0x21,0x63,0x42,0x21, +0x10,0x30,0x20,0x10, 0xff,0x1a,0xe5,0xff, 0xf3,0x0e,0xfd,0xf3, 0xd2,0x6d,0xbf,0xd2, +0xcd,0x4c,0x81,0xcd, 0x0c,0x14,0x18,0x0c, 0x13,0x35,0x26,0x13, 0xec,0x2f,0xc3,0xec, +0x5f,0xe1,0xbe,0x5f, 0x97,0xa2,0x35,0x97, 0x44,0xcc,0x88,0x44, 0x17,0x39,0x2e,0x17, +0xc4,0x57,0x93,0xc4, 0xa7,0xf2,0x55,0xa7, 0x7e,0x82,0xfc,0x7e, 0x3d,0x47,0x7a,0x3d, +0x64,0xac,0xc8,0x64, 0x5d,0xe7,0xba,0x5d, 0x19,0x2b,0x32,0x19, 0x73,0x95,0xe6,0x73, +0x60,0xa0,0xc0,0x60, 0x81,0x98,0x19,0x81, 0x4f,0xd1,0x9e,0x4f, 0xdc,0x7f,0xa3,0xdc, +0x22,0x66,0x44,0x22, 0x2a,0x7e,0x54,0x2a, 0x90,0xab,0x3b,0x90, 0x88,0x83,0x0b,0x88, +0x46,0xca,0x8c,0x46, 0xee,0x29,0xc7,0xee, 0xb8,0xd3,0x6b,0xb8, 0x14,0x3c,0x28,0x14, +0xde,0x79,0xa7,0xde, 0x5e,0xe2,0xbc,0x5e, 0x0b,0x1d,0x16,0x0b, 0xdb,0x76,0xad,0xdb, +0xe0,0x3b,0xdb,0xe0, 0x32,0x56,0x64,0x32, 0x3a,0x4e,0x74,0x3a, 0x0a,0x1e,0x14,0x0a, +0x49,0xdb,0x92,0x49, 0x06,0x0a,0x0c,0x06, 0x24,0x6c,0x48,0x24, 0x5c,0xe4,0xb8,0x5c, +0xc2,0x5d,0x9f,0xc2, 0xd3,0x6e,0xbd,0xd3, 0xac,0xef,0x43,0xac, 0x62,0xa6,0xc4,0x62, +0x91,0xa8,0x39,0x91, 0x95,0xa4,0x31,0x95, 0xe4,0x37,0xd3,0xe4, 0x79,0x8b,0xf2,0x79, +0xe7,0x32,0xd5,0xe7, 0xc8,0x43,0x8b,0xc8, 0x37,0x59,0x6e,0x37, 0x6d,0xb7,0xda,0x6d, +0x8d,0x8c,0x01,0x8d, 0xd5,0x64,0xb1,0xd5, 0x4e,0xd2,0x9c,0x4e, 0xa9,0xe0,0x49,0xa9, +0x6c,0xb4,0xd8,0x6c, 0x56,0xfa,0xac,0x56, 0xf4,0x07,0xf3,0xf4, 0xea,0x25,0xcf,0xea, +0x65,0xaf,0xca,0x65, 0x7a,0x8e,0xf4,0x7a, 0xae,0xe9,0x47,0xae, 0x08,0x18,0x10,0x08, +0xba,0xd5,0x6f,0xba, 0x78,0x88,0xf0,0x78, 0x25,0x6f,0x4a,0x25, 0x2e,0x72,0x5c,0x2e, +0x1c,0x24,0x38,0x1c, 0xa6,0xf1,0x57,0xa6, 0xb4,0xc7,0x73,0xb4, 0xc6,0x51,0x97,0xc6, +0xe8,0x23,0xcb,0xe8, 0xdd,0x7c,0xa1,0xdd, 0x74,0x9c,0xe8,0x74, 0x1f,0x21,0x3e,0x1f, +0x4b,0xdd,0x96,0x4b, 0xbd,0xdc,0x61,0xbd, 0x8b,0x86,0x0d,0x8b, 0x8a,0x85,0x0f,0x8a, +0x70,0x90,0xe0,0x70, 0x3e,0x42,0x7c,0x3e, 0xb5,0xc4,0x71,0xb5, 0x66,0xaa,0xcc,0x66, +0x48,0xd8,0x90,0x48, 0x03,0x05,0x06,0x03, 0xf6,0x01,0xf7,0xf6, 0x0e,0x12,0x1c,0x0e, +0x61,0xa3,0xc2,0x61, 0x35,0x5f,0x6a,0x35, 0x57,0xf9,0xae,0x57, 0xb9,0xd0,0x69,0xb9, +0x86,0x91,0x17,0x86, 0xc1,0x58,0x99,0xc1, 0x1d,0x27,0x3a,0x1d, 0x9e,0xb9,0x27,0x9e, +0xe1,0x38,0xd9,0xe1, 0xf8,0x13,0xeb,0xf8, 0x98,0xb3,0x2b,0x98, 0x11,0x33,0x22,0x11, +0x69,0xbb,0xd2,0x69, 0xd9,0x70,0xa9,0xd9, 0x8e,0x89,0x07,0x8e, 0x94,0xa7,0x33,0x94, +0x9b,0xb6,0x2d,0x9b, 0x1e,0x22,0x3c,0x1e, 0x87,0x92,0x15,0x87, 0xe9,0x20,0xc9,0xe9, +0xce,0x49,0x87,0xce, 0x55,0xff,0xaa,0x55, 0x28,0x78,0x50,0x28, 0xdf,0x7a,0xa5,0xdf, +0x8c,0x8f,0x03,0x8c, 0xa1,0xf8,0x59,0xa1, 0x89,0x80,0x09,0x89, 0x0d,0x17,0x1a,0x0d, +0xbf,0xda,0x65,0xbf, 0xe6,0x31,0xd7,0xe6, 0x42,0xc6,0x84,0x42, 0x68,0xb8,0xd0,0x68, +0x41,0xc3,0x82,0x41, 0x99,0xb0,0x29,0x99, 0x2d,0x77,0x5a,0x2d, 0x0f,0x11,0x1e,0x0f, +0xb0,0xcb,0x7b,0xb0, 0x54,0xfc,0xa8,0x54, 0xbb,0xd6,0x6d,0xbb, 0x16,0x3a,0x2c,0x16 +},{ +0x63,0x63,0xa5,0xc6, 0x7c,0x7c,0x84,0xf8, 0x77,0x77,0x99,0xee, 0x7b,0x7b,0x8d,0xf6, +0xf2,0xf2,0x0d,0xff, 0x6b,0x6b,0xbd,0xd6, 0x6f,0x6f,0xb1,0xde, 0xc5,0xc5,0x54,0x91, +0x30,0x30,0x50,0x60, 0x01,0x01,0x03,0x02, 0x67,0x67,0xa9,0xce, 0x2b,0x2b,0x7d,0x56, +0xfe,0xfe,0x19,0xe7, 0xd7,0xd7,0x62,0xb5, 0xab,0xab,0xe6,0x4d, 0x76,0x76,0x9a,0xec, +0xca,0xca,0x45,0x8f, 0x82,0x82,0x9d,0x1f, 0xc9,0xc9,0x40,0x89, 0x7d,0x7d,0x87,0xfa, +0xfa,0xfa,0x15,0xef, 0x59,0x59,0xeb,0xb2, 0x47,0x47,0xc9,0x8e, 0xf0,0xf0,0x0b,0xfb, +0xad,0xad,0xec,0x41, 0xd4,0xd4,0x67,0xb3, 0xa2,0xa2,0xfd,0x5f, 0xaf,0xaf,0xea,0x45, +0x9c,0x9c,0xbf,0x23, 0xa4,0xa4,0xf7,0x53, 0x72,0x72,0x96,0xe4, 0xc0,0xc0,0x5b,0x9b, +0xb7,0xb7,0xc2,0x75, 0xfd,0xfd,0x1c,0xe1, 0x93,0x93,0xae,0x3d, 0x26,0x26,0x6a,0x4c, +0x36,0x36,0x5a,0x6c, 0x3f,0x3f,0x41,0x7e, 0xf7,0xf7,0x02,0xf5, 0xcc,0xcc,0x4f,0x83, +0x34,0x34,0x5c,0x68, 0xa5,0xa5,0xf4,0x51, 0xe5,0xe5,0x34,0xd1, 0xf1,0xf1,0x08,0xf9, +0x71,0x71,0x93,0xe2, 0xd8,0xd8,0x73,0xab, 0x31,0x31,0x53,0x62, 0x15,0x15,0x3f,0x2a, +0x04,0x04,0x0c,0x08, 0xc7,0xc7,0x52,0x95, 0x23,0x23,0x65,0x46, 0xc3,0xc3,0x5e,0x9d, +0x18,0x18,0x28,0x30, 0x96,0x96,0xa1,0x37, 0x05,0x05,0x0f,0x0a, 0x9a,0x9a,0xb5,0x2f, +0x07,0x07,0x09,0x0e, 0x12,0x12,0x36,0x24, 0x80,0x80,0x9b,0x1b, 0xe2,0xe2,0x3d,0xdf, +0xeb,0xeb,0x26,0xcd, 0x27,0x27,0x69,0x4e, 0xb2,0xb2,0xcd,0x7f, 0x75,0x75,0x9f,0xea, +0x09,0x09,0x1b,0x12, 0x83,0x83,0x9e,0x1d, 0x2c,0x2c,0x74,0x58, 0x1a,0x1a,0x2e,0x34, +0x1b,0x1b,0x2d,0x36, 0x6e,0x6e,0xb2,0xdc, 0x5a,0x5a,0xee,0xb4, 0xa0,0xa0,0xfb,0x5b, +0x52,0x52,0xf6,0xa4, 0x3b,0x3b,0x4d,0x76, 0xd6,0xd6,0x61,0xb7, 0xb3,0xb3,0xce,0x7d, +0x29,0x29,0x7b,0x52, 0xe3,0xe3,0x3e,0xdd, 0x2f,0x2f,0x71,0x5e, 0x84,0x84,0x97,0x13, +0x53,0x53,0xf5,0xa6, 0xd1,0xd1,0x68,0xb9, 0x00,0x00,0x00,0x00, 0xed,0xed,0x2c,0xc1, +0x20,0x20,0x60,0x40, 0xfc,0xfc,0x1f,0xe3, 0xb1,0xb1,0xc8,0x79, 0x5b,0x5b,0xed,0xb6, +0x6a,0x6a,0xbe,0xd4, 0xcb,0xcb,0x46,0x8d, 0xbe,0xbe,0xd9,0x67, 0x39,0x39,0x4b,0x72, +0x4a,0x4a,0xde,0x94, 0x4c,0x4c,0xd4,0x98, 0x58,0x58,0xe8,0xb0, 0xcf,0xcf,0x4a,0x85, +0xd0,0xd0,0x6b,0xbb, 0xef,0xef,0x2a,0xc5, 0xaa,0xaa,0xe5,0x4f, 0xfb,0xfb,0x16,0xed, +0x43,0x43,0xc5,0x86, 0x4d,0x4d,0xd7,0x9a, 0x33,0x33,0x55,0x66, 0x85,0x85,0x94,0x11, +0x45,0x45,0xcf,0x8a, 0xf9,0xf9,0x10,0xe9, 0x02,0x02,0x06,0x04, 0x7f,0x7f,0x81,0xfe, +0x50,0x50,0xf0,0xa0, 0x3c,0x3c,0x44,0x78, 0x9f,0x9f,0xba,0x25, 0xa8,0xa8,0xe3,0x4b, +0x51,0x51,0xf3,0xa2, 0xa3,0xa3,0xfe,0x5d, 0x40,0x40,0xc0,0x80, 0x8f,0x8f,0x8a,0x05, +0x92,0x92,0xad,0x3f, 0x9d,0x9d,0xbc,0x21, 0x38,0x38,0x48,0x70, 0xf5,0xf5,0x04,0xf1, +0xbc,0xbc,0xdf,0x63, 0xb6,0xb6,0xc1,0x77, 0xda,0xda,0x75,0xaf, 0x21,0x21,0x63,0x42, +0x10,0x10,0x30,0x20, 0xff,0xff,0x1a,0xe5, 0xf3,0xf3,0x0e,0xfd, 0xd2,0xd2,0x6d,0xbf, +0xcd,0xcd,0x4c,0x81, 0x0c,0x0c,0x14,0x18, 0x13,0x13,0x35,0x26, 0xec,0xec,0x2f,0xc3, +0x5f,0x5f,0xe1,0xbe, 0x97,0x97,0xa2,0x35, 0x44,0x44,0xcc,0x88, 0x17,0x17,0x39,0x2e, +0xc4,0xc4,0x57,0x93, 0xa7,0xa7,0xf2,0x55, 0x7e,0x7e,0x82,0xfc, 0x3d,0x3d,0x47,0x7a, +0x64,0x64,0xac,0xc8, 0x5d,0x5d,0xe7,0xba, 0x19,0x19,0x2b,0x32, 0x73,0x73,0x95,0xe6, +0x60,0x60,0xa0,0xc0, 0x81,0x81,0x98,0x19, 0x4f,0x4f,0xd1,0x9e, 0xdc,0xdc,0x7f,0xa3, +0x22,0x22,0x66,0x44, 0x2a,0x2a,0x7e,0x54, 0x90,0x90,0xab,0x3b, 0x88,0x88,0x83,0x0b, +0x46,0x46,0xca,0x8c, 0xee,0xee,0x29,0xc7, 0xb8,0xb8,0xd3,0x6b, 0x14,0x14,0x3c,0x28, +0xde,0xde,0x79,0xa7, 0x5e,0x5e,0xe2,0xbc, 0x0b,0x0b,0x1d,0x16, 0xdb,0xdb,0x76,0xad, +0xe0,0xe0,0x3b,0xdb, 0x32,0x32,0x56,0x64, 0x3a,0x3a,0x4e,0x74, 0x0a,0x0a,0x1e,0x14, +0x49,0x49,0xdb,0x92, 0x06,0x06,0x0a,0x0c, 0x24,0x24,0x6c,0x48, 0x5c,0x5c,0xe4,0xb8, +0xc2,0xc2,0x5d,0x9f, 0xd3,0xd3,0x6e,0xbd, 0xac,0xac,0xef,0x43, 0x62,0x62,0xa6,0xc4, +0x91,0x91,0xa8,0x39, 0x95,0x95,0xa4,0x31, 0xe4,0xe4,0x37,0xd3, 0x79,0x79,0x8b,0xf2, +0xe7,0xe7,0x32,0xd5, 0xc8,0xc8,0x43,0x8b, 0x37,0x37,0x59,0x6e, 0x6d,0x6d,0xb7,0xda, +0x8d,0x8d,0x8c,0x01, 0xd5,0xd5,0x64,0xb1, 0x4e,0x4e,0xd2,0x9c, 0xa9,0xa9,0xe0,0x49, +0x6c,0x6c,0xb4,0xd8, 0x56,0x56,0xfa,0xac, 0xf4,0xf4,0x07,0xf3, 0xea,0xea,0x25,0xcf, +0x65,0x65,0xaf,0xca, 0x7a,0x7a,0x8e,0xf4, 0xae,0xae,0xe9,0x47, 0x08,0x08,0x18,0x10, +0xba,0xba,0xd5,0x6f, 0x78,0x78,0x88,0xf0, 0x25,0x25,0x6f,0x4a, 0x2e,0x2e,0x72,0x5c, +0x1c,0x1c,0x24,0x38, 0xa6,0xa6,0xf1,0x57, 0xb4,0xb4,0xc7,0x73, 0xc6,0xc6,0x51,0x97, +0xe8,0xe8,0x23,0xcb, 0xdd,0xdd,0x7c,0xa1, 0x74,0x74,0x9c,0xe8, 0x1f,0x1f,0x21,0x3e, +0x4b,0x4b,0xdd,0x96, 0xbd,0xbd,0xdc,0x61, 0x8b,0x8b,0x86,0x0d, 0x8a,0x8a,0x85,0x0f, +0x70,0x70,0x90,0xe0, 0x3e,0x3e,0x42,0x7c, 0xb5,0xb5,0xc4,0x71, 0x66,0x66,0xaa,0xcc, +0x48,0x48,0xd8,0x90, 0x03,0x03,0x05,0x06, 0xf6,0xf6,0x01,0xf7, 0x0e,0x0e,0x12,0x1c, +0x61,0x61,0xa3,0xc2, 0x35,0x35,0x5f,0x6a, 0x57,0x57,0xf9,0xae, 0xb9,0xb9,0xd0,0x69, +0x86,0x86,0x91,0x17, 0xc1,0xc1,0x58,0x99, 0x1d,0x1d,0x27,0x3a, 0x9e,0x9e,0xb9,0x27, +0xe1,0xe1,0x38,0xd9, 0xf8,0xf8,0x13,0xeb, 0x98,0x98,0xb3,0x2b, 0x11,0x11,0x33,0x22, +0x69,0x69,0xbb,0xd2, 0xd9,0xd9,0x70,0xa9, 0x8e,0x8e,0x89,0x07, 0x94,0x94,0xa7,0x33, +0x9b,0x9b,0xb6,0x2d, 0x1e,0x1e,0x22,0x3c, 0x87,0x87,0x92,0x15, 0xe9,0xe9,0x20,0xc9, +0xce,0xce,0x49,0x87, 0x55,0x55,0xff,0xaa, 0x28,0x28,0x78,0x50, 0xdf,0xdf,0x7a,0xa5, +0x8c,0x8c,0x8f,0x03, 0xa1,0xa1,0xf8,0x59, 0x89,0x89,0x80,0x09, 0x0d,0x0d,0x17,0x1a, +0xbf,0xbf,0xda,0x65, 0xe6,0xe6,0x31,0xd7, 0x42,0x42,0xc6,0x84, 0x68,0x68,0xb8,0xd0, +0x41,0x41,0xc3,0x82, 0x99,0x99,0xb0,0x29, 0x2d,0x2d,0x77,0x5a, 0x0f,0x0f,0x11,0x1e, +0xb0,0xb0,0xcb,0x7b, 0x54,0x54,0xfc,0xa8, 0xbb,0xbb,0xd6,0x6d, 0x16,0x16,0x3a,0x2c +}}; + +SYMCRYPT_ALIGN_AT(1024) const BYTE SymCryptAesInvSboxMatrixMult[4][256][4] = {{// Main decryption tables +0x51,0xf4,0xa7,0x50, 0x7e,0x41,0x65,0x53, 0x1a,0x17,0xa4,0xc3, 0x3a,0x27,0x5e,0x96, +0x3b,0xab,0x6b,0xcb, 0x1f,0x9d,0x45,0xf1, 0xac,0xfa,0x58,0xab, 0x4b,0xe3,0x03,0x93, +0x20,0x30,0xfa,0x55, 0xad,0x76,0x6d,0xf6, 0x88,0xcc,0x76,0x91, 0xf5,0x02,0x4c,0x25, +0x4f,0xe5,0xd7,0xfc, 0xc5,0x2a,0xcb,0xd7, 0x26,0x35,0x44,0x80, 0xb5,0x62,0xa3,0x8f, +0xde,0xb1,0x5a,0x49, 0x25,0xba,0x1b,0x67, 0x45,0xea,0x0e,0x98, 0x5d,0xfe,0xc0,0xe1, +0xc3,0x2f,0x75,0x02, 0x81,0x4c,0xf0,0x12, 0x8d,0x46,0x97,0xa3, 0x6b,0xd3,0xf9,0xc6, +0x03,0x8f,0x5f,0xe7, 0x15,0x92,0x9c,0x95, 0xbf,0x6d,0x7a,0xeb, 0x95,0x52,0x59,0xda, +0xd4,0xbe,0x83,0x2d, 0x58,0x74,0x21,0xd3, 0x49,0xe0,0x69,0x29, 0x8e,0xc9,0xc8,0x44, +0x75,0xc2,0x89,0x6a, 0xf4,0x8e,0x79,0x78, 0x99,0x58,0x3e,0x6b, 0x27,0xb9,0x71,0xdd, +0xbe,0xe1,0x4f,0xb6, 0xf0,0x88,0xad,0x17, 0xc9,0x20,0xac,0x66, 0x7d,0xce,0x3a,0xb4, +0x63,0xdf,0x4a,0x18, 0xe5,0x1a,0x31,0x82, 0x97,0x51,0x33,0x60, 0x62,0x53,0x7f,0x45, +0xb1,0x64,0x77,0xe0, 0xbb,0x6b,0xae,0x84, 0xfe,0x81,0xa0,0x1c, 0xf9,0x08,0x2b,0x94, +0x70,0x48,0x68,0x58, 0x8f,0x45,0xfd,0x19, 0x94,0xde,0x6c,0x87, 0x52,0x7b,0xf8,0xb7, +0xab,0x73,0xd3,0x23, 0x72,0x4b,0x02,0xe2, 0xe3,0x1f,0x8f,0x57, 0x66,0x55,0xab,0x2a, +0xb2,0xeb,0x28,0x07, 0x2f,0xb5,0xc2,0x03, 0x86,0xc5,0x7b,0x9a, 0xd3,0x37,0x08,0xa5, +0x30,0x28,0x87,0xf2, 0x23,0xbf,0xa5,0xb2, 0x02,0x03,0x6a,0xba, 0xed,0x16,0x82,0x5c, +0x8a,0xcf,0x1c,0x2b, 0xa7,0x79,0xb4,0x92, 0xf3,0x07,0xf2,0xf0, 0x4e,0x69,0xe2,0xa1, +0x65,0xda,0xf4,0xcd, 0x06,0x05,0xbe,0xd5, 0xd1,0x34,0x62,0x1f, 0xc4,0xa6,0xfe,0x8a, +0x34,0x2e,0x53,0x9d, 0xa2,0xf3,0x55,0xa0, 0x05,0x8a,0xe1,0x32, 0xa4,0xf6,0xeb,0x75, +0x0b,0x83,0xec,0x39, 0x40,0x60,0xef,0xaa, 0x5e,0x71,0x9f,0x06, 0xbd,0x6e,0x10,0x51, +0x3e,0x21,0x8a,0xf9, 0x96,0xdd,0x06,0x3d, 0xdd,0x3e,0x05,0xae, 0x4d,0xe6,0xbd,0x46, +0x91,0x54,0x8d,0xb5, 0x71,0xc4,0x5d,0x05, 0x04,0x06,0xd4,0x6f, 0x60,0x50,0x15,0xff, +0x19,0x98,0xfb,0x24, 0xd6,0xbd,0xe9,0x97, 0x89,0x40,0x43,0xcc, 0x67,0xd9,0x9e,0x77, +0xb0,0xe8,0x42,0xbd, 0x07,0x89,0x8b,0x88, 0xe7,0x19,0x5b,0x38, 0x79,0xc8,0xee,0xdb, +0xa1,0x7c,0x0a,0x47, 0x7c,0x42,0x0f,0xe9, 0xf8,0x84,0x1e,0xc9, 0x00,0x00,0x00,0x00, +0x09,0x80,0x86,0x83, 0x32,0x2b,0xed,0x48, 0x1e,0x11,0x70,0xac, 0x6c,0x5a,0x72,0x4e, +0xfd,0x0e,0xff,0xfb, 0x0f,0x85,0x38,0x56, 0x3d,0xae,0xd5,0x1e, 0x36,0x2d,0x39,0x27, +0x0a,0x0f,0xd9,0x64, 0x68,0x5c,0xa6,0x21, 0x9b,0x5b,0x54,0xd1, 0x24,0x36,0x2e,0x3a, +0x0c,0x0a,0x67,0xb1, 0x93,0x57,0xe7,0x0f, 0xb4,0xee,0x96,0xd2, 0x1b,0x9b,0x91,0x9e, +0x80,0xc0,0xc5,0x4f, 0x61,0xdc,0x20,0xa2, 0x5a,0x77,0x4b,0x69, 0x1c,0x12,0x1a,0x16, +0xe2,0x93,0xba,0x0a, 0xc0,0xa0,0x2a,0xe5, 0x3c,0x22,0xe0,0x43, 0x12,0x1b,0x17,0x1d, +0x0e,0x09,0x0d,0x0b, 0xf2,0x8b,0xc7,0xad, 0x2d,0xb6,0xa8,0xb9, 0x14,0x1e,0xa9,0xc8, +0x57,0xf1,0x19,0x85, 0xaf,0x75,0x07,0x4c, 0xee,0x99,0xdd,0xbb, 0xa3,0x7f,0x60,0xfd, +0xf7,0x01,0x26,0x9f, 0x5c,0x72,0xf5,0xbc, 0x44,0x66,0x3b,0xc5, 0x5b,0xfb,0x7e,0x34, +0x8b,0x43,0x29,0x76, 0xcb,0x23,0xc6,0xdc, 0xb6,0xed,0xfc,0x68, 0xb8,0xe4,0xf1,0x63, +0xd7,0x31,0xdc,0xca, 0x42,0x63,0x85,0x10, 0x13,0x97,0x22,0x40, 0x84,0xc6,0x11,0x20, +0x85,0x4a,0x24,0x7d, 0xd2,0xbb,0x3d,0xf8, 0xae,0xf9,0x32,0x11, 0xc7,0x29,0xa1,0x6d, +0x1d,0x9e,0x2f,0x4b, 0xdc,0xb2,0x30,0xf3, 0x0d,0x86,0x52,0xec, 0x77,0xc1,0xe3,0xd0, +0x2b,0xb3,0x16,0x6c, 0xa9,0x70,0xb9,0x99, 0x11,0x94,0x48,0xfa, 0x47,0xe9,0x64,0x22, +0xa8,0xfc,0x8c,0xc4, 0xa0,0xf0,0x3f,0x1a, 0x56,0x7d,0x2c,0xd8, 0x22,0x33,0x90,0xef, +0x87,0x49,0x4e,0xc7, 0xd9,0x38,0xd1,0xc1, 0x8c,0xca,0xa2,0xfe, 0x98,0xd4,0x0b,0x36, +0xa6,0xf5,0x81,0xcf, 0xa5,0x7a,0xde,0x28, 0xda,0xb7,0x8e,0x26, 0x3f,0xad,0xbf,0xa4, +0x2c,0x3a,0x9d,0xe4, 0x50,0x78,0x92,0x0d, 0x6a,0x5f,0xcc,0x9b, 0x54,0x7e,0x46,0x62, +0xf6,0x8d,0x13,0xc2, 0x90,0xd8,0xb8,0xe8, 0x2e,0x39,0xf7,0x5e, 0x82,0xc3,0xaf,0xf5, +0x9f,0x5d,0x80,0xbe, 0x69,0xd0,0x93,0x7c, 0x6f,0xd5,0x2d,0xa9, 0xcf,0x25,0x12,0xb3, +0xc8,0xac,0x99,0x3b, 0x10,0x18,0x7d,0xa7, 0xe8,0x9c,0x63,0x6e, 0xdb,0x3b,0xbb,0x7b, +0xcd,0x26,0x78,0x09, 0x6e,0x59,0x18,0xf4, 0xec,0x9a,0xb7,0x01, 0x83,0x4f,0x9a,0xa8, +0xe6,0x95,0x6e,0x65, 0xaa,0xff,0xe6,0x7e, 0x21,0xbc,0xcf,0x08, 0xef,0x15,0xe8,0xe6, +0xba,0xe7,0x9b,0xd9, 0x4a,0x6f,0x36,0xce, 0xea,0x9f,0x09,0xd4, 0x29,0xb0,0x7c,0xd6, +0x31,0xa4,0xb2,0xaf, 0x2a,0x3f,0x23,0x31, 0xc6,0xa5,0x94,0x30, 0x35,0xa2,0x66,0xc0, +0x74,0x4e,0xbc,0x37, 0xfc,0x82,0xca,0xa6, 0xe0,0x90,0xd0,0xb0, 0x33,0xa7,0xd8,0x15, +0xf1,0x04,0x98,0x4a, 0x41,0xec,0xda,0xf7, 0x7f,0xcd,0x50,0x0e, 0x17,0x91,0xf6,0x2f, +0x76,0x4d,0xd6,0x8d, 0x43,0xef,0xb0,0x4d, 0xcc,0xaa,0x4d,0x54, 0xe4,0x96,0x04,0xdf, +0x9e,0xd1,0xb5,0xe3, 0x4c,0x6a,0x88,0x1b, 0xc1,0x2c,0x1f,0xb8, 0x46,0x65,0x51,0x7f, +0x9d,0x5e,0xea,0x04, 0x01,0x8c,0x35,0x5d, 0xfa,0x87,0x74,0x73, 0xfb,0x0b,0x41,0x2e, +0xb3,0x67,0x1d,0x5a, 0x92,0xdb,0xd2,0x52, 0xe9,0x10,0x56,0x33, 0x6d,0xd6,0x47,0x13, +0x9a,0xd7,0x61,0x8c, 0x37,0xa1,0x0c,0x7a, 0x59,0xf8,0x14,0x8e, 0xeb,0x13,0x3c,0x89, +0xce,0xa9,0x27,0xee, 0xb7,0x61,0xc9,0x35, 0xe1,0x1c,0xe5,0xed, 0x7a,0x47,0xb1,0x3c, +0x9c,0xd2,0xdf,0x59, 0x55,0xf2,0x73,0x3f, 0x18,0x14,0xce,0x79, 0x73,0xc7,0x37,0xbf, +0x53,0xf7,0xcd,0xea, 0x5f,0xfd,0xaa,0x5b, 0xdf,0x3d,0x6f,0x14, 0x78,0x44,0xdb,0x86, +0xca,0xaf,0xf3,0x81, 0xb9,0x68,0xc4,0x3e, 0x38,0x24,0x34,0x2c, 0xc2,0xa3,0x40,0x5f, +0x16,0x1d,0xc3,0x72, 0xbc,0xe2,0x25,0x0c, 0x28,0x3c,0x49,0x8b, 0xff,0x0d,0x95,0x41, +0x39,0xa8,0x01,0x71, 0x08,0x0c,0xb3,0xde, 0xd8,0xb4,0xe4,0x9c, 0x64,0x56,0xc1,0x90, +0x7b,0xcb,0x84,0x61, 0xd5,0x32,0xb6,0x70, 0x48,0x6c,0x5c,0x74, 0xd0,0xb8,0x57,0x42 +},{ +0x50,0x51,0xf4,0xa7, 0x53,0x7e,0x41,0x65, 0xc3,0x1a,0x17,0xa4, 0x96,0x3a,0x27,0x5e, +0xcb,0x3b,0xab,0x6b, 0xf1,0x1f,0x9d,0x45, 0xab,0xac,0xfa,0x58, 0x93,0x4b,0xe3,0x03, +0x55,0x20,0x30,0xfa, 0xf6,0xad,0x76,0x6d, 0x91,0x88,0xcc,0x76, 0x25,0xf5,0x02,0x4c, +0xfc,0x4f,0xe5,0xd7, 0xd7,0xc5,0x2a,0xcb, 0x80,0x26,0x35,0x44, 0x8f,0xb5,0x62,0xa3, +0x49,0xde,0xb1,0x5a, 0x67,0x25,0xba,0x1b, 0x98,0x45,0xea,0x0e, 0xe1,0x5d,0xfe,0xc0, +0x02,0xc3,0x2f,0x75, 0x12,0x81,0x4c,0xf0, 0xa3,0x8d,0x46,0x97, 0xc6,0x6b,0xd3,0xf9, +0xe7,0x03,0x8f,0x5f, 0x95,0x15,0x92,0x9c, 0xeb,0xbf,0x6d,0x7a, 0xda,0x95,0x52,0x59, +0x2d,0xd4,0xbe,0x83, 0xd3,0x58,0x74,0x21, 0x29,0x49,0xe0,0x69, 0x44,0x8e,0xc9,0xc8, +0x6a,0x75,0xc2,0x89, 0x78,0xf4,0x8e,0x79, 0x6b,0x99,0x58,0x3e, 0xdd,0x27,0xb9,0x71, +0xb6,0xbe,0xe1,0x4f, 0x17,0xf0,0x88,0xad, 0x66,0xc9,0x20,0xac, 0xb4,0x7d,0xce,0x3a, +0x18,0x63,0xdf,0x4a, 0x82,0xe5,0x1a,0x31, 0x60,0x97,0x51,0x33, 0x45,0x62,0x53,0x7f, +0xe0,0xb1,0x64,0x77, 0x84,0xbb,0x6b,0xae, 0x1c,0xfe,0x81,0xa0, 0x94,0xf9,0x08,0x2b, +0x58,0x70,0x48,0x68, 0x19,0x8f,0x45,0xfd, 0x87,0x94,0xde,0x6c, 0xb7,0x52,0x7b,0xf8, +0x23,0xab,0x73,0xd3, 0xe2,0x72,0x4b,0x02, 0x57,0xe3,0x1f,0x8f, 0x2a,0x66,0x55,0xab, +0x07,0xb2,0xeb,0x28, 0x03,0x2f,0xb5,0xc2, 0x9a,0x86,0xc5,0x7b, 0xa5,0xd3,0x37,0x08, +0xf2,0x30,0x28,0x87, 0xb2,0x23,0xbf,0xa5, 0xba,0x02,0x03,0x6a, 0x5c,0xed,0x16,0x82, +0x2b,0x8a,0xcf,0x1c, 0x92,0xa7,0x79,0xb4, 0xf0,0xf3,0x07,0xf2, 0xa1,0x4e,0x69,0xe2, +0xcd,0x65,0xda,0xf4, 0xd5,0x06,0x05,0xbe, 0x1f,0xd1,0x34,0x62, 0x8a,0xc4,0xa6,0xfe, +0x9d,0x34,0x2e,0x53, 0xa0,0xa2,0xf3,0x55, 0x32,0x05,0x8a,0xe1, 0x75,0xa4,0xf6,0xeb, +0x39,0x0b,0x83,0xec, 0xaa,0x40,0x60,0xef, 0x06,0x5e,0x71,0x9f, 0x51,0xbd,0x6e,0x10, +0xf9,0x3e,0x21,0x8a, 0x3d,0x96,0xdd,0x06, 0xae,0xdd,0x3e,0x05, 0x46,0x4d,0xe6,0xbd, +0xb5,0x91,0x54,0x8d, 0x05,0x71,0xc4,0x5d, 0x6f,0x04,0x06,0xd4, 0xff,0x60,0x50,0x15, +0x24,0x19,0x98,0xfb, 0x97,0xd6,0xbd,0xe9, 0xcc,0x89,0x40,0x43, 0x77,0x67,0xd9,0x9e, +0xbd,0xb0,0xe8,0x42, 0x88,0x07,0x89,0x8b, 0x38,0xe7,0x19,0x5b, 0xdb,0x79,0xc8,0xee, +0x47,0xa1,0x7c,0x0a, 0xe9,0x7c,0x42,0x0f, 0xc9,0xf8,0x84,0x1e, 0x00,0x00,0x00,0x00, +0x83,0x09,0x80,0x86, 0x48,0x32,0x2b,0xed, 0xac,0x1e,0x11,0x70, 0x4e,0x6c,0x5a,0x72, +0xfb,0xfd,0x0e,0xff, 0x56,0x0f,0x85,0x38, 0x1e,0x3d,0xae,0xd5, 0x27,0x36,0x2d,0x39, +0x64,0x0a,0x0f,0xd9, 0x21,0x68,0x5c,0xa6, 0xd1,0x9b,0x5b,0x54, 0x3a,0x24,0x36,0x2e, +0xb1,0x0c,0x0a,0x67, 0x0f,0x93,0x57,0xe7, 0xd2,0xb4,0xee,0x96, 0x9e,0x1b,0x9b,0x91, +0x4f,0x80,0xc0,0xc5, 0xa2,0x61,0xdc,0x20, 0x69,0x5a,0x77,0x4b, 0x16,0x1c,0x12,0x1a, +0x0a,0xe2,0x93,0xba, 0xe5,0xc0,0xa0,0x2a, 0x43,0x3c,0x22,0xe0, 0x1d,0x12,0x1b,0x17, +0x0b,0x0e,0x09,0x0d, 0xad,0xf2,0x8b,0xc7, 0xb9,0x2d,0xb6,0xa8, 0xc8,0x14,0x1e,0xa9, +0x85,0x57,0xf1,0x19, 0x4c,0xaf,0x75,0x07, 0xbb,0xee,0x99,0xdd, 0xfd,0xa3,0x7f,0x60, +0x9f,0xf7,0x01,0x26, 0xbc,0x5c,0x72,0xf5, 0xc5,0x44,0x66,0x3b, 0x34,0x5b,0xfb,0x7e, +0x76,0x8b,0x43,0x29, 0xdc,0xcb,0x23,0xc6, 0x68,0xb6,0xed,0xfc, 0x63,0xb8,0xe4,0xf1, +0xca,0xd7,0x31,0xdc, 0x10,0x42,0x63,0x85, 0x40,0x13,0x97,0x22, 0x20,0x84,0xc6,0x11, +0x7d,0x85,0x4a,0x24, 0xf8,0xd2,0xbb,0x3d, 0x11,0xae,0xf9,0x32, 0x6d,0xc7,0x29,0xa1, +0x4b,0x1d,0x9e,0x2f, 0xf3,0xdc,0xb2,0x30, 0xec,0x0d,0x86,0x52, 0xd0,0x77,0xc1,0xe3, +0x6c,0x2b,0xb3,0x16, 0x99,0xa9,0x70,0xb9, 0xfa,0x11,0x94,0x48, 0x22,0x47,0xe9,0x64, +0xc4,0xa8,0xfc,0x8c, 0x1a,0xa0,0xf0,0x3f, 0xd8,0x56,0x7d,0x2c, 0xef,0x22,0x33,0x90, +0xc7,0x87,0x49,0x4e, 0xc1,0xd9,0x38,0xd1, 0xfe,0x8c,0xca,0xa2, 0x36,0x98,0xd4,0x0b, +0xcf,0xa6,0xf5,0x81, 0x28,0xa5,0x7a,0xde, 0x26,0xda,0xb7,0x8e, 0xa4,0x3f,0xad,0xbf, +0xe4,0x2c,0x3a,0x9d, 0x0d,0x50,0x78,0x92, 0x9b,0x6a,0x5f,0xcc, 0x62,0x54,0x7e,0x46, +0xc2,0xf6,0x8d,0x13, 0xe8,0x90,0xd8,0xb8, 0x5e,0x2e,0x39,0xf7, 0xf5,0x82,0xc3,0xaf, +0xbe,0x9f,0x5d,0x80, 0x7c,0x69,0xd0,0x93, 0xa9,0x6f,0xd5,0x2d, 0xb3,0xcf,0x25,0x12, +0x3b,0xc8,0xac,0x99, 0xa7,0x10,0x18,0x7d, 0x6e,0xe8,0x9c,0x63, 0x7b,0xdb,0x3b,0xbb, +0x09,0xcd,0x26,0x78, 0xf4,0x6e,0x59,0x18, 0x01,0xec,0x9a,0xb7, 0xa8,0x83,0x4f,0x9a, +0x65,0xe6,0x95,0x6e, 0x7e,0xaa,0xff,0xe6, 0x08,0x21,0xbc,0xcf, 0xe6,0xef,0x15,0xe8, +0xd9,0xba,0xe7,0x9b, 0xce,0x4a,0x6f,0x36, 0xd4,0xea,0x9f,0x09, 0xd6,0x29,0xb0,0x7c, +0xaf,0x31,0xa4,0xb2, 0x31,0x2a,0x3f,0x23, 0x30,0xc6,0xa5,0x94, 0xc0,0x35,0xa2,0x66, +0x37,0x74,0x4e,0xbc, 0xa6,0xfc,0x82,0xca, 0xb0,0xe0,0x90,0xd0, 0x15,0x33,0xa7,0xd8, +0x4a,0xf1,0x04,0x98, 0xf7,0x41,0xec,0xda, 0x0e,0x7f,0xcd,0x50, 0x2f,0x17,0x91,0xf6, +0x8d,0x76,0x4d,0xd6, 0x4d,0x43,0xef,0xb0, 0x54,0xcc,0xaa,0x4d, 0xdf,0xe4,0x96,0x04, +0xe3,0x9e,0xd1,0xb5, 0x1b,0x4c,0x6a,0x88, 0xb8,0xc1,0x2c,0x1f, 0x7f,0x46,0x65,0x51, +0x04,0x9d,0x5e,0xea, 0x5d,0x01,0x8c,0x35, 0x73,0xfa,0x87,0x74, 0x2e,0xfb,0x0b,0x41, +0x5a,0xb3,0x67,0x1d, 0x52,0x92,0xdb,0xd2, 0x33,0xe9,0x10,0x56, 0x13,0x6d,0xd6,0x47, +0x8c,0x9a,0xd7,0x61, 0x7a,0x37,0xa1,0x0c, 0x8e,0x59,0xf8,0x14, 0x89,0xeb,0x13,0x3c, +0xee,0xce,0xa9,0x27, 0x35,0xb7,0x61,0xc9, 0xed,0xe1,0x1c,0xe5, 0x3c,0x7a,0x47,0xb1, +0x59,0x9c,0xd2,0xdf, 0x3f,0x55,0xf2,0x73, 0x79,0x18,0x14,0xce, 0xbf,0x73,0xc7,0x37, +0xea,0x53,0xf7,0xcd, 0x5b,0x5f,0xfd,0xaa, 0x14,0xdf,0x3d,0x6f, 0x86,0x78,0x44,0xdb, +0x81,0xca,0xaf,0xf3, 0x3e,0xb9,0x68,0xc4, 0x2c,0x38,0x24,0x34, 0x5f,0xc2,0xa3,0x40, +0x72,0x16,0x1d,0xc3, 0x0c,0xbc,0xe2,0x25, 0x8b,0x28,0x3c,0x49, 0x41,0xff,0x0d,0x95, +0x71,0x39,0xa8,0x01, 0xde,0x08,0x0c,0xb3, 0x9c,0xd8,0xb4,0xe4, 0x90,0x64,0x56,0xc1, +0x61,0x7b,0xcb,0x84, 0x70,0xd5,0x32,0xb6, 0x74,0x48,0x6c,0x5c, 0x42,0xd0,0xb8,0x57 +},{ +0xa7,0x50,0x51,0xf4, 0x65,0x53,0x7e,0x41, 0xa4,0xc3,0x1a,0x17, 0x5e,0x96,0x3a,0x27, +0x6b,0xcb,0x3b,0xab, 0x45,0xf1,0x1f,0x9d, 0x58,0xab,0xac,0xfa, 0x03,0x93,0x4b,0xe3, +0xfa,0x55,0x20,0x30, 0x6d,0xf6,0xad,0x76, 0x76,0x91,0x88,0xcc, 0x4c,0x25,0xf5,0x02, +0xd7,0xfc,0x4f,0xe5, 0xcb,0xd7,0xc5,0x2a, 0x44,0x80,0x26,0x35, 0xa3,0x8f,0xb5,0x62, +0x5a,0x49,0xde,0xb1, 0x1b,0x67,0x25,0xba, 0x0e,0x98,0x45,0xea, 0xc0,0xe1,0x5d,0xfe, +0x75,0x02,0xc3,0x2f, 0xf0,0x12,0x81,0x4c, 0x97,0xa3,0x8d,0x46, 0xf9,0xc6,0x6b,0xd3, +0x5f,0xe7,0x03,0x8f, 0x9c,0x95,0x15,0x92, 0x7a,0xeb,0xbf,0x6d, 0x59,0xda,0x95,0x52, +0x83,0x2d,0xd4,0xbe, 0x21,0xd3,0x58,0x74, 0x69,0x29,0x49,0xe0, 0xc8,0x44,0x8e,0xc9, +0x89,0x6a,0x75,0xc2, 0x79,0x78,0xf4,0x8e, 0x3e,0x6b,0x99,0x58, 0x71,0xdd,0x27,0xb9, +0x4f,0xb6,0xbe,0xe1, 0xad,0x17,0xf0,0x88, 0xac,0x66,0xc9,0x20, 0x3a,0xb4,0x7d,0xce, +0x4a,0x18,0x63,0xdf, 0x31,0x82,0xe5,0x1a, 0x33,0x60,0x97,0x51, 0x7f,0x45,0x62,0x53, +0x77,0xe0,0xb1,0x64, 0xae,0x84,0xbb,0x6b, 0xa0,0x1c,0xfe,0x81, 0x2b,0x94,0xf9,0x08, +0x68,0x58,0x70,0x48, 0xfd,0x19,0x8f,0x45, 0x6c,0x87,0x94,0xde, 0xf8,0xb7,0x52,0x7b, +0xd3,0x23,0xab,0x73, 0x02,0xe2,0x72,0x4b, 0x8f,0x57,0xe3,0x1f, 0xab,0x2a,0x66,0x55, +0x28,0x07,0xb2,0xeb, 0xc2,0x03,0x2f,0xb5, 0x7b,0x9a,0x86,0xc5, 0x08,0xa5,0xd3,0x37, +0x87,0xf2,0x30,0x28, 0xa5,0xb2,0x23,0xbf, 0x6a,0xba,0x02,0x03, 0x82,0x5c,0xed,0x16, +0x1c,0x2b,0x8a,0xcf, 0xb4,0x92,0xa7,0x79, 0xf2,0xf0,0xf3,0x07, 0xe2,0xa1,0x4e,0x69, +0xf4,0xcd,0x65,0xda, 0xbe,0xd5,0x06,0x05, 0x62,0x1f,0xd1,0x34, 0xfe,0x8a,0xc4,0xa6, +0x53,0x9d,0x34,0x2e, 0x55,0xa0,0xa2,0xf3, 0xe1,0x32,0x05,0x8a, 0xeb,0x75,0xa4,0xf6, +0xec,0x39,0x0b,0x83, 0xef,0xaa,0x40,0x60, 0x9f,0x06,0x5e,0x71, 0x10,0x51,0xbd,0x6e, +0x8a,0xf9,0x3e,0x21, 0x06,0x3d,0x96,0xdd, 0x05,0xae,0xdd,0x3e, 0xbd,0x46,0x4d,0xe6, +0x8d,0xb5,0x91,0x54, 0x5d,0x05,0x71,0xc4, 0xd4,0x6f,0x04,0x06, 0x15,0xff,0x60,0x50, +0xfb,0x24,0x19,0x98, 0xe9,0x97,0xd6,0xbd, 0x43,0xcc,0x89,0x40, 0x9e,0x77,0x67,0xd9, +0x42,0xbd,0xb0,0xe8, 0x8b,0x88,0x07,0x89, 0x5b,0x38,0xe7,0x19, 0xee,0xdb,0x79,0xc8, +0x0a,0x47,0xa1,0x7c, 0x0f,0xe9,0x7c,0x42, 0x1e,0xc9,0xf8,0x84, 0x00,0x00,0x00,0x00, +0x86,0x83,0x09,0x80, 0xed,0x48,0x32,0x2b, 0x70,0xac,0x1e,0x11, 0x72,0x4e,0x6c,0x5a, +0xff,0xfb,0xfd,0x0e, 0x38,0x56,0x0f,0x85, 0xd5,0x1e,0x3d,0xae, 0x39,0x27,0x36,0x2d, +0xd9,0x64,0x0a,0x0f, 0xa6,0x21,0x68,0x5c, 0x54,0xd1,0x9b,0x5b, 0x2e,0x3a,0x24,0x36, +0x67,0xb1,0x0c,0x0a, 0xe7,0x0f,0x93,0x57, 0x96,0xd2,0xb4,0xee, 0x91,0x9e,0x1b,0x9b, +0xc5,0x4f,0x80,0xc0, 0x20,0xa2,0x61,0xdc, 0x4b,0x69,0x5a,0x77, 0x1a,0x16,0x1c,0x12, +0xba,0x0a,0xe2,0x93, 0x2a,0xe5,0xc0,0xa0, 0xe0,0x43,0x3c,0x22, 0x17,0x1d,0x12,0x1b, +0x0d,0x0b,0x0e,0x09, 0xc7,0xad,0xf2,0x8b, 0xa8,0xb9,0x2d,0xb6, 0xa9,0xc8,0x14,0x1e, +0x19,0x85,0x57,0xf1, 0x07,0x4c,0xaf,0x75, 0xdd,0xbb,0xee,0x99, 0x60,0xfd,0xa3,0x7f, +0x26,0x9f,0xf7,0x01, 0xf5,0xbc,0x5c,0x72, 0x3b,0xc5,0x44,0x66, 0x7e,0x34,0x5b,0xfb, +0x29,0x76,0x8b,0x43, 0xc6,0xdc,0xcb,0x23, 0xfc,0x68,0xb6,0xed, 0xf1,0x63,0xb8,0xe4, +0xdc,0xca,0xd7,0x31, 0x85,0x10,0x42,0x63, 0x22,0x40,0x13,0x97, 0x11,0x20,0x84,0xc6, +0x24,0x7d,0x85,0x4a, 0x3d,0xf8,0xd2,0xbb, 0x32,0x11,0xae,0xf9, 0xa1,0x6d,0xc7,0x29, +0x2f,0x4b,0x1d,0x9e, 0x30,0xf3,0xdc,0xb2, 0x52,0xec,0x0d,0x86, 0xe3,0xd0,0x77,0xc1, +0x16,0x6c,0x2b,0xb3, 0xb9,0x99,0xa9,0x70, 0x48,0xfa,0x11,0x94, 0x64,0x22,0x47,0xe9, +0x8c,0xc4,0xa8,0xfc, 0x3f,0x1a,0xa0,0xf0, 0x2c,0xd8,0x56,0x7d, 0x90,0xef,0x22,0x33, +0x4e,0xc7,0x87,0x49, 0xd1,0xc1,0xd9,0x38, 0xa2,0xfe,0x8c,0xca, 0x0b,0x36,0x98,0xd4, +0x81,0xcf,0xa6,0xf5, 0xde,0x28,0xa5,0x7a, 0x8e,0x26,0xda,0xb7, 0xbf,0xa4,0x3f,0xad, +0x9d,0xe4,0x2c,0x3a, 0x92,0x0d,0x50,0x78, 0xcc,0x9b,0x6a,0x5f, 0x46,0x62,0x54,0x7e, +0x13,0xc2,0xf6,0x8d, 0xb8,0xe8,0x90,0xd8, 0xf7,0x5e,0x2e,0x39, 0xaf,0xf5,0x82,0xc3, +0x80,0xbe,0x9f,0x5d, 0x93,0x7c,0x69,0xd0, 0x2d,0xa9,0x6f,0xd5, 0x12,0xb3,0xcf,0x25, +0x99,0x3b,0xc8,0xac, 0x7d,0xa7,0x10,0x18, 0x63,0x6e,0xe8,0x9c, 0xbb,0x7b,0xdb,0x3b, +0x78,0x09,0xcd,0x26, 0x18,0xf4,0x6e,0x59, 0xb7,0x01,0xec,0x9a, 0x9a,0xa8,0x83,0x4f, +0x6e,0x65,0xe6,0x95, 0xe6,0x7e,0xaa,0xff, 0xcf,0x08,0x21,0xbc, 0xe8,0xe6,0xef,0x15, +0x9b,0xd9,0xba,0xe7, 0x36,0xce,0x4a,0x6f, 0x09,0xd4,0xea,0x9f, 0x7c,0xd6,0x29,0xb0, +0xb2,0xaf,0x31,0xa4, 0x23,0x31,0x2a,0x3f, 0x94,0x30,0xc6,0xa5, 0x66,0xc0,0x35,0xa2, +0xbc,0x37,0x74,0x4e, 0xca,0xa6,0xfc,0x82, 0xd0,0xb0,0xe0,0x90, 0xd8,0x15,0x33,0xa7, +0x98,0x4a,0xf1,0x04, 0xda,0xf7,0x41,0xec, 0x50,0x0e,0x7f,0xcd, 0xf6,0x2f,0x17,0x91, +0xd6,0x8d,0x76,0x4d, 0xb0,0x4d,0x43,0xef, 0x4d,0x54,0xcc,0xaa, 0x04,0xdf,0xe4,0x96, +0xb5,0xe3,0x9e,0xd1, 0x88,0x1b,0x4c,0x6a, 0x1f,0xb8,0xc1,0x2c, 0x51,0x7f,0x46,0x65, +0xea,0x04,0x9d,0x5e, 0x35,0x5d,0x01,0x8c, 0x74,0x73,0xfa,0x87, 0x41,0x2e,0xfb,0x0b, +0x1d,0x5a,0xb3,0x67, 0xd2,0x52,0x92,0xdb, 0x56,0x33,0xe9,0x10, 0x47,0x13,0x6d,0xd6, +0x61,0x8c,0x9a,0xd7, 0x0c,0x7a,0x37,0xa1, 0x14,0x8e,0x59,0xf8, 0x3c,0x89,0xeb,0x13, +0x27,0xee,0xce,0xa9, 0xc9,0x35,0xb7,0x61, 0xe5,0xed,0xe1,0x1c, 0xb1,0x3c,0x7a,0x47, +0xdf,0x59,0x9c,0xd2, 0x73,0x3f,0x55,0xf2, 0xce,0x79,0x18,0x14, 0x37,0xbf,0x73,0xc7, +0xcd,0xea,0x53,0xf7, 0xaa,0x5b,0x5f,0xfd, 0x6f,0x14,0xdf,0x3d, 0xdb,0x86,0x78,0x44, +0xf3,0x81,0xca,0xaf, 0xc4,0x3e,0xb9,0x68, 0x34,0x2c,0x38,0x24, 0x40,0x5f,0xc2,0xa3, +0xc3,0x72,0x16,0x1d, 0x25,0x0c,0xbc,0xe2, 0x49,0x8b,0x28,0x3c, 0x95,0x41,0xff,0x0d, +0x01,0x71,0x39,0xa8, 0xb3,0xde,0x08,0x0c, 0xe4,0x9c,0xd8,0xb4, 0xc1,0x90,0x64,0x56, +0x84,0x61,0x7b,0xcb, 0xb6,0x70,0xd5,0x32, 0x5c,0x74,0x48,0x6c, 0x57,0x42,0xd0,0xb8 +},{ +0xf4,0xa7,0x50,0x51, 0x41,0x65,0x53,0x7e, 0x17,0xa4,0xc3,0x1a, 0x27,0x5e,0x96,0x3a, +0xab,0x6b,0xcb,0x3b, 0x9d,0x45,0xf1,0x1f, 0xfa,0x58,0xab,0xac, 0xe3,0x03,0x93,0x4b, +0x30,0xfa,0x55,0x20, 0x76,0x6d,0xf6,0xad, 0xcc,0x76,0x91,0x88, 0x02,0x4c,0x25,0xf5, +0xe5,0xd7,0xfc,0x4f, 0x2a,0xcb,0xd7,0xc5, 0x35,0x44,0x80,0x26, 0x62,0xa3,0x8f,0xb5, +0xb1,0x5a,0x49,0xde, 0xba,0x1b,0x67,0x25, 0xea,0x0e,0x98,0x45, 0xfe,0xc0,0xe1,0x5d, +0x2f,0x75,0x02,0xc3, 0x4c,0xf0,0x12,0x81, 0x46,0x97,0xa3,0x8d, 0xd3,0xf9,0xc6,0x6b, +0x8f,0x5f,0xe7,0x03, 0x92,0x9c,0x95,0x15, 0x6d,0x7a,0xeb,0xbf, 0x52,0x59,0xda,0x95, +0xbe,0x83,0x2d,0xd4, 0x74,0x21,0xd3,0x58, 0xe0,0x69,0x29,0x49, 0xc9,0xc8,0x44,0x8e, +0xc2,0x89,0x6a,0x75, 0x8e,0x79,0x78,0xf4, 0x58,0x3e,0x6b,0x99, 0xb9,0x71,0xdd,0x27, +0xe1,0x4f,0xb6,0xbe, 0x88,0xad,0x17,0xf0, 0x20,0xac,0x66,0xc9, 0xce,0x3a,0xb4,0x7d, +0xdf,0x4a,0x18,0x63, 0x1a,0x31,0x82,0xe5, 0x51,0x33,0x60,0x97, 0x53,0x7f,0x45,0x62, +0x64,0x77,0xe0,0xb1, 0x6b,0xae,0x84,0xbb, 0x81,0xa0,0x1c,0xfe, 0x08,0x2b,0x94,0xf9, +0x48,0x68,0x58,0x70, 0x45,0xfd,0x19,0x8f, 0xde,0x6c,0x87,0x94, 0x7b,0xf8,0xb7,0x52, +0x73,0xd3,0x23,0xab, 0x4b,0x02,0xe2,0x72, 0x1f,0x8f,0x57,0xe3, 0x55,0xab,0x2a,0x66, +0xeb,0x28,0x07,0xb2, 0xb5,0xc2,0x03,0x2f, 0xc5,0x7b,0x9a,0x86, 0x37,0x08,0xa5,0xd3, +0x28,0x87,0xf2,0x30, 0xbf,0xa5,0xb2,0x23, 0x03,0x6a,0xba,0x02, 0x16,0x82,0x5c,0xed, +0xcf,0x1c,0x2b,0x8a, 0x79,0xb4,0x92,0xa7, 0x07,0xf2,0xf0,0xf3, 0x69,0xe2,0xa1,0x4e, +0xda,0xf4,0xcd,0x65, 0x05,0xbe,0xd5,0x06, 0x34,0x62,0x1f,0xd1, 0xa6,0xfe,0x8a,0xc4, +0x2e,0x53,0x9d,0x34, 0xf3,0x55,0xa0,0xa2, 0x8a,0xe1,0x32,0x05, 0xf6,0xeb,0x75,0xa4, +0x83,0xec,0x39,0x0b, 0x60,0xef,0xaa,0x40, 0x71,0x9f,0x06,0x5e, 0x6e,0x10,0x51,0xbd, +0x21,0x8a,0xf9,0x3e, 0xdd,0x06,0x3d,0x96, 0x3e,0x05,0xae,0xdd, 0xe6,0xbd,0x46,0x4d, +0x54,0x8d,0xb5,0x91, 0xc4,0x5d,0x05,0x71, 0x06,0xd4,0x6f,0x04, 0x50,0x15,0xff,0x60, +0x98,0xfb,0x24,0x19, 0xbd,0xe9,0x97,0xd6, 0x40,0x43,0xcc,0x89, 0xd9,0x9e,0x77,0x67, +0xe8,0x42,0xbd,0xb0, 0x89,0x8b,0x88,0x07, 0x19,0x5b,0x38,0xe7, 0xc8,0xee,0xdb,0x79, +0x7c,0x0a,0x47,0xa1, 0x42,0x0f,0xe9,0x7c, 0x84,0x1e,0xc9,0xf8, 0x00,0x00,0x00,0x00, +0x80,0x86,0x83,0x09, 0x2b,0xed,0x48,0x32, 0x11,0x70,0xac,0x1e, 0x5a,0x72,0x4e,0x6c, +0x0e,0xff,0xfb,0xfd, 0x85,0x38,0x56,0x0f, 0xae,0xd5,0x1e,0x3d, 0x2d,0x39,0x27,0x36, +0x0f,0xd9,0x64,0x0a, 0x5c,0xa6,0x21,0x68, 0x5b,0x54,0xd1,0x9b, 0x36,0x2e,0x3a,0x24, +0x0a,0x67,0xb1,0x0c, 0x57,0xe7,0x0f,0x93, 0xee,0x96,0xd2,0xb4, 0x9b,0x91,0x9e,0x1b, +0xc0,0xc5,0x4f,0x80, 0xdc,0x20,0xa2,0x61, 0x77,0x4b,0x69,0x5a, 0x12,0x1a,0x16,0x1c, +0x93,0xba,0x0a,0xe2, 0xa0,0x2a,0xe5,0xc0, 0x22,0xe0,0x43,0x3c, 0x1b,0x17,0x1d,0x12, +0x09,0x0d,0x0b,0x0e, 0x8b,0xc7,0xad,0xf2, 0xb6,0xa8,0xb9,0x2d, 0x1e,0xa9,0xc8,0x14, +0xf1,0x19,0x85,0x57, 0x75,0x07,0x4c,0xaf, 0x99,0xdd,0xbb,0xee, 0x7f,0x60,0xfd,0xa3, +0x01,0x26,0x9f,0xf7, 0x72,0xf5,0xbc,0x5c, 0x66,0x3b,0xc5,0x44, 0xfb,0x7e,0x34,0x5b, +0x43,0x29,0x76,0x8b, 0x23,0xc6,0xdc,0xcb, 0xed,0xfc,0x68,0xb6, 0xe4,0xf1,0x63,0xb8, +0x31,0xdc,0xca,0xd7, 0x63,0x85,0x10,0x42, 0x97,0x22,0x40,0x13, 0xc6,0x11,0x20,0x84, +0x4a,0x24,0x7d,0x85, 0xbb,0x3d,0xf8,0xd2, 0xf9,0x32,0x11,0xae, 0x29,0xa1,0x6d,0xc7, +0x9e,0x2f,0x4b,0x1d, 0xb2,0x30,0xf3,0xdc, 0x86,0x52,0xec,0x0d, 0xc1,0xe3,0xd0,0x77, +0xb3,0x16,0x6c,0x2b, 0x70,0xb9,0x99,0xa9, 0x94,0x48,0xfa,0x11, 0xe9,0x64,0x22,0x47, +0xfc,0x8c,0xc4,0xa8, 0xf0,0x3f,0x1a,0xa0, 0x7d,0x2c,0xd8,0x56, 0x33,0x90,0xef,0x22, +0x49,0x4e,0xc7,0x87, 0x38,0xd1,0xc1,0xd9, 0xca,0xa2,0xfe,0x8c, 0xd4,0x0b,0x36,0x98, +0xf5,0x81,0xcf,0xa6, 0x7a,0xde,0x28,0xa5, 0xb7,0x8e,0x26,0xda, 0xad,0xbf,0xa4,0x3f, +0x3a,0x9d,0xe4,0x2c, 0x78,0x92,0x0d,0x50, 0x5f,0xcc,0x9b,0x6a, 0x7e,0x46,0x62,0x54, +0x8d,0x13,0xc2,0xf6, 0xd8,0xb8,0xe8,0x90, 0x39,0xf7,0x5e,0x2e, 0xc3,0xaf,0xf5,0x82, +0x5d,0x80,0xbe,0x9f, 0xd0,0x93,0x7c,0x69, 0xd5,0x2d,0xa9,0x6f, 0x25,0x12,0xb3,0xcf, +0xac,0x99,0x3b,0xc8, 0x18,0x7d,0xa7,0x10, 0x9c,0x63,0x6e,0xe8, 0x3b,0xbb,0x7b,0xdb, +0x26,0x78,0x09,0xcd, 0x59,0x18,0xf4,0x6e, 0x9a,0xb7,0x01,0xec, 0x4f,0x9a,0xa8,0x83, +0x95,0x6e,0x65,0xe6, 0xff,0xe6,0x7e,0xaa, 0xbc,0xcf,0x08,0x21, 0x15,0xe8,0xe6,0xef, +0xe7,0x9b,0xd9,0xba, 0x6f,0x36,0xce,0x4a, 0x9f,0x09,0xd4,0xea, 0xb0,0x7c,0xd6,0x29, +0xa4,0xb2,0xaf,0x31, 0x3f,0x23,0x31,0x2a, 0xa5,0x94,0x30,0xc6, 0xa2,0x66,0xc0,0x35, +0x4e,0xbc,0x37,0x74, 0x82,0xca,0xa6,0xfc, 0x90,0xd0,0xb0,0xe0, 0xa7,0xd8,0x15,0x33, +0x04,0x98,0x4a,0xf1, 0xec,0xda,0xf7,0x41, 0xcd,0x50,0x0e,0x7f, 0x91,0xf6,0x2f,0x17, +0x4d,0xd6,0x8d,0x76, 0xef,0xb0,0x4d,0x43, 0xaa,0x4d,0x54,0xcc, 0x96,0x04,0xdf,0xe4, +0xd1,0xb5,0xe3,0x9e, 0x6a,0x88,0x1b,0x4c, 0x2c,0x1f,0xb8,0xc1, 0x65,0x51,0x7f,0x46, +0x5e,0xea,0x04,0x9d, 0x8c,0x35,0x5d,0x01, 0x87,0x74,0x73,0xfa, 0x0b,0x41,0x2e,0xfb, +0x67,0x1d,0x5a,0xb3, 0xdb,0xd2,0x52,0x92, 0x10,0x56,0x33,0xe9, 0xd6,0x47,0x13,0x6d, +0xd7,0x61,0x8c,0x9a, 0xa1,0x0c,0x7a,0x37, 0xf8,0x14,0x8e,0x59, 0x13,0x3c,0x89,0xeb, +0xa9,0x27,0xee,0xce, 0x61,0xc9,0x35,0xb7, 0x1c,0xe5,0xed,0xe1, 0x47,0xb1,0x3c,0x7a, +0xd2,0xdf,0x59,0x9c, 0xf2,0x73,0x3f,0x55, 0x14,0xce,0x79,0x18, 0xc7,0x37,0xbf,0x73, +0xf7,0xcd,0xea,0x53, 0xfd,0xaa,0x5b,0x5f, 0x3d,0x6f,0x14,0xdf, 0x44,0xdb,0x86,0x78, +0xaf,0xf3,0x81,0xca, 0x68,0xc4,0x3e,0xb9, 0x24,0x34,0x2c,0x38, 0xa3,0x40,0x5f,0xc2, +0x1d,0xc3,0x72,0x16, 0xe2,0x25,0x0c,0xbc, 0x3c,0x49,0x8b,0x28, 0x0d,0x95,0x41,0xff, +0xa8,0x01,0x71,0x39, 0x0c,0xb3,0xde,0x08, 0xb4,0xe4,0x9c,0xd8, 0x56,0xc1,0x90,0x64, +0xcb,0x84,0x61,0x7b, 0x32,0xb6,0x70,0xd5, 0x6c,0x5c,0x74,0x48, 0xb8,0x57,0x42,0xd0 +}}; + +SYMCRYPT_ALIGN_AT(1024) const BYTE SymCryptAesInvMatrixMult[4][256][4] = {{ // For computing decryption round keys +0x00,0x00,0x00,0x00, 0x0e,0x09,0x0d,0x0b, 0x1c,0x12,0x1a,0x16, 0x12,0x1b,0x17,0x1d, +0x38,0x24,0x34,0x2c, 0x36,0x2d,0x39,0x27, 0x24,0x36,0x2e,0x3a, 0x2a,0x3f,0x23,0x31, +0x70,0x48,0x68,0x58, 0x7e,0x41,0x65,0x53, 0x6c,0x5a,0x72,0x4e, 0x62,0x53,0x7f,0x45, +0x48,0x6c,0x5c,0x74, 0x46,0x65,0x51,0x7f, 0x54,0x7e,0x46,0x62, 0x5a,0x77,0x4b,0x69, +0xe0,0x90,0xd0,0xb0, 0xee,0x99,0xdd,0xbb, 0xfc,0x82,0xca,0xa6, 0xf2,0x8b,0xc7,0xad, +0xd8,0xb4,0xe4,0x9c, 0xd6,0xbd,0xe9,0x97, 0xc4,0xa6,0xfe,0x8a, 0xca,0xaf,0xf3,0x81, +0x90,0xd8,0xb8,0xe8, 0x9e,0xd1,0xb5,0xe3, 0x8c,0xca,0xa2,0xfe, 0x82,0xc3,0xaf,0xf5, +0xa8,0xfc,0x8c,0xc4, 0xa6,0xf5,0x81,0xcf, 0xb4,0xee,0x96,0xd2, 0xba,0xe7,0x9b,0xd9, +0xdb,0x3b,0xbb,0x7b, 0xd5,0x32,0xb6,0x70, 0xc7,0x29,0xa1,0x6d, 0xc9,0x20,0xac,0x66, +0xe3,0x1f,0x8f,0x57, 0xed,0x16,0x82,0x5c, 0xff,0x0d,0x95,0x41, 0xf1,0x04,0x98,0x4a, +0xab,0x73,0xd3,0x23, 0xa5,0x7a,0xde,0x28, 0xb7,0x61,0xc9,0x35, 0xb9,0x68,0xc4,0x3e, +0x93,0x57,0xe7,0x0f, 0x9d,0x5e,0xea,0x04, 0x8f,0x45,0xfd,0x19, 0x81,0x4c,0xf0,0x12, +0x3b,0xab,0x6b,0xcb, 0x35,0xa2,0x66,0xc0, 0x27,0xb9,0x71,0xdd, 0x29,0xb0,0x7c,0xd6, +0x03,0x8f,0x5f,0xe7, 0x0d,0x86,0x52,0xec, 0x1f,0x9d,0x45,0xf1, 0x11,0x94,0x48,0xfa, +0x4b,0xe3,0x03,0x93, 0x45,0xea,0x0e,0x98, 0x57,0xf1,0x19,0x85, 0x59,0xf8,0x14,0x8e, +0x73,0xc7,0x37,0xbf, 0x7d,0xce,0x3a,0xb4, 0x6f,0xd5,0x2d,0xa9, 0x61,0xdc,0x20,0xa2, +0xad,0x76,0x6d,0xf6, 0xa3,0x7f,0x60,0xfd, 0xb1,0x64,0x77,0xe0, 0xbf,0x6d,0x7a,0xeb, +0x95,0x52,0x59,0xda, 0x9b,0x5b,0x54,0xd1, 0x89,0x40,0x43,0xcc, 0x87,0x49,0x4e,0xc7, +0xdd,0x3e,0x05,0xae, 0xd3,0x37,0x08,0xa5, 0xc1,0x2c,0x1f,0xb8, 0xcf,0x25,0x12,0xb3, +0xe5,0x1a,0x31,0x82, 0xeb,0x13,0x3c,0x89, 0xf9,0x08,0x2b,0x94, 0xf7,0x01,0x26,0x9f, +0x4d,0xe6,0xbd,0x46, 0x43,0xef,0xb0,0x4d, 0x51,0xf4,0xa7,0x50, 0x5f,0xfd,0xaa,0x5b, +0x75,0xc2,0x89,0x6a, 0x7b,0xcb,0x84,0x61, 0x69,0xd0,0x93,0x7c, 0x67,0xd9,0x9e,0x77, +0x3d,0xae,0xd5,0x1e, 0x33,0xa7,0xd8,0x15, 0x21,0xbc,0xcf,0x08, 0x2f,0xb5,0xc2,0x03, +0x05,0x8a,0xe1,0x32, 0x0b,0x83,0xec,0x39, 0x19,0x98,0xfb,0x24, 0x17,0x91,0xf6,0x2f, +0x76,0x4d,0xd6,0x8d, 0x78,0x44,0xdb,0x86, 0x6a,0x5f,0xcc,0x9b, 0x64,0x56,0xc1,0x90, +0x4e,0x69,0xe2,0xa1, 0x40,0x60,0xef,0xaa, 0x52,0x7b,0xf8,0xb7, 0x5c,0x72,0xf5,0xbc, +0x06,0x05,0xbe,0xd5, 0x08,0x0c,0xb3,0xde, 0x1a,0x17,0xa4,0xc3, 0x14,0x1e,0xa9,0xc8, +0x3e,0x21,0x8a,0xf9, 0x30,0x28,0x87,0xf2, 0x22,0x33,0x90,0xef, 0x2c,0x3a,0x9d,0xe4, +0x96,0xdd,0x06,0x3d, 0x98,0xd4,0x0b,0x36, 0x8a,0xcf,0x1c,0x2b, 0x84,0xc6,0x11,0x20, +0xae,0xf9,0x32,0x11, 0xa0,0xf0,0x3f,0x1a, 0xb2,0xeb,0x28,0x07, 0xbc,0xe2,0x25,0x0c, +0xe6,0x95,0x6e,0x65, 0xe8,0x9c,0x63,0x6e, 0xfa,0x87,0x74,0x73, 0xf4,0x8e,0x79,0x78, +0xde,0xb1,0x5a,0x49, 0xd0,0xb8,0x57,0x42, 0xc2,0xa3,0x40,0x5f, 0xcc,0xaa,0x4d,0x54, +0x41,0xec,0xda,0xf7, 0x4f,0xe5,0xd7,0xfc, 0x5d,0xfe,0xc0,0xe1, 0x53,0xf7,0xcd,0xea, +0x79,0xc8,0xee,0xdb, 0x77,0xc1,0xe3,0xd0, 0x65,0xda,0xf4,0xcd, 0x6b,0xd3,0xf9,0xc6, +0x31,0xa4,0xb2,0xaf, 0x3f,0xad,0xbf,0xa4, 0x2d,0xb6,0xa8,0xb9, 0x23,0xbf,0xa5,0xb2, +0x09,0x80,0x86,0x83, 0x07,0x89,0x8b,0x88, 0x15,0x92,0x9c,0x95, 0x1b,0x9b,0x91,0x9e, +0xa1,0x7c,0x0a,0x47, 0xaf,0x75,0x07,0x4c, 0xbd,0x6e,0x10,0x51, 0xb3,0x67,0x1d,0x5a, +0x99,0x58,0x3e,0x6b, 0x97,0x51,0x33,0x60, 0x85,0x4a,0x24,0x7d, 0x8b,0x43,0x29,0x76, +0xd1,0x34,0x62,0x1f, 0xdf,0x3d,0x6f,0x14, 0xcd,0x26,0x78,0x09, 0xc3,0x2f,0x75,0x02, +0xe9,0x10,0x56,0x33, 0xe7,0x19,0x5b,0x38, 0xf5,0x02,0x4c,0x25, 0xfb,0x0b,0x41,0x2e, +0x9a,0xd7,0x61,0x8c, 0x94,0xde,0x6c,0x87, 0x86,0xc5,0x7b,0x9a, 0x88,0xcc,0x76,0x91, +0xa2,0xf3,0x55,0xa0, 0xac,0xfa,0x58,0xab, 0xbe,0xe1,0x4f,0xb6, 0xb0,0xe8,0x42,0xbd, +0xea,0x9f,0x09,0xd4, 0xe4,0x96,0x04,0xdf, 0xf6,0x8d,0x13,0xc2, 0xf8,0x84,0x1e,0xc9, +0xd2,0xbb,0x3d,0xf8, 0xdc,0xb2,0x30,0xf3, 0xce,0xa9,0x27,0xee, 0xc0,0xa0,0x2a,0xe5, +0x7a,0x47,0xb1,0x3c, 0x74,0x4e,0xbc,0x37, 0x66,0x55,0xab,0x2a, 0x68,0x5c,0xa6,0x21, +0x42,0x63,0x85,0x10, 0x4c,0x6a,0x88,0x1b, 0x5e,0x71,0x9f,0x06, 0x50,0x78,0x92,0x0d, +0x0a,0x0f,0xd9,0x64, 0x04,0x06,0xd4,0x6f, 0x16,0x1d,0xc3,0x72, 0x18,0x14,0xce,0x79, +0x32,0x2b,0xed,0x48, 0x3c,0x22,0xe0,0x43, 0x2e,0x39,0xf7,0x5e, 0x20,0x30,0xfa,0x55, +0xec,0x9a,0xb7,0x01, 0xe2,0x93,0xba,0x0a, 0xf0,0x88,0xad,0x17, 0xfe,0x81,0xa0,0x1c, +0xd4,0xbe,0x83,0x2d, 0xda,0xb7,0x8e,0x26, 0xc8,0xac,0x99,0x3b, 0xc6,0xa5,0x94,0x30, +0x9c,0xd2,0xdf,0x59, 0x92,0xdb,0xd2,0x52, 0x80,0xc0,0xc5,0x4f, 0x8e,0xc9,0xc8,0x44, +0xa4,0xf6,0xeb,0x75, 0xaa,0xff,0xe6,0x7e, 0xb8,0xe4,0xf1,0x63, 0xb6,0xed,0xfc,0x68, +0x0c,0x0a,0x67,0xb1, 0x02,0x03,0x6a,0xba, 0x10,0x18,0x7d,0xa7, 0x1e,0x11,0x70,0xac, +0x34,0x2e,0x53,0x9d, 0x3a,0x27,0x5e,0x96, 0x28,0x3c,0x49,0x8b, 0x26,0x35,0x44,0x80, +0x7c,0x42,0x0f,0xe9, 0x72,0x4b,0x02,0xe2, 0x60,0x50,0x15,0xff, 0x6e,0x59,0x18,0xf4, +0x44,0x66,0x3b,0xc5, 0x4a,0x6f,0x36,0xce, 0x58,0x74,0x21,0xd3, 0x56,0x7d,0x2c,0xd8, +0x37,0xa1,0x0c,0x7a, 0x39,0xa8,0x01,0x71, 0x2b,0xb3,0x16,0x6c, 0x25,0xba,0x1b,0x67, +0x0f,0x85,0x38,0x56, 0x01,0x8c,0x35,0x5d, 0x13,0x97,0x22,0x40, 0x1d,0x9e,0x2f,0x4b, +0x47,0xe9,0x64,0x22, 0x49,0xe0,0x69,0x29, 0x5b,0xfb,0x7e,0x34, 0x55,0xf2,0x73,0x3f, +0x7f,0xcd,0x50,0x0e, 0x71,0xc4,0x5d,0x05, 0x63,0xdf,0x4a,0x18, 0x6d,0xd6,0x47,0x13, +0xd7,0x31,0xdc,0xca, 0xd9,0x38,0xd1,0xc1, 0xcb,0x23,0xc6,0xdc, 0xc5,0x2a,0xcb,0xd7, +0xef,0x15,0xe8,0xe6, 0xe1,0x1c,0xe5,0xed, 0xf3,0x07,0xf2,0xf0, 0xfd,0x0e,0xff,0xfb, +0xa7,0x79,0xb4,0x92, 0xa9,0x70,0xb9,0x99, 0xbb,0x6b,0xae,0x84, 0xb5,0x62,0xa3,0x8f, +0x9f,0x5d,0x80,0xbe, 0x91,0x54,0x8d,0xb5, 0x83,0x4f,0x9a,0xa8, 0x8d,0x46,0x97,0xa3 +},{ +0x00,0x00,0x00,0x00, 0x0b,0x0e,0x09,0x0d, 0x16,0x1c,0x12,0x1a, 0x1d,0x12,0x1b,0x17, +0x2c,0x38,0x24,0x34, 0x27,0x36,0x2d,0x39, 0x3a,0x24,0x36,0x2e, 0x31,0x2a,0x3f,0x23, +0x58,0x70,0x48,0x68, 0x53,0x7e,0x41,0x65, 0x4e,0x6c,0x5a,0x72, 0x45,0x62,0x53,0x7f, +0x74,0x48,0x6c,0x5c, 0x7f,0x46,0x65,0x51, 0x62,0x54,0x7e,0x46, 0x69,0x5a,0x77,0x4b, +0xb0,0xe0,0x90,0xd0, 0xbb,0xee,0x99,0xdd, 0xa6,0xfc,0x82,0xca, 0xad,0xf2,0x8b,0xc7, +0x9c,0xd8,0xb4,0xe4, 0x97,0xd6,0xbd,0xe9, 0x8a,0xc4,0xa6,0xfe, 0x81,0xca,0xaf,0xf3, +0xe8,0x90,0xd8,0xb8, 0xe3,0x9e,0xd1,0xb5, 0xfe,0x8c,0xca,0xa2, 0xf5,0x82,0xc3,0xaf, +0xc4,0xa8,0xfc,0x8c, 0xcf,0xa6,0xf5,0x81, 0xd2,0xb4,0xee,0x96, 0xd9,0xba,0xe7,0x9b, +0x7b,0xdb,0x3b,0xbb, 0x70,0xd5,0x32,0xb6, 0x6d,0xc7,0x29,0xa1, 0x66,0xc9,0x20,0xac, +0x57,0xe3,0x1f,0x8f, 0x5c,0xed,0x16,0x82, 0x41,0xff,0x0d,0x95, 0x4a,0xf1,0x04,0x98, +0x23,0xab,0x73,0xd3, 0x28,0xa5,0x7a,0xde, 0x35,0xb7,0x61,0xc9, 0x3e,0xb9,0x68,0xc4, +0x0f,0x93,0x57,0xe7, 0x04,0x9d,0x5e,0xea, 0x19,0x8f,0x45,0xfd, 0x12,0x81,0x4c,0xf0, +0xcb,0x3b,0xab,0x6b, 0xc0,0x35,0xa2,0x66, 0xdd,0x27,0xb9,0x71, 0xd6,0x29,0xb0,0x7c, +0xe7,0x03,0x8f,0x5f, 0xec,0x0d,0x86,0x52, 0xf1,0x1f,0x9d,0x45, 0xfa,0x11,0x94,0x48, +0x93,0x4b,0xe3,0x03, 0x98,0x45,0xea,0x0e, 0x85,0x57,0xf1,0x19, 0x8e,0x59,0xf8,0x14, +0xbf,0x73,0xc7,0x37, 0xb4,0x7d,0xce,0x3a, 0xa9,0x6f,0xd5,0x2d, 0xa2,0x61,0xdc,0x20, +0xf6,0xad,0x76,0x6d, 0xfd,0xa3,0x7f,0x60, 0xe0,0xb1,0x64,0x77, 0xeb,0xbf,0x6d,0x7a, +0xda,0x95,0x52,0x59, 0xd1,0x9b,0x5b,0x54, 0xcc,0x89,0x40,0x43, 0xc7,0x87,0x49,0x4e, +0xae,0xdd,0x3e,0x05, 0xa5,0xd3,0x37,0x08, 0xb8,0xc1,0x2c,0x1f, 0xb3,0xcf,0x25,0x12, +0x82,0xe5,0x1a,0x31, 0x89,0xeb,0x13,0x3c, 0x94,0xf9,0x08,0x2b, 0x9f,0xf7,0x01,0x26, +0x46,0x4d,0xe6,0xbd, 0x4d,0x43,0xef,0xb0, 0x50,0x51,0xf4,0xa7, 0x5b,0x5f,0xfd,0xaa, +0x6a,0x75,0xc2,0x89, 0x61,0x7b,0xcb,0x84, 0x7c,0x69,0xd0,0x93, 0x77,0x67,0xd9,0x9e, +0x1e,0x3d,0xae,0xd5, 0x15,0x33,0xa7,0xd8, 0x08,0x21,0xbc,0xcf, 0x03,0x2f,0xb5,0xc2, +0x32,0x05,0x8a,0xe1, 0x39,0x0b,0x83,0xec, 0x24,0x19,0x98,0xfb, 0x2f,0x17,0x91,0xf6, +0x8d,0x76,0x4d,0xd6, 0x86,0x78,0x44,0xdb, 0x9b,0x6a,0x5f,0xcc, 0x90,0x64,0x56,0xc1, +0xa1,0x4e,0x69,0xe2, 0xaa,0x40,0x60,0xef, 0xb7,0x52,0x7b,0xf8, 0xbc,0x5c,0x72,0xf5, +0xd5,0x06,0x05,0xbe, 0xde,0x08,0x0c,0xb3, 0xc3,0x1a,0x17,0xa4, 0xc8,0x14,0x1e,0xa9, +0xf9,0x3e,0x21,0x8a, 0xf2,0x30,0x28,0x87, 0xef,0x22,0x33,0x90, 0xe4,0x2c,0x3a,0x9d, +0x3d,0x96,0xdd,0x06, 0x36,0x98,0xd4,0x0b, 0x2b,0x8a,0xcf,0x1c, 0x20,0x84,0xc6,0x11, +0x11,0xae,0xf9,0x32, 0x1a,0xa0,0xf0,0x3f, 0x07,0xb2,0xeb,0x28, 0x0c,0xbc,0xe2,0x25, +0x65,0xe6,0x95,0x6e, 0x6e,0xe8,0x9c,0x63, 0x73,0xfa,0x87,0x74, 0x78,0xf4,0x8e,0x79, +0x49,0xde,0xb1,0x5a, 0x42,0xd0,0xb8,0x57, 0x5f,0xc2,0xa3,0x40, 0x54,0xcc,0xaa,0x4d, +0xf7,0x41,0xec,0xda, 0xfc,0x4f,0xe5,0xd7, 0xe1,0x5d,0xfe,0xc0, 0xea,0x53,0xf7,0xcd, +0xdb,0x79,0xc8,0xee, 0xd0,0x77,0xc1,0xe3, 0xcd,0x65,0xda,0xf4, 0xc6,0x6b,0xd3,0xf9, +0xaf,0x31,0xa4,0xb2, 0xa4,0x3f,0xad,0xbf, 0xb9,0x2d,0xb6,0xa8, 0xb2,0x23,0xbf,0xa5, +0x83,0x09,0x80,0x86, 0x88,0x07,0x89,0x8b, 0x95,0x15,0x92,0x9c, 0x9e,0x1b,0x9b,0x91, +0x47,0xa1,0x7c,0x0a, 0x4c,0xaf,0x75,0x07, 0x51,0xbd,0x6e,0x10, 0x5a,0xb3,0x67,0x1d, +0x6b,0x99,0x58,0x3e, 0x60,0x97,0x51,0x33, 0x7d,0x85,0x4a,0x24, 0x76,0x8b,0x43,0x29, +0x1f,0xd1,0x34,0x62, 0x14,0xdf,0x3d,0x6f, 0x09,0xcd,0x26,0x78, 0x02,0xc3,0x2f,0x75, +0x33,0xe9,0x10,0x56, 0x38,0xe7,0x19,0x5b, 0x25,0xf5,0x02,0x4c, 0x2e,0xfb,0x0b,0x41, +0x8c,0x9a,0xd7,0x61, 0x87,0x94,0xde,0x6c, 0x9a,0x86,0xc5,0x7b, 0x91,0x88,0xcc,0x76, +0xa0,0xa2,0xf3,0x55, 0xab,0xac,0xfa,0x58, 0xb6,0xbe,0xe1,0x4f, 0xbd,0xb0,0xe8,0x42, +0xd4,0xea,0x9f,0x09, 0xdf,0xe4,0x96,0x04, 0xc2,0xf6,0x8d,0x13, 0xc9,0xf8,0x84,0x1e, +0xf8,0xd2,0xbb,0x3d, 0xf3,0xdc,0xb2,0x30, 0xee,0xce,0xa9,0x27, 0xe5,0xc0,0xa0,0x2a, +0x3c,0x7a,0x47,0xb1, 0x37,0x74,0x4e,0xbc, 0x2a,0x66,0x55,0xab, 0x21,0x68,0x5c,0xa6, +0x10,0x42,0x63,0x85, 0x1b,0x4c,0x6a,0x88, 0x06,0x5e,0x71,0x9f, 0x0d,0x50,0x78,0x92, +0x64,0x0a,0x0f,0xd9, 0x6f,0x04,0x06,0xd4, 0x72,0x16,0x1d,0xc3, 0x79,0x18,0x14,0xce, +0x48,0x32,0x2b,0xed, 0x43,0x3c,0x22,0xe0, 0x5e,0x2e,0x39,0xf7, 0x55,0x20,0x30,0xfa, +0x01,0xec,0x9a,0xb7, 0x0a,0xe2,0x93,0xba, 0x17,0xf0,0x88,0xad, 0x1c,0xfe,0x81,0xa0, +0x2d,0xd4,0xbe,0x83, 0x26,0xda,0xb7,0x8e, 0x3b,0xc8,0xac,0x99, 0x30,0xc6,0xa5,0x94, +0x59,0x9c,0xd2,0xdf, 0x52,0x92,0xdb,0xd2, 0x4f,0x80,0xc0,0xc5, 0x44,0x8e,0xc9,0xc8, +0x75,0xa4,0xf6,0xeb, 0x7e,0xaa,0xff,0xe6, 0x63,0xb8,0xe4,0xf1, 0x68,0xb6,0xed,0xfc, +0xb1,0x0c,0x0a,0x67, 0xba,0x02,0x03,0x6a, 0xa7,0x10,0x18,0x7d, 0xac,0x1e,0x11,0x70, +0x9d,0x34,0x2e,0x53, 0x96,0x3a,0x27,0x5e, 0x8b,0x28,0x3c,0x49, 0x80,0x26,0x35,0x44, +0xe9,0x7c,0x42,0x0f, 0xe2,0x72,0x4b,0x02, 0xff,0x60,0x50,0x15, 0xf4,0x6e,0x59,0x18, +0xc5,0x44,0x66,0x3b, 0xce,0x4a,0x6f,0x36, 0xd3,0x58,0x74,0x21, 0xd8,0x56,0x7d,0x2c, +0x7a,0x37,0xa1,0x0c, 0x71,0x39,0xa8,0x01, 0x6c,0x2b,0xb3,0x16, 0x67,0x25,0xba,0x1b, +0x56,0x0f,0x85,0x38, 0x5d,0x01,0x8c,0x35, 0x40,0x13,0x97,0x22, 0x4b,0x1d,0x9e,0x2f, +0x22,0x47,0xe9,0x64, 0x29,0x49,0xe0,0x69, 0x34,0x5b,0xfb,0x7e, 0x3f,0x55,0xf2,0x73, +0x0e,0x7f,0xcd,0x50, 0x05,0x71,0xc4,0x5d, 0x18,0x63,0xdf,0x4a, 0x13,0x6d,0xd6,0x47, +0xca,0xd7,0x31,0xdc, 0xc1,0xd9,0x38,0xd1, 0xdc,0xcb,0x23,0xc6, 0xd7,0xc5,0x2a,0xcb, +0xe6,0xef,0x15,0xe8, 0xed,0xe1,0x1c,0xe5, 0xf0,0xf3,0x07,0xf2, 0xfb,0xfd,0x0e,0xff, +0x92,0xa7,0x79,0xb4, 0x99,0xa9,0x70,0xb9, 0x84,0xbb,0x6b,0xae, 0x8f,0xb5,0x62,0xa3, +0xbe,0x9f,0x5d,0x80, 0xb5,0x91,0x54,0x8d, 0xa8,0x83,0x4f,0x9a, 0xa3,0x8d,0x46,0x97 +},{ +0x00,0x00,0x00,0x00, 0x0d,0x0b,0x0e,0x09, 0x1a,0x16,0x1c,0x12, 0x17,0x1d,0x12,0x1b, +0x34,0x2c,0x38,0x24, 0x39,0x27,0x36,0x2d, 0x2e,0x3a,0x24,0x36, 0x23,0x31,0x2a,0x3f, +0x68,0x58,0x70,0x48, 0x65,0x53,0x7e,0x41, 0x72,0x4e,0x6c,0x5a, 0x7f,0x45,0x62,0x53, +0x5c,0x74,0x48,0x6c, 0x51,0x7f,0x46,0x65, 0x46,0x62,0x54,0x7e, 0x4b,0x69,0x5a,0x77, +0xd0,0xb0,0xe0,0x90, 0xdd,0xbb,0xee,0x99, 0xca,0xa6,0xfc,0x82, 0xc7,0xad,0xf2,0x8b, +0xe4,0x9c,0xd8,0xb4, 0xe9,0x97,0xd6,0xbd, 0xfe,0x8a,0xc4,0xa6, 0xf3,0x81,0xca,0xaf, +0xb8,0xe8,0x90,0xd8, 0xb5,0xe3,0x9e,0xd1, 0xa2,0xfe,0x8c,0xca, 0xaf,0xf5,0x82,0xc3, +0x8c,0xc4,0xa8,0xfc, 0x81,0xcf,0xa6,0xf5, 0x96,0xd2,0xb4,0xee, 0x9b,0xd9,0xba,0xe7, +0xbb,0x7b,0xdb,0x3b, 0xb6,0x70,0xd5,0x32, 0xa1,0x6d,0xc7,0x29, 0xac,0x66,0xc9,0x20, +0x8f,0x57,0xe3,0x1f, 0x82,0x5c,0xed,0x16, 0x95,0x41,0xff,0x0d, 0x98,0x4a,0xf1,0x04, +0xd3,0x23,0xab,0x73, 0xde,0x28,0xa5,0x7a, 0xc9,0x35,0xb7,0x61, 0xc4,0x3e,0xb9,0x68, +0xe7,0x0f,0x93,0x57, 0xea,0x04,0x9d,0x5e, 0xfd,0x19,0x8f,0x45, 0xf0,0x12,0x81,0x4c, +0x6b,0xcb,0x3b,0xab, 0x66,0xc0,0x35,0xa2, 0x71,0xdd,0x27,0xb9, 0x7c,0xd6,0x29,0xb0, +0x5f,0xe7,0x03,0x8f, 0x52,0xec,0x0d,0x86, 0x45,0xf1,0x1f,0x9d, 0x48,0xfa,0x11,0x94, +0x03,0x93,0x4b,0xe3, 0x0e,0x98,0x45,0xea, 0x19,0x85,0x57,0xf1, 0x14,0x8e,0x59,0xf8, +0x37,0xbf,0x73,0xc7, 0x3a,0xb4,0x7d,0xce, 0x2d,0xa9,0x6f,0xd5, 0x20,0xa2,0x61,0xdc, +0x6d,0xf6,0xad,0x76, 0x60,0xfd,0xa3,0x7f, 0x77,0xe0,0xb1,0x64, 0x7a,0xeb,0xbf,0x6d, +0x59,0xda,0x95,0x52, 0x54,0xd1,0x9b,0x5b, 0x43,0xcc,0x89,0x40, 0x4e,0xc7,0x87,0x49, +0x05,0xae,0xdd,0x3e, 0x08,0xa5,0xd3,0x37, 0x1f,0xb8,0xc1,0x2c, 0x12,0xb3,0xcf,0x25, +0x31,0x82,0xe5,0x1a, 0x3c,0x89,0xeb,0x13, 0x2b,0x94,0xf9,0x08, 0x26,0x9f,0xf7,0x01, +0xbd,0x46,0x4d,0xe6, 0xb0,0x4d,0x43,0xef, 0xa7,0x50,0x51,0xf4, 0xaa,0x5b,0x5f,0xfd, +0x89,0x6a,0x75,0xc2, 0x84,0x61,0x7b,0xcb, 0x93,0x7c,0x69,0xd0, 0x9e,0x77,0x67,0xd9, +0xd5,0x1e,0x3d,0xae, 0xd8,0x15,0x33,0xa7, 0xcf,0x08,0x21,0xbc, 0xc2,0x03,0x2f,0xb5, +0xe1,0x32,0x05,0x8a, 0xec,0x39,0x0b,0x83, 0xfb,0x24,0x19,0x98, 0xf6,0x2f,0x17,0x91, +0xd6,0x8d,0x76,0x4d, 0xdb,0x86,0x78,0x44, 0xcc,0x9b,0x6a,0x5f, 0xc1,0x90,0x64,0x56, +0xe2,0xa1,0x4e,0x69, 0xef,0xaa,0x40,0x60, 0xf8,0xb7,0x52,0x7b, 0xf5,0xbc,0x5c,0x72, +0xbe,0xd5,0x06,0x05, 0xb3,0xde,0x08,0x0c, 0xa4,0xc3,0x1a,0x17, 0xa9,0xc8,0x14,0x1e, +0x8a,0xf9,0x3e,0x21, 0x87,0xf2,0x30,0x28, 0x90,0xef,0x22,0x33, 0x9d,0xe4,0x2c,0x3a, +0x06,0x3d,0x96,0xdd, 0x0b,0x36,0x98,0xd4, 0x1c,0x2b,0x8a,0xcf, 0x11,0x20,0x84,0xc6, +0x32,0x11,0xae,0xf9, 0x3f,0x1a,0xa0,0xf0, 0x28,0x07,0xb2,0xeb, 0x25,0x0c,0xbc,0xe2, +0x6e,0x65,0xe6,0x95, 0x63,0x6e,0xe8,0x9c, 0x74,0x73,0xfa,0x87, 0x79,0x78,0xf4,0x8e, +0x5a,0x49,0xde,0xb1, 0x57,0x42,0xd0,0xb8, 0x40,0x5f,0xc2,0xa3, 0x4d,0x54,0xcc,0xaa, +0xda,0xf7,0x41,0xec, 0xd7,0xfc,0x4f,0xe5, 0xc0,0xe1,0x5d,0xfe, 0xcd,0xea,0x53,0xf7, +0xee,0xdb,0x79,0xc8, 0xe3,0xd0,0x77,0xc1, 0xf4,0xcd,0x65,0xda, 0xf9,0xc6,0x6b,0xd3, +0xb2,0xaf,0x31,0xa4, 0xbf,0xa4,0x3f,0xad, 0xa8,0xb9,0x2d,0xb6, 0xa5,0xb2,0x23,0xbf, +0x86,0x83,0x09,0x80, 0x8b,0x88,0x07,0x89, 0x9c,0x95,0x15,0x92, 0x91,0x9e,0x1b,0x9b, +0x0a,0x47,0xa1,0x7c, 0x07,0x4c,0xaf,0x75, 0x10,0x51,0xbd,0x6e, 0x1d,0x5a,0xb3,0x67, +0x3e,0x6b,0x99,0x58, 0x33,0x60,0x97,0x51, 0x24,0x7d,0x85,0x4a, 0x29,0x76,0x8b,0x43, +0x62,0x1f,0xd1,0x34, 0x6f,0x14,0xdf,0x3d, 0x78,0x09,0xcd,0x26, 0x75,0x02,0xc3,0x2f, +0x56,0x33,0xe9,0x10, 0x5b,0x38,0xe7,0x19, 0x4c,0x25,0xf5,0x02, 0x41,0x2e,0xfb,0x0b, +0x61,0x8c,0x9a,0xd7, 0x6c,0x87,0x94,0xde, 0x7b,0x9a,0x86,0xc5, 0x76,0x91,0x88,0xcc, +0x55,0xa0,0xa2,0xf3, 0x58,0xab,0xac,0xfa, 0x4f,0xb6,0xbe,0xe1, 0x42,0xbd,0xb0,0xe8, +0x09,0xd4,0xea,0x9f, 0x04,0xdf,0xe4,0x96, 0x13,0xc2,0xf6,0x8d, 0x1e,0xc9,0xf8,0x84, +0x3d,0xf8,0xd2,0xbb, 0x30,0xf3,0xdc,0xb2, 0x27,0xee,0xce,0xa9, 0x2a,0xe5,0xc0,0xa0, +0xb1,0x3c,0x7a,0x47, 0xbc,0x37,0x74,0x4e, 0xab,0x2a,0x66,0x55, 0xa6,0x21,0x68,0x5c, +0x85,0x10,0x42,0x63, 0x88,0x1b,0x4c,0x6a, 0x9f,0x06,0x5e,0x71, 0x92,0x0d,0x50,0x78, +0xd9,0x64,0x0a,0x0f, 0xd4,0x6f,0x04,0x06, 0xc3,0x72,0x16,0x1d, 0xce,0x79,0x18,0x14, +0xed,0x48,0x32,0x2b, 0xe0,0x43,0x3c,0x22, 0xf7,0x5e,0x2e,0x39, 0xfa,0x55,0x20,0x30, +0xb7,0x01,0xec,0x9a, 0xba,0x0a,0xe2,0x93, 0xad,0x17,0xf0,0x88, 0xa0,0x1c,0xfe,0x81, +0x83,0x2d,0xd4,0xbe, 0x8e,0x26,0xda,0xb7, 0x99,0x3b,0xc8,0xac, 0x94,0x30,0xc6,0xa5, +0xdf,0x59,0x9c,0xd2, 0xd2,0x52,0x92,0xdb, 0xc5,0x4f,0x80,0xc0, 0xc8,0x44,0x8e,0xc9, +0xeb,0x75,0xa4,0xf6, 0xe6,0x7e,0xaa,0xff, 0xf1,0x63,0xb8,0xe4, 0xfc,0x68,0xb6,0xed, +0x67,0xb1,0x0c,0x0a, 0x6a,0xba,0x02,0x03, 0x7d,0xa7,0x10,0x18, 0x70,0xac,0x1e,0x11, +0x53,0x9d,0x34,0x2e, 0x5e,0x96,0x3a,0x27, 0x49,0x8b,0x28,0x3c, 0x44,0x80,0x26,0x35, +0x0f,0xe9,0x7c,0x42, 0x02,0xe2,0x72,0x4b, 0x15,0xff,0x60,0x50, 0x18,0xf4,0x6e,0x59, +0x3b,0xc5,0x44,0x66, 0x36,0xce,0x4a,0x6f, 0x21,0xd3,0x58,0x74, 0x2c,0xd8,0x56,0x7d, +0x0c,0x7a,0x37,0xa1, 0x01,0x71,0x39,0xa8, 0x16,0x6c,0x2b,0xb3, 0x1b,0x67,0x25,0xba, +0x38,0x56,0x0f,0x85, 0x35,0x5d,0x01,0x8c, 0x22,0x40,0x13,0x97, 0x2f,0x4b,0x1d,0x9e, +0x64,0x22,0x47,0xe9, 0x69,0x29,0x49,0xe0, 0x7e,0x34,0x5b,0xfb, 0x73,0x3f,0x55,0xf2, +0x50,0x0e,0x7f,0xcd, 0x5d,0x05,0x71,0xc4, 0x4a,0x18,0x63,0xdf, 0x47,0x13,0x6d,0xd6, +0xdc,0xca,0xd7,0x31, 0xd1,0xc1,0xd9,0x38, 0xc6,0xdc,0xcb,0x23, 0xcb,0xd7,0xc5,0x2a, +0xe8,0xe6,0xef,0x15, 0xe5,0xed,0xe1,0x1c, 0xf2,0xf0,0xf3,0x07, 0xff,0xfb,0xfd,0x0e, +0xb4,0x92,0xa7,0x79, 0xb9,0x99,0xa9,0x70, 0xae,0x84,0xbb,0x6b, 0xa3,0x8f,0xb5,0x62, +0x80,0xbe,0x9f,0x5d, 0x8d,0xb5,0x91,0x54, 0x9a,0xa8,0x83,0x4f, 0x97,0xa3,0x8d,0x46 +},{ +0x00,0x00,0x00,0x00, 0x09,0x0d,0x0b,0x0e, 0x12,0x1a,0x16,0x1c, 0x1b,0x17,0x1d,0x12, +0x24,0x34,0x2c,0x38, 0x2d,0x39,0x27,0x36, 0x36,0x2e,0x3a,0x24, 0x3f,0x23,0x31,0x2a, +0x48,0x68,0x58,0x70, 0x41,0x65,0x53,0x7e, 0x5a,0x72,0x4e,0x6c, 0x53,0x7f,0x45,0x62, +0x6c,0x5c,0x74,0x48, 0x65,0x51,0x7f,0x46, 0x7e,0x46,0x62,0x54, 0x77,0x4b,0x69,0x5a, +0x90,0xd0,0xb0,0xe0, 0x99,0xdd,0xbb,0xee, 0x82,0xca,0xa6,0xfc, 0x8b,0xc7,0xad,0xf2, +0xb4,0xe4,0x9c,0xd8, 0xbd,0xe9,0x97,0xd6, 0xa6,0xfe,0x8a,0xc4, 0xaf,0xf3,0x81,0xca, +0xd8,0xb8,0xe8,0x90, 0xd1,0xb5,0xe3,0x9e, 0xca,0xa2,0xfe,0x8c, 0xc3,0xaf,0xf5,0x82, +0xfc,0x8c,0xc4,0xa8, 0xf5,0x81,0xcf,0xa6, 0xee,0x96,0xd2,0xb4, 0xe7,0x9b,0xd9,0xba, +0x3b,0xbb,0x7b,0xdb, 0x32,0xb6,0x70,0xd5, 0x29,0xa1,0x6d,0xc7, 0x20,0xac,0x66,0xc9, +0x1f,0x8f,0x57,0xe3, 0x16,0x82,0x5c,0xed, 0x0d,0x95,0x41,0xff, 0x04,0x98,0x4a,0xf1, +0x73,0xd3,0x23,0xab, 0x7a,0xde,0x28,0xa5, 0x61,0xc9,0x35,0xb7, 0x68,0xc4,0x3e,0xb9, +0x57,0xe7,0x0f,0x93, 0x5e,0xea,0x04,0x9d, 0x45,0xfd,0x19,0x8f, 0x4c,0xf0,0x12,0x81, +0xab,0x6b,0xcb,0x3b, 0xa2,0x66,0xc0,0x35, 0xb9,0x71,0xdd,0x27, 0xb0,0x7c,0xd6,0x29, +0x8f,0x5f,0xe7,0x03, 0x86,0x52,0xec,0x0d, 0x9d,0x45,0xf1,0x1f, 0x94,0x48,0xfa,0x11, +0xe3,0x03,0x93,0x4b, 0xea,0x0e,0x98,0x45, 0xf1,0x19,0x85,0x57, 0xf8,0x14,0x8e,0x59, +0xc7,0x37,0xbf,0x73, 0xce,0x3a,0xb4,0x7d, 0xd5,0x2d,0xa9,0x6f, 0xdc,0x20,0xa2,0x61, +0x76,0x6d,0xf6,0xad, 0x7f,0x60,0xfd,0xa3, 0x64,0x77,0xe0,0xb1, 0x6d,0x7a,0xeb,0xbf, +0x52,0x59,0xda,0x95, 0x5b,0x54,0xd1,0x9b, 0x40,0x43,0xcc,0x89, 0x49,0x4e,0xc7,0x87, +0x3e,0x05,0xae,0xdd, 0x37,0x08,0xa5,0xd3, 0x2c,0x1f,0xb8,0xc1, 0x25,0x12,0xb3,0xcf, +0x1a,0x31,0x82,0xe5, 0x13,0x3c,0x89,0xeb, 0x08,0x2b,0x94,0xf9, 0x01,0x26,0x9f,0xf7, +0xe6,0xbd,0x46,0x4d, 0xef,0xb0,0x4d,0x43, 0xf4,0xa7,0x50,0x51, 0xfd,0xaa,0x5b,0x5f, +0xc2,0x89,0x6a,0x75, 0xcb,0x84,0x61,0x7b, 0xd0,0x93,0x7c,0x69, 0xd9,0x9e,0x77,0x67, +0xae,0xd5,0x1e,0x3d, 0xa7,0xd8,0x15,0x33, 0xbc,0xcf,0x08,0x21, 0xb5,0xc2,0x03,0x2f, +0x8a,0xe1,0x32,0x05, 0x83,0xec,0x39,0x0b, 0x98,0xfb,0x24,0x19, 0x91,0xf6,0x2f,0x17, +0x4d,0xd6,0x8d,0x76, 0x44,0xdb,0x86,0x78, 0x5f,0xcc,0x9b,0x6a, 0x56,0xc1,0x90,0x64, +0x69,0xe2,0xa1,0x4e, 0x60,0xef,0xaa,0x40, 0x7b,0xf8,0xb7,0x52, 0x72,0xf5,0xbc,0x5c, +0x05,0xbe,0xd5,0x06, 0x0c,0xb3,0xde,0x08, 0x17,0xa4,0xc3,0x1a, 0x1e,0xa9,0xc8,0x14, +0x21,0x8a,0xf9,0x3e, 0x28,0x87,0xf2,0x30, 0x33,0x90,0xef,0x22, 0x3a,0x9d,0xe4,0x2c, +0xdd,0x06,0x3d,0x96, 0xd4,0x0b,0x36,0x98, 0xcf,0x1c,0x2b,0x8a, 0xc6,0x11,0x20,0x84, +0xf9,0x32,0x11,0xae, 0xf0,0x3f,0x1a,0xa0, 0xeb,0x28,0x07,0xb2, 0xe2,0x25,0x0c,0xbc, +0x95,0x6e,0x65,0xe6, 0x9c,0x63,0x6e,0xe8, 0x87,0x74,0x73,0xfa, 0x8e,0x79,0x78,0xf4, +0xb1,0x5a,0x49,0xde, 0xb8,0x57,0x42,0xd0, 0xa3,0x40,0x5f,0xc2, 0xaa,0x4d,0x54,0xcc, +0xec,0xda,0xf7,0x41, 0xe5,0xd7,0xfc,0x4f, 0xfe,0xc0,0xe1,0x5d, 0xf7,0xcd,0xea,0x53, +0xc8,0xee,0xdb,0x79, 0xc1,0xe3,0xd0,0x77, 0xda,0xf4,0xcd,0x65, 0xd3,0xf9,0xc6,0x6b, +0xa4,0xb2,0xaf,0x31, 0xad,0xbf,0xa4,0x3f, 0xb6,0xa8,0xb9,0x2d, 0xbf,0xa5,0xb2,0x23, +0x80,0x86,0x83,0x09, 0x89,0x8b,0x88,0x07, 0x92,0x9c,0x95,0x15, 0x9b,0x91,0x9e,0x1b, +0x7c,0x0a,0x47,0xa1, 0x75,0x07,0x4c,0xaf, 0x6e,0x10,0x51,0xbd, 0x67,0x1d,0x5a,0xb3, +0x58,0x3e,0x6b,0x99, 0x51,0x33,0x60,0x97, 0x4a,0x24,0x7d,0x85, 0x43,0x29,0x76,0x8b, +0x34,0x62,0x1f,0xd1, 0x3d,0x6f,0x14,0xdf, 0x26,0x78,0x09,0xcd, 0x2f,0x75,0x02,0xc3, +0x10,0x56,0x33,0xe9, 0x19,0x5b,0x38,0xe7, 0x02,0x4c,0x25,0xf5, 0x0b,0x41,0x2e,0xfb, +0xd7,0x61,0x8c,0x9a, 0xde,0x6c,0x87,0x94, 0xc5,0x7b,0x9a,0x86, 0xcc,0x76,0x91,0x88, +0xf3,0x55,0xa0,0xa2, 0xfa,0x58,0xab,0xac, 0xe1,0x4f,0xb6,0xbe, 0xe8,0x42,0xbd,0xb0, +0x9f,0x09,0xd4,0xea, 0x96,0x04,0xdf,0xe4, 0x8d,0x13,0xc2,0xf6, 0x84,0x1e,0xc9,0xf8, +0xbb,0x3d,0xf8,0xd2, 0xb2,0x30,0xf3,0xdc, 0xa9,0x27,0xee,0xce, 0xa0,0x2a,0xe5,0xc0, +0x47,0xb1,0x3c,0x7a, 0x4e,0xbc,0x37,0x74, 0x55,0xab,0x2a,0x66, 0x5c,0xa6,0x21,0x68, +0x63,0x85,0x10,0x42, 0x6a,0x88,0x1b,0x4c, 0x71,0x9f,0x06,0x5e, 0x78,0x92,0x0d,0x50, +0x0f,0xd9,0x64,0x0a, 0x06,0xd4,0x6f,0x04, 0x1d,0xc3,0x72,0x16, 0x14,0xce,0x79,0x18, +0x2b,0xed,0x48,0x32, 0x22,0xe0,0x43,0x3c, 0x39,0xf7,0x5e,0x2e, 0x30,0xfa,0x55,0x20, +0x9a,0xb7,0x01,0xec, 0x93,0xba,0x0a,0xe2, 0x88,0xad,0x17,0xf0, 0x81,0xa0,0x1c,0xfe, +0xbe,0x83,0x2d,0xd4, 0xb7,0x8e,0x26,0xda, 0xac,0x99,0x3b,0xc8, 0xa5,0x94,0x30,0xc6, +0xd2,0xdf,0x59,0x9c, 0xdb,0xd2,0x52,0x92, 0xc0,0xc5,0x4f,0x80, 0xc9,0xc8,0x44,0x8e, +0xf6,0xeb,0x75,0xa4, 0xff,0xe6,0x7e,0xaa, 0xe4,0xf1,0x63,0xb8, 0xed,0xfc,0x68,0xb6, +0x0a,0x67,0xb1,0x0c, 0x03,0x6a,0xba,0x02, 0x18,0x7d,0xa7,0x10, 0x11,0x70,0xac,0x1e, +0x2e,0x53,0x9d,0x34, 0x27,0x5e,0x96,0x3a, 0x3c,0x49,0x8b,0x28, 0x35,0x44,0x80,0x26, +0x42,0x0f,0xe9,0x7c, 0x4b,0x02,0xe2,0x72, 0x50,0x15,0xff,0x60, 0x59,0x18,0xf4,0x6e, +0x66,0x3b,0xc5,0x44, 0x6f,0x36,0xce,0x4a, 0x74,0x21,0xd3,0x58, 0x7d,0x2c,0xd8,0x56, +0xa1,0x0c,0x7a,0x37, 0xa8,0x01,0x71,0x39, 0xb3,0x16,0x6c,0x2b, 0xba,0x1b,0x67,0x25, +0x85,0x38,0x56,0x0f, 0x8c,0x35,0x5d,0x01, 0x97,0x22,0x40,0x13, 0x9e,0x2f,0x4b,0x1d, +0xe9,0x64,0x22,0x47, 0xe0,0x69,0x29,0x49, 0xfb,0x7e,0x34,0x5b, 0xf2,0x73,0x3f,0x55, +0xcd,0x50,0x0e,0x7f, 0xc4,0x5d,0x05,0x71, 0xdf,0x4a,0x18,0x63, 0xd6,0x47,0x13,0x6d, +0x31,0xdc,0xca,0xd7, 0x38,0xd1,0xc1,0xd9, 0x23,0xc6,0xdc,0xcb, 0x2a,0xcb,0xd7,0xc5, +0x15,0xe8,0xe6,0xef, 0x1c,0xe5,0xed,0xe1, 0x07,0xf2,0xf0,0xf3, 0x0e,0xff,0xfb,0xfd, +0x79,0xb4,0x92,0xa7, 0x70,0xb9,0x99,0xa9, 0x6b,0xae,0x84,0xbb, 0x62,0xa3,0x8f,0xb5, +0x5d,0x80,0xbe,0x9f, 0x54,0x8d,0xb5,0x91, 0x4f,0x9a,0xa8,0x83, 0x46,0x97,0xa3,0x8d +}}; + +/* +// +// The AES S-box values can be retrieved from the AesSboxMatrixMult table. +// S[x] = AesSboxMatrixMult[0][x][1]. +// We save codespace by not having a separate table for the S-box. +// This trick doesn't work for the inverse S-box as the +// Inverse MDS matrix does not have a coefficient equal to 1. +// + +SYMCRYPT_ALIGN_AT(256) const BYTE SymCryptAesSbox[256] = { + 99, 124, 119, 123, 242, 107, 111, 197, 48, 1, 103, 43, 254, 215, 171, 118, +202, 130, 201, 125, 250, 89, 71, 240, 173, 212, 162, 175, 156, 164, 114, 192, +183, 253, 147, 38, 54, 63, 247, 204, 52, 165, 229, 241, 113, 216, 49, 21, + 4, 199, 35, 195, 24, 150, 5, 154, 7, 18, 128, 226, 235, 39, 178, 117, + 9, 131, 44, 26, 27, 110, 90, 160, 82, 59, 214, 179, 41, 227, 47, 132, + 83, 209, 0, 237, 32, 252, 177, 91, 106, 203, 190, 57, 74, 76, 88, 207, +208, 239, 170, 251, 67, 77, 51, 133, 69, 249, 2, 127, 80, 60, 159, 168, + 81, 163, 64, 143, 146, 157, 56, 245, 188, 182, 218, 33, 16, 255, 243, 210, +205, 12, 19, 236, 95, 151, 68, 23, 196, 167, 126, 61, 100, 93, 25, 115, + 96, 129, 79, 220, 34, 42, 144, 136, 70, 238, 184, 20, 222, 94, 11, 219, +224, 50, 58, 10, 73, 6, 36, 92, 194, 211, 172, 98, 145, 149, 228, 121, +231, 200, 55, 109, 141, 213, 78, 169, 108, 86, 244, 234, 101, 122, 174, 8, +186, 120, 37, 46, 28, 166, 180, 198, 232, 221, 116, 31, 75, 189, 139, 138, +112, 62, 181, 102, 72, 3, 246, 14, 97, 53, 87, 185, 134, 193, 29, 158, +225, 248, 152, 17, 105, 217, 142, 148, 155, 30, 135, 233, 206, 85, 40, 223, +140, 161, 137, 13, 191, 230, 66, 104, 65, 153, 45, 15, 176, 84, 187, 22 +}; +*/ + +SYMCRYPT_ALIGN_AT(256) const BYTE SymCryptAesInvSbox[256] = { // For final round in decryption +0x52,0x09,0x6a,0xd5, +0x30,0x36,0xa5,0x38, +0xbf,0x40,0xa3,0x9e, +0x81,0xf3,0xd7,0xfb, +0x7c,0xe3,0x39,0x82, +0x9b,0x2f,0xff,0x87, +0x34,0x8e,0x43,0x44, +0xc4,0xde,0xe9,0xcb, +0x54,0x7b,0x94,0x32, +0xa6,0xc2,0x23,0x3d, +0xee,0x4c,0x95,0x0b, +0x42,0xfa,0xc3,0x4e, +0x08,0x2e,0xa1,0x66, +0x28,0xd9,0x24,0xb2, +0x76,0x5b,0xa2,0x49, +0x6d,0x8b,0xd1,0x25, +0x72,0xf8,0xf6,0x64, +0x86,0x68,0x98,0x16, +0xd4,0xa4,0x5c,0xcc, +0x5d,0x65,0xb6,0x92, +0x6c,0x70,0x48,0x50, +0xfd,0xed,0xb9,0xda, +0x5e,0x15,0x46,0x57, +0xa7,0x8d,0x9d,0x84, +0x90,0xd8,0xab,0x00, +0x8c,0xbc,0xd3,0x0a, +0xf7,0xe4,0x58,0x05, +0xb8,0xb3,0x45,0x06, +0xd0,0x2c,0x1e,0x8f, +0xca,0x3f,0x0f,0x02, +0xc1,0xaf,0xbd,0x03, +0x01,0x13,0x8a,0x6b, +0x3a,0x91,0x11,0x41, +0x4f,0x67,0xdc,0xea, +0x97,0xf2,0xcf,0xce, +0xf0,0xb4,0xe6,0x73, +0x96,0xac,0x74,0x22, +0xe7,0xad,0x35,0x85, +0xe2,0xf9,0x37,0xe8, +0x1c,0x75,0xdf,0x6e, +0x47,0xf1,0x1a,0x71, +0x1d,0x29,0xc5,0x89, +0x6f,0xb7,0x62,0x0e, +0xaa,0x18,0xbe,0x1b, +0xfc,0x56,0x3e,0x4b, +0xc6,0xd2,0x79,0x20, +0x9a,0xdb,0xc0,0xfe, +0x78,0xcd,0x5a,0xf4, +0x1f,0xdd,0xa8,0x33, +0x88,0x07,0xc7,0x31, +0xb1,0x12,0x10,0x59, +0x27,0x80,0xec,0x5f, +0x60,0x51,0x7f,0xa9, +0x19,0xb5,0x4a,0x0d, +0x2d,0xe5,0x7a,0x9f, +0x93,0xc9,0x9c,0xef, +0xa0,0xe0,0x3b,0x4d, +0xae,0x2a,0xf5,0xb0, +0xc8,0xeb,0xbb,0x3c, +0x83,0x53,0x99,0x61, +0x17,0x2b,0x04,0x7e, +0xba,0x77,0xd6,0x26, +0xe1,0x69,0x14,0x63, +0x55,0x21,0x0c,0x7d +}; diff --git a/libs/symcrypt/lib/DesTables.c b/libs/symcrypt/lib/DesTables.c new file mode 100644 index 00000000000..d39375b8ad5 --- /dev/null +++ b/libs/symcrypt/lib/DesTables.c @@ -0,0 +1,280 @@ +// +// DesTables.c static lookup tables for DES +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// +// These tables were copies +// from the RSA32 DES implementation. See 3des.c for details. +// + +#include "precomp.h" + +// +// Alignments are chosen to reduce side-channel attacks through the TLB cache. +// We align each table to a multiple of the size within which we do data-dependent +// lookups. For example, the table below is aligned to 256. It is not a secret +// that the 8 sub-tables are accessed, but which value inside each sub-table is a secret. +// Aligning to 256 still leaves the data cache line leakage, but avoids any TLB-related leakage. +// +SYMCRYPT_ALIGN_AT( 256 ) const UINT32 SymCryptDesSpbox[8][64] = { +0x02080800,0x00080000,0x02000002,0x02080802, +0x02000000,0x00080802,0x00080002,0x02000002, +0x00080802,0x02080800,0x02080000,0x00000802, +0x02000802,0x02000000,0x00000000,0x00080002, +0x00080000,0x00000002,0x02000800,0x00080800, +0x02080802,0x02080000,0x00000802,0x02000800, +0x00000002,0x00000800,0x00080800,0x02080002, +0x00000800,0x02000802,0x02080002,0x00000000, +0x00000000,0x02080802,0x02000800,0x00080002, +0x02080800,0x00080000,0x00000802,0x02000800, +0x02080002,0x00000800,0x00080800,0x02000002, +0x00080802,0x00000002,0x02000002,0x02080000, +0x02080802,0x00080800,0x02080000,0x02000802, +0x02000000,0x00000802,0x00080002,0x00000000, +0x00080000,0x02000000,0x02000802,0x02080800, +0x00000002,0x02080002,0x00000800,0x00080802, +0x40108010,0x00000000,0x00108000,0x40100000, +0x40000010,0x00008010,0x40008000,0x00108000, +0x00008000,0x40100010,0x00000010,0x40008000, +0x00100010,0x40108000,0x40100000,0x00000010, +0x00100000,0x40008010,0x40100010,0x00008000, +0x00108010,0x40000000,0x00000000,0x00100010, +0x40008010,0x00108010,0x40108000,0x40000010, +0x40000000,0x00100000,0x00008010,0x40108010, +0x00100010,0x40108000,0x40008000,0x00108010, +0x40108010,0x00100010,0x40000010,0x00000000, +0x40000000,0x00008010,0x00100000,0x40100010, +0x00008000,0x40000000,0x00108010,0x40008010, +0x40108000,0x00008000,0x00000000,0x40000010, +0x00000010,0x40108010,0x00108000,0x40100000, +0x40100010,0x00100000,0x00008010,0x40008000, +0x40008010,0x00000010,0x40100000,0x00108000, +0x04000001,0x04040100,0x00000100,0x04000101, +0x00040001,0x04000000,0x04000101,0x00040100, +0x04000100,0x00040000,0x04040000,0x00000001, +0x04040101,0x00000101,0x00000001,0x04040001, +0x00000000,0x00040001,0x04040100,0x00000100, +0x00000101,0x04040101,0x00040000,0x04000001, +0x04040001,0x04000100,0x00040101,0x04040000, +0x00040100,0x00000000,0x04000000,0x00040101, +0x04040100,0x00000100,0x00000001,0x00040000, +0x00000101,0x00040001,0x04040000,0x04000101, +0x00000000,0x04040100,0x00040100,0x04040001, +0x00040001,0x04000000,0x04040101,0x00000001, +0x00040101,0x04000001,0x04000000,0x04040101, +0x00040000,0x04000100,0x04000101,0x00040100, +0x04000100,0x00000000,0x04040001,0x00000101, +0x04000001,0x00040101,0x00000100,0x04040000, +0x00401008,0x10001000,0x00000008,0x10401008, +0x00000000,0x10400000,0x10001008,0x00400008, +0x10401000,0x10000008,0x10000000,0x00001008, +0x10000008,0x00401008,0x00400000,0x10000000, +0x10400008,0x00401000,0x00001000,0x00000008, +0x00401000,0x10001008,0x10400000,0x00001000, +0x00001008,0x00000000,0x00400008,0x10401000, +0x10001000,0x10400008,0x10401008,0x00400000, +0x10400008,0x00001008,0x00400000,0x10000008, +0x00401000,0x10001000,0x00000008,0x10400000, +0x10001008,0x00000000,0x00001000,0x00400008, +0x00000000,0x10400008,0x10401000,0x00001000, +0x10000000,0x10401008,0x00401008,0x00400000, +0x10401008,0x00000008,0x10001000,0x00401008, +0x00400008,0x00401000,0x10400000,0x10001008, +0x00001008,0x10000000,0x10000008,0x10401000, +0x08000000,0x00010000,0x00000400,0x08010420, +0x08010020,0x08000400,0x00010420,0x08010000, +0x00010000,0x00000020,0x08000020,0x00010400, +0x08000420,0x08010020,0x08010400,0x00000000, +0x00010400,0x08000000,0x00010020,0x00000420, +0x08000400,0x00010420,0x00000000,0x08000020, +0x00000020,0x08000420,0x08010420,0x00010020, +0x08010000,0x00000400,0x00000420,0x08010400, +0x08010400,0x08000420,0x00010020,0x08010000, +0x00010000,0x00000020,0x08000020,0x08000400, +0x08000000,0x00010400,0x08010420,0x00000000, +0x00010420,0x08000000,0x00000400,0x00010020, +0x08000420,0x00000400,0x00000000,0x08010420, +0x08010020,0x08010400,0x00000420,0x00010000, +0x00010400,0x08010020,0x08000400,0x00000420, +0x00000020,0x00010420,0x08010000,0x08000020, +0x80000040,0x00200040,0x00000000,0x80202000, +0x00200040,0x00002000,0x80002040,0x00200000, +0x00002040,0x80202040,0x00202000,0x80000000, +0x80002000,0x80000040,0x80200000,0x00202040, +0x00200000,0x80002040,0x80200040,0x00000000, +0x00002000,0x00000040,0x80202000,0x80200040, +0x80202040,0x80200000,0x80000000,0x00002040, +0x00000040,0x00202000,0x00202040,0x80002000, +0x00002040,0x80000000,0x80002000,0x00202040, +0x80202000,0x00200040,0x00000000,0x80002000, +0x80000000,0x00002000,0x80200040,0x00200000, +0x00200040,0x80202040,0x00202000,0x00000040, +0x80202040,0x00202000,0x00200000,0x80002040, +0x80000040,0x80200000,0x00202040,0x00000000, +0x00002000,0x80000040,0x80002040,0x80202000, +0x80200000,0x00002040,0x00000040,0x80200040, +0x00004000,0x00000200,0x01000200,0x01000004, +0x01004204,0x00004004,0x00004200,0x00000000, +0x01000000,0x01000204,0x00000204,0x01004000, +0x00000004,0x01004200,0x01004000,0x00000204, +0x01000204,0x00004000,0x00004004,0x01004204, +0x00000000,0x01000200,0x01000004,0x00004200, +0x01004004,0x00004204,0x01004200,0x00000004, +0x00004204,0x01004004,0x00000200,0x01000000, +0x00004204,0x01004000,0x01004004,0x00000204, +0x00004000,0x00000200,0x01000000,0x01004004, +0x01000204,0x00004204,0x00004200,0x00000000, +0x00000200,0x01000004,0x00000004,0x01000200, +0x00000000,0x01000204,0x01000200,0x00004200, +0x00000204,0x00004000,0x01004204,0x01000000, +0x01004200,0x00000004,0x00004004,0x01004204, +0x01000004,0x01004200,0x01004000,0x00004004, +0x20800080,0x20820000,0x00020080,0x00000000, +0x20020000,0x00800080,0x20800000,0x20820080, +0x00000080,0x20000000,0x00820000,0x00020080, +0x00820080,0x20020080,0x20000080,0x20800000, +0x00020000,0x00820080,0x00800080,0x20020000, +0x20820080,0x20000080,0x00000000,0x00820000, +0x20000000,0x00800000,0x20020080,0x20800080, +0x00800000,0x00020000,0x20820000,0x00000080, +0x00800000,0x00020000,0x20000080,0x20820080, +0x00020080,0x20000000,0x00000000,0x00820000, +0x20800080,0x20020080,0x20020000,0x00800080, +0x20820000,0x00000080,0x00800080,0x20020000, +0x20820080,0x00800000,0x20800000,0x20000080, +0x00820000,0x00020080,0x20020080,0x20800000, +0x00000080,0x20820000,0x00820080,0x00000000, +0x20000000,0x20800080,0x00020000,0x00820080, +}; + +SYMCRYPT_ALIGN_AT(256) const UINT32 SymCryptDesKeySelect[8][64]={ +0x00000000,0x00000010,0x20000000,0x20000010, +0x00010000,0x00010010,0x20010000,0x20010010, +0x00000800,0x00000810,0x20000800,0x20000810, +0x00010800,0x00010810,0x20010800,0x20010810, +0x00000020,0x00000030,0x20000020,0x20000030, +0x00010020,0x00010030,0x20010020,0x20010030, +0x00000820,0x00000830,0x20000820,0x20000830, +0x00010820,0x00010830,0x20010820,0x20010830, +0x00080000,0x00080010,0x20080000,0x20080010, +0x00090000,0x00090010,0x20090000,0x20090010, +0x00080800,0x00080810,0x20080800,0x20080810, +0x00090800,0x00090810,0x20090800,0x20090810, +0x00080020,0x00080030,0x20080020,0x20080030, +0x00090020,0x00090030,0x20090020,0x20090030, +0x00080820,0x00080830,0x20080820,0x20080830, +0x00090820,0x00090830,0x20090820,0x20090830, +0x00000000,0x02000000,0x00002000,0x02002000, +0x00200000,0x02200000,0x00202000,0x02202000, +0x00000004,0x02000004,0x00002004,0x02002004, +0x00200004,0x02200004,0x00202004,0x02202004, +0x00000400,0x02000400,0x00002400,0x02002400, +0x00200400,0x02200400,0x00202400,0x02202400, +0x00000404,0x02000404,0x00002404,0x02002404, +0x00200404,0x02200404,0x00202404,0x02202404, +0x10000000,0x12000000,0x10002000,0x12002000, +0x10200000,0x12200000,0x10202000,0x12202000, +0x10000004,0x12000004,0x10002004,0x12002004, +0x10200004,0x12200004,0x10202004,0x12202004, +0x10000400,0x12000400,0x10002400,0x12002400, +0x10200400,0x12200400,0x10202400,0x12202400, +0x10000404,0x12000404,0x10002404,0x12002404, +0x10200404,0x12200404,0x10202404,0x12202404, +0x00000000,0x00000001,0x00040000,0x00040001, +0x01000000,0x01000001,0x01040000,0x01040001, +0x00000002,0x00000003,0x00040002,0x00040003, +0x01000002,0x01000003,0x01040002,0x01040003, +0x00000200,0x00000201,0x00040200,0x00040201, +0x01000200,0x01000201,0x01040200,0x01040201, +0x00000202,0x00000203,0x00040202,0x00040203, +0x01000202,0x01000203,0x01040202,0x01040203, +0x08000000,0x08000001,0x08040000,0x08040001, +0x09000000,0x09000001,0x09040000,0x09040001, +0x08000002,0x08000003,0x08040002,0x08040003, +0x09000002,0x09000003,0x09040002,0x09040003, +0x08000200,0x08000201,0x08040200,0x08040201, +0x09000200,0x09000201,0x09040200,0x09040201, +0x08000202,0x08000203,0x08040202,0x08040203, +0x09000202,0x09000203,0x09040202,0x09040203, +0x00000000,0x00100000,0x00000100,0x00100100, +0x00000008,0x00100008,0x00000108,0x00100108, +0x00001000,0x00101000,0x00001100,0x00101100, +0x00001008,0x00101008,0x00001108,0x00101108, +0x04000000,0x04100000,0x04000100,0x04100100, +0x04000008,0x04100008,0x04000108,0x04100108, +0x04001000,0x04101000,0x04001100,0x04101100, +0x04001008,0x04101008,0x04001108,0x04101108, +0x00020000,0x00120000,0x00020100,0x00120100, +0x00020008,0x00120008,0x00020108,0x00120108, +0x00021000,0x00121000,0x00021100,0x00121100, +0x00021008,0x00121008,0x00021108,0x00121108, +0x04020000,0x04120000,0x04020100,0x04120100, +0x04020008,0x04120008,0x04020108,0x04120108, +0x04021000,0x04121000,0x04021100,0x04121100, +0x04021008,0x04121008,0x04021108,0x04121108, +0x00000000,0x10000000,0x00010000,0x10010000, +0x00000004,0x10000004,0x00010004,0x10010004, +0x20000000,0x30000000,0x20010000,0x30010000, +0x20000004,0x30000004,0x20010004,0x30010004, +0x00100000,0x10100000,0x00110000,0x10110000, +0x00100004,0x10100004,0x00110004,0x10110004, +0x20100000,0x30100000,0x20110000,0x30110000, +0x20100004,0x30100004,0x20110004,0x30110004, +0x00001000,0x10001000,0x00011000,0x10011000, +0x00001004,0x10001004,0x00011004,0x10011004, +0x20001000,0x30001000,0x20011000,0x30011000, +0x20001004,0x30001004,0x20011004,0x30011004, +0x00101000,0x10101000,0x00111000,0x10111000, +0x00101004,0x10101004,0x00111004,0x10111004, +0x20101000,0x30101000,0x20111000,0x30111000, +0x20101004,0x30101004,0x20111004,0x30111004, +0x00000000,0x08000000,0x00000008,0x08000008, +0x00000400,0x08000400,0x00000408,0x08000408, +0x00020000,0x08020000,0x00020008,0x08020008, +0x00020400,0x08020400,0x00020408,0x08020408, +0x00000001,0x08000001,0x00000009,0x08000009, +0x00000401,0x08000401,0x00000409,0x08000409, +0x00020001,0x08020001,0x00020009,0x08020009, +0x00020401,0x08020401,0x00020409,0x08020409, +0x02000000,0x0A000000,0x02000008,0x0A000008, +0x02000400,0x0A000400,0x02000408,0x0A000408, +0x02020000,0x0A020000,0x02020008,0x0A020008, +0x02020400,0x0A020400,0x02020408,0x0A020408, +0x02000001,0x0A000001,0x02000009,0x0A000009, +0x02000401,0x0A000401,0x02000409,0x0A000409, +0x02020001,0x0A020001,0x02020009,0x0A020009, +0x02020401,0x0A020401,0x02020409,0x0A020409, +0x00000000,0x00000100,0x00080000,0x00080100, +0x01000000,0x01000100,0x01080000,0x01080100, +0x00000010,0x00000110,0x00080010,0x00080110, +0x01000010,0x01000110,0x01080010,0x01080110, +0x00200000,0x00200100,0x00280000,0x00280100, +0x01200000,0x01200100,0x01280000,0x01280100, +0x00200010,0x00200110,0x00280010,0x00280110, +0x01200010,0x01200110,0x01280010,0x01280110, +0x00000200,0x00000300,0x00080200,0x00080300, +0x01000200,0x01000300,0x01080200,0x01080300, +0x00000210,0x00000310,0x00080210,0x00080310, +0x01000210,0x01000310,0x01080210,0x01080310, +0x00200200,0x00200300,0x00280200,0x00280300, +0x01200200,0x01200300,0x01280200,0x01280300, +0x00200210,0x00200310,0x00280210,0x00280310, +0x01200210,0x01200310,0x01280210,0x01280310, +0x00000000,0x04000000,0x00040000,0x04040000, +0x00000002,0x04000002,0x00040002,0x04040002, +0x00002000,0x04002000,0x00042000,0x04042000, +0x00002002,0x04002002,0x00042002,0x04042002, +0x00000020,0x04000020,0x00040020,0x04040020, +0x00000022,0x04000022,0x00040022,0x04040022, +0x00002020,0x04002020,0x00042020,0x04042020, +0x00002022,0x04002022,0x00042022,0x04042022, +0x00000800,0x04000800,0x00040800,0x04040800, +0x00000802,0x04000802,0x00040802,0x04040802, +0x00002800,0x04002800,0x00042800,0x04042800, +0x00002802,0x04002802,0x00042802,0x04042802, +0x00000820,0x04000820,0x00040820,0x04040820, +0x00000822,0x04000822,0x00040822,0x04040822, +0x00002820,0x04002820,0x00042820,0x04042820, +0x00002822,0x04002822,0x00042822,0x04042822, +}; diff --git a/libs/symcrypt/lib/FatalIntercept.c b/libs/symcrypt/lib/FatalIntercept.c new file mode 100644 index 00000000000..b0e11e527c0 --- /dev/null +++ b/libs/symcrypt/lib/FatalIntercept.c @@ -0,0 +1,23 @@ +// +// FatalIntercept.C +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +// +// Empty function which our test code can replace to intercept any Fatal calls. +// Used in Kernel-mode tests so that an error doesn't bugcheck the machine. +// Rather, it can kill the current thread and not take down the machine. +// +// This is in its own C file so that it is only linked in when the caller doesn't have +// a function by this name. +// + +#include "precomp.h" + +VOID +SYMCRYPT_CALL +SymCryptFatalIntercept( UINT32 fatalCode ) +{ + UNREFERENCED_PARAMETER( fatalCode ); +} diff --git a/libs/symcrypt/lib/IEEE802_11SaeCustom.c b/libs/symcrypt/lib/IEEE802_11SaeCustom.c new file mode 100644 index 00000000000..574b53400c4 --- /dev/null +++ b/libs/symcrypt/lib/IEEE802_11SaeCustom.c @@ -0,0 +1,1585 @@ +// +// IEEE802_11SaeCustom.c Implementation of the custom crypto of IEEE 802.11 SAE +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// + +#include "precomp.h" + +// Used in SAE Hunting and Pecking methods where NIST P256 is hardcoded +#define PRIME_LENGTH_BITS 256 + +// +// This data structure is used to store the associated elliptic curve and the z value corresponding to +// each IANA group mappings for each elliptic +// curve defined in IEEE Std 802.11 SAE method. +// +typedef struct _SYMCRYPT_SAE_GROUP_DATA { + SYMCRYPT_802_11_SAE_GROUP group; + const PCSYMCRYPT_ECURVE_PARAMS *pCurveParams; + const PCSYMCRYPT_MAC *macAlgorithm; + INT32 z; +} SYMCRYPT_SAE_GROUP_DATA, *PSYMCRYPT_SAE_GROUP_DATA; + +typedef const SYMCRYPT_SAE_GROUP_DATA* PCSYMCRYPT_SAE_GROUP_DATA; + +// +// Data based on IEEE Std 802.11-2020 +// Table 12.1 - Hash algorithm based on length of prime +// Table 12.2 - Unique curve parameter +// +const SYMCRYPT_SAE_GROUP_DATA g_ianaData[] = { + { SYMCRYPT_SAE_GROUP_19, &SymCryptEcurveParamsNistP256, &SymCryptHmacSha256Algorithm, -10}, + { SYMCRYPT_SAE_GROUP_20, &SymCryptEcurveParamsNistP384, &SymCryptHmacSha384Algorithm, -12}, +}; + +// +// Helper function that finds the associated IANA group data entry for a given group number +// Searches the global variable g_ianaData where the data for supported groups are stored +// +PCSYMCRYPT_SAE_GROUP_DATA SymCryptSaeFindGroupData(SYMCRYPT_802_11_SAE_GROUP ianaGroup) +{ + for (UINT32 index = 0; index < SYMCRYPT_ARRAY_SIZE(g_ianaData); index++ ) + { + if ( g_ianaData[index].group == ianaGroup ) + { + return &g_ianaData[index]; + } + } + + return NULL; +} + +// +// Helper function that returns the sizes of the field elements and elliptic curve points in bytes +// for a given IANA group number. Both output parameters are optional. +// +VOID SymCrypt802_11SaeGetGroupSizes( + SYMCRYPT_802_11_SAE_GROUP group, + _Out_opt_ SIZE_T* pcbScalar, + _Out_opt_ SIZE_T* pcbPoint ) +{ + PCSYMCRYPT_SAE_GROUP_DATA pGroupData = NULL; + SIZE_T cbScalar = 0; + SIZE_T cbPoint = 0; + + pGroupData = SymCryptSaeFindGroupData( group ); + + if ( pGroupData != NULL ) + { + cbScalar = ( *( pGroupData->pCurveParams ) )->cbFieldLength; + cbPoint = 2 * cbScalar; + } + + if ( pcbScalar != NULL ) + { + *pcbScalar = cbScalar; + } + + if ( pcbPoint != NULL ) + { + *pcbPoint = cbPoint; + } +} + +// +// Calculate sqrt(peVal) if it exists. If so, *puIsQuadraticResidue is set to 0xFFFF`FFFF. +// Otherwise, *puIsQuadraticResidue is set to 0. +// WARNING: *peSqrtArg is set even if the square root doesn't exist. Use masked copy functions +// with *puIsQuadraticResidue so as to use the value of *peSqrtArg only if the square root exists. +// +// - pmMod: Modulus of the curve. Must equal 3 mod 4, which holds for all NIST Prime curves except P224 +// - peVal: Value to calculate the square root of +// - puIsQuadraticResidue: mask value, true if sqrt(peVal) exists, false otherwise +// - peSqrtArg: optional out argument for square root value +// - pbScratch, cbScratch: scratch space >= SYMCRYPT_SCRATCH_BYTES_FOR_MODEXP( pmMod->nDigits ) +// +SYMCRYPT_ERROR +SymCryptModSqrt( + _In_ PSYMCRYPT_MODULUS pmMod, + _In_ PSYMCRYPT_MODELEMENT peVal, + _Out_ PUINT32 puIsQuadraticResidue, + _Out_opt_ PSYMCRYPT_MODELEMENT peSqrtArg, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PSYMCRYPT_INT piTmp = SymCryptIntAllocate( SymCryptDigitsFromBits( pmMod->Divisor.nBits ) ); + PSYMCRYPT_MODELEMENT peSqrt = SymCryptModElementAllocate( pmMod ); + PSYMCRYPT_MODELEMENT peTmp = SymCryptModElementAllocate( pmMod ); + + if( piTmp == NULL || peSqrt == NULL || peTmp == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + // Sqrt( v ) = v^{(P+1)/4} mod P when P = 3 mod 4 as it is here + SymCryptIntCopy( SymCryptIntFromModulus( pmMod ), piTmp ); + SymCryptIntAddUint32( piTmp, 1, piTmp ); // No overflow as our prime is not 2^256 - 1 + + SYMCRYPT_ASSERT( (SymCryptIntGetValueLsbits32(piTmp) & 3) == 0); + SymCryptIntDivPow2(piTmp, 2, piTmp); + // iX = (P+1)/4 + + // Compute Sqrt( v ) if it exists + SymCryptModExp( pmMod, peVal, piTmp, pmMod->Divisor.nBits - 2, 0, peSqrt, pbScratch, cbScratch ); + + SymCryptModSquare( pmMod, peSqrt, peTmp, pbScratch, cbScratch ); + *puIsQuadraticResidue = SymCryptModElementIsEqual( pmMod, peTmp, peVal ); + + if( peSqrtArg != NULL ) + { + SymCryptModElementCopy( pmMod, peSqrt, peSqrtArg ); + } + +cleanup: + + if( piTmp != NULL ) + { + SymCryptIntFree( piTmp ); + piTmp = NULL; + } + + if( peSqrt != NULL ) + { + SymCryptModElementFree( pmMod, peSqrt ); + peSqrt = NULL; + } + + if( peTmp != NULL ) + { + SymCryptModElementFree( pmMod, peTmp ); + peTmp = NULL; + } + + return scError; + +} + +// +// Calculates SSWU( u ) as described in 12.4.4.2.3 +// +// - pCurve: The curve object to use. +// - z: z value used in the SSWU calculation. Currently we assume this value to be negative. +// - peU: Value to calculate SSWU of. +// - popP: point on the curve found by SSWU. +// - pbScratch, cbScratch: scratch space >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( pCurve ) +// +SYMCRYPT_ERROR +SymCryptSswu( + _In_ PSYMCRYPT_ECURVE pCurve, + _In_ INT32 z, + _In_ PSYMCRYPT_MODELEMENT peU, + _Out_ PSYMCRYPT_ECPOINT poP, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + UINT32 selectionMask = 0; // Mask variable for masked copy operations. "l" in the spec + + PSYMCRYPT_INT piTmp = NULL; + PSYMCRYPT_MODELEMENT peTmp = NULL; + + PSYMCRYPT_MODELEMENT peZ = NULL; + PSYMCRYPT_MODELEMENT peM = NULL; + PSYMCRYPT_MODELEMENT peT = NULL; + PSYMCRYPT_MODELEMENT peX1 = NULL; + PSYMCRYPT_MODELEMENT peX2 = NULL; + PSYMCRYPT_MODELEMENT peGX1 = NULL; + PSYMCRYPT_MODELEMENT peGX2 = NULL; + + BYTE pointBuf[SYMCRYPT_SAE_MAX_EC_POINT_SIZE_BYTES] = { 0 }; + + SYMCRYPT_ASSERT( z < 0 ); + + piTmp = SymCryptIntAllocate( SymCryptDigitsFromBits( pCurve->FModBitsize ) ); + + peTmp = SymCryptModElementAllocate( pCurve->FMod ); + peZ = SymCryptModElementAllocate( pCurve->FMod ); + peM = SymCryptModElementAllocate( pCurve->FMod ); + peT = SymCryptModElementAllocate( pCurve->FMod ); + peX1 = SymCryptModElementAllocate( pCurve->FMod ); + peX2 = SymCryptModElementAllocate( pCurve->FMod ); + peGX1 = SymCryptModElementAllocate( pCurve->FMod ); + peGX2 = SymCryptModElementAllocate( pCurve->FMod ); + + if( piTmp == NULL|| peTmp == NULL || peZ == NULL || peM == NULL || peT == NULL || + peX1 == NULL || peX2 == NULL || peGX1 == NULL || peGX2 == NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + // Convert z to mod element + // Currently we avoid a branching based on the sign of z to make the assignment and assume it will + // be negative which holds for the set of possible values as of now (NIST P256 and NIST P384). + // There is no direct function to create a SYMCRYPT_INT from a signed INT32, so when z is negative + // we change its sign and call SymCryptModElementSetValueNegUInt32 + SymCryptModElementSetValueNegUint32(-z, pCurve->FMod, peZ, pbScratch, cbScratch); + + // Set peTmp to 1 for convenience later + SymCryptModElementSetValueUint32( 1, pCurve->FMod, peTmp, pbScratch, cbScratch ); + + // m = ( z^2 * u^4 + z * u^2 ) = (z * u^2)(z * u^2 + 1) modulo p + SymCryptModSquare( pCurve->FMod, peU, peM, pbScratch, cbScratch ); // M = u^2 + SymCryptModMul( pCurve->FMod, peM, peZ, peM, pbScratch, cbScratch ); // M = z * u^2 + SymCryptModAdd( pCurve->FMod, peM, peTmp, peTmp, pbScratch, cbScratch ); // tmp = (z * u^2 + 1) + SymCryptModMul( pCurve->FMod, peM, peTmp, peM, pbScratch, cbScratch ); // M = M * tmp = (z * u^2)(z * u^2 + 1) + + // l = CEQ( m, 0 ) + selectionMask = SymCryptModElementIsZero( pCurve->FMod, peM ); + + // t = inverse( m ) where inverse ( m ) = m^( p-2 ) modulo p + SymCryptIntSubUint32( SymCryptIntFromModulus( pCurve->FMod ), 2, piTmp ); + SymCryptModExp( pCurve->FMod, peM, piTmp, pCurve->FModBitsize, 0, peT, pbScratch, cbScratch ); + + //x1 = CSEL( l, ( b / ( z * a ) modulo p ), ( ( - b / a ) * ( 1 + t ) ) modulo p ) + // where CSEL(x,y,z) operates in constant time and returns y if x is true and z otherwise. + SymCryptModMul( pCurve->FMod, peZ, pCurve->A, peTmp, pbScratch, cbScratch ); // tmp = z * a + SymCryptModInv( pCurve->FMod, peTmp, peTmp, SYMCRYPT_FLAG_DATA_PUBLIC | SYMCRYPT_FLAG_MODULUS_PRIME, pbScratch, cbScratch ); // tmp = 1/(z * a) + SymCryptModMul( pCurve->FMod, pCurve->B, peTmp, peX1, pbScratch, cbScratch ); // x1A = B * 1/(z * a) + + SymCryptModInv( pCurve->FMod, pCurve->A, peTmp, SYMCRYPT_FLAG_DATA_PUBLIC | SYMCRYPT_FLAG_MODULUS_PRIME, pbScratch, cbScratch ); // tmp = 1/a + SymCryptModMul( pCurve->FMod, pCurve->B, peTmp, peTmp, pbScratch, cbScratch ); // tmp = b * 1/a + SymCryptModNeg( pCurve->FMod, peTmp, peTmp, pbScratch, cbScratch ); // tmp = -(b * 1/a) + + // NB: in this block we're using X2 as the second candidate for CSEL. This allows us to choose the + // correct X1 by copying X2 to X1 if l is false + SymCryptIntSetValueUint32( 1, piTmp ); + SymCryptIntToModElement( piTmp, pCurve->FMod, peX2, pbScratch, cbScratch ); // X1B = 1 + SymCryptModAdd( pCurve->FMod, peX2, peT, peX2, pbScratch, cbScratch ); // X1B = 1 + t + SymCryptModMul( pCurve->FMod, peX2, peTmp, peX2, pbScratch, cbScratch ); // X1B = -(b * 1/a)(1 + t) + + // Note: we need the binary complement of l since MaskedCopy copies only if the mask is 0xFFFFFFFF, + // and we want the second X1 candidate iff l is false + SymCryptModElementMaskedCopy( pCurve->FMod, peX2, peX1, ~selectionMask ); + + // gx1 = ( x1^3 + a * x1 + b ) = (x1^2 + a)*x1 + b modulo p + SymCryptModSquare( pCurve->FMod, peX1, peGX1, pbScratch, cbScratch ); // gx1 = x1^2 + SymCryptModAdd( pCurve->FMod, peGX1, pCurve->A, peGX1, pbScratch, cbScratch ); // gx1 = x1^2 + a + SymCryptModMul( pCurve->FMod, peGX1, peX1, peGX1, pbScratch, cbScratch ); // gx1 = (x1^2 + a)*x1 + SymCryptModAdd( pCurve->FMod, peGX1, pCurve->B, peGX1, pbScratch, cbScratch ); // gx1 = (x1^2 + a)*x1 + b + + //x2 = ( z * u^2 * x1 ) modulo p + SymCryptModSquare( pCurve->FMod, peU, peX2, pbScratch, cbScratch ); // x2 = u^2 + SymCryptModMul( pCurve->FMod, peX2, peZ, peX2, pbScratch, cbScratch ); // x2 = u^2 * z + SymCryptModMul( pCurve->FMod, peX2, peX1, peX2, pbScratch, cbScratch ); // x2 = u^2 * z * x1 + + //gx2 = ( x2^3 + a * x2 + b ) = (x2^2 + a)*x2 + b modulo p + SymCryptModSquare( pCurve->FMod, peX2, peGX2, pbScratch, cbScratch ); // gx2 = x2^2 + SymCryptModAdd( pCurve->FMod, peGX2, pCurve->A, peGX2, pbScratch, cbScratch ); // gx2 = x2^2 + a + SymCryptModMul( pCurve->FMod, peGX2, peX2, peGX2, pbScratch, cbScratch ); // gx2 = (x2^2 + a)*x2 + SymCryptModAdd( pCurve->FMod, peGX2, pCurve->B, peGX2, pbScratch, cbScratch ); // gx2 = (x2^2 + a)*x2 + b + + //l = gx1 is a quadratic residue modulo p + scError = SymCryptModSqrt( pCurve->FMod, peGX1, &selectionMask, NULL, pbScratch, cbScratch ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // v = CSEL( l, gx1, gx2 ) + // (Using gx1 as a temporary for v) + SymCryptModElementMaskedCopy( pCurve->FMod, peGX2, peGX1, ~selectionMask ); + + // x = CSEL( l, x1, x2 ) + // (Using x1 as a temporary for x) + SymCryptModElementMaskedCopy( pCurve->FMod, peX2, peX1, ~selectionMask ); + + // y = sqrt( v ) = v^{(P+1)/4} + // (Using gx1 as a temporary for y) + scError = SymCryptModSqrt( pCurve->FMod, peGX1, &selectionMask, peGX1, pbScratch, cbScratch ); + + // l = CEQ( LSB( u ), LSB( y ) ) + // LSB returns the least significant *BIT* of its argument + SymCryptModElementToInt( pCurve->FMod, peU, piTmp, pbScratch, cbScratch ); + UINT32 u = SymCryptIntGetValueLsbits32( piTmp ); + + SymCryptModElementToInt( pCurve->FMod, peGX1, piTmp, pbScratch, cbScratch ); + UINT32 y = SymCryptIntGetValueLsbits32( piTmp ); + + selectionMask = SYMCRYPT_MASK32_EQ( u & 1, y & 1 ); + + // P = CSEL( l, ( x, y ), ( x, p - y ) ) + // equivalently, y = CSEL( l, y, p - y ) + // (p - y) mod p is equivalent to -y mod p, so we end up with + // y = CSEL(l, y, -y) + // We use gx1 for y + SymCryptModNeg( pCurve->FMod, peGX1, peTmp, pbScratch, cbScratch ); + SymCryptModElementMaskedCopy( pCurve->FMod, peTmp, peGX1, ~selectionMask ); + + SymCryptModElementGetValue( pCurve->FMod, peX1, &pointBuf[0], pCurve->FModBytesize, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, pbScratch, cbScratch ); + SymCryptModElementGetValue( pCurve->FMod, peGX1, &pointBuf[pCurve->FModBytesize], pCurve->FModBytesize, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, pbScratch, cbScratch ); + + scError = SymCryptEcpointSetValue( pCurve, + pointBuf, + 2 * pCurve->FModBytesize, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, + SYMCRYPT_ECPOINT_FORMAT_XY, + poP, + 0, + pbScratch, + cbScratch ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + +cleanup: + + if( peGX2 != NULL ) + { + SymCryptModElementFree( pCurve->FMod, peGX2 ); + peGX2 = NULL; + } + + if( peGX1 != NULL ) + { + SymCryptModElementFree( pCurve->FMod, peGX1 ); + peGX1 = NULL; + } + + if( peX2 != NULL ) + { + SymCryptModElementFree( pCurve->FMod, peX2 ); + peX2 = NULL; + } + + if( peX1 != NULL ) + { + SymCryptModElementFree( pCurve->FMod, peX1 ); + peX1 = NULL; + } + + if( peT != NULL ) + { + SymCryptModElementFree( pCurve->FMod, peT ); + peT = NULL; + } + + if( peM != NULL ) + { + SymCryptModElementFree( pCurve->FMod, peM ); + peM = NULL; + } + + if( peZ != NULL ) + { + SymCryptModElementFree( pCurve->FMod, peZ ); + peZ = NULL; + } + + if( peTmp != NULL ) + { + SymCryptModElementFree( pCurve->FMod, peTmp ); + peTmp = NULL; + } + + if( piTmp != NULL ) + { + SymCryptIntFree( piTmp ); + piTmp = NULL; + } + + return scError; +} + +SYMCRYPT_ERROR +SymCrypt802_11SaeCustomSetRandMask( + _Inout_ PSYMCRYPT_802_11_SAE_CUSTOM_STATE pState, + _Inout_updates_opt_( cbRand ) PBYTE pbRand, + SIZE_T cbRand, + _Inout_updates_opt_( cbMask) PBYTE pbMask, + SIZE_T cbMask, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PCSYMCRYPT_ECURVE pcCurve = pState->pCurve; + + SymCryptModElementSetValueUint32( 0, pcCurve->GOrd, pState->peRand, pbScratch, cbScratch ); + if( pbRand != NULL ) + { + scError = SymCryptModElementSetValue( pbRand, cbRand, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, pcCurve->GOrd, pState->peRand, pbScratch, cbScratch ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + } + + if( SymCryptModElementIsZero( pcCurve->GOrd, pState->peRand ) ) + { + SymCryptModSetRandom( pcCurve->GOrd, pState->peRand, SYMCRYPT_FLAG_MODRANDOM_ALLOW_MINUSONE, pbScratch, cbScratch ); + } + + if( pbRand != NULL ) + { + scError = SymCryptModElementGetValue( pcCurve->GOrd, pState->peRand, pbRand, cbRand, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, pbScratch, cbScratch ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + } + + SymCryptModElementSetValueUint32( 0, pcCurve->GOrd, pState->peMask, pbScratch, cbScratch ); + if( pbMask != NULL ) + { + scError = SymCryptModElementSetValue( pbMask, cbMask, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, pcCurve->GOrd, pState->peMask, pbScratch, cbScratch ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + } + + if( SymCryptModElementIsZero( pcCurve->GOrd, pState->peMask ) ) + { + SymCryptModSetRandom( pcCurve->GOrd, pState->peMask, SYMCRYPT_FLAG_MODRANDOM_ALLOW_MINUSONE, pbScratch, cbScratch ); + } + + if( pbMask != NULL ) + { + scError = SymCryptModElementGetValue( pcCurve->GOrd, pState->peMask, pbMask, cbMask, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, pbScratch, cbScratch ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + } + + // + // The standard calls for checking that peRand and peMask are not 0 or 1, and peRand + peMask is not 0 or 1. + // When the caller specifies the values we don't want to do any checking as they might be helpful in test vectors. + // When this code generates the random values, we avoid 0 or 1 (by not passing the flags allowing 0 and 1). + // We don't check that peRand + peMask > 1 because the probability of that occurring randomly is about 2^{-254} so the + // risk of this happening on any machine ever in the world is much smaller than the risk associated with adding several lines of code. + // + +cleanup: + + return scError; +} + +SYMCRYPT_ERROR +SymCrypt802_11SaeCustomInit( + _Out_ PSYMCRYPT_802_11_SAE_CUSTOM_STATE pState, + _In_reads_( 6 ) PCBYTE pbMac1, + _In_reads_( 6 ) PCBYTE pbMac2, + _In_reads_( cbPassword ) PCBYTE pbPassword, + SIZE_T cbPassword, + _Out_opt_ PBYTE pbCounter, + _Inout_updates_opt_( 32 ) PBYTE pbRand, + _Inout_updates_opt_( 32 ) PBYTE pbMask ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + BYTE counter; + UINT32 notFoundMask; + UINT32 solutionMask; + UINT32 negMask; + BYTE abSeed[SYMCRYPT_HMAC_SHA256_RESULT_SIZE]; + BYTE abValue[SYMCRYPT_HMAC_SHA256_RESULT_SIZE]; + BYTE abSeedKey[16]; // Need only 12, but the extra bytes make the code easier. + SYMCRYPT_HMAC_SHA256_EXPANDED_KEY hmacSeedKey; + SYMCRYPT_HMAC_SHA256_EXPANDED_KEY hmacValueKey; + SYMCRYPT_HMAC_SHA256_STATE hmacState; + BYTE abTmp[2]; + BYTE pointBuf[ 64 ]; + PBYTE pbScratch = NULL; + SIZE_T cbScratch = 0; + UINT64 minMac; + UINT64 maxMac; + + UINT32 nDigits; + PSYMCRYPT_ECURVE pCurve; // Only a cache, pState->pCurve owns the allocation + PSYMCRYPT_INT piTmp = NULL; + PSYMCRYPT_MODELEMENT peX = NULL; + PSYMCRYPT_MODELEMENT peY = NULL; + PSYMCRYPT_MODELEMENT peCubic = NULL; + PSYMCRYPT_MODELEMENT peTmp = NULL; + PSYMCRYPT_ECPOINT poPWECandidate = NULL; + + // Set state to 0 so that our pointers have valid values. + SymCryptWipe( pState, sizeof( *pState ) ); + + // Per IEEE 802.11-2016 section 12.4.4.1 the mandatory-to-implement curve is + // number 19 from the IANA Group description for RFC 2409 (IKE) + // The IANA website maps this to a 256-bit Random ECP group in RFC 5903. + // RFC 5903 specifies this group to be identical to the NIST P256 curve. + pCurve = SymCryptEcurveAllocate( SymCryptEcurveParamsNistP256, 0 ); + pState->pCurve = pCurve; + if( pCurve == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + pState->macAlgorithm = SymCryptHmacSha256Algorithm; + + pState->peRand = SymCryptModElementAllocate( pCurve->GOrd ); + if( pState->peRand == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + pState->peMask = SymCryptModElementAllocate( pCurve->GOrd ); + if( pState->peMask == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + pState->poPWE = SymCryptEcpointAllocate( pCurve ); + if( pState->poPWE == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + nDigits = SymCryptDigitsFromBits( PRIME_LENGTH_BITS ); + + cbScratch = SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigits ), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_MODEXP( nDigits ), + SYMCRYPT_SCRATCH_BYTES_FOR_GETSET_VALUE_ECURVE_OPERATIONS( pCurve ) ) ); + pbScratch = SymCryptCallbackAlloc( cbScratch ); + + piTmp = SymCryptIntAllocate( nDigits ); + peX = SymCryptModElementAllocate( pCurve->FMod ); + peY = SymCryptModElementAllocate( pCurve->FMod ); + peCubic = SymCryptModElementAllocate( pCurve->FMod ); + peTmp = SymCryptModElementAllocate( pCurve->FMod ); + poPWECandidate = SymCryptEcpointAllocate( pCurve ); + + if( pbScratch == NULL || piTmp == NULL || peX == NULL || peY == NULL || peCubic == NULL || peTmp == NULL || poPWECandidate == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + SymCryptWipeKnownSize( abSeedKey, sizeof( abSeedKey ) ); + memcpy( &abSeedKey[0], pbMac1, 6 ); + minMac = SYMCRYPT_LOAD_MSBFIRST64( abSeedKey ); + memcpy( &abSeedKey[0], pbMac2, 6 ); + maxMac = SYMCRYPT_LOAD_MSBFIRST64( abSeedKey ); + + if( minMac > maxMac ) + { + // MAC values are public, no side-channel issues with this if() + // Swap the two values + minMac ^= maxMac; + maxMac ^= minMac; + minMac ^= maxMac; + } + + // Now we write the two MACs into the buffer. + // Note the slight overlap, and the use of 14 bytes rather than 12 + SYMCRYPT_STORE_MSBFIRST64( &abSeedKey[0], maxMac ); + SYMCRYPT_STORE_MSBFIRST64( &abSeedKey[6], minMac ); // This writes up to abSeedKey[14] + + SymCryptHmacSha256ExpandKey( &hmacSeedKey, abSeedKey, 12 ); + SymCryptWipeKnownSize( abSeedKey, sizeof( abSeedKey ) ); // Not strictly speaking a secret, but good general hygiene + + notFoundMask = (UINT32)-1; + counter = 0; + + // We exit the loop only after 40 or more iterations + // This greatly reduces the side-channel of how often we run this loop. + while( notFoundMask != 0 || counter < 40 ) + { + counter += 1; + if( counter == 0 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // pwd-seed = Hmac-sha256( MacA || MacB , Password || counter ) + SymCryptHmacSha256Init( &hmacState, &hmacSeedKey ); + SymCryptHmacSha256Append( &hmacState, pbPassword, cbPassword ); + SymCryptHmacSha256Append( &hmacState, &counter, 1 ); + SymCryptHmacSha256Result( &hmacState, abSeed ); + + // pwd-value + SymCryptHmacSha256ExpandKey( &hmacValueKey, abSeed, sizeof( abSeed ) ); + SymCryptHmacSha256Init( &hmacState, &hmacValueKey ); + + SYMCRYPT_STORE_LSBFIRST16( abTmp, 1 ); + SymCryptHmacSha256Append( &hmacState, abTmp, 2 ); // i value = 1 + // Spec is unclear on whether there should be a terminating 0 on the context + // There are 23 characters in the string, so using len=24 gives us a zero + SymCryptHmacSha256Append( &hmacState, (PCBYTE) "SAE Hunting and Pecking", 23 ); + + // Pick up the byte representation of p from the parameters + SymCryptHmacSha256Append( &hmacState, (BYTE *)(SymCryptEcurveParamsNistP256 + 1), 32 ); + + SYMCRYPT_STORE_LSBFIRST16( abTmp, 256 ); + SymCryptHmacSha256Append( &hmacState, abTmp, 2 ); // Length value = 256 + SymCryptHmacSha256Result( &hmacState, abValue ); + + // Get the pwd-value into an integer + scError = SymCryptIntSetValue( abValue, sizeof( abValue ), SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, piTmp ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Check that it is less than P + if( !SymCryptIntIsLessThan( piTmp, SymCryptIntFromModulus( pCurve->FMod ) ) ) + { + // This is a slight side-channel, but our prime P starts with FFFFFFFF so the probability of + // hitting this case is < 2^-32. + continue; + } + + // Compute x^3 + A*x + B + SymCryptIntToModElement( piTmp, pCurve->FMod, peX, pbScratch, cbScratch ); + SymCryptModSquare( pCurve->FMod, peX, peCubic, pbScratch, cbScratch ); + SymCryptModAdd( pCurve->FMod, peCubic, pCurve->A, peCubic, pbScratch, cbScratch ); + SymCryptModMul( pCurve->FMod, peCubic, peX, peCubic, pbScratch, cbScratch ); + SymCryptModAdd( pCurve->FMod, peCubic, pCurve->B, peCubic, pbScratch, cbScratch ); + + // Get the quadratic residue of (x^3 + A*x + B) modulo P if it exists + scError = SymCryptModSqrt( pCurve->FMod, peCubic, &solutionMask, peY, pbScratch, cbScratch ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + solutionMask &= notFoundMask; + + // Pick Y or -Y according to the LSbits + SymCryptModElementToInt( pCurve->FMod, peY, piTmp, pbScratch, cbScratch ); + SymCryptModNeg( pCurve->FMod, peY, peTmp, pbScratch, cbScratch ); + + negMask = 0 - ((abSeed[ sizeof( abSeed ) - 1 ] ^ SymCryptIntGetValueLsbits32( piTmp ) ) & 1); + SymCryptModElementMaskedCopy( pCurve->FMod, peTmp, peY, negMask ); + + SymCryptModElementGetValue( pCurve->FMod, peX, &pointBuf[ 0], 32, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, pbScratch, cbScratch ); + SymCryptModElementGetValue( pCurve->FMod, peY, &pointBuf[32], 32, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, pbScratch, cbScratch ); + scError = SymCryptEcpointSetValue( pCurve, + pointBuf, + sizeof( pointBuf ), + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, + SYMCRYPT_ECPOINT_FORMAT_XY, + poPWECandidate, + 0, + pbScratch, + cbScratch ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + SymCryptEcpointMaskedCopy( pCurve, poPWECandidate, pState->poPWE, solutionMask ); + pState->counter |= (BYTE)(counter & solutionMask); + + notFoundMask &= ~solutionMask; + } + + scError = SymCrypt802_11SaeCustomSetRandMask( pState, pbRand, 32, pbMask, 32, pbScratch, cbScratch ); + if( scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + if( pbCounter != NULL ) + { + *pbCounter = pState->counter; + } + +cleanup: + + SymCryptWipe( &hmacSeedKey, sizeof( hmacSeedKey ) ); + SymCryptWipe( &hmacValueKey, sizeof( hmacValueKey ) ); + SymCryptWipe( abSeed, sizeof( abSeed ) ); + SymCryptWipe( abValue, sizeof( abValue ) ); + SymCryptWipe( pointBuf, sizeof( pointBuf ) ); + + if( piTmp != NULL ) + { + SymCryptIntFree( piTmp ); + piTmp = NULL; + } + + if( peX != NULL ) + { + SymCryptModElementFree( pCurve->FMod, peX ); + peX = NULL; + } + + if( peY != NULL ) + { + SymCryptModElementFree( pCurve->FMod, peY ); + peY = NULL; + } + + if( peCubic != NULL ) + { + SymCryptModElementFree( pCurve->FMod, peCubic ); + peCubic = NULL; + } + + if( peTmp != NULL ) + { + SymCryptModElementFree( pCurve->FMod, peTmp ); + peTmp = NULL; + } + + if( poPWECandidate != NULL ) + { + SymCryptEcpointFree( pCurve, poPWECandidate ); + poPWECandidate = NULL; + } + + if( scError != SYMCRYPT_NO_ERROR ) + { + SymCrypt802_11SaeCustomDestroy( pState ); + } + + if( pbScratch != NULL ) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + pbScratch = NULL; + } + + return scError; +} + + +SYMCRYPT_ERROR +SymCrypt802_11SaeCustomCreatePTGeneric( + SYMCRYPT_802_11_SAE_GROUP group, + _In_reads_( cbSsid ) PCBYTE pbSsid, + SIZE_T cbSsid, + _In_reads_( cbPassword ) PCBYTE pbPassword, + SIZE_T cbPassword, + _In_reads_opt_( cbPasswordIdentifier ) PCBYTE pbPasswordIdentifier, + SIZE_T cbPasswordIdentifier, + _Out_writes_( cbPT ) PBYTE pbPT, + SIZE_T cbPT) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + SIZE_T cbIkm = 0; + SIZE_T cbScratch = 0; + + PBYTE pbPwdValue = NULL; + UINT32 cbPwdValue = 0; + PBYTE pbScratch = NULL; + SYMCRYPT_HKDF_EXPANDED_KEY hkdfKey; + + PSYMCRYPT_ECURVE pCurve = NULL; + PCSYMCRYPT_MAC pMacAlgorithm = NULL; + PSYMCRYPT_INT piU1 = NULL; + PSYMCRYPT_INT piU2 = NULL; + PSYMCRYPT_MODELEMENT peU1 = NULL; + PSYMCRYPT_MODELEMENT peU2 = NULL; + + PSYMCRYPT_ECPOINT poP1 = NULL; + PSYMCRYPT_ECPOINT poP2 = NULL; + PSYMCRYPT_ECPOINT poPT = NULL; + + PCSYMCRYPT_SAE_GROUP_DATA pGroupData = NULL; + + + pGroupData = SymCryptSaeFindGroupData( group ); + + // Provided IANA group number must match one of the supported groups + if ( pGroupData == NULL) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Construct the objects associated with the IANA group number + pCurve = SymCryptEcurveAllocate( *( pGroupData->pCurveParams), 0 ); + if( pCurve == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + pMacAlgorithm = *( pGroupData->macAlgorithm ); + + const UINT32 nDigits = SymCryptEcurveDigitsofFieldElement( pCurve ); + + cbIkm = cbPassword + cbPasswordIdentifier; + cbScratch = SYMCRYPT_MAX( cbIkm, + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigits ), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_MODEXP( nDigits ), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_GETSET_VALUE_ECURVE_OPERATIONS( pCurve ), + SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( pCurve ) ) ) ) ); + pbScratch = SymCryptCallbackAlloc( cbScratch ); + + // len = olen( p ) + floor( olen( p ) / 2 ) + cbPwdValue = SYMCRYPT_BYTES_FROM_BITS(pCurve->FModBitsize) + SYMCRYPT_BYTES_FROM_BITS(pCurve->FModBitsize) / 2; + + pbPwdValue = SymCryptCallbackAlloc( cbPwdValue ); + + piU1 = SymCryptIntAllocate( SymCryptDigitsFromBits( cbPwdValue * 8 ) ); + piU2 = SymCryptIntAllocate( SymCryptDigitsFromBits( cbPwdValue * 8 ) ); + peU1 = SymCryptModElementAllocate( pCurve->FMod ); + peU2 = SymCryptModElementAllocate( pCurve->FMod ); + + poP1 = SymCryptEcpointAllocate( pCurve ); + poP2 = SymCryptEcpointAllocate( pCurve ); + poPT = SymCryptEcpointAllocate( pCurve ); + + if( pbScratch == NULL || pbPwdValue == NULL || piU1 == NULL || piU2 == NULL || + peU1 == NULL || peU2 == NULL || poP1 == NULL || poP2 == NULL || poPT == NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + // pwd-seed = HKDF-Extract( ssid, password [|| identifier] ) + // Note that SymCryptHkdfExpandKey corresponds to HKDF-Extract + memcpy( pbScratch, pbPassword, cbPassword ); + if( pbPasswordIdentifier ) + { + memcpy( pbScratch + cbPassword, pbPasswordIdentifier, cbPasswordIdentifier ); + } + + scError = SymCryptHkdfExpandKey( &hkdfKey, pMacAlgorithm, pbScratch, cbIkm, pbSsid, cbSsid ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // pwd-value = HKDF-Expand( pwd-seed, "SAE Hash to Element u1 P1", len ) + // Note that SymCryptHkdf derive corresponds to HKDF-Expand + // Salt does not include a null terminator, so the length is 25 chars + scError = SymCryptHkdfDerive( &hkdfKey, (PCBYTE) "SAE Hash to Element u1 P1", 25, pbPwdValue, cbPwdValue ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // u1 = pwd-value modulo p + scError = SymCryptIntSetValue( pbPwdValue, cbPwdValue, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, piU1 ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + SymCryptIntToModElement( piU1, pCurve->FMod, peU1, pbScratch, cbScratch ); + + // P1 = SSWU( u1 ) + SymCryptSswu( pCurve, pGroupData->z, peU1, poP1, pbScratch, cbScratch ); + + // pwd-value = HKDF-Expand( pwd-seed, "SAE Hash to Element u2 P2", len ) + scError = SymCryptHkdfDerive( &hkdfKey, (PCBYTE) "SAE Hash to Element u2 P2", 25, pbPwdValue, cbPwdValue ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // u2 = pwd-value modulo p + scError = SymCryptIntSetValue( pbPwdValue, cbPwdValue, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, piU2 ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + SymCryptIntToModElement( piU2, pCurve->FMod, peU2, pbScratch, cbScratch ); + + // P2 = SSWU( u2 ) + scError = SymCryptSswu( pCurve, pGroupData->z, peU2, poP2, pbScratch, cbScratch ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // PT = P1 + P2 + SymCryptEcpointAdd( pCurve, poP1, poP2, poPT, 0, pbScratch, cbScratch ); + + scError = SymCryptEcpointGetValue( pCurve, + poPT, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, + SYMCRYPT_ECPOINT_FORMAT_XY, + pbPT, + cbPT, + 0, + pbScratch, + cbScratch ); + SYMCRYPT_ASSERT( scError == SYMCRYPT_NO_ERROR ); + +cleanup: + + if( poP2 != NULL ) + { + SymCryptEcpointFree( pCurve, poP2 ); + poP2 = NULL; + } + + if( poP1 != NULL ) + { + SymCryptEcpointFree( pCurve, poP1 ); + poP1 = NULL; + } + + if( poPT != NULL ) + { + SymCryptEcpointFree( pCurve, poPT ); + poPT = NULL; + } + + if( peU2 != NULL ) + { + SymCryptModElementFree( pCurve->FMod, peU2 ); + peU2 = NULL; + } + + if( peU1 != NULL ) + { + SymCryptModElementFree( pCurve->FMod, peU1 ); + peU1 = NULL; + } + + if( piU2 != NULL ) + { + SymCryptIntFree( piU2 ); + piU2 = NULL; + } + + if( piU1 != NULL ) + { + SymCryptIntFree( piU1 ); + piU1 = NULL; + } + + if( pbPwdValue != NULL ) + { + SymCryptWipe( pbPwdValue, cbPwdValue ); + SymCryptCallbackFree( pbPwdValue ); + pbPwdValue = NULL; + } + + if( pbScratch != NULL ) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + pbScratch = NULL; + } + + if ( pCurve != NULL ) + { + SymCryptEcurveFree( pCurve ); + pCurve = NULL; + } + + return scError; +} + + +SYMCRYPT_ERROR +SymCrypt802_11SaeCustomCreatePT( + _In_reads_( cbSsid ) PCBYTE pbSsid, + SIZE_T cbSsid, + _In_reads_( cbPassword ) PCBYTE pbPassword, + SIZE_T cbPassword, + _In_reads_opt_( cbPasswordIdentifier ) PCBYTE pbPasswordIdentifier, + SIZE_T cbPasswordIdentifier, + _Out_writes_( 64 ) PBYTE pbPT ) +{ + return SymCrypt802_11SaeCustomCreatePTGeneric( SYMCRYPT_SAE_GROUP_19, + pbSsid, + cbSsid, + pbPassword, + cbPassword, + pbPasswordIdentifier, + cbPasswordIdentifier, + pbPT, + 64 ); +} + + +SYMCRYPT_ERROR +SymCrypt802_11SaeCustomInitH2EGeneric( + _Out_ PSYMCRYPT_802_11_SAE_CUSTOM_STATE pState, + SYMCRYPT_802_11_SAE_GROUP group, + _In_reads_( cbPT ) PCBYTE pbPT, + SIZE_T cbPT, + _In_reads_( 6 ) PCBYTE pbMacA, + _In_reads_( 6 ) PCBYTE pbMacB, + _Inout_updates_opt_( cbRand ) PBYTE pbRand, + SIZE_T cbRand, + _Inout_updates_opt_( cbMask ) PBYTE pbMask, + SIZE_T cbMask) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + BYTE hmacKeyBytes[SYMCRYPT_SAE_MAX_HMAC_OUTPUT_SIZE_BYTES] = { 0 }; + BYTE valBytes[SYMCRYPT_SAE_MAX_HMAC_OUTPUT_SIZE_BYTES] = { 0 }; + BYTE macBuffer[16] = { 0 }; // Need only 12, but the extra bytes make the code easier. + SYMCRYPT_MAC_EXPANDED_KEY hmacKey = { 0 }; + SYMCRYPT_MAC_STATE hmacState = { 0 }; + + SIZE_T cbScratch = 0; + PBYTE pbScratch = NULL; + + UINT64 minMac = 0; + UINT64 maxMac = 0; + + UINT32 nDigits = 0; + + PSYMCRYPT_INT piTmp = NULL; + PSYMCRYPT_MODULUS pmMod = NULL; + PSYMCRYPT_MODELEMENT peVal = NULL; + PSYMCRYPT_MODELEMENT peTmp = NULL; + PSYMCRYPT_ECPOINT poPT = NULL; + PCSYMCRYPT_SAE_GROUP_DATA pGroupData = NULL; + PCSYMCRYPT_MAC pMacAlgorithm = NULL; + + // Set state to 0 so that our pointers have valid values. + SymCryptWipeKnownSize( pState, sizeof( *pState ) ); + + PSYMCRYPT_ECURVE pCurve = NULL; // Weak reference; curve is owned by pState + + pGroupData = SymCryptSaeFindGroupData( group ); + + // Provided IANA group number must match one of the supported groups + if ( pGroupData == NULL ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Construct the objects associated with the IANA group number + pCurve = SymCryptEcurveAllocate( *( pGroupData->pCurveParams ), 0 ); + if ( pCurve == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + pState->pCurve = pCurve; + + pMacAlgorithm = *( pGroupData->macAlgorithm ); + + SIZE_T cbHMACOutputSize = pMacAlgorithm->resultSize; + + pState->peRand = SymCryptModElementAllocate( pCurve->GOrd ); + if( pState->peRand == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + pState->peMask = SymCryptModElementAllocate( pCurve->GOrd ); + if( pState->peMask == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + pState->poPWE = SymCryptEcpointAllocate( pCurve ); + if( pState->poPWE == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + nDigits = SymCryptDigitsFromBits( pCurve->GOrdBitsize ); + + piTmp = SymCryptIntAllocate( nDigits ); + pmMod = SymCryptModulusAllocate( nDigits ); + poPT = SymCryptEcpointAllocate( pCurve ); + + cbScratch = SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigits ), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_GETSET_VALUE_ECURVE_OPERATIONS( pCurve ), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( pCurve ), + SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_SCALAR_ECURVE_OPERATIONS ( pCurve, 1 ) ) ) ); + pbScratch = SymCryptCallbackAlloc( cbScratch ); + + if( piTmp == NULL || pmMod == NULL || poPT == NULL || pbScratch == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + memcpy( &macBuffer[0], pbMacA, 6 ); + minMac = SYMCRYPT_LOAD_MSBFIRST64( macBuffer ); + memcpy( &macBuffer[0], pbMacB, 6 ); + maxMac = SYMCRYPT_LOAD_MSBFIRST64( macBuffer ); + + if( minMac > maxMac ) + { + // MAC values are public, no side-channel issues with this if() + // Swap the two values + minMac ^= maxMac; + maxMac ^= minMac; + minMac ^= maxMac; + } + + // Now we write the two MACs into the buffer. + // Note the slight overlap, and the use of 14 bytes rather than 12 + SYMCRYPT_STORE_MSBFIRST64( &macBuffer[0], maxMac ); + SYMCRYPT_STORE_MSBFIRST64( &macBuffer[6], minMac ); // This writes up to macBuffer[14] + + // val = hmac-sha256( 0^n, maxMac || minMac ) + // The HMAC key is is a buffer of all zeros whose length equals the length of the digest from the hash function + pMacAlgorithm->expandKeyFunc(&hmacKey, hmacKeyBytes, cbHMACOutputSize); + + pMacAlgorithm->initFunc( &hmacState, &hmacKey ); + pMacAlgorithm->appendFunc( &hmacState, macBuffer, 12 ); + pMacAlgorithm->resultFunc( &hmacState, valBytes ); + + // val = val (#4666)modulo (q - 1) + 1 + SymCryptIntSubUint32( SymCryptIntFromModulus(pCurve->GOrd), 1, piTmp ); + SymCryptIntToModulus( piTmp, pmMod, 1, SYMCRYPT_FLAG_DATA_PUBLIC, pbScratch, cbScratch ); + + peVal = SymCryptModElementAllocate( pmMod ); + peTmp = SymCryptModElementAllocate( pmMod ); + + if( peVal == NULL || peTmp == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + scError = SymCryptModElementSetValue( valBytes, cbHMACOutputSize, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, pmMod, peVal, pbScratch, cbScratch ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + SymCryptModElementSetValueUint32( 1, pmMod, peTmp, pbScratch, cbScratch ); + SymCryptModAdd( pmMod, peVal, peTmp, peVal, pbScratch, cbScratch ); + + SymCryptModElementToInt( pmMod, peVal, piTmp, pbScratch, cbScratch ); + + scError = SymCryptEcpointSetValue( pCurve, + pbPT, + cbPT, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, + SYMCRYPT_ECPOINT_FORMAT_XY, + poPT, + 0, + pbScratch, + cbScratch ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + scError = SymCryptEcpointScalarMul( pCurve, piTmp, poPT, 0, pState->poPWE, pbScratch, cbScratch ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + scError = SymCrypt802_11SaeCustomSetRandMask( pState, pbRand, cbRand, pbMask, cbMask, pbScratch, cbScratch ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + +cleanup: + + if( peTmp != NULL ) + { + SymCryptModElementFree( pmMod, peTmp ); + peTmp = NULL; + } + + if( peVal != NULL ) + { + SymCryptModElementFree( pmMod, peVal ); + peVal = NULL; + } + + if( poPT != NULL ) + { + SymCryptEcpointFree( pCurve, poPT ); + poPT = NULL; + } + + if( pmMod != NULL ) + { + SymCryptModulusFree( pmMod ); + pmMod = NULL; + } + + if( piTmp != NULL ) + { + SymCryptIntFree( piTmp ); + piTmp = NULL; + } + + if( pbScratch != NULL ) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + pbScratch = NULL; + } + + if( scError != SYMCRYPT_NO_ERROR ) + { + SymCrypt802_11SaeCustomDestroy( pState ); + } + + return scError; +} + +SYMCRYPT_ERROR +SymCrypt802_11SaeCustomInitH2E( + _Out_ PSYMCRYPT_802_11_SAE_CUSTOM_STATE pState, + _In_reads_( 64 ) PCBYTE pbPT, + _In_reads_( 6 ) PCBYTE pbMacA, + _In_reads_( 6 ) PCBYTE pbMacB, + _Inout_updates_opt_( 32 ) PBYTE pbRand, + _Inout_updates_opt_( 32 ) PBYTE pbMask ) +{ + return SymCrypt802_11SaeCustomInitH2EGeneric( pState, + SYMCRYPT_SAE_GROUP_19, + pbPT, + 64, + pbMacA, + pbMacB, + pbRand, + 32, + pbMask, + 32 ); +} + + +VOID +SymCrypt802_11SaeCustomDestroy( + _Inout_ PSYMCRYPT_802_11_SAE_CUSTOM_STATE pState ) +{ + PSYMCRYPT_ECURVE pCurve = pState->pCurve; + + if( pState->poPWE != NULL ) + { + SymCryptEcpointFree( pCurve, pState->poPWE ); + } + + if( pState->peMask != NULL ) + { + SymCryptModElementFree( pCurve->GOrd, pState->peMask ); + } + + if( pState->peRand != NULL ) + { + SymCryptModElementFree( pCurve->GOrd, pState->peRand ); + } + + if( pCurve != NULL ) + { + SymCryptEcurveFree( pCurve ); + } + + SymCryptWipeKnownSize( pState, sizeof( *pState ) ); +} + +SYMCRYPT_ERROR +SymCrypt802_11SaeCustomCommitCreateGeneric( + _In_ PCSYMCRYPT_802_11_SAE_CUSTOM_STATE pState, + _Out_writes_( cbCommitScalar ) PBYTE pbCommitScalar, + SIZE_T cbCommitScalar, + _Out_writes_( cbCommitElement ) PBYTE pbCommitElement, + SIZE_T cbCommitElement) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PSYMCRYPT_MODELEMENT peTmp = NULL; + PSYMCRYPT_INT piTmp = NULL; + PSYMCRYPT_ECPOINT poPoint = NULL; + PBYTE pbScratch = NULL; + SIZE_T cbScratch; + SIZE_T nDigits; + + PCSYMCRYPT_ECURVE pCurve = pState->pCurve; + + nDigits = SymCryptDigitsFromBits( pCurve->FModBitsize ); + cbScratch = SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigits ), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_SCALAR_ECURVE_OPERATIONS( pCurve ), + SYMCRYPT_SCRATCH_BYTES_FOR_GETSET_VALUE_ECURVE_OPERATIONS( pCurve ) ) ); + + pbScratch = SymCryptCallbackAlloc( cbScratch ); + + peTmp = SymCryptModElementAllocate( pCurve->GOrd ); + piTmp = SymCryptIntAllocate( SymCryptEcurveDigitsofScalarMultiplier( pCurve ) ); + poPoint = SymCryptEcpointAllocate( pCurve ); + + if( peTmp == NULL || piTmp == NULL || poPoint == NULL || pbScratch == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + SymCryptModAdd( pCurve->GOrd, pState->peRand, pState->peMask, peTmp, pbScratch, cbScratch ); + scError = SymCryptModElementGetValue( pCurve->GOrd, peTmp, pbCommitScalar, cbCommitScalar, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, pbScratch, cbScratch ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + SymCryptModElementToInt( pCurve->GOrd, pState->peMask, piTmp, pbScratch, cbScratch ); + scError = SymCryptEcpointScalarMul( pCurve, + piTmp, + pState->poPWE, + 0, + poPoint, + pbScratch, + cbScratch ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Now we have mask * PWE, but we need the negative... + SymCryptEcpointNegate( pCurve, poPoint, (UINT32)-1, pbScratch, cbScratch ); + + scError = SymCryptEcpointGetValue( pCurve, + poPoint, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, + SYMCRYPT_ECPOINT_FORMAT_XY, + pbCommitElement, + cbCommitElement, + 0, + pbScratch, + cbScratch ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + +cleanup: + + if( piTmp != NULL ) + { + SymCryptIntFree( piTmp ); + piTmp = NULL; + } + + if( peTmp != NULL ) + { + SymCryptModElementFree( pCurve->GOrd, peTmp ); + peTmp = NULL; + } + + if( poPoint != NULL ) + { + SymCryptEcpointFree( pCurve, poPoint ); + poPoint = NULL; + } + + if( pbScratch != NULL ) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + pbScratch = NULL; + } + + return scError; +} + +SYMCRYPT_ERROR +SymCrypt802_11SaeCustomCommitCreate( + _In_ PCSYMCRYPT_802_11_SAE_CUSTOM_STATE pState, + _Out_writes_( 32 ) PBYTE pbCommitScalar, + _Out_writes_( 64 ) PBYTE pbCommitElement ) +{ + return SymCrypt802_11SaeCustomCommitCreateGeneric( pState, + pbCommitScalar, + 32, + pbCommitElement, + 64 ); +} + +SYMCRYPT_ERROR +SymCrypt802_11SaeCustomCommitProcessGeneric( + _In_ PCSYMCRYPT_802_11_SAE_CUSTOM_STATE pState, + _In_reads_( cbPeerCommitScalar ) PCBYTE pbPeerCommitScalar, + SIZE_T cbPeerCommitScalar, + _In_reads_( cbPeerCommitElement ) PCBYTE pbPeerCommitElement, + SIZE_T cbPeerCommitElement, + _Out_writes_( cbSharedSecret ) PBYTE pbSharedSecret, + SIZE_T cbSharedSecret, + _Out_writes_( cbScalarSum ) PBYTE pbScalarSum, + SIZE_T cbScalarSum ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PSYMCRYPT_ECURVE pCurve = pState->pCurve; + PSYMCRYPT_MODELEMENT peCommitScalarSum = NULL; + PSYMCRYPT_ECPOINT poPeerCommitElement = NULL; + PSYMCRYPT_ECPOINT poTmp = NULL; + PSYMCRYPT_INT piTmp = NULL; + UINT32 nDigits; + + PBYTE pbScratch = NULL; + SIZE_T cbScratch; + + nDigits = SymCryptDigitsFromBits( pCurve->FModBitsize ); + cbScratch = SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigits ), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_SCALAR_ECURVE_OPERATIONS( pCurve ), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( pCurve ), + SYMCRYPT_SCRATCH_BYTES_FOR_GETSET_VALUE_ECURVE_OPERATIONS( pCurve ) ) ) ); + pbScratch = SymCryptCallbackAlloc( cbScratch ); + + peCommitScalarSum = SymCryptModElementAllocate( pCurve->GOrd ); + poPeerCommitElement = SymCryptEcpointAllocate( pCurve ); + poTmp = SymCryptEcpointAllocate( pCurve ); + piTmp = SymCryptIntAllocate( SymCryptEcurveDigitsofScalarMultiplier( pCurve ) ); + + if( pbScratch == NULL || peCommitScalarSum == NULL || poPeerCommitElement == NULL || poTmp == NULL || piTmp == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + // piTmp = peer commit value + scError = SymCryptIntSetValue( pbPeerCommitScalar, cbPeerCommitScalar, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, piTmp ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // The Standard requires a check that the Peer commit value must be 1 < peer-commit < r where r is the group order. + if( !SymCryptIntIsLessThan( piTmp, SymCryptIntFromModulus( pCurve->GOrd ) ) || + SymCryptIntIsEqualUint32( piTmp, 0 ) || + SymCryptIntIsEqualUint32( piTmp, 1 ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + SymCryptIntToModElement( piTmp, pCurve->GOrd, peCommitScalarSum, pbScratch, cbScratch ); + + // Now compute the sum of the scalar commit values + SymCryptModAdd( pCurve->GOrd, peCommitScalarSum, pState->peRand, peCommitScalarSum, pbScratch, cbScratch ); + SymCryptModAdd( pCurve->GOrd, peCommitScalarSum, pState->peMask, peCommitScalarSum, pbScratch, cbScratch ); + + scError = SymCryptEcpointSetValue( pCurve, + pbPeerCommitElement, + cbPeerCommitElement, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, + SYMCRYPT_ECPOINT_FORMAT_XY, + poPeerCommitElement, + 0, + pbScratch, + cbScratch ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // The EcPointSetValue routine returns an error if either coordinate is >= P. + // We need to check that the point is on the curve and not the zero point of the curve + // (The zero point is sometimes called the 'point at infinity'.) + if( !SymCryptEcpointOnCurve( pCurve, poPeerCommitElement, pbScratch, cbScratch ) || + SymCryptEcpointIsZero( pCurve, poPeerCommitElement, pbScratch, cbScratch ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + + scError = SymCryptEcpointScalarMul( pCurve, + piTmp, + pState->poPWE, + 0, + poTmp, + pbScratch, + cbScratch ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + SymCryptEcpointAdd( pCurve, poTmp, poPeerCommitElement, poTmp, 0, pbScratch, cbScratch ); + + SymCryptModElementToInt( pCurve->GOrd, pState->peRand, piTmp, pbScratch, cbScratch ); + scError = SymCryptEcpointScalarMul( pCurve, + piTmp, + poTmp, + 0, + poTmp, + pbScratch, + cbScratch ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + scError = SymCryptEcpointGetValue( pCurve, + poTmp, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, + SYMCRYPT_ECPOINT_FORMAT_X, + pbSharedSecret, + cbSharedSecret, + 0, + pbScratch, + cbScratch ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + scError = SymCryptModElementGetValue( pCurve->GOrd, peCommitScalarSum, pbScalarSum, cbScalarSum, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, pbScratch, cbScratch ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + +cleanup: + + if( peCommitScalarSum != NULL ) + { + SymCryptModElementFree( pCurve->GOrd, peCommitScalarSum ); + peCommitScalarSum = NULL; + } + + if( poPeerCommitElement != NULL ) + { + SymCryptEcpointFree( pCurve, poPeerCommitElement ); + poPeerCommitElement = NULL; + } + + if( poTmp != NULL ) + { + SymCryptEcpointFree( pCurve, poTmp ); + poTmp = NULL; + } + + if( piTmp != NULL ) + { + SymCryptIntFree( piTmp ); + piTmp = NULL; + } + + if( pbScratch != NULL ) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + pbScratch = NULL; + } + + return scError; +} + +SYMCRYPT_ERROR +SymCrypt802_11SaeCustomCommitProcess( + _In_ PCSYMCRYPT_802_11_SAE_CUSTOM_STATE pState, + _In_reads_( 32 ) PCBYTE pbPeerCommitScalar, + _In_reads_( 64 ) PCBYTE pbPeerCommitElement, + _Out_writes_( 32 ) PBYTE pbSharedSecret, + _Out_writes_( 32 ) PBYTE pbScalarSum ) +{ + return SymCrypt802_11SaeCustomCommitProcessGeneric( pState, + pbPeerCommitScalar, + 32, + pbPeerCommitElement, + 64, + pbSharedSecret, + 32, + pbScalarSum, + 32 ); +} diff --git a/libs/symcrypt/lib/ScsTable.c b/libs/symcrypt/lib/ScsTable.c new file mode 100644 index 00000000000..e9e782d4f43 --- /dev/null +++ b/libs/symcrypt/lib/ScsTable.c @@ -0,0 +1,388 @@ +// +// ScsTable.c +// Side-channel safe table +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// +// These functions implement an table of large elements. +// Reading an element from the table is done in a way that does not reveal the +// element accessed through memory side channels. +// Basically, the whole table is read by the CPU, and the required data is selected +// using boolean operations. +// + +#include "precomp.h" + +// +// Items are multiple of SYMCRYPT_DIGIT_SIZE long. +// +// Format: +// The memory format is parameterized for optimal implementations on several +// different architectures. +// +// The following parameters define the format: +// - group_size +// - interleave_size +// +// Let nElements be the number of elements in the table. +// If necessary, the size of each element in the table is rounded up to a multiple of interleave_size. +// Each whole group of group_size elements is interleaved with each other. +// The last (nElements % group_size) elements are simply stored consecutively. +// (For now we simply require that nElements is a multiple of group_size.) +// Within each group of group_size, the data for the elements are interleaved in natural order +// using chunks of interleave_size bytes. +// +// The choice of group_size and interleave_size depends on the CPU architecture, CPU features, +// and even the element size. (E.g. 1024-bit elements might interleave @ 64 bytes on an AVX512 +// capable CPU, but 256-bit elements would have to interleave at 16 or 32 bytes on that same CPU.) +// + +// Currently these are constants as that allows easier optimizations... +#if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM64 +#define SYMCRYPT_SCSTABLE_USE64 1 +#define SYMCRYPT_SCSTABLE_INTERLEAVE_SIZE 32 +#define SYMCRYPT_SCSTABLE_GROUP_SIZE 4 +typedef UINT64 SYMCRYPT_SCSTABLE_TYPE; +#else +#define SYMCRYPT_SCSTABLE_USE64 0 +#define SYMCRYPT_SCSTABLE_INTERLEAVE_SIZE 16 +#define SYMCRYPT_SCSTABLE_GROUP_SIZE 4 +typedef UINT32 SYMCRYPT_SCSTABLE_TYPE; +#endif + +UINT32 +SYMCRYPT_CALL +SymCryptScsTableInit( + _Out_ PSYMCRYPT_SCSTABLE pScsTable, + UINT32 nElements, + UINT32 elementSize ) +{ + UINT32 groupSize; + UINT32 interleaveSize; + UINT32 cbBuffer; + + SYMCRYPT_ASSERT( nElements > 0 ); + +#pragma warning( suppress: 4127 ) // conditional expression is constant + if( SYMCRYPT_CPU_AMD64 && elementSize == 128 ) + { + // Highly optimized assembler mode for 1024-bit entries for RSA-2048... + interleaveSize = 128; + groupSize = 1; + } else { + // Standard C implementation + interleaveSize = SYMCRYPT_SCSTABLE_INTERLEAVE_SIZE; + groupSize = SYMCRYPT_SCSTABLE_GROUP_SIZE; + } + + // Right now, we limit ourselves to element sizes that are a multiple of the interleaveSize and + // # elements that are a multiple of the group size. + // We also limit ourselves to sensible input sizes + SYMCRYPT_ASSERT( elementSize % interleaveSize == 0 && nElements % groupSize == 0 && (elementSize | nElements) < (1 << 16) && elementSize > 0 ); + + cbBuffer = elementSize * nElements; // Each factor is < 2^16, so there is no overflow in the mul + + pScsTable->groupSize = groupSize; + pScsTable->interleaveSize = interleaveSize; + pScsTable->nElements = nElements; + pScsTable->elementSize = elementSize; + pScsTable->cbTableData = cbBuffer; + pScsTable->pbTableData = NULL; + + return cbBuffer; +} + +VOID +SYMCRYPT_CALL +SymCryptScsTableSetBuffer( + _Inout_ PSYMCRYPT_SCSTABLE pScsTable, + _Inout_updates_bytes_( cbBuffer ) PBYTE pbBuffer, + UINT32 cbBuffer ) +{ + SYMCRYPT_ASSERT(cbBuffer >= pScsTable->cbTableData); + UNREFERENCED_PARAMETER( cbBuffer ); + + pScsTable->pbTableData = pbBuffer; +} + + +C_ASSERT( SYMCRYPT_SCSTABLE_INTERLEAVE_SIZE == 16 || SYMCRYPT_SCSTABLE_INTERLEAVE_SIZE == 32 ); +// check that an interleave size is exactly 4 words +C_ASSERT( SYMCRYPT_SCSTABLE_INTERLEAVE_SIZE == 4 * sizeof( SYMCRYPT_SCSTABLE_TYPE ) ); + +VOID +SYMCRYPT_CALL +SymCryptScsTableStoreC( + _Inout_ PSYMCRYPT_SCSTABLE pScsTable, + UINT32 iIndex, + _In_reads_bytes_( cbData ) PCBYTE pbData, + UINT32 cbData ) +{ + UINT32 groupSize = SYMCRYPT_SCSTABLE_GROUP_SIZE; + UINT32 interleaveSize = SYMCRYPT_SCSTABLE_INTERLEAVE_SIZE; + UINT32 elementSize = pScsTable->elementSize; + UINT32 groupOffset; + + SYMCRYPT_ASSERT( groupSize == pScsTable->groupSize ); + SYMCRYPT_ASSERT( interleaveSize == pScsTable->interleaveSize ); + + SYMCRYPT_ASSERT( cbData == elementSize ); + UNREFERENCED_PARAMETER( cbData ); + + SYMCRYPT_ASSERT(iIndex < pScsTable->nElements); + + groupOffset = iIndex % groupSize; + + // dcl - document why this can't be an integer overflow + SYMCRYPT_SCSTABLE_TYPE * pDst = (SYMCRYPT_SCSTABLE_TYPE *) (pScsTable->pbTableData + (iIndex - groupOffset) * elementSize + groupOffset * interleaveSize); + SYMCRYPT_SCSTABLE_TYPE * pSrc = (SYMCRYPT_SCSTABLE_TYPE *) pbData; + + UINT32 nInterleaves = elementSize / interleaveSize; + + do + { + pDst[0] = pSrc[0]; + pDst[1] = pSrc[1]; + pDst[2] = pSrc[2]; + pDst[3] = pSrc[3]; + + pDst += interleaveSize * groupSize / sizeof( *pDst ); + pSrc += interleaveSize / sizeof( *pSrc ); + nInterleaves--; + } while( nInterleaves > 0 ); + +} + +#if SYMCRYPT_CPU_AMD64 +VOID +SYMCRYPT_CALL +SymCryptScsTableStore128Xmm( + _Inout_ PSYMCRYPT_SCSTABLE pScsTable, + UINT32 iIndex, + _In_reads_bytes_( cbData ) PCBYTE pbData, + UINT32 cbData ) +{ + __m128i * pDst = (__m128i *) (pScsTable->pbTableData + iIndex * 128); + __m128i * pSrc = (__m128i *) pbData; + + SYMCRYPT_ASSERT( cbData == 128 && pScsTable->elementSize == 128 && iIndex < pScsTable->nElements && pScsTable->groupSize == 1 ); + UNREFERENCED_PARAMETER( cbData ); + + pDst[0] = pSrc[0]; + pDst[1] = pSrc[1]; + pDst[2] = pSrc[2]; + pDst[3] = pSrc[3]; + pDst[4] = pSrc[4]; + pDst[5] = pSrc[5]; + pDst[6] = pSrc[6]; + pDst[7] = pSrc[7]; +} +#endif // AMD64 + +VOID +SYMCRYPT_CALL +SymCryptScsTableLoadC( + _In_ PSYMCRYPT_SCSTABLE pScsTable, + UINT32 iIndex, + _Out_writes_bytes_(cbData) PBYTE pbData, + UINT32 cbData ) +{ + UINT32 groupSize = SYMCRYPT_SCSTABLE_GROUP_SIZE; + UINT32 interleaveSize = SYMCRYPT_SCSTABLE_INTERLEAVE_SIZE; + UINT32 elementSize = pScsTable->elementSize; + + SYMCRYPT_SCSTABLE_TYPE mask0, mask1, mask2, mask3; + UINT32 i; + UINT32 j; + UINT32 nElements = pScsTable->nElements; + + const SYMCRYPT_SCSTABLE_TYPE * pSrc = (SYMCRYPT_SCSTABLE_TYPE *) pScsTable->pbTableData; + SYMCRYPT_SCSTABLE_TYPE * pDst = (SYMCRYPT_SCSTABLE_TYPE *) pbData; + SYMCRYPT_SCSTABLE_TYPE * pD; + + UINT32 nInterleaves = elementSize / interleaveSize; + + + SYMCRYPT_ASSERT( groupSize == pScsTable->groupSize ); + SYMCRYPT_ASSERT( interleaveSize == pScsTable->interleaveSize ); + + SYMCRYPT_ASSERT( cbData >= sizeof( SYMCRYPT_SCSTABLE_TYPE ) * SYMCRYPT_SCSTABLE_GROUP_SIZE ); + SYMCRYPT_ASSERT( cbData == pScsTable->elementSize ); + UNREFERENCED_PARAMETER( cbData ); + +#if SYMCRYPT_SCSTABLE_USE64 +#define SCS_MASK_EQUAL32( _a, _b ) ( ~(UINT64) ((INT64) ((UINT64)0 - (_a ^ _b)) >> 32 ) ) +#else +#define SCS_MASK_EQUAL32( _a, _b ) (SYMCRYPT_MASK32_EQ( _a, _b )) +#endif + + i = 0; + + mask0 = SCS_MASK_EQUAL32( i+0, iIndex ); + mask1 = SCS_MASK_EQUAL32( i+1, iIndex ); + mask2 = SCS_MASK_EQUAL32( i+2, iIndex ); + mask3 = SCS_MASK_EQUAL32( i+3, iIndex ); + + j = nInterleaves; + pD = pDst; + + do { + pD[0] = (mask0 & pSrc[0]) | (mask1 & pSrc[4]) | (mask2 & pSrc[ 8]) | (mask3 & pSrc[12]); + pD[1] = (mask0 & pSrc[1]) | (mask1 & pSrc[5]) | (mask2 & pSrc[ 9]) | (mask3 & pSrc[13]); + pD[2] = (mask0 & pSrc[2]) | (mask1 & pSrc[6]) | (mask2 & pSrc[10]) | (mask3 & pSrc[14]); + pD[3] = (mask0 & pSrc[3]) | (mask1 & pSrc[7]) | (mask2 & pSrc[11]) | (mask3 & pSrc[15]); + pD += interleaveSize / sizeof( *pD ); + pSrc += interleaveSize * groupSize / sizeof( *pSrc ); + j--; + } while( j > 0 ); + + i += groupSize; + + while (i + groupSize <= nElements) + { + + mask0 = SCS_MASK_EQUAL32( i+0, iIndex ); + mask1 = SCS_MASK_EQUAL32( i+1, iIndex ); + mask2 = SCS_MASK_EQUAL32( i+2, iIndex ); + mask3 = SCS_MASK_EQUAL32( i+3, iIndex ); + + j = nInterleaves; + pD = pDst; + + do { + pD[0] |= (mask0 & pSrc[0]) | (mask1 & pSrc[4]) | (mask2 & pSrc[ 8]) | (mask3 & pSrc[12]); + pD[1] |= (mask0 & pSrc[1]) | (mask1 & pSrc[5]) | (mask2 & pSrc[ 9]) | (mask3 & pSrc[13]); + pD[2] |= (mask0 & pSrc[2]) | (mask1 & pSrc[6]) | (mask2 & pSrc[10]) | (mask3 & pSrc[14]); + pD[3] |= (mask0 & pSrc[3]) | (mask1 & pSrc[7]) | (mask2 & pSrc[11]) | (mask3 & pSrc[15]); + pD += interleaveSize / sizeof( *pD ); + pSrc += interleaveSize * groupSize / sizeof( *pSrc ); + j--; + } while( j > 0 ); + + i += groupSize; + } +} + +#if SYMCRYPT_CPU_AMD64 +VOID +SYMCRYPT_CALL +SymCryptScsTableLoad128Xmm( + _In_ PSYMCRYPT_SCSTABLE pScsTable, + UINT32 iIndex, + _Out_writes_bytes_(cbData) PBYTE pbData, + UINT32 cbData ) +{ + UINT32 nElements = pScsTable->nElements; + + __m128i R0, R1, R2, R3, R4, R5, R6, R7; + __m128i T0, T1; + + __m128i Count = _mm_setzero_si128(); + __m128i Ones = _mm_set_epi32( 1, 1, 1, 1 ); + __m128i Entry = _mm_set_epi32( iIndex, iIndex, iIndex, iIndex ); + __m128i Mask; + __m128i * pSrc = (__m128i *) pScsTable->pbTableData; + __m128i * pDst = (__m128i *) pbData; + + SYMCRYPT_ASSERT( cbData == 128 && pScsTable->elementSize == 128 && iIndex < pScsTable->nElements && pScsTable->groupSize == 1 ); + UNREFERENCED_PARAMETER( cbData ); + + Mask = _mm_cmpeq_epi32( Count, Entry ); + Count = _mm_add_epi32( Count, Ones ); + + R0 = _mm_and_si128( Mask, pSrc[0] ); + R1 = _mm_and_si128( Mask, pSrc[1] ); + R2 = _mm_and_si128( Mask, pSrc[2] ); + R3 = _mm_and_si128( Mask, pSrc[3] ); + R4 = _mm_and_si128( Mask, pSrc[4] ); + R5 = _mm_and_si128( Mask, pSrc[5] ); + R6 = _mm_and_si128( Mask, pSrc[6] ); + R7 = _mm_and_si128( Mask, pSrc[7] ); + + pSrc += 8; + + while( --nElements > 0 ) + { + Mask = _mm_cmpeq_epi32( Count, Entry ); + Count = _mm_add_epi32( Count, Ones ); + + T0 = _mm_and_si128( Mask, pSrc[0] ); R0 = _mm_or_si128( R0, T0 ); + T1 = _mm_and_si128( Mask, pSrc[1] ); R1 = _mm_or_si128( R1, T1 ); + T0 = _mm_and_si128( Mask, pSrc[2] ); R2 = _mm_or_si128( R2, T0 ); + T1 = _mm_and_si128( Mask, pSrc[3] ); R3 = _mm_or_si128( R3, T1 ); + T0 = _mm_and_si128( Mask, pSrc[4] ); R4 = _mm_or_si128( R4, T0 ); + T1 = _mm_and_si128( Mask, pSrc[5] ); R5 = _mm_or_si128( R5, T1 ); + T0 = _mm_and_si128( Mask, pSrc[6] ); R6 = _mm_or_si128( R6, T0 ); + T1 = _mm_and_si128( Mask, pSrc[7] ); R7 = _mm_or_si128( R7, T1 ); + pSrc += 8; + } + + pDst[0] = R0; + pDst[1] = R1; + pDst[2] = R2; + pDst[3] = R3; + pDst[4] = R4; + pDst[5] = R5; + pDst[6] = R6; + pDst[7] = R7; +} +#endif // AMD64 + +VOID +SYMCRYPT_CALL +SymCryptScsTableStore( + _Inout_ PSYMCRYPT_SCSTABLE pScsTable, + UINT32 iIndex, + _In_reads_bytes_( cbData ) PCBYTE pbData, + UINT32 cbData ) +{ +#if SYMCRYPT_CPU_AMD64 + + if( pScsTable->elementSize == 128 ) + { + SymCryptScsTableStore128Xmm( pScsTable, iIndex, pbData, cbData ); + } else { + SymCryptScsTableStoreC( pScsTable, iIndex, pbData, cbData ); + } + +#else + + SymCryptScsTableStoreC( pScsTable, iIndex, pbData, cbData ); + +#endif +} + +VOID +SYMCRYPT_CALL +SymCryptScsTableLoad( + _In_ PSYMCRYPT_SCSTABLE pScsTable, + UINT32 iIndex, + _Out_writes_bytes_(cbData) PBYTE pbData, + UINT32 cbData ) +{ + // This is the side-channel safe routine + +#if SYMCRYPT_CPU_AMD64 + + if( pScsTable->elementSize == 128 ) + { + SymCryptScsTableLoad128Xmm( pScsTable, iIndex, pbData, cbData ); + } else { + SymCryptScsTableLoadC( pScsTable, iIndex, pbData, cbData ); + } + +#else + + SymCryptScsTableLoadC( pScsTable, iIndex, pbData, cbData ); + +#endif +} + +VOID +SYMCRYPT_CALL +SymCryptScsTableWipe( + _Inout_ PSYMCRYPT_SCSTABLE pScsTable ) +{ + SymCryptWipe( pScsTable->pbTableData, pScsTable->cbTableData ); +} diff --git a/libs/symcrypt/lib/a_dispatch.c b/libs/symcrypt/lib/a_dispatch.c new file mode 100644 index 00000000000..53eee734c3b --- /dev/null +++ b/libs/symcrypt/lib/a_dispatch.c @@ -0,0 +1,1028 @@ +// +// a_dispatch.c Dispatch between different arithmetic format implementations. +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// SymCrypt can have multiple implementations of the arithmetic operations, and these can +// have incompatible formats used to store the integers. +// This file contains logic to dispatch between these incompatible formats. +// Currently all implementations use the default format, or "Fdef". +// + +#include "precomp.h" + +// +// Define the FDEF dispatch table here. +// This should eventually be split out so that different users of the library can use different +// table sets & implementation choice functions. +// + + +const SYMCRYPT_MODULAR_FUNCTIONS g_SymCryptModFns[] = { + SYMCRYPT_MOD_FUNCTIONS_FDEF_GENERIC, // Handles any type of modulus + SYMCRYPT_MOD_FUNCTIONS_FDEF_MONTGOMERY, // Montgomery, only for odd parity-public moduli + +#if 0 && SYMCRYPT_CPU_AMD64 + + SYMCRYPT_MOD_FUNCTIONS_FDEF369_MONTGOMERY, // optimized for 384 and 576-bit moduli + SYMCRYPT_MOD_FUNCTIONS_FDEF_MONTGOMERY_MULX256, // Special faster code for 256-bit Montgomery moduli, MULX-based code + SYMCRYPT_MOD_FUNCTIONS_FDEF_MONTGOMERY_MULXP384, // Special faster code for P-384 field modulus, MULX-based code + SYMCRYPT_MOD_FUNCTIONS_FDEF_MONTGOMERY_MULX, // MULX-based code, for any size (digit size = 512 bits) + SYMCRYPT_MOD_FUNCTIONS_FDEF_MONTGOMERY_MULX1024, // Special faster code for 1024-bit Montgomery moduli, MULX-based code + {NULL,}, + + // SYMCRYPT_MOD_FUNCTIONS_FDEF_MONTGOMERY_MULXP256, // Special faster code for P-256 field modulus, MULX-based code + // SYMCRYPT_MOD_FUNCTIONS_FDEF_MONTGOMERY_MULX384, // Special faster code for 384-bit Montgomery moduli, MULX-based code + // SYMCRYPT_MOD_FUNCTIONS_FDEF_MONTGOMERY256, // Special faster code for 256-bit Montgomery moduli + // SYMCRYPT_MOD_FUNCTIONS_FDEF_MONTGOMERY512, // Special faster code for 512-bit Montgomery moduli + // SYMCRYPT_MOD_FUNCTIONS_FDEF_MONTGOMERY1024, // Special faster code for 1024-bit Montgomery moduli + +#elif 0 && SYMCRYPT_CPU_ARM64 + + SYMCRYPT_MOD_FUNCTIONS_FDEF369_MONTGOMERY, + SYMCRYPT_MOD_FUNCTIONS_FDEF_MONTGOMERY_ARM64256, + SYMCRYPT_MOD_FUNCTIONS_FDEF_MONTGOMERY_ARM64P384, + {NULL,}, + {NULL,}, + {NULL,}, + +#endif +}; + +#define SymCryptModLabel(_label) (_label << 16) +#define SymCryptModFntableGeneric (SymCryptModLabel('gM') + (0 * SYMCRYPT_MODULAR_FUNCTIONS_SIZE)) +#define SymCryptModFntableMontgomery (SymCryptModLabel('mM') + (1 * SYMCRYPT_MODULAR_FUNCTIONS_SIZE)) +#define SymCryptModFntable369Montgomery (SymCryptModLabel('9m') + (2 * SYMCRYPT_MODULAR_FUNCTIONS_SIZE)) +#define SymCryptModFntableMontgomeryMulx256 (SymCryptModLabel('2x') + (3 * SYMCRYPT_MODULAR_FUNCTIONS_SIZE)) +#define SymCryptModFntableMontgomeryMulxP384 (SymCryptModLabel('3n') + (4 * SYMCRYPT_MODULAR_FUNCTIONS_SIZE)) +#define SymCryptModFntableMontgomeryMulx (SymCryptModLabel('xM') + (5 * SYMCRYPT_MODULAR_FUNCTIONS_SIZE)) +#define SymCryptModFntableMontgomeryMulx1024 (SymCryptModLabel('1x') + (6 * SYMCRYPT_MODULAR_FUNCTIONS_SIZE)) + +#define SymCryptModFntableMontgomeryArm64256 (SymCryptModLabel('2m') + (3 * SYMCRYPT_MODULAR_FUNCTIONS_SIZE)) +#define SymCryptModFntableMontgomeryArm64P384 (SymCryptModLabel('3n') + (4 * SYMCRYPT_MODULAR_FUNCTIONS_SIZE)) + +// #define SymCryptModFntableMontgomeryMulxP256 (SymCryptModLabel('2n') + (xx * SYMCRYPT_MODULAR_FUNCTIONS_SIZE)) +// #define SymCryptModFntableMontgomeryMulx384 (SymCryptModLabel('3x') + (xx * SYMCRYPT_MODULAR_FUNCTIONS_SIZE)) +// #define SymCryptModFntableMontgomery256 (SymCryptModLabel('2m') + (xx * SYMCRYPT_MODULAR_FUNCTIONS_SIZE)) +// #define SymCryptModFntableMontgomery512 (SymCryptModLabel('5m') + (xx * SYMCRYPT_MODULAR_FUNCTIONS_SIZE)) +// #define SymCryptModFntableMontgomery1024 (SymCryptModLabel('1m') + (xx * SYMCRYPT_MODULAR_FUNCTIONS_SIZE)) + +C_ASSERT( (sizeof( g_SymCryptModFns ) & (sizeof( g_SymCryptModFns) - 1 )) == 0 ); // size of the table must be a power of 2 to be CFG-safe. + +const UINT32 g_SymCryptModFnsMask = sizeof( g_SymCryptModFns ) - sizeof( g_SymCryptModFns[0] ); + +// +// Tweaking the selection & function tables allows different tradeoffs of performance vs codesize +// +const SYMCRYPT_MODULUS_TYPE_SELECTION_ENTRY SymCryptModulusTypeSelections[] = +{ +#if 0 && SYMCRYPT_CPU_AMD64 + // Mulx used for 0-512 and 577-... bits + {SymCryptModFntableMontgomeryMulxP384, SYMCRYPT_CPU_FEATURES_FOR_MULX, 384, SYMCRYPT_MODULUS_FEATURE_MONTGOMERY | SYMCRYPT_MODULUS_FEATURE_NISTP384 }, + {SymCryptModFntableMontgomeryMulx256, SYMCRYPT_CPU_FEATURES_FOR_MULX, 256, SYMCRYPT_MODULUS_FEATURE_MONTGOMERY }, + {SymCryptModFntableMontgomeryMulx, SYMCRYPT_CPU_FEATURES_FOR_MULX, 512, SYMCRYPT_MODULUS_FEATURE_MONTGOMERY }, + {SymCryptModFntable369Montgomery, 0, 384, SYMCRYPT_MODULUS_FEATURE_MONTGOMERY }, + {SymCryptModFntableMontgomery, 0, 512, SYMCRYPT_MODULUS_FEATURE_MONTGOMERY }, + {SymCryptModFntable369Montgomery, 0, 576, SYMCRYPT_MODULUS_FEATURE_MONTGOMERY }, + {SymCryptModFntableMontgomeryMulx1024, SYMCRYPT_CPU_FEATURES_FOR_MULX, 1024, SYMCRYPT_MODULUS_FEATURE_MONTGOMERY }, + {SymCryptModFntableMontgomeryMulx, SYMCRYPT_CPU_FEATURES_FOR_MULX, 0, SYMCRYPT_MODULUS_FEATURE_MONTGOMERY }, + +#elif 0 && SYMCRYPT_CPU_ARM64 + + {SymCryptModFntableMontgomeryArm64P384, 0, 384, SYMCRYPT_MODULUS_FEATURE_MONTGOMERY | SYMCRYPT_MODULUS_FEATURE_NISTP384 }, + {SymCryptModFntableMontgomeryArm64256, 0, 256, SYMCRYPT_MODULUS_FEATURE_MONTGOMERY }, + {SymCryptModFntable369Montgomery, 0, 384, SYMCRYPT_MODULUS_FEATURE_MONTGOMERY }, + {SymCryptModFntableMontgomery, 0, 512, SYMCRYPT_MODULUS_FEATURE_MONTGOMERY }, + {SymCryptModFntable369Montgomery, 0, 576, SYMCRYPT_MODULUS_FEATURE_MONTGOMERY }, + +#endif + + {SymCryptModFntableMontgomery, 0, 0, SYMCRYPT_MODULUS_FEATURE_MONTGOMERY }, + {SymCryptModFntableGeneric, 0, 0, 0 }, + // This last entry always matches, so the code never falls off the end of this table. +}; + + +// +// At the moment there is only the default number format. +// + +UINT32 +SymCryptDigitsFromBits( UINT32 nBits ) +{ + return SymCryptFdefDigitsFromBits( nBits ); +} + + +PSYMCRYPT_INT +SYMCRYPT_CALL +SymCryptIntAllocate( UINT32 nDigits ) +{ + return SymCryptFdefIntAllocate( nDigits ); +} + +VOID +SYMCRYPT_CALL +SymCryptIntFree( _Out_ PSYMCRYPT_INT piObj ) +{ + SymCryptIntWipe( piObj ); + SymCryptCallbackFree( piObj ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptSizeofIntFromDigits( UINT32 nDigits ) +{ + return SymCryptFdefSizeofIntFromDigits( nDigits ); +} + +PSYMCRYPT_INT +SYMCRYPT_CALL +SymCryptIntCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + UINT32 nDigits ) +{ + return SymCryptFdefIntCreate( pbBuffer, cbBuffer, nDigits ); +} + +VOID +SYMCRYPT_CALL +SymCryptIntWipe( _Out_ PSYMCRYPT_INT piDst ) +{ + SYMCRYPT_CHECK_MAGIC( piDst ); + + // Wipe the whole structure in one go; + SymCryptWipe( piDst, piDst->cbSize ); +} + +VOID +SYMCRYPT_CALL +SymCryptIntCopy( + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_INT piDst ) +{ + SymCryptFdefIntCopy( piSrc, piDst ); +} + +VOID +SYMCRYPT_CALL +SymCryptIntMaskedCopy( + _In_ PCSYMCRYPT_INT piSrc, + _Inout_ PSYMCRYPT_INT piDst, + UINT32 mask ) +{ + SymCryptFdefIntMaskedCopy( piSrc, piDst, mask ); +} + +VOID +SYMCRYPT_CALL +SymCryptIntConditionalCopy( + _In_ PCSYMCRYPT_INT piSrc, + _Inout_ PSYMCRYPT_INT piDst, + UINT32 cond ) +{ + SymCryptFdefIntConditionalCopy( piSrc, piDst, cond ); +} + +VOID +SYMCRYPT_CALL +SymCryptIntConditionalSwap( + _Inout_ PSYMCRYPT_INT piSrc1, + _Inout_ PSYMCRYPT_INT piSrc2, + UINT32 cond ) +{ + SymCryptFdefIntConditionalSwap( piSrc1, piSrc2, cond ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptIntBitsizeOfObject( _In_ PCSYMCRYPT_INT piSrc ) +{ + return SymCryptFdefIntBitsizeOfObject( piSrc ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptIntDigitsizeOfObject( _In_ PCSYMCRYPT_INT piSrc ) +{ + return piSrc->nDigits; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptIntCopyMixedSize( + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_INT piDst ) +{ + return SymCryptFdefIntCopyMixedSize( piSrc, piDst ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptIntBitsizeOfValue( _In_ PCSYMCRYPT_INT piSrc ) +{ + return SymCryptFdefIntBitsizeOfValue( piSrc ); +} + +VOID +SYMCRYPT_CALL +SymCryptIntSetValueUint32( + UINT32 u32Src, + _Out_ PSYMCRYPT_INT piDst ) +{ + SymCryptFdefIntSetValueUint32( u32Src, piDst ); +} + +VOID +SYMCRYPT_CALL +SymCryptIntSetValueUint64( + UINT64 u64Src, + _Out_ PSYMCRYPT_INT piDst ) +{ + SymCryptFdefIntSetValueUint64( u64Src, piDst ); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptIntSetValue( + _In_reads_bytes_(cbSrc) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_NUMBER_FORMAT format, + _Out_ PSYMCRYPT_INT piDst ) +{ + return SymCryptFdefIntSetValue( pbSrc, cbSrc, format, piDst ); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptIntGetValue( + _In_ PCSYMCRYPT_INT piSrc, + _Out_writes_bytes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst, + SYMCRYPT_NUMBER_FORMAT format ) +{ + return SymCryptFdefIntGetValue( piSrc, pbDst, cbDst, format ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptIntGetValueLsbits32( _In_ PCSYMCRYPT_INT piSrc ) +{ + return SymCryptFdefIntGetValueLsbits32( piSrc ); +} + +UINT64 +SYMCRYPT_CALL +SymCryptIntGetValueLsbits64( _In_ PCSYMCRYPT_INT piSrc ) +{ + return SymCryptFdefIntGetValueLsbits64( piSrc ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptIntAddUint32( + _In_ PCSYMCRYPT_INT piSrc1, + UINT32 u32Src2, + _Out_ PSYMCRYPT_INT piDst ) +{ + return SymCryptFdefIntAddUint32( piSrc1, u32Src2, piDst ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptIntAddSameSize( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + _Out_ PSYMCRYPT_INT piDst ) +{ + return SymCryptFdefIntAddSameSize( piSrc1, piSrc2, piDst ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptIntAddMixedSize( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + _Out_ PSYMCRYPT_INT piDst ) +{ + return SymCryptFdefIntAddMixedSize( piSrc1, piSrc2, piDst ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptIntSubUint32( + _In_ PCSYMCRYPT_INT piSrc1, + UINT32 u32Src2, + _Out_ PSYMCRYPT_INT piDst ) +{ + return SymCryptFdefIntSubUint32( piSrc1, u32Src2, piDst ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptIntSubSameSize( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + _Out_ PSYMCRYPT_INT piDst ) +{ + return SymCryptFdefIntSubSameSize( piSrc1, piSrc2, piDst ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptIntSubMixedSize( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + _Out_ PSYMCRYPT_INT piDst ) +{ + return SymCryptFdefIntSubMixedSize( piSrc1, piSrc2, piDst ); +} + +VOID +SYMCRYPT_CALL +SymCryptIntNeg( + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_INT piDst ) +{ + SymCryptFdefIntNeg( piSrc, piDst ); +} + + +VOID +SYMCRYPT_CALL +SymCryptIntMulPow2( + _In_ PCSYMCRYPT_INT piSrc, + SIZE_T exp, + _Out_ PSYMCRYPT_INT piDst ) +{ + SymCryptFdefIntMulPow2( piSrc, exp, piDst ); +} + +VOID +SYMCRYPT_CALL +SymCryptIntDivPow2( + _In_ PCSYMCRYPT_INT piSrc, + SIZE_T exp, + _Out_ PSYMCRYPT_INT piDst ) +{ + SymCryptFdefIntDivPow2( piSrc, exp, piDst ); +} + +VOID +SYMCRYPT_CALL +SymCryptIntShr1( + UINT32 highestBit, + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_INT piDst ) +{ + SymCryptFdefIntShr1( highestBit, piSrc, piDst ); +} + +VOID +SYMCRYPT_CALL +SymCryptIntModPow2( + _In_ PCSYMCRYPT_INT piSrc, + SIZE_T exp, + _Out_ PSYMCRYPT_INT piDst ) +{ + SymCryptFdefIntModPow2( piSrc, exp, piDst ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptIntGetBit( + _In_ PCSYMCRYPT_INT piSrc, + UINT32 iBit ) +{ + return SymCryptFdefIntGetBit( piSrc, iBit ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptIntGetBits( + _In_ PCSYMCRYPT_INT piSrc, + UINT32 iBit, + UINT32 nBits ) +{ + return SymCryptFdefIntGetBits( piSrc, iBit, nBits ); +} + +VOID +SYMCRYPT_CALL +SymCryptIntSetBits( + _In_ PSYMCRYPT_INT piDst, + UINT32 value, + UINT32 iBit, + UINT32 nBits ) +{ + SymCryptFdefIntSetBits( piDst, value, iBit, nBits ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptIntIsEqualUint32( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ UINT32 u32Src2 ) +{ + return SymCryptFdefIntIsEqualUint32( piSrc1, u32Src2 ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptIntIsEqual( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2 ) +{ + return SymCryptFdefIntIsEqual( piSrc1, piSrc2 ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptIntIsLessThan( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2 ) +{ + return SymCryptFdefIntIsLessThan( piSrc1, piSrc2 ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptIntMulUint32( + _In_ PCSYMCRYPT_INT piSrc1, + UINT32 Src2, + _Out_ PSYMCRYPT_INT piDst ) +{ + return SymCryptFdefIntMulUint32( piSrc1, Src2, piDst ); +} + +VOID +SYMCRYPT_CALL +SymCryptIntMulSameSize( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + _Out_ PSYMCRYPT_INT piDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SymCryptFdefIntMulSameSize( piSrc1, piSrc2, piDst, pbScratch, cbScratch ); +} + + +VOID +SYMCRYPT_CALL +SymCryptIntSquare( + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_INT piDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SymCryptFdefIntSquare( piSrc, piDst, pbScratch, cbScratch ); +} + +VOID +SYMCRYPT_CALL +SymCryptIntMulMixedSize( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + _Out_ PSYMCRYPT_INT piDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SymCryptFdefIntMulMixedSize( piSrc1, piSrc2, piDst, pbScratch, cbScratch ); +} + +PSYMCRYPT_DIVISOR +SYMCRYPT_CALL +SymCryptDivisorAllocate( UINT32 nDigits ) +{ + return SymCryptFdefDivisorAllocate( nDigits ); +} + +VOID +SYMCRYPT_CALL +SymCryptDivisorFree( _Out_ PSYMCRYPT_DIVISOR pdObj ) +{ + SymCryptDivisorWipe( pdObj ); + SymCryptCallbackFree( pdObj ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptSizeofDivisorFromDigits( UINT32 nDigits ) +{ + return SymCryptFdefSizeofDivisorFromDigits( nDigits ); +} + +PSYMCRYPT_DIVISOR +SYMCRYPT_CALL +SymCryptDivisorCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + UINT32 nDigits ) +{ + return SymCryptFdefDivisorCreate( pbBuffer, cbBuffer, nDigits ); +} + +VOID +SYMCRYPT_CALL +SymCryptDivisorWipe( _Out_ PSYMCRYPT_DIVISOR pdObj ) +{ + SYMCRYPT_CHECK_MAGIC( pdObj ); + + SymCryptWipe( pdObj, pdObj->cbSize ); +} + +VOID +SymCryptDivisorCopy( + _In_ PCSYMCRYPT_DIVISOR pdSrc, + _Out_ PSYMCRYPT_DIVISOR pdDst ) +{ + SymCryptFdefDivisorCopy( pdSrc, pdDst ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptDivisorDigitsizeOfObject( _In_ PCSYMCRYPT_DIVISOR pdSrc ) +{ + return pdSrc->nDigits; +} + +PSYMCRYPT_INT +SYMCRYPT_CALL +SymCryptIntFromDivisor( _In_ PSYMCRYPT_DIVISOR pdSrc ) +{ + return SymCryptFdefIntFromDivisor( pdSrc ); +} + +VOID +SYMCRYPT_CALL +SymCryptIntToDivisor( + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_DIVISOR pdDst, + UINT32 totalOperations, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SymCryptFdefIntToDivisor( piSrc, pdDst, totalOperations, flags, pbScratch, cbScratch ); +} + +VOID +SYMCRYPT_CALL +SymCryptIntDivMod( + _In_ PCSYMCRYPT_INT piSrc, + _In_ PCSYMCRYPT_DIVISOR pdDivisor, + _Out_opt_ PSYMCRYPT_INT piQuotient, + _Out_opt_ PSYMCRYPT_INT piRemainder, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SymCryptFdefIntDivMod( piSrc, pdDivisor, piQuotient, piRemainder, pbScratch, cbScratch ); +} + + +PSYMCRYPT_MODULUS +SYMCRYPT_CALL +SymCryptModulusAllocate( UINT32 nDigits ) +{ + return SymCryptFdefModulusAllocate( nDigits ); +} + +VOID +SYMCRYPT_CALL +SymCryptModulusFree( _Out_ PSYMCRYPT_MODULUS pmObj ) +{ + SymCryptFdefModulusFree( pmObj ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptSizeofModulusFromDigits( UINT32 nDigits ) +{ + return SymCryptFdefSizeofModulusFromDigits( nDigits ); +} + +PSYMCRYPT_MODULUS +SYMCRYPT_CALL +SymCryptModulusCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + UINT32 nDigits ) +{ + return SymCryptFdefModulusCreate( pbBuffer, cbBuffer, nDigits ); +} + +VOID +SYMCRYPT_CALL +SymCryptModulusWipe( _Out_ PSYMCRYPT_MODULUS pmObj ) +{ + SYMCRYPT_CHECK_MAGIC( pmObj ); + + SymCryptWipe( pmObj, pmObj->cbSize ); +} + +VOID +SymCryptModulusCopy( + _In_ PCSYMCRYPT_MODULUS pmSrc, + _Out_ PSYMCRYPT_MODULUS pmDst ) +{ + SymCryptFdefModulusCopy( pmSrc, pmDst ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptModulusDigitsizeOfObject( _In_ PCSYMCRYPT_MODULUS pmSrc ) +{ + return pmSrc->nDigits; +} + +PSYMCRYPT_MODELEMENT +SYMCRYPT_CALL +SymCryptModElementAllocate( _In_ PCSYMCRYPT_MODULUS pmMod ) +{ + return SymCryptFdefModElementAllocate( pmMod ); +} + +VOID +SYMCRYPT_CALL +SymCryptModElementFree( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peObj ) +{ + SymCryptFdefModElementFree( pmMod, peObj ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptSizeofModElementFromModulus( PCSYMCRYPT_MODULUS pmMod ) +{ + return SymCryptFdefSizeofModElementFromModulus( pmMod ); +} + +PSYMCRYPT_MODELEMENT +SYMCRYPT_CALL +SymCryptModElementCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + _In_ PCSYMCRYPT_MODULUS pmMod ) +{ + return SymCryptFdefModElementCreate( pbBuffer, cbBuffer, pmMod ); +} + +VOID +SYMCRYPT_CALL +SymCryptModElementWipe( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peDst ) +{ + SymCryptFdefModElementWipe( pmMod, peDst ); +} + +VOID +SymCryptModElementCopy( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst ) +{ + SymCryptFdefModElementCopy( pmMod, peSrc, peDst ); +} + +VOID +SymCryptModElementMaskedCopy( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst, + UINT32 mask ) +{ + SymCryptFdefModElementMaskedCopy( pmMod, peSrc, peDst, mask ); +} + +PSYMCRYPT_DIVISOR +SYMCRYPT_CALL +SymCryptDivisorFromModulus( _In_ PSYMCRYPT_MODULUS pmSrc ) +{ + return SymCryptFdefDivisorFromModulus( pmSrc ); +} + +VOID +SymCryptModElementConditionalSwap( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_ PSYMCRYPT_MODELEMENT peData1, + _Inout_ PSYMCRYPT_MODELEMENT peData2, + _In_ UINT32 cond ) +{ + SymCryptFdefModElementConditionalSwap( pmMod, peData1, peData2, cond ); +} + +PSYMCRYPT_INT +SYMCRYPT_CALL +SymCryptIntFromModulus( _In_ PSYMCRYPT_MODULUS pmSrc ) +{ + return SymCryptFdefIntFromModulus( pmSrc ); +} + +VOID +SYMCRYPT_CALL +SymCryptIntToModulus( + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_MODULUS pmDst, + UINT32 averageOperations, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + PSYMCRYPT_INT piSrcTweak = (PSYMCRYPT_INT) piSrc; + + // In CHKed build, we'll verify that the modulus is not prime, or that it is 2 or odd + // (Some inversion algorithms fail hard when one input isn't 2 or odd.) + // We are constant-time w.r.t. piSrc being odd or =2. We don't hide the size of any input, + // but inputs 2 and 3 are handled with the same code path. + SYMCRYPT_ASSERT( ((flags & SYMCRYPT_FLAG_MODULUS_PRIME) == 0) || + (((SymCryptIntGetValueLsbits32( piSrc ) & 1) | SymCryptIntIsEqualUint32( piSrc, 2 )) != 0) ); + + SymCryptFdefIntToModulus( piSrcTweak, pmDst, averageOperations, flags, pbScratch, cbScratch ); +} + +VOID +SYMCRYPT_CALL +SymCryptIntToModElement( + _In_ PCSYMCRYPT_INT piSrc, + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SymCryptFdefIntToModElement( piSrc, pmMod, peDst, pbScratch, cbScratch ); +} + +SYMCRYPT_DISABLE_CFG +VOID +SYMCRYPT_CALL +SymCryptModElementToInt( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_INT piDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + PCUINT32 pData; + + SYMCRYPT_ASSERT( piDst->nDigits >= pmMod->nDigits ); + + pData = SYMCRYPT_MOD_CALL( pmMod ) modPreGet( pmMod, peSrc, pbScratch, cbScratch ); + + SymCryptFdefModElementToIntGeneric( pmMod, pData, piDst, pbScratch, cbScratch ); +} + +SYMCRYPT_DISABLE_CFG +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptModElementSetValue( + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_NUMBER_FORMAT format, + PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError; + + scError = SymCryptFdefModElementSetValueGeneric( pbSrc, cbSrc, format, pmMod, peDst, pbScratch, cbScratch ); + + if( scError == SYMCRYPT_NO_ERROR ) + { + SYMCRYPT_MOD_CALL( pmMod ) modSetPost( pmMod, peDst, pbScratch, cbScratch ); + } + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptModElementGetValue( + PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_writes_bytes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst, + SYMCRYPT_NUMBER_FORMAT format, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + return SymCryptFdefModElementGetValue( pmMod, peSrc, pbDst, cbDst, format, pbScratch, cbScratch ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptModElementIsEqual( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2 ) +{ + return SymCryptFdefModElementIsEqual( pmMod, peSrc1, peSrc2 ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptModElementIsZero( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc ) +{ + return SymCryptFdefModElementIsZero( pmMod, peSrc ); +} + +SYMCRYPT_DISABLE_CFG +VOID +SYMCRYPT_CALL +SymCryptModAdd( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_MOD_CALL( pmMod ) modAdd( pmMod, peSrc1, peSrc2, peDst, pbScratch, cbScratch ); +} + +SYMCRYPT_DISABLE_CFG +VOID +SYMCRYPT_CALL +SymCryptModSub( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_MOD_CALL( pmMod ) modSub( pmMod, peSrc1, peSrc2, peDst, pbScratch, cbScratch ); +} + + +SYMCRYPT_DISABLE_CFG +VOID +SYMCRYPT_CALL +SymCryptModMul( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_MOD_CALL( pmMod ) modMul( pmMod, peSrc1, peSrc2, peDst, pbScratch, cbScratch ); +} + +SYMCRYPT_DISABLE_CFG +VOID +SYMCRYPT_CALL +SymCryptModSquare( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_MOD_CALL( pmMod ) modSquare( pmMod, peSrc, peDst, pbScratch, cbScratch ); +} + + +SYMCRYPT_DISABLE_CFG +VOID +SYMCRYPT_CALL +SymCryptModNeg( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_MOD_CALL( pmMod ) modNeg( pmMod, peSrc, peDst, pbScratch, cbScratch ); +} + +SYMCRYPT_DISABLE_CFG +VOID +SYMCRYPT_CALL +SymCryptModElementSetValueUint32( + UINT32 value, + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SymCryptFdefModElementSetValueUint32Generic( value, pmMod, peDst, pbScratch, cbScratch ); + + SYMCRYPT_MOD_CALL( pmMod ) modSetPost( pmMod, peDst, pbScratch, cbScratch ); +} + +VOID +SYMCRYPT_CALL +SymCryptModElementSetValueNegUint32( + UINT32 value, + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SymCryptFdefModElementSetValueNegUint32( value, pmMod, peDst, pbScratch, cbScratch ); +} + +VOID +SYMCRYPT_CALL +SymCryptModDivPow2( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + UINT32 exp, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SymCryptFdefModDivPow2( pmMod, peSrc, exp, peDst, pbScratch, cbScratch ); +} + +SYMCRYPT_DISABLE_CFG +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptModInv( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + return SYMCRYPT_MOD_CALL( pmMod ) modInv( pmMod, peSrc, peDst, flags, pbScratch, cbScratch ); +} + +VOID +SYMCRYPT_CALL +SymCryptModExp( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peBase, + _In_ PCSYMCRYPT_INT piExp, + UINT32 nBitsExp, + UINT32 flags, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SymCryptModExpGeneric( pmMod, peBase, piExp, nBitsExp, flags, peDst, pbScratch, cbScratch ); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptModMultiExp( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_reads_( nBases ) PCSYMCRYPT_MODELEMENT * peBaseArray, + _In_reads_( nBases ) PCSYMCRYPT_INT * piExpArray, + UINT32 nBases, + UINT32 nBitsExp, + UINT32 flags, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + return SymCryptModMultiExpGeneric( pmMod, peBaseArray, piExpArray, nBases, nBitsExp, flags, peDst, pbScratch, cbScratch ); +} + +SYMCRYPT_DISABLE_CFG +VOID +SYMCRYPT_CALL +SymCryptModSetRandom( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SymCryptFdefModSetRandomGeneric( pmMod, peDst, flags, pbScratch, cbScratch ); + + SYMCRYPT_MOD_CALL( pmMod ) modSetPost( pmMod, peDst, pbScratch, cbScratch ); +} + +PCSYMCRYPT_TRIALDIVISION_CONTEXT +SYMCRYPT_CALL +SymCryptCreateTrialDivisionContext( UINT32 nDigits ) +{ + return SymCryptFdefCreateTrialDivisionContext( nDigits ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptIntFindSmallDivisor( + _In_ PCSYMCRYPT_TRIALDIVISION_CONTEXT pContext, + _In_ PCSYMCRYPT_INT piSrc, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + return SymCryptFdefIntFindSmallDivisor( pContext, piSrc, pbScratch, cbScratch ); +} + +VOID +SYMCRYPT_CALL +SymCryptFreeTrialDivisionContext( PCSYMCRYPT_TRIALDIVISION_CONTEXT pContext ) +{ + SymCryptFdefFreeTrialDivisionContext( pContext ); +} diff --git a/libs/symcrypt/lib/aes-asm.c b/libs/symcrypt/lib/aes-asm.c new file mode 100644 index 00000000000..bd49d2e549a --- /dev/null +++ b/libs/symcrypt/lib/aes-asm.c @@ -0,0 +1,46 @@ +// +// aes-asm.c code for AES implementation +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + + +#include "precomp.h" + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM + +VOID +SYMCRYPT_CALL +SymCryptAesEcbEncryptAsm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + while( cbData >= SYMCRYPT_AES_BLOCK_SIZE ) + { + SymCryptAesEncryptAsm( pExpandedKey, pbSrc, pbDst ); + pbSrc += SYMCRYPT_AES_BLOCK_SIZE; + pbDst += SYMCRYPT_AES_BLOCK_SIZE; + cbData -= SYMCRYPT_AES_BLOCK_SIZE; + } +} + +VOID +SYMCRYPT_CALL +SymCryptAesEcbDecryptAsm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + while( cbData >= SYMCRYPT_AES_BLOCK_SIZE ) + { + SymCryptAesDecryptAsm( pExpandedKey, pbSrc, pbDst ); + pbSrc += SYMCRYPT_AES_BLOCK_SIZE; + pbDst += SYMCRYPT_AES_BLOCK_SIZE; + cbData -= SYMCRYPT_AES_BLOCK_SIZE; + } +} + +#endif diff --git a/libs/symcrypt/lib/aes-c.c b/libs/symcrypt/lib/aes-c.c new file mode 100644 index 00000000000..f2e22438487 --- /dev/null +++ b/libs/symcrypt/lib/aes-c.c @@ -0,0 +1,468 @@ +// +// aes-c.c code for AES implementation +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// The fast-ish C implementation of the core AES functions +// +// Separate C file because at some point we want to be able to switch this out with a compact-C implementation +// that is smaller. +// + +#include "precomp.h" + +// +// Static vs. dynamically generated tables. +// +// AES uses about 13 kB of tables; it turns out that most of these tables can be generated +// algorithmically much faster than they can be read off the disk. +// This implementation does not do so. +// The reason is that generated tables live in the modifyable data segment, which means +// that they are not shared between different instances of a DLL. +// Static tables are shared. Especially for applications that have a very large number +// of processes (e.g. Terminal Servers) the extra cost of generating and storing a +// per-process copy of these tables is higher then the cost of loading it a few times +// from disk. +// Earlier versions of this implementation did generate the tables dynamically and ran into +// this very problem. +// +// Our tables are aligned to eliminate side-channels from TLB lookups if the TLB page size +// is big enough. For example, the SboxMatrixMult table is 1024-aligned. Each use of that +// table consists of 4 lookups, and each lookup is within its own 1kB aligned subtable. +// The side-channels from cache lines still remains, of course. +// + +//extern BYTE SymCryptAesSbox[256]; // Basic S-box, not used +extern SYMCRYPT_ALIGN_AT( 256) BYTE SymCryptAesInvSbox[256]; // For final round in decryption +extern SYMCRYPT_ALIGN_AT(1024) BYTE SymCryptAesSboxMatrixMult[4][256][4]; // Main encryption tables +extern SYMCRYPT_ALIGN_AT(1024) BYTE SymCryptAesInvSboxMatrixMult[4][256][4];// Main decryption tables +extern SYMCRYPT_ALIGN_AT(1024) BYTE SymCryptAesInvMatrixMult[4][256][4]; // For computing decryption round keys + +// +// Throughout this implementation we use UINT32s to access byte arrays. The AES +// algorithm almost requires this; without it the performance would be abysmal. +// All data elements are SYMCRYPT_ALIGNed, which must be at least 4. +// + +// +// Macro to check for alignment to support platforms that need alignment fix-ups. +// +#define IS_UINT32_ALIGNED( __p ) ((((intptr_t)__p) & 3) == 0) + +// +// Only need to enforce alignment on platforms that are not x86 or x64 +// Future improvement: should switch to using unaligned pointer accesses +// on some platforms. +// +#define NEED_ALIGN (!(SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM | SYMCRYPT_CPU_ARM64)) + + +VOID +SYMCRYPT_CALL +SymCryptAes4SboxC( + _In_reads_(4) PCBYTE pIn, + _Out_writes_(4) PBYTE pOut ) +// +// Perform 4 S-box lookups. +// This is a separate function as it can be done side-channel safe using +// AES-NI. +// Key expansion can actually be improved a lot more with AES-NI, but that +// requires major code changes for which we don't have time right now. +// +{ + pOut[0] = SymCryptAesSboxMatrixMult[0][pIn[0]][1]; + pOut[1] = SymCryptAesSboxMatrixMult[0][pIn[1]][1]; + pOut[2] = SymCryptAesSboxMatrixMult[0][pIn[2]][1]; + pOut[3] = SymCryptAesSboxMatrixMult[0][pIn[3]][1]; +} + +VOID +SYMCRYPT_CALL +SymCryptAesCreateDecryptionRoundKeyC( + _In_reads_(16) PCBYTE pEncryptionRoundKey, + _Out_writes_(16) PBYTE pDecryptionRoundKey ) +// +// Convert an encryption round key to a decryption round key by applying the inverse +// mixcolumn function to each 4-byte subword. +// This is a separate function as with AES-NI there is an assembler version of this +// function that is side-channel safe. +// +{ + int i; + PBYTE p = pDecryptionRoundKey; + PCBYTE q = pEncryptionRoundKey; + + for( i=0; i<4; i++ ) { + *(UINT32 *)p = + *(UINT32 *)SymCryptAesInvMatrixMult[0][q[0]] ^ + *(UINT32 *)SymCryptAesInvMatrixMult[1][q[1]] ^ + *(UINT32 *)SymCryptAesInvMatrixMult[2][q[2]] ^ + *(UINT32 *)SymCryptAesInvMatrixMult[3][q[3]]; + p += 4; + q += 4; + } + +} + +// +// SymCryptAesEncrypt +// NOINLINE prevents the compiler from creating additional implementations +// that have to be FIPS selftested. +// +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptAesEncryptC( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_(SYMCRYPT_AES_BLOCK_SIZE) PCBYTE pbPlaintext, + _Out_writes_(SYMCRYPT_AES_BLOCK_SIZE) PBYTE pbCiphertext ) +{ + SYMCRYPT_ALIGN BYTE state[4][4] = { 0 }; + SYMCRYPT_ALIGN UINT32 state2[4] = { 0 }; + + const BYTE (*keyPtr)[4][4]; + const BYTE (*keyLimit)[4][4]; + +#if NEED_ALIGN + SYMCRYPT_ALIGN BYTE alignBuffer[SYMCRYPT_AES_BLOCK_SIZE]; +#endif + +#if NEED_ALIGN + + // + // Callers who don't have their buffers aligned don't care about speed, + // so we do this in the simplest way. + // + if( !(IS_UINT32_ALIGNED( pbPlaintext ) & IS_UINT32_ALIGNED( pbCiphertext )) ) { + memcpy( alignBuffer, pbPlaintext, SYMCRYPT_AES_BLOCK_SIZE ); + SymCryptAesEncrypt( pExpandedKey, alignBuffer, alignBuffer ); + memcpy( pbCiphertext, alignBuffer, SYMCRYPT_AES_BLOCK_SIZE ); + SymCryptWipeKnownSize( alignBuffer, sizeof( alignBuffer ) ); + return; + } +#endif + + SYMCRYPT_CHECK_MAGIC( pExpandedKey ); + + // + // From this point on all our data is UINT32 aligned or better on those + // platforms that have alignment restrictions. + // + + keyPtr = &pExpandedKey->RoundKey[0]; // First round key + keyLimit = &pExpandedKey->lastEncRoundKey[0]; // Last round key + + // Initial round (AddRoundKey) + *((UINT32 *) &state[0][0]) = *(UINT32 *) (*keyPtr)[0] ^ *(UINT32 *) &pbPlaintext[0]; + *((UINT32 *) &state[1][0]) = *(UINT32 *) (*keyPtr)[1] ^ *(UINT32 *) &pbPlaintext[4]; + *((UINT32 *) &state[2][0]) = *(UINT32 *) (*keyPtr)[2] ^ *(UINT32 *) &pbPlaintext[8]; + *((UINT32 *) &state[3][0]) = *(UINT32 *) (*keyPtr)[3] ^ *(UINT32 *) &pbPlaintext[12]; + + keyPtr += 1; + + // Main rounds + while (keyPtr < keyLimit) + { + + // SubBytes/ShiftRows/MixColumns for col. 0 + state2[0] = *((UINT32 *) &SymCryptAesSboxMatrixMult[0][ state[0][0] ]); + state2[3] = *((UINT32 *) &SymCryptAesSboxMatrixMult[1][ state[0][1] ]); + state2[2] = *((UINT32 *) &SymCryptAesSboxMatrixMult[2][ state[0][2] ]); + state2[1] = *((UINT32 *) &SymCryptAesSboxMatrixMult[3][ state[0][3] ]); + + // SubBytes/ShiftRows/MixColumns for col. 1 + state2[1] ^= *((UINT32 *) &SymCryptAesSboxMatrixMult[0][ state[1][0] ]); + state2[0] ^= *((UINT32 *) &SymCryptAesSboxMatrixMult[1][ state[1][1] ]); + state2[3] ^= *((UINT32 *) &SymCryptAesSboxMatrixMult[2][ state[1][2] ]); + state2[2] ^= *((UINT32 *) &SymCryptAesSboxMatrixMult[3][ state[1][3] ]); + + // SubBytes/ShiftRows/MixColumns for col. 2 + state2[2] ^= *((UINT32 *) &SymCryptAesSboxMatrixMult[0][ state[2][0] ]); + state2[1] ^= *((UINT32 *) &SymCryptAesSboxMatrixMult[1][ state[2][1] ]); + state2[0] ^= *((UINT32 *) &SymCryptAesSboxMatrixMult[2][ state[2][2] ]); + state2[3] ^= *((UINT32 *) &SymCryptAesSboxMatrixMult[3][ state[2][3] ]); + + // SubBytes/ShiftRows/MixColumns for col. 3 + state2[3] ^= *((UINT32 *) &SymCryptAesSboxMatrixMult[0][ state[3][0] ]); + state2[2] ^= *((UINT32 *) &SymCryptAesSboxMatrixMult[1][ state[3][1] ]); + state2[1] ^= *((UINT32 *) &SymCryptAesSboxMatrixMult[2][ state[3][2] ]); + state2[0] ^= *((UINT32 *) &SymCryptAesSboxMatrixMult[3][ state[3][3] ]); + + // AddRoundKey + *((UINT32 *) &state[0][0]) = *(UINT32 *) (*keyPtr)[0] ^ state2[0]; + *((UINT32 *) &state[1][0]) = *(UINT32 *) (*keyPtr)[1] ^ state2[1]; + *((UINT32 *) &state[2][0]) = *(UINT32 *) (*keyPtr)[2] ^ state2[2]; + *((UINT32 *) &state[3][0]) = *(UINT32 *) (*keyPtr)[3] ^ state2[3]; + + keyPtr += 1; + } + + // Final round + + // SubBytes/ShiftRows for col. 0 + state2[0] = (UINT32) SymCryptAesSboxMatrixMult[0][ state[0][0] ][1]; + state2[3] = (UINT32) SymCryptAesSboxMatrixMult[0][ state[0][1] ][1] << 8; + state2[2] = (UINT32) SymCryptAesSboxMatrixMult[0][ state[0][2] ][1] << 16; + state2[1] = (UINT32) SymCryptAesSboxMatrixMult[0][ state[0][3] ][1] << 24; + + // SubBytes/ShiftRows for col. 1 + state2[1] |= (UINT32) SymCryptAesSboxMatrixMult[0][ state[1][0] ][1]; + state2[0] |= (UINT32) SymCryptAesSboxMatrixMult[0][ state[1][1] ][1] << 8; + state2[3] |= (UINT32) SymCryptAesSboxMatrixMult[0][ state[1][2] ][1] << 16; + state2[2] |= (UINT32) SymCryptAesSboxMatrixMult[0][ state[1][3] ][1] << 24; + + // SubBytes/ShiftRows for col. 2 + state2[2] |= (UINT32) SymCryptAesSboxMatrixMult[0][ state[2][0] ][1]; + state2[1] |= (UINT32) SymCryptAesSboxMatrixMult[0][ state[2][1] ][1] << 8; + state2[0] |= (UINT32) SymCryptAesSboxMatrixMult[0][ state[2][2] ][1] << 16; + state2[3] |= (UINT32) SymCryptAesSboxMatrixMult[0][ state[2][3] ][1] << 24; + + // SubBytes/ShiftRows for col. 3 + state2[3] |= (UINT32) SymCryptAesSboxMatrixMult[0][ state[3][0] ][1]; + state2[2] |= (UINT32) SymCryptAesSboxMatrixMult[0][ state[3][1] ][1] << 8; + state2[1] |= (UINT32) SymCryptAesSboxMatrixMult[0][ state[3][2] ][1] << 16; + state2[0] |= (UINT32) SymCryptAesSboxMatrixMult[0][ state[3][3] ][1] << 24; + + // AddRoundKey + *((UINT32 *) &pbCiphertext[0 ]) = *(UINT32 *) (*keyPtr)[0] ^ state2[0]; + *((UINT32 *) &pbCiphertext[4 ]) = *(UINT32 *) (*keyPtr)[1] ^ state2[1]; + *((UINT32 *) &pbCiphertext[8 ]) = *(UINT32 *) (*keyPtr)[2] ^ state2[2]; + *((UINT32 *) &pbCiphertext[12]) = *(UINT32 *) (*keyPtr)[3] ^ state2[3]; + + SymCryptWipeKnownSize( state, sizeof( state ) ); + SymCryptWipeKnownSize( state2, sizeof( state2 ) ); + + return; +} + + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptAesDecryptC( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_(SYMCRYPT_AES_BLOCK_SIZE) PCBYTE pbCiphertext, + _Out_writes_(SYMCRYPT_AES_BLOCK_SIZE) PBYTE pbPlaintext ) +{ + SYMCRYPT_ALIGN BYTE state[4][4] = { 0 }; + SYMCRYPT_ALIGN UINT32 state2[4] = { 0 }; + + const BYTE (*keyPtr)[4][4]; + const BYTE (*keyLimit)[4][4]; + +#if NEED_ALIGN + SYMCRYPT_ALIGN BYTE alignBuffer[SYMCRYPT_AES_BLOCK_SIZE]; +#endif + +#if NEED_ALIGN + // + // Callers who don't have their buffers aligned don't care about speed, + // so we do this in the simplest way. + // + if( !(IS_UINT32_ALIGNED( pbPlaintext ) & IS_UINT32_ALIGNED( pbCiphertext )) ) { + memcpy( alignBuffer, pbCiphertext, SYMCRYPT_AES_BLOCK_SIZE ); + SymCryptAesDecrypt( pExpandedKey, alignBuffer, alignBuffer ); + memcpy( pbPlaintext, alignBuffer, SYMCRYPT_AES_BLOCK_SIZE ); + SymCryptWipeKnownSize( alignBuffer, sizeof( alignBuffer ) ); + return; + } +#endif + + SYMCRYPT_CHECK_MAGIC( pExpandedKey ); + + keyPtr = &pExpandedKey->lastEncRoundKey[0]; // First round key + keyLimit = &pExpandedKey->lastDecRoundKey[0]; // Last round key + + // Initial round (AddRoundKey) + *((UINT32 *) &state[0][0]) = *(UINT32 *) (*keyPtr)[0] ^ *(UINT32 *) &pbCiphertext[0]; + *((UINT32 *) &state[1][0]) = *(UINT32 *) (*keyPtr)[1] ^ *(UINT32 *) &pbCiphertext[4]; + *((UINT32 *) &state[2][0]) = *(UINT32 *) (*keyPtr)[2] ^ *(UINT32 *) &pbCiphertext[8]; + *((UINT32 *) &state[3][0]) = *(UINT32 *) (*keyPtr)[3] ^ *(UINT32 *) &pbCiphertext[12]; + + keyPtr += 1; + + // Main rounds + while (keyPtr < keyLimit) + { + + // SubBytes/ShiftRows/MixColumns for col. 0 + state2[0] = *((UINT32 *) &SymCryptAesInvSboxMatrixMult[0][ state[0][0] ]); + state2[1] = *((UINT32 *) &SymCryptAesInvSboxMatrixMult[1][ state[0][1] ]); + state2[2] = *((UINT32 *) &SymCryptAesInvSboxMatrixMult[2][ state[0][2] ]); + state2[3] = *((UINT32 *) &SymCryptAesInvSboxMatrixMult[3][ state[0][3] ]); + + // SubBytes/ShiftRows/MixColumns for col. 1 + state2[1] ^= *((UINT32 *) &SymCryptAesInvSboxMatrixMult[0][ state[1][0] ]); + state2[2] ^= *((UINT32 *) &SymCryptAesInvSboxMatrixMult[1][ state[1][1] ]); + state2[3] ^= *((UINT32 *) &SymCryptAesInvSboxMatrixMult[2][ state[1][2] ]); + state2[0] ^= *((UINT32 *) &SymCryptAesInvSboxMatrixMult[3][ state[1][3] ]); + + // SubBytes/ShiftRows/MixColumns for col. 2 + state2[2] ^= *((UINT32 *) &SymCryptAesInvSboxMatrixMult[0][ state[2][0] ]); + state2[3] ^= *((UINT32 *) &SymCryptAesInvSboxMatrixMult[1][ state[2][1] ]); + state2[0] ^= *((UINT32 *) &SymCryptAesInvSboxMatrixMult[2][ state[2][2] ]); + state2[1] ^= *((UINT32 *) &SymCryptAesInvSboxMatrixMult[3][ state[2][3] ]); + + // SubBytes/ShiftRows/MixColumns for col. 3 + state2[3] ^= *((UINT32 *) &SymCryptAesInvSboxMatrixMult[0][ state[3][0] ]); + state2[0] ^= *((UINT32 *) &SymCryptAesInvSboxMatrixMult[1][ state[3][1] ]); + state2[1] ^= *((UINT32 *) &SymCryptAesInvSboxMatrixMult[2][ state[3][2] ]); + state2[2] ^= *((UINT32 *) &SymCryptAesInvSboxMatrixMult[3][ state[3][3] ]); + + // AddRoundKey + *((UINT32 *) &state[0][0]) = *(UINT32 *) (*keyPtr)[0] ^ state2[0]; + *((UINT32 *) &state[1][0]) = *(UINT32 *) (*keyPtr)[1] ^ state2[1]; + *((UINT32 *) &state[2][0]) = *(UINT32 *) (*keyPtr)[2] ^ state2[2]; + *((UINT32 *) &state[3][0]) = *(UINT32 *) (*keyPtr)[3] ^ state2[3]; + + keyPtr += 1; + } + + // Final round + + // SubBytes/ShiftRows for col. 0 + state2[0] = (UINT32) SymCryptAesInvSbox[ state[0][0] ]; + state2[1] = (UINT32) SymCryptAesInvSbox[ state[0][1] ] << 8; + state2[2] = (UINT32) SymCryptAesInvSbox[ state[0][2] ] << 16; + state2[3] = (UINT32) SymCryptAesInvSbox[ state[0][3] ] << 24; + + // SubBytes/ShiftRows for col. 1 + state2[1] |= (UINT32) SymCryptAesInvSbox[ state[1][0] ]; + state2[2] |= (UINT32) SymCryptAesInvSbox[ state[1][1] ] << 8; + state2[3] |= (UINT32) SymCryptAesInvSbox[ state[1][2] ] << 16; + state2[0] |= (UINT32) SymCryptAesInvSbox[ state[1][3] ] << 24; + + // SubBytes/ShiftRows for col. 2 + state2[2] |= (UINT32) SymCryptAesInvSbox[ state[2][0] ]; + state2[3] |= (UINT32) SymCryptAesInvSbox[ state[2][1] ] << 8; + state2[0] |= (UINT32) SymCryptAesInvSbox[ state[2][2] ] << 16; + state2[1] |= (UINT32) SymCryptAesInvSbox[ state[2][3] ] << 24; + + // SubBytes/ShiftRows for col. 3 + state2[3] |= (UINT32) SymCryptAesInvSbox[ state[3][0] ]; + state2[0] |= (UINT32) SymCryptAesInvSbox[ state[3][1] ] << 8; + state2[1] |= (UINT32) SymCryptAesInvSbox[ state[3][2] ] << 16; + state2[2] |= (UINT32) SymCryptAesInvSbox[ state[3][3] ] << 24; + + // AddRoundKey + *((UINT32 *) &pbPlaintext[0 ]) = *(UINT32 *) (*keyPtr)[0] ^ state2[0]; + *((UINT32 *) &pbPlaintext[4 ]) = *(UINT32 *) (*keyPtr)[1] ^ state2[1]; + *((UINT32 *) &pbPlaintext[8 ]) = *(UINT32 *) (*keyPtr)[2] ^ state2[2]; + *((UINT32 *) &pbPlaintext[12]) = *(UINT32 *) (*keyPtr)[3] ^ state2[3]; + + SymCryptWipeKnownSize( state, sizeof( state ) ); + SymCryptWipeKnownSize( state2, sizeof( state2 ) ); + + return; +} + +VOID +SYMCRYPT_CALL +SymCryptAesEcbEncryptC( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + while( cbData >= SYMCRYPT_AES_BLOCK_SIZE ) + { + SymCryptAesEncryptC( pExpandedKey, pbSrc, pbDst ); + pbSrc += SYMCRYPT_AES_BLOCK_SIZE; + pbDst += SYMCRYPT_AES_BLOCK_SIZE; + cbData -= SYMCRYPT_AES_BLOCK_SIZE; + } +} + +VOID +SYMCRYPT_CALL +SymCryptAesEcbDecryptC( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + while( cbData >= SYMCRYPT_AES_BLOCK_SIZE ) + { + SymCryptAesDecryptC( pExpandedKey, pbSrc, pbDst ); + pbSrc += SYMCRYPT_AES_BLOCK_SIZE; + pbDst += SYMCRYPT_AES_BLOCK_SIZE; + cbData -= SYMCRYPT_AES_BLOCK_SIZE; + } +} + +/* Wine hack: asm not supported yet */ + +VOID +SYMCRYPT_CALL +SymCryptAesEncryptAsm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbDst ) +{ + SymCryptAesEncryptC( pExpandedKey, pbSrc, pbDst ); +} + +VOID +SYMCRYPT_CALL +SymCryptAesDecryptAsm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbDst ) +{ + SymCryptAesDecryptC( pExpandedKey, pbSrc, pbDst ); +} + +VOID +SYMCRYPT_CALL +SymCryptAesCbcEncryptAsm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + SymCryptCbcEncrypt( &SymCryptAesBlockCipherNoOpt, pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); +} + +VOID +SYMCRYPT_CALL +SymCryptAesCbcDecryptAsm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + SymCryptCbcDecrypt( &SymCryptAesBlockCipherNoOpt, pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); +} + +VOID +SYMCRYPT_CALL +SymCryptAesCtrMsb64Asm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + SymCryptCtrMsb64( &SymCryptAesBlockCipherNoOpt, pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); +} + +VOID +SYMCRYPT_CALL +SymCryptSha256AppendBlocks_ul1( + _Inout_ SYMCRYPT_SHA256_CHAINING_STATE * pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ); + +VOID +SYMCRYPT_CALL +SymCryptSha256AppendBlocks_xmm_ssse3_asm( + _Inout_ SYMCRYPT_SHA256_CHAINING_STATE * pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ) +{ + SymCryptSha256AppendBlocks_ul1( pChain, pbData, cbData, pcbRemaining ); +} diff --git a/libs/symcrypt/lib/aes-default-bc.c b/libs/symcrypt/lib/aes-default-bc.c new file mode 100644 index 00000000000..2f38fc4aabe --- /dev/null +++ b/libs/symcrypt/lib/aes-default-bc.c @@ -0,0 +1,92 @@ +// +// aes-default-bc.c code for AES implementation +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +// +// The SymCrypt API allows callers to use the generic block cipher mode functions and pass +// a pointer to a structure that describes the block cipher. +// This structure contains pointers to all the optimized implementations of the various modes. +// This pulls in all the mode-specific code, which in some cases we don't want. +// +// We isolate the SymCryptAesBlockCipher structure into this separate C file so that it only gets +// pulled in when the application uses this structure. +// + +// +// The virtual table for the AES block cipher. +// +// All pointers must point to specialized functions. The general +// block cipher mode functions will call these pointers if they are non-NULL +// so if they point back to an implementation that calls the generic +// mode functions we get an infinite recursion. +// +// NOTE: the compile-time conditions in this file should track the actual implementations in +// aes-default.c. +// + +const SYMCRYPT_BLOCKCIPHER SymCryptAesBlockCipher_Fast = { + &SymCryptAesExpandKey, + &SymCryptAesEncrypt, + &SymCryptAesDecrypt, + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM | SYMCRYPT_CPU_ARM64 + &SymCryptAesEcbEncrypt, +#else + NULL, +#endif + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + &SymCryptAesEcbDecrypt, +#else + NULL, +#endif + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM | SYMCRYPT_CPU_ARM64 + &SymCryptAesCbcEncrypt, +#else + NULL, +#endif + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM | SYMCRYPT_CPU_ARM64 + &SymCryptAesCbcDecrypt, +#else + NULL, +#endif + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM64 + &SymCryptAesCbcMac, +#else + NULL, +#endif + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM | SYMCRYPT_CPU_ARM64 + &SymCryptAesCtrMsb64, +#else + NULL, +#endif + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM | SYMCRYPT_CPU_ARM64 + &SymCryptAesGcmEncryptPart, +#else + NULL, +#endif + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM | SYMCRYPT_CPU_ARM64 + &SymCryptAesGcmDecryptPart, +#else + NULL, +#endif + + SYMCRYPT_AES_BLOCK_SIZE, + sizeof( SYMCRYPT_AES_EXPANDED_KEY ), +}; + +// +// This indirection makes it easier to switch implementations in a binary without +// changing the calling code. +// +const PCSYMCRYPT_BLOCKCIPHER SymCryptAesBlockCipher = &SymCryptAesBlockCipher_Fast; diff --git a/libs/symcrypt/lib/aes-default.c b/libs/symcrypt/lib/aes-default.c new file mode 100644 index 00000000000..fce247d7f74 --- /dev/null +++ b/libs/symcrypt/lib/aes-default.c @@ -0,0 +1,872 @@ +// +// aes-default.c code for AES implementation +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// This is the interface for the default AES implementation. +// On each platform, this is the fastest AES implementation irrespective of code size. +// It uses assembler, XMM, or any other trick. +// + + +#include "precomp.h" + +// +// Virtual table for generic functions +// This allows us to default to generic implementations for some modes without pulling in all the +// dedicated functions. +// We use this when we cannot use the optimized implementations for some reason. +// +const SYMCRYPT_BLOCKCIPHER SymCryptAesBlockCipherNoOpt = { + &SymCryptAesExpandKey, +#if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_ARM + &SymCryptAesEncryptAsm, + &SymCryptAesDecryptAsm, +#else + &SymCryptAesEncryptC, + &SymCryptAesDecryptC, +#endif + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + + SYMCRYPT_AES_BLOCK_SIZE, + sizeof( SYMCRYPT_AES_EXPANDED_KEY ), +}; + +VOID +SYMCRYPT_CALL +SymCryptAes4Sbox( _In_reads_(4) PCBYTE pIn, _Out_writes_(4) PBYTE pOut, BOOL UseSimd ) +{ +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + if( UseSimd ) + { + SymCryptAes4SboxXmm( pIn, pOut ); + } else { + SymCryptAes4SboxC( pIn, pOut ); + } +#elif SYMCRYPT_CPU_ARM64 + if( UseSimd ) + { + SymCryptAes4SboxNeon( pIn, pOut ); + } else { + SymCryptAes4SboxC( pIn, pOut ); + } +#else + UNREFERENCED_PARAMETER( UseSimd ); + SymCryptAes4SboxC( pIn, pOut ); // never use XMM on SaveXmm arch, save/restore overhead is too large. +#endif +} + +VOID +SYMCRYPT_CALL +SymCryptAesCreateDecryptionRoundKey( + _In_reads_(16) PCBYTE pEncryptionRoundKey, + _Out_writes_(16) PBYTE pDecryptionRoundKey, + BOOL UseSimd ) +{ +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + if( UseSimd ) + { + SymCryptAesCreateDecryptionRoundKeyXmm( pEncryptionRoundKey, pDecryptionRoundKey ); + } else { + SymCryptAesCreateDecryptionRoundKeyC( pEncryptionRoundKey, pDecryptionRoundKey ); + } +#elif SYMCRYPT_CPU_ARM64 + if( UseSimd ) + { + SymCryptAesCreateDecryptionRoundKeyNeon( pEncryptionRoundKey, pDecryptionRoundKey ); + } else { + SymCryptAesCreateDecryptionRoundKeyC( pEncryptionRoundKey, pDecryptionRoundKey ); + } +#else + UNREFERENCED_PARAMETER( UseSimd ); + SymCryptAesCreateDecryptionRoundKeyC( pEncryptionRoundKey, pDecryptionRoundKey ); // never use XMM on SaveXmm arch, save/restore overhead is too large. +#endif +} + +VOID +SYMCRYPT_CALL +SymCryptAesEncrypt( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_(SYMCRYPT_AES_BLOCK_SIZE) PCBYTE pbSrc, + _Out_writes_(SYMCRYPT_AES_BLOCK_SIZE) PBYTE pbDst ) +{ +#if SYMCRYPT_CPU_AMD64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) ) + { + SymCryptAesEncryptXmm( pExpandedKey, pbSrc, pbDst ); + } else { + SymCryptAesEncryptAsm( pExpandedKey, pbSrc, pbDst ); + } +#elif SYMCRYPT_CPU_X86 + SYMCRYPT_EXTENDED_SAVE_DATA SaveData; + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) && + SymCryptSaveXmm( &SaveData ) == SYMCRYPT_NO_ERROR ) + { + SymCryptAesEncryptXmm( pExpandedKey, pbSrc, pbDst ); + SymCryptRestoreXmm( &SaveData ); + } else { + SymCryptAesEncryptAsm( pExpandedKey, pbSrc, pbDst ); + } +#elif SYMCRYPT_CPU_ARM + SymCryptAesEncryptAsm( pExpandedKey, pbSrc, pbDst ); +#elif SYMCRYPT_CPU_ARM64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON_AES ) ) + { + SymCryptAesEncryptNeon( pExpandedKey, pbSrc, pbDst ); + } else { + SymCryptAesEncryptC( pExpandedKey, pbSrc, pbDst ); + } +#else + SymCryptAesEncryptC( pExpandedKey, pbSrc, pbDst ); +#endif +} + +VOID +SYMCRYPT_CALL +SymCryptAesDecrypt( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_(SYMCRYPT_AES_BLOCK_SIZE) PCBYTE pbSrc, + _Out_writes_(SYMCRYPT_AES_BLOCK_SIZE) PBYTE pbDst ) +{ +#if SYMCRYPT_CPU_AMD64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) ) + { + SymCryptAesDecryptXmm( pExpandedKey, pbSrc, pbDst ); + } else { + SymCryptAesDecryptAsm( pExpandedKey, pbSrc, pbDst ); + } +#elif SYMCRYPT_CPU_X86 + SYMCRYPT_EXTENDED_SAVE_DATA SaveData; + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) && + SymCryptSaveXmm( &SaveData ) == SYMCRYPT_NO_ERROR ) + { + SymCryptAesDecryptXmm( pExpandedKey, pbSrc, pbDst ); + SymCryptRestoreXmm( &SaveData ); + } else { + SymCryptAesDecryptAsm( pExpandedKey, pbSrc, pbDst ); + } +#elif SYMCRYPT_CPU_ARM + SymCryptAesDecryptAsm( pExpandedKey, pbSrc, pbDst ); +#elif SYMCRYPT_CPU_ARM64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON_AES ) ) + { + SymCryptAesDecryptNeon( pExpandedKey, pbSrc, pbDst ); + } else { + SymCryptAesDecryptC( pExpandedKey, pbSrc, pbDst ); + } +#else + SymCryptAesDecryptC( pExpandedKey, pbSrc, pbDst ); +#endif +} + +VOID +SYMCRYPT_CALL +SymCryptAesCbcEncrypt( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ +#if SYMCRYPT_CPU_AMD64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) ) + { + SymCryptAesCbcEncryptXmm( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + } else { + SymCryptAesCbcEncryptAsm( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + } +#elif SYMCRYPT_CPU_X86 + SYMCRYPT_EXTENDED_SAVE_DATA SaveData; + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) && + SymCryptSaveXmm( &SaveData ) == SYMCRYPT_NO_ERROR ) + { + SymCryptAesCbcEncryptXmm( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + SymCryptRestoreXmm( &SaveData ); + } else { + SymCryptAesCbcEncryptAsm( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + } +#elif SYMCRYPT_CPU_ARM + SymCryptAesCbcEncryptAsm( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); +#elif SYMCRYPT_CPU_ARM64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON_AES ) ) + { + SymCryptAesCbcEncryptNeon( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + } else { + SymCryptCbcEncrypt( &SymCryptAesBlockCipherNoOpt, pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + } +#else + SymCryptCbcEncrypt( &SymCryptAesBlockCipherNoOpt, pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); +#endif +} + +VOID +SYMCRYPT_CALL +SymCryptAesCbcDecrypt( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ +#if SYMCRYPT_CPU_AMD64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) ) + { + SymCryptAesCbcDecryptXmm( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + } else { + SymCryptAesCbcDecryptAsm( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + } +#elif SYMCRYPT_CPU_X86 + SYMCRYPT_EXTENDED_SAVE_DATA SaveData; + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) && + SymCryptSaveXmm( &SaveData ) == SYMCRYPT_NO_ERROR ) + { + SymCryptAesCbcDecryptXmm( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + SymCryptRestoreXmm( &SaveData ); + } else { + SymCryptAesCbcDecryptAsm( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + } +#elif SYMCRYPT_CPU_ARM + SymCryptAesCbcDecryptAsm( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); +#elif SYMCRYPT_CPU_ARM64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON_AES ) ) + { + SymCryptAesCbcDecryptNeon( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + } else { + SymCryptCbcDecrypt( &SymCryptAesBlockCipherNoOpt, pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + } +#else + SymCryptCbcDecrypt( &SymCryptAesBlockCipherNoOpt, pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); +#endif +} + +VOID +SYMCRYPT_CALL +SymCryptAesEcbEncrypt( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ +#if SYMCRYPT_CPU_AMD64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) ) + { + SymCryptAesEcbEncryptXmm( pExpandedKey, pbSrc, pbDst, cbData ); + } else { + SymCryptAesEcbEncryptAsm( pExpandedKey, pbSrc, pbDst, cbData ); + } +#elif SYMCRYPT_CPU_X86 + SYMCRYPT_EXTENDED_SAVE_DATA SaveData; + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) && + SymCryptSaveXmm( &SaveData ) == SYMCRYPT_NO_ERROR ) + { + SymCryptAesEcbEncryptXmm( pExpandedKey, pbSrc, pbDst, cbData ); + SymCryptRestoreXmm( &SaveData ); + } else { + SymCryptAesEcbEncryptAsm( pExpandedKey, pbSrc, pbDst, cbData ); + } +#elif SYMCRYPT_CPU_ARM + SymCryptAesEcbEncryptAsm( pExpandedKey, pbSrc, pbDst, cbData ); +#elif SYMCRYPT_CPU_ARM64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON_AES ) ) + { + SymCryptAesEcbEncryptNeon( pExpandedKey, pbSrc, pbDst, cbData ); + } else { + SymCryptAesEcbEncryptC( pExpandedKey, pbSrc, pbDst, cbData ); + } +#else + SymCryptAesEcbEncryptC( pExpandedKey, pbSrc, pbDst, cbData ); +#endif +} + +// +// NOTE: There is no reason that SymCryptAesEcbDecrypt could not have unrolled versions similar to +// SymCryptAesEcbEncrypt if a real use case requiring large scale Ecb decryption is found. +// For now just decrypt 1 block at a time to reduce code size. +// +VOID +SYMCRYPT_CALL +SymCryptAesEcbDecrypt( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + while( cbData >= SYMCRYPT_AES_BLOCK_SIZE ) + { + SymCryptAesDecrypt( pExpandedKey, pbSrc, pbDst ); + pbSrc += SYMCRYPT_AES_BLOCK_SIZE; + pbDst += SYMCRYPT_AES_BLOCK_SIZE; + cbData -= SYMCRYPT_AES_BLOCK_SIZE; + } +} + +VOID +SYMCRYPT_CALL +SymCryptAesCbcMac( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ +#if SYMCRYPT_CPU_AMD64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) ) + { + SymCryptAesCbcMacXmm( pExpandedKey, pbChainingValue, pbData, cbData ); + } else { + SYMCRYPT_ASSERT( SymCryptAesBlockCipherNoOpt.blockSize == SYMCRYPT_AES_BLOCK_SIZE ); + SymCryptCbcMac( &SymCryptAesBlockCipherNoOpt, pExpandedKey, pbChainingValue, pbData, cbData ); + } +#elif SYMCRYPT_CPU_X86 + SYMCRYPT_EXTENDED_SAVE_DATA SaveData; + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) && + SymCryptSaveXmm( &SaveData ) == SYMCRYPT_NO_ERROR ) + { + SymCryptAesCbcMacXmm( pExpandedKey, pbChainingValue, pbData, cbData ); + SymCryptRestoreXmm( &SaveData ); + } else { + SYMCRYPT_ASSERT( SymCryptAesBlockCipherNoOpt.blockSize == SYMCRYPT_AES_BLOCK_SIZE ); + SymCryptCbcMac( &SymCryptAesBlockCipherNoOpt, pExpandedKey, pbChainingValue, pbData, cbData ); + } +#elif SYMCRYPT_CPU_ARM64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON_AES ) ) + { + SymCryptAesCbcMacNeon( pExpandedKey, pbChainingValue, pbData, cbData ); + } else { + SYMCRYPT_ASSERT( SymCryptAesBlockCipherNoOpt.blockSize == SYMCRYPT_AES_BLOCK_SIZE ); + SymCryptCbcMac( &SymCryptAesBlockCipherNoOpt, pExpandedKey, pbChainingValue, pbData, cbData ); + } +#else + SYMCRYPT_ASSERT( SymCryptAesBlockCipherNoOpt.blockSize == SYMCRYPT_AES_BLOCK_SIZE ); + SymCryptCbcMac( &SymCryptAesBlockCipherNoOpt, pExpandedKey, pbChainingValue, pbData, cbData ); +#endif +} + +VOID +SYMCRYPT_CALL +SymCryptAesCtrMsb32( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ +#if SYMCRYPT_CPU_AMD64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) ) + { + SymCryptAesCtrMsb32Xmm( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + } else { + SYMCRYPT_ASSERT( SymCryptAesBlockCipherNoOpt.blockSize == SYMCRYPT_AES_BLOCK_SIZE ); // keep Prefast happy + SymCryptCtrMsb32( &SymCryptAesBlockCipherNoOpt, pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + } + +#elif SYMCRYPT_CPU_X86 + SYMCRYPT_EXTENDED_SAVE_DATA SaveData; + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) && + SymCryptSaveXmm( &SaveData ) == SYMCRYPT_NO_ERROR ) + { + SymCryptAesCtrMsb32Xmm( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + SymCryptRestoreXmm( &SaveData ); + } else { + SYMCRYPT_ASSERT( SymCryptAesBlockCipherNoOpt.blockSize == SYMCRYPT_AES_BLOCK_SIZE ); // keep Prefast happy + SymCryptCtrMsb32( &SymCryptAesBlockCipherNoOpt, pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + } + +#elif SYMCRYPT_CPU_ARM64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON_AES ) ) + { + SymCryptAesCtrMsb32Neon( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + } else { + SymCryptCtrMsb32( &SymCryptAesBlockCipherNoOpt, pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + } + +#else + SYMCRYPT_ASSERT( SymCryptAesBlockCipherNoOpt.blockSize == SYMCRYPT_AES_BLOCK_SIZE ); // keep Prefast happy + SymCryptCtrMsb32( &SymCryptAesBlockCipherNoOpt, pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); +#endif +} + +VOID +SYMCRYPT_CALL +SymCryptAesCtrMsb64( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ +#if SYMCRYPT_CPU_AMD64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) ) + { + SymCryptAesCtrMsb64Xmm( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + } else { + SymCryptAesCtrMsb64Asm( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + } + +#elif SYMCRYPT_CPU_X86 + SYMCRYPT_EXTENDED_SAVE_DATA SaveData; + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) && + SymCryptSaveXmm( &SaveData ) == SYMCRYPT_NO_ERROR ) + { + SymCryptAesCtrMsb64Xmm( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + SymCryptRestoreXmm( &SaveData ); + } else { + SymCryptAesCtrMsb64Asm( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + } + +#elif SYMCRYPT_CPU_ARM + SymCryptAesCtrMsb64Asm( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + +#elif SYMCRYPT_CPU_ARM64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON_AES ) ) + { + SymCryptAesCtrMsb64Neon( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + } else { + SymCryptCtrMsb64( &SymCryptAesBlockCipherNoOpt, pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + } + +#else + SYMCRYPT_ASSERT( SymCryptAesBlockCipherNoOpt.blockSize == SYMCRYPT_AES_BLOCK_SIZE ); // keep Prefast happy + SymCryptCtrMsb64( &SymCryptAesBlockCipherNoOpt, pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); +#endif +} + +VOID +SYMCRYPT_CALL +SymCryptAesGcmEncryptPartOnePass( + _Inout_ PSYMCRYPT_GCM_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + SIZE_T bytesToProcess; +#if SYMCRYPT_CPU_AMD64 + SYMCRYPT_EXTENDED_SAVE_DATA SaveData; +#endif + + // + // We have entered the encrypt phase, the AAD has been padded to be a multiple of block size + // We know that the bytes still to use in the key stream buffer and the bytes left to fill the + // macBlock will be the same in the context of this function + // + SYMCRYPT_ASSERT( (pState->cbData & SYMCRYPT_GCM_BLOCK_MOD_MASK) == pState->bytesInMacBlock ); + + // + // We update pState->cbData once before we modify cbData. + // pState->cbData is not used in the rest of this function + // + SYMCRYPT_ASSERT( pState->cbData + cbData <= SYMCRYPT_GCM_MAX_DATA_SIZE ); + pState->cbData += cbData; + + if( pState->bytesInMacBlock > 0 ) + { + bytesToProcess = SYMCRYPT_MIN( cbData, SYMCRYPT_GCM_BLOCK_SIZE - pState->bytesInMacBlock ); + SymCryptXorBytes( + pbSrc, + &pState->keystreamBlock[pState->bytesInMacBlock], + &pState->macBlock[pState->bytesInMacBlock], + bytesToProcess ); + memcpy( pbDst, &pState->macBlock[pState->bytesInMacBlock], bytesToProcess ); + pbSrc += bytesToProcess; + pbDst += bytesToProcess; + cbData -= bytesToProcess; + pState->bytesInMacBlock += bytesToProcess; + + if( pState->bytesInMacBlock == SYMCRYPT_GCM_BLOCK_SIZE ) + { + SymCryptGHashAppendData( &pState->pKey->ghashKey, + &pState->ghashState, + &pState->macBlock[0], + SYMCRYPT_GCM_BLOCK_SIZE ); + pState->bytesInMacBlock = 0; + } + + // + // If there are bytes left in the key stream buffer, then cbData == 0 and we're done. + // If we used up all the bytes, then we are fine, no need to compute the next key stream block + // + } + + if( cbData >= SYMCRYPT_GCM_BLOCK_SIZE ) + { + bytesToProcess = cbData & SYMCRYPT_GCM_BLOCK_ROUND_MASK; + + // + // We use a Gcm function that increments the CTR by 64 bits, rather than the 32 bits that GCM requires. + // As we only support 12-byte nonces, the 32-bit counter never overflows, and we can safely use + // the 64-bit incrementing primitive. + // If we ever support other nonce sizes this is going to be a big problem. + // You can't fake a 32-bit counter using a 64-bit counter function without side-channels that expose + // information about the current counter value. + // With other nonce sizes the actual counter value itself is not public, so we can't expose that. + // We can do two things: + // - create SymCryptAesGcmEncryptXXX32 + // - Accept that we leak information about the counter value; after all it is not treated as a + // secret when the nonce is 12 bytes. + // + SYMCRYPT_ASSERT( pState->pKey->pBlockCipher->blockSize == SYMCRYPT_GCM_BLOCK_SIZE ); + +#if SYMCRYPT_CPU_AMD64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_VAES_256_CODE ) && + (bytesToProcess >= GCM_YMM_MINBLOCKS * SYMCRYPT_GCM_BLOCK_SIZE) && + SymCryptSaveYmm( &SaveData ) == SYMCRYPT_NO_ERROR ) + { + SymCryptAesGcmEncryptStitchedYmm_2048( + &pState->pKey->blockcipherKey.aes, + &pState->counterBlock[0], + &pState->pKey->ghashKey.table[0], + &pState->ghashState, + pbSrc, + pbDst, + bytesToProcess ); + + SymCryptRestoreYmm( &SaveData ); + } else { + SymCryptAesGcmEncryptStitchedXmm( + &pState->pKey->blockcipherKey.aes, + &pState->counterBlock[0], + &pState->pKey->ghashKey.table[0], + &pState->ghashState, + pbSrc, + pbDst, + bytesToProcess ); + } + +#elif SYMCRYPT_CPU_X86 + SymCryptAesGcmEncryptStitchedXmm( + &pState->pKey->blockcipherKey.aes, + &pState->counterBlock[0], + (PSYMCRYPT_GF128_ELEMENT)&pState->pKey->ghashKey.tableSpace[pState->pKey->ghashKey.tableOffset], + &pState->ghashState, + pbSrc, + pbDst, + bytesToProcess ); + +#elif SYMCRYPT_CPU_ARM64 + SymCryptAesGcmEncryptStitchedNeon( + &pState->pKey->blockcipherKey.aes, + &pState->counterBlock[0], + &pState->pKey->ghashKey.table[0], + &pState->ghashState, + pbSrc, + pbDst, + bytesToProcess ); + +#else + SymCryptAesCtrMsb32(&pState->pKey->blockcipherKey.aes, + &pState->counterBlock[0], + pbSrc, + pbDst, + cbData ); + // + // We break the read-once/write once rule here by reading the pbDst data back. + // In this particular situation this is safe, and avoiding it is expensive as it + // requires an extra copy and an extra memory buffer. + // The first write exposes the GCM key stream, independent of the underlying data that + // we are processing. From an attacking point of view we can think of this as literally + // handing over the key stream. So encryption consists of two steps: + // - hand over the key stream + // - MAC some ciphertext + // In this view (which has equivalent security properties to GCM) is obviously doesn't + // matter that we read pbDst back. + // + SymCryptGHashAppendData(&pState->pKey->ghashKey, + &pState->ghashState, + pbDst, + cbData ); + +#endif + + pbSrc += bytesToProcess; + pbDst += bytesToProcess; + cbData -= bytesToProcess; + } + + if( cbData > 0 ) + { + SymCryptWipeKnownSize( &pState->keystreamBlock[0], SYMCRYPT_GCM_BLOCK_SIZE ); + + SYMCRYPT_ASSERT( pState->pKey->pBlockCipher->blockSize == SYMCRYPT_GCM_BLOCK_SIZE ); + SymCryptAesCtrMsb32(&pState->pKey->blockcipherKey.aes, + &pState->counterBlock[0], + &pState->keystreamBlock[0], + &pState->keystreamBlock[0], + SYMCRYPT_GCM_BLOCK_SIZE ); + + SymCryptXorBytes( &pState->keystreamBlock[0], pbSrc, &pState->macBlock[0], cbData ); + memcpy( pbDst, &pState->macBlock[0], cbData ); + pState->bytesInMacBlock = cbData; + + // + // pState->cbData contains the data length after this call already, so it knows how many + // bytes are left in the keystream block + // + } +} + +VOID +SYMCRYPT_CALL +SymCryptAesGcmDecryptPartOnePass( + _Inout_ PSYMCRYPT_GCM_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + SIZE_T bytesToProcess; +#if SYMCRYPT_CPU_AMD64 + SYMCRYPT_EXTENDED_SAVE_DATA SaveData; +#endif + + // + // We have entered the decrypt phase, the AAD has been padded to be a multiple of block size + // We know that the bytes still to use in the key stream buffer and the bytes left to fill the + // macBlock will be the same in the context of this function + // + SYMCRYPT_ASSERT( (pState->cbData & SYMCRYPT_GCM_BLOCK_MOD_MASK) == pState->bytesInMacBlock ); + + // + // We update pState->cbData once before we modify cbData. + // pState->cbData is not used in the rest of this function + // + SYMCRYPT_ASSERT( pState->cbData + cbData <= SYMCRYPT_GCM_MAX_DATA_SIZE ); + pState->cbData += cbData; + + if( pState->bytesInMacBlock > 0 ) + { + bytesToProcess = SYMCRYPT_MIN( cbData, SYMCRYPT_GCM_BLOCK_SIZE - pState->bytesInMacBlock ); + memcpy( &pState->macBlock[pState->bytesInMacBlock], pbSrc, bytesToProcess ); + SymCryptXorBytes( + &pState->keystreamBlock[pState->bytesInMacBlock], + &pState->macBlock[pState->bytesInMacBlock], + pbDst, + bytesToProcess ); + + pbSrc += bytesToProcess; + pbDst += bytesToProcess; + cbData -= bytesToProcess; + pState->bytesInMacBlock += bytesToProcess; + + if( pState->bytesInMacBlock == SYMCRYPT_GCM_BLOCK_SIZE ) + { + SymCryptGHashAppendData( &pState->pKey->ghashKey, + &pState->ghashState, + &pState->macBlock[0], + SYMCRYPT_GCM_BLOCK_SIZE ); + pState->bytesInMacBlock = 0; + } + + // + // If there are bytes left in the key stream buffer, then cbData == 0 and we're done. + // If we used up all the bytes, then we are fine, no need to compute the next key stream block + // + } + + if( cbData >= SYMCRYPT_GCM_BLOCK_SIZE ) + { + bytesToProcess = cbData & SYMCRYPT_GCM_BLOCK_ROUND_MASK; + + // + // We use a Gcm function that increments the CTR by 64 bits, rather than the 32 bits that GCM requires. + // As we only support 12-byte nonces, the 32-bit counter never overflows, and we can safely use + // the 64-bit incrementing primitive. + // If we ever support other nonce sizes this is going to be a big problem. + // You can't fake a 32-bit counter using a 64-bit counter function without side-channels that expose + // information about the current counter value. + // With other nonce sizes the actual counter value itself is not public, so we can't expose that. + // We can do two things: + // - create SymCryptAesGcmDecryptXXX32 + // - Accept that we leak information about the counter value; after all it is not treated as a + // secret when the nonce is 12 bytes. + // + SYMCRYPT_ASSERT( pState->pKey->pBlockCipher->blockSize == SYMCRYPT_GCM_BLOCK_SIZE ); + +#if SYMCRYPT_CPU_AMD64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_VAES_256_CODE ) && + (bytesToProcess >= GCM_YMM_MINBLOCKS * SYMCRYPT_GCM_BLOCK_SIZE) && + SymCryptSaveYmm( &SaveData ) == SYMCRYPT_NO_ERROR ) + { + SymCryptAesGcmDecryptStitchedYmm_2048( + &pState->pKey->blockcipherKey.aes, + &pState->counterBlock[0], + &pState->pKey->ghashKey.table[0], + &pState->ghashState, + pbSrc, + pbDst, + bytesToProcess ); + + SymCryptRestoreYmm( &SaveData ); + } else { + SymCryptAesGcmDecryptStitchedXmm( + &pState->pKey->blockcipherKey.aes, + &pState->counterBlock[0], + &pState->pKey->ghashKey.table[0], + &pState->ghashState, + pbSrc, + pbDst, + bytesToProcess ); + } + +#elif SYMCRYPT_CPU_X86 + SymCryptAesGcmDecryptStitchedXmm( + &pState->pKey->blockcipherKey.aes, + &pState->counterBlock[0], + (PSYMCRYPT_GF128_ELEMENT)&pState->pKey->ghashKey.tableSpace[pState->pKey->ghashKey.tableOffset], + &pState->ghashState, + pbSrc, + pbDst, + bytesToProcess ); + +#elif SYMCRYPT_CPU_ARM64 + SymCryptAesGcmDecryptStitchedNeon( + &pState->pKey->blockcipherKey.aes, + &pState->counterBlock[0], + &pState->pKey->ghashKey.table[0], + &pState->ghashState, + pbSrc, + pbDst, + bytesToProcess ); + +#else + SymCryptGHashAppendData(&pState->pKey->ghashKey, + &pState->ghashState, + pbSrc, + cbData ); + // + // Do the actual decryption + // This violates the read-once rule, but it is safe for the same reasons as above + // in the encryption case. + // + SymCryptAesCtrMsb32(&pState->pKey->blockcipherKey.aes, + &pState->counterBlock[0], + pbSrc, + pbDst, + cbData ); + +#endif + pbSrc += bytesToProcess; + pbDst += bytesToProcess; + cbData -= bytesToProcess; + } + + if( cbData > 0 ) + { + SymCryptWipeKnownSize( &pState->keystreamBlock[0], SYMCRYPT_GCM_BLOCK_SIZE ); + + SYMCRYPT_ASSERT( pState->pKey->pBlockCipher->blockSize == SYMCRYPT_GCM_BLOCK_SIZE ); + SymCryptAesCtrMsb32(&pState->pKey->blockcipherKey.aes, + &pState->counterBlock[0], + &pState->keystreamBlock[0], + &pState->keystreamBlock[0], + SYMCRYPT_GCM_BLOCK_SIZE ); + + memcpy( &pState->macBlock[0], pbSrc, cbData ); + SymCryptXorBytes( + &pState->keystreamBlock[0], + &pState->macBlock[0], + pbDst, + cbData ); + + pState->bytesInMacBlock = cbData; + + // + // pState->cbData contains the data length after this call already, so it knows how many + // bytes are left in the keystream block + // + } +} + +VOID +SYMCRYPT_CALL +SymCryptAesGcmEncryptPart( + _Inout_ PSYMCRYPT_GCM_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ +#if SYMCRYPT_CPU_AMD64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_PCLMULQDQ_CODE ) ) + { + SymCryptAesGcmEncryptPartOnePass( pState, pbSrc, pbDst, cbData ); + } else { + SymCryptGcmEncryptPartTwoPass( pState, pbSrc, pbDst, cbData ); + } + +#elif SYMCRYPT_CPU_X86 + SYMCRYPT_EXTENDED_SAVE_DATA SaveData; + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_PCLMULQDQ_CODE ) && + SymCryptSaveXmm( &SaveData ) == SYMCRYPT_NO_ERROR ) + { + SymCryptAesGcmEncryptPartOnePass( pState, pbSrc, pbDst, cbData ); + SymCryptRestoreXmm( &SaveData ); + } else { + SymCryptGcmEncryptPartTwoPass( pState, pbSrc, pbDst, cbData ); + } + +#elif SYMCRYPT_CPU_ARM64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON_AES | SYMCRYPT_CPU_FEATURE_NEON_PMULL ) ) + { + SymCryptAesGcmEncryptPartOnePass( pState, pbSrc, pbDst, cbData ); + } else { + SymCryptGcmEncryptPartTwoPass( pState, pbSrc, pbDst, cbData ); + } + +#else + SymCryptGcmEncryptPartTwoPass( pState, pbSrc, pbDst, cbData ); +#endif +} + +VOID +SYMCRYPT_CALL +SymCryptAesGcmDecryptPart( + _Inout_ PSYMCRYPT_GCM_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ +#if SYMCRYPT_CPU_AMD64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_PCLMULQDQ_CODE ) ) + { + SymCryptAesGcmDecryptPartOnePass( pState, pbSrc, pbDst, cbData ); + } else { + SymCryptGcmDecryptPartTwoPass( pState, pbSrc, pbDst, cbData ); + } + +#elif SYMCRYPT_CPU_X86 + SYMCRYPT_EXTENDED_SAVE_DATA SaveData; + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_PCLMULQDQ_CODE ) && + SymCryptSaveXmm( &SaveData ) == SYMCRYPT_NO_ERROR ) + { + SymCryptAesGcmDecryptPartOnePass( pState, pbSrc, pbDst, cbData ); + SymCryptRestoreXmm( &SaveData ); + } else { + SymCryptGcmDecryptPartTwoPass( pState, pbSrc, pbDst, cbData ); + } + +#elif SYMCRYPT_CPU_ARM64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON_AES | SYMCRYPT_CPU_FEATURE_NEON_PMULL ) ) + { + SymCryptAesGcmDecryptPartOnePass( pState, pbSrc, pbDst, cbData ); + } else { + SymCryptGcmDecryptPartTwoPass( pState, pbSrc, pbDst, cbData ); + } + +#else + SymCryptGcmDecryptPartTwoPass( pState, pbSrc, pbDst, cbData ); +#endif +} diff --git a/libs/symcrypt/lib/aes-key.c b/libs/symcrypt/lib/aes-key.c new file mode 100644 index 00000000000..e584403914a --- /dev/null +++ b/libs/symcrypt/lib/aes-key.c @@ -0,0 +1,437 @@ +// +// aes.c code for AES implementation +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// The actual encryption and decryption routines here are not nearly as fast as the +// assembler ones. They are used on platforms that don't have assembler implementations +// and for various testing purposes. +// +// This code derives from the orignal fast AES code that Niels Ferguson wrote +// for BitLocker in Windows Vista. +// The C code is derived from the AES that was already in the RSA32 library, +// the assembler code was created new at that time. +// + + +#include "precomp.h" + + +/////////////////////////////////////////////////////////////////////////////// +// Key expansion uses two functions, a 4-byte S-box lookup and one +// to create a decryption round key from an encryption round key. +// These are the C implementations of these functions +// + + +static BYTE g_SymCryptAesRoundConstant[11] = +{ + 0, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, +}; + +SYMCRYPT_NOINLINE +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptAesExpandKeyInternal( + _Out_ PSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey, + BOOLEAN fCreateDecryptionKeys ) +{ + UINT32 nRounds; + BYTE * p; + BYTE * q; + UINT32 i; + UINT32 t; + + BOOL UseSimd = FALSE; + SYMCRYPT_ERROR status = SYMCRYPT_NO_ERROR; + +#if SYMCRYPT_CPU_X86 + SYMCRYPT_EXTENDED_SAVE_DATA SaveData; + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) ) + { + if( SymCryptSaveXmm( &SaveData ) == SYMCRYPT_NO_ERROR ) + { + UseSimd = TRUE; + } + } +#elif SYMCRYPT_CPU_AMD64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) ) + { + UseSimd = TRUE; + } +#elif SYMCRYPT_CPU_ARM64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON_AES ) ) + { + UseSimd = TRUE; + } +#endif + + SYMCRYPT_SET_MAGIC( pExpandedKey ); + + // + // Separate code for each key size, this is significantly faster. + // We have a number of applications that do frequent key expansions. + // + switch( cbKey ) + { + case 16: + nRounds = 10; + pExpandedKey->lastEncRoundKey = &pExpandedKey->RoundKey[nRounds]; + pExpandedKey->lastDecRoundKey = &pExpandedKey->RoundKey[2*nRounds]; + + memcpy( &pExpandedKey->RoundKey[0], pbKey, 16 ); + + p = (BYTE *)&pExpandedKey->RoundKey[1]; + + for( i=1; i<=nRounds; i++ ) + { + SymCryptAes4Sbox( &p[-4], p, UseSimd ); + t = ROR32(SYMCRYPT_LOAD_LSBFIRST32(p), 8) ^ SYMCRYPT_LOAD_LSBFIRST32(p - 16) ^ g_SymCryptAesRoundConstant[i]; + SYMCRYPT_STORE_LSBFIRST32( p, t ); // this is a macro that re-evaluates its arguments + + *(UINT32 *)(p+4) = *(UINT32 *) p ^ *(UINT32 *)(p - 12); + *(UINT32 *)(p+8) = *(UINT32 *)(p+4) ^ *(UINT32 *)(p - 8); + *(UINT32 *)(p+12) = *(UINT32 *)(p+8) ^ *(UINT32 *)(p - 4); + + p += 16; + } + + break; + + case 24: + nRounds = 12; + pExpandedKey->lastEncRoundKey = &pExpandedKey->RoundKey[nRounds]; + pExpandedKey->lastDecRoundKey = &pExpandedKey->RoundKey[2*nRounds]; + + memcpy( &pExpandedKey->RoundKey[0], pbKey, 24 ); + + p = (BYTE *)&pExpandedKey->RoundKey[0] + 24; + + // + // We have 12 rounds, 13 round keys, and 13*16 = 208 bytes of encryption key to generate. + // We have 24 already, so we need 184 more. + // Each iteration produces 24 bytes, so we need to loop 8 times. + // + for( i=1; i<=8; i++ ) + { + SymCryptAes4Sbox( &p[-4], p, UseSimd ); + t = ROR32(SYMCRYPT_LOAD_LSBFIRST32(p), 8) ^ SYMCRYPT_LOAD_LSBFIRST32(p - 24) ^ g_SymCryptAesRoundConstant[i]; + SYMCRYPT_STORE_LSBFIRST32( p, t ); + + *(UINT32 *)(p+4) = *(UINT32 *) p ^ *(UINT32 *)(p - 20); + *(UINT32 *)(p+8) = *(UINT32 *)(p+ 4) ^ *(UINT32 *)(p - 16); + *(UINT32 *)(p+12) = *(UINT32 *)(p+ 8) ^ *(UINT32 *)(p - 12); + *(UINT32 *)(p+16) = *(UINT32 *)(p+12) ^ *(UINT32 *)(p - 8); + *(UINT32 *)(p+20) = *(UINT32 *)(p+16) ^ *(UINT32 *)(p - 4); + + p += 24; + } + + break; + + case 32: + nRounds = 14; + pExpandedKey->lastEncRoundKey = &pExpandedKey->RoundKey[nRounds]; + pExpandedKey->lastDecRoundKey = &pExpandedKey->RoundKey[2*nRounds]; + + memcpy( &pExpandedKey->RoundKey[0], pbKey, 32 ); + + p = (BYTE *)&pExpandedKey->RoundKey[0] + 32; + + // + // We have 14 rounds, 15 round keys, and 15*16 = 240 bytes of encryption key to generate. + // We have 32 already, so we need 208 more. + // Each iteration produces 32 bytes, so we need to loop 6.5 times. + // + for( i=1; i<=6; i++ ) + { + SymCryptAes4Sbox( &p[-4], p, UseSimd ); + t = ROR32(SYMCRYPT_LOAD_LSBFIRST32(p), 8) ^ SYMCRYPT_LOAD_LSBFIRST32(p - 32) ^ g_SymCryptAesRoundConstant[i]; + SYMCRYPT_STORE_LSBFIRST32( p, t ); + + *(UINT32 *)(p+4) = *(UINT32 *) p ^ *(UINT32 *)(p - 28); + *(UINT32 *)(p+8) = *(UINT32 *)(p + 4) ^ *(UINT32 *)(p - 24); + *(UINT32 *)(p+12) = *(UINT32 *)(p + 8) ^ *(UINT32 *)(p - 20); + + SymCryptAes4Sbox( &p[12], &p[16], UseSimd ); + *(UINT32 *)(p+16) = *(UINT32 *)(p + 16) ^ *(UINT32 *)(p - 16); + + *(UINT32 *)(p+20) = *(UINT32 *)(p + 16) ^ *(UINT32 *)(p - 12); + *(UINT32 *)(p+24) = *(UINT32 *)(p + 20) ^ *(UINT32 *)(p - 8); + *(UINT32 *)(p+28) = *(UINT32 *)(p + 24) ^ *(UINT32 *)(p - 4); + + p += 32; + } + + // We looped 6 times, so here is the half-loop + + SymCryptAes4Sbox( &p[-4], p, UseSimd ); + t = ROR32(SYMCRYPT_LOAD_LSBFIRST32(p), 8) ^ SYMCRYPT_LOAD_LSBFIRST32(p - 32) ^ g_SymCryptAesRoundConstant[i]; + SYMCRYPT_STORE_LSBFIRST32( p, t ); + + *(UINT32 *)(p+4) = *(UINT32 *) p ^ *(UINT32 *)(p - 28); + *(UINT32 *)(p+8) = *(UINT32 *)(p + 4) ^ *(UINT32 *)(p - 24); + *(UINT32 *)(p+12) = *(UINT32 *)(p + 8) ^ *(UINT32 *)(p - 20); + + break; + + default: + status = SYMCRYPT_WRONG_KEY_SIZE; + goto cleanup; + } + + + if( fCreateDecryptionKeys ) + { + p = &pExpandedKey->RoundKey[0][0][0]; + q = (PBYTE)(pExpandedKey->lastDecRoundKey); + + // The first encryption round key is the last decryption round key + memcpy( q, p, SYMCRYPT_AES_BLOCK_SIZE ); + p += 16; + q -= 16; + + while( p < (PBYTE) pExpandedKey->lastEncRoundKey ) + { + SymCryptAesCreateDecryptionRoundKey( p, q, UseSimd ); + q -= 16; + p += 16; + } + } + +cleanup: + +#if SYMCRYPT_CPU_X86 + if( UseSimd ) + { + SymCryptRestoreXmm( &SaveData ); + } +#endif + + return status; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptAesExpandKey( + _Out_ PSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ) + +{ + return SymCryptAesExpandKeyInternal( pExpandedKey, pbKey, cbKey, TRUE ); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptAesExpandKeyEncryptOnly( + _Out_ PSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ) +{ + return SymCryptAesExpandKeyInternal( pExpandedKey, pbKey, cbKey, FALSE ); +} + +VOID +SYMCRYPT_CALL +SymCryptAesKeyCopy( _In_ PCSYMCRYPT_AES_EXPANDED_KEY pSrc, + _Out_ PSYMCRYPT_AES_EXPANDED_KEY pDst ) +{ + SYMCRYPT_CHECK_MAGIC( pSrc ); + + *pDst = *pSrc; + pDst->lastEncRoundKey = &pDst->RoundKey[0] + (pSrc->lastEncRoundKey - &pSrc->RoundKey[0]); + pDst->lastDecRoundKey = &pDst->RoundKey[0] + (pSrc->lastDecRoundKey - &pSrc->RoundKey[0]); + + SYMCRYPT_SET_MAGIC( pDst ); +} + +// +// Self test code +// + + +const BYTE SymCryptAesNistTestVector128Ciphertext[16] = { + 0x69, 0xc4, 0xe0, 0xd8, 0x6a, 0x7b, 0x04, 0x30, + 0xd8, 0xcd, 0xb7, 0x80, 0x70, 0xb4, 0xc5, 0x5a, +}; + + + +/**************************************************************** + * OLD CODE + * + * Old code to generate the AES tables dynamically. + * Kept for future reference. + * + + +// +// Prototype; on some platforms this function is in assembler. +// +VOID +SYMCRYPT_CALL +SymCryptAesCreateRotatedTables( BYTE MatrixMult[4][256][4] ); + +VOID +SYMCRYPT_CALL +SymCryptAesCreateRotatedTables( _Inout_ BYTE MatrixMult[4][256][4] ) +{ + int i,j,k; + + // + // We do this byte-by-byte, which is easiest. + // It would be faster to use UINT32 operations, + // but that is endian-specific, and therefore platform-specific. + // Endian-agnostic UINT32-based code would be a lot more complicated. + // All this is extremely easy to do in assembler, which we do on those + // platforms that have assembler implementations. + // + for( j=1; j<4; j++ ) { + for( i=0; i<256; i++ ) { + for( k=0; k<4; k++ ) { + MatrixMult[j][i][k] = MatrixMult[0][i][(k-j)&3]; + } + } + } +} + + + +// +// SymCryptAesInitMatrixMultiplyTable +// +// Initialize a matrix multiplication table. +// Each matrix multiplication table consists of 4 tables of 256 entries of 4 bytes each. +// The four tables are rotated copies of each other. +// This function generates the first of those four tables from the init +// value. +// +// After this call: +// At index i the table contains the four bytes +// i * init[0], i * init[1], i * init[2], i * init[3] +// where multiplication is in GF(2^8). +// +// We do not do a GF(2^8) multiplication for each entry, but rather use the +// relationship (a xor b) * init[.] = a * init[.] xor b * init[.] +// And only compute i*init[.] for i = 1,2,4,8,...,128. This can be done +// using repeated multiplication by x in the finite field. +// +// It is safe to call this function on two separate threads for the same table. +// All invocations will write the same data to the table, and within a tread each entry is written +// before it is read. Doing parallel initializations of the same table can be very inefficient +// as multiple cores will be fighting over the cache lines, but the result will be correct. +// We use this property to initialize the tables lazily. +// +static +VOID +SYMCRYPT_CALL +SymCryptAesInitMatrixMultiplyTable( _Out_ SYMCRYPT_ALIGN BYTE MatrixMult[256][4], + _In_ SYMCRYPT_ALIGN BYTE init[4] + ) +{ + int i,j; + SYMCRYPT_ALIGN BYTE initCopy[4]; + UINT32 initCopyAsUint32; + + // + // We copy the init value so that we can modify it without worrying about multi-threading + // issues. + // + *(UINT32 *)initCopy = *(UINT32 *)init; + + *(UINT32 *)MatrixMult[0] = 0; + for( i=1; i<256; i<<=1 ) + { + initCopyAsUint32 = *(UINT32 *)initCopy; + for( j=0; j<i; j++ ) + { + *(UINT32 *)MatrixMult[i+j] = *(UINT32 *)MatrixMult[j] ^ initCopyAsUint32; + } + for( j=0; j<4; j++ ) + { + initCopy[j] = MULT_BY_X( initCopy[j] ); + } + } +} + + +// +// SymCryptAesInitialize +// +// Initialize the static tables for the AES implementation. +// This function is called by the key expansion function if it finds the +// tables not initialized. +// +// This leads to an interesting case where multiple threads running on multiple +// CPUs run this initialization code at the same time. +// This code is carefully structured to allow that. When global data is written it is +// always with the final value, and we never read uninitialized global data. +// Thus, even if two CPUs run this code at the same time, they will both initialize each +// memory location to the same correct value and the end result will be correct. +// (Performance will suffer due to the fact that cache lines will be bounced back and force +// between the two CPUs, but that is not a significant concern as this code is used only once.) +// +// At the end of the initialization the flag is set to indicate that further +// key expansion invocations do not need to re-run the initialization. +// We use memory barriers to keep this multi-thread safe. +// +static +VOID +SYMCRYPT_CALL +SymCryptAesInitialize(void) +{ + int i,j; + BYTE S; + BYTE Stimes2; + + // + // We force alignment of these arrays as we sometimes treat them as a UINT32 + // + SYMCRYPT_ALIGN BYTE InvMatrixEntry[4] = {0xe, 0x9, 0xd, 0xb}; + SYMCRYPT_ALIGN BYTE MatrixEntry[4] = {2, 1, 1, 3}; + SYMCRYPT_ALIGN BYTE MatrixScratch[256][4]; + + // Generate the forward MDS multiplication table in the scratch space + SymCryptAesInitMatrixMultiplyTable( MatrixScratch, MatrixEntry ); + + // Initialize first table of SymCryptAesInvMatrixMult + SymCryptAesInitMatrixMultiplyTable( SymCryptAesInvMatrixMult[0], InvMatrixEntry ); + + // + // Build the InvSbox table and the first table of SymCryptAesSboxMatrixMult and + // SymCryptAesInvSboxMatrixMult + // + for( i=0; i<256; i++ ) { + S = SymCryptAesSbox[i]; + SymCryptAesInvSbox[S] = (BYTE) i; + *(UINT32 *)SymCryptAesSboxMatrixMult[0][i] = *(UINT32 *)MatrixScratch[S]; + *(UINT32 *)SymCryptAesInvSboxMatrixMult[0][S] = *(UINT32 *)SymCryptAesInvMatrixMult[0][i]; + } + + // + // Now we generate the byte rotations of the tables + // + SymCryptAesCreateRotatedTables( SymCryptAesSboxMatrixMult ); + SymCryptAesCreateRotatedTables( SymCryptAesInvSboxMatrixMult ); + SymCryptAesCreateRotatedTables( SymCryptAesInvMatrixMult ); + + // + // This is a memory barrier. It ensures that all the memory writes we do before the barrier + // are globally visible to other CPUs before the memory writes we do after the fence. + // In this particular case, it ensures that every CPU sees the completed tables before + // it sees the flag as set. + // + MemoryBarrier(); + + // + // Set the flag to signal that the tables are initialized. + // + SymCryptAesTablesInitialized = TRUE; +} + + +*/ diff --git a/libs/symcrypt/lib/aes-neon.c b/libs/symcrypt/lib/aes-neon.c new file mode 100644 index 00000000000..3c0d3fb1817 --- /dev/null +++ b/libs/symcrypt/lib/aes-neon.c @@ -0,0 +1,1889 @@ +// +// aes-neon.c code for AES implementation +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// All NEON-based code for AES operations +// + +#include "precomp.h" + +#if SYMCRYPT_CPU_ARM64 + +#pragma clang attribute push (__attribute__((target("aes"))), apply_to=function) + +#define vzeroq() vdupq_n_u64(0) + + +VOID +SYMCRYPT_CALL +SymCryptAes4SboxNeon( _In_reads_(4) PCBYTE pIn, _Out_writes_(4) PBYTE pOut ) +{ + /* + __m128i x; + + x = _mm_set1_epi32( *(int *) pIn ); + + x = _mm_aeskeygenassist_si128( x, 0 ); + + *(unsigned *) pOut = x.m128i_u32[0]; + */ + __n128 x; + + // + // There is no pure S-box lookup instruction, but the AESE instruction + // does a ShiftRow followed by a SubBytes. + // If we duplicate the input value to all 4 lanes, then the ShiftRow does nothing + // and the SubBytes will do the S-box lookup. + // + x = vdupq_n_u32( *(unsigned int *) pIn ); + x = vaeseq_u8( x, vzeroq() ); + vst1q_lane_s32( pOut, x, 0 ); + //*(unsigned int *) pOut = x.n128_u32[0]; +} + + +VOID +SYMCRYPT_CALL +SymCryptAesCreateDecryptionRoundKeyNeon( + _In_reads_(16) PCBYTE pEncryptionRoundKey, + _Out_writes_(16) PBYTE pDecryptionRoundKey ) +{ + *(__n128 *) pDecryptionRoundKey = vaesimcq_u8( *(__n128 *)pEncryptionRoundKey ); +} + +// +// When doing a full round of AES encryption, make sure to give compiler opportunity to schedule dependent +// aese/aesmc pairs to enable instruction fusion in many arm64 CPUs +// +#define AESE_AESMC( c, rk ) \ +{ \ + c = vaeseq_u8( c, rk ); \ + c = vaesmcq_u8( c ); \ +}; + +// +// When doing a full round of AES decryption, make sure to give compiler opportunity to schedule dependent +// aesd/aesimc pairs to enable instruction fusion in many arm64 CPUs +// +#define AESD_AESIMC( c, rk ) \ +{ \ + c = vaesdq_u8( c, rk ); \ + c = vaesimcq_u8( c ); \ +}; + +// +// Using a loop with AESE_AESMC and AESD_AESIMC, the compiler can still prematurely rearrange the loop and +// lose opportunity for scheduling adjacent pairs. +// Instead, explicitly unroll the AES rounds with this macro. +// Takes the name of first_round, full_round, and final_round macros, and uses them to construct block to +// handle AES (128|192|256) for either encrypt or decrypt. For now assume only need at most 8 state +// variables in the macros. +// Assumes roundKey, keyPtr, and keyLimit are defined in calling context. +// +#define UNROLL_AES_ROUNDS_FIRST( first_round, full_round, final_round, c0, c1, c2, c3, c4, c5, c6, c7 ) \ +{ \ + /* Do 9 full rounds (AES-128|AES-192|AES-256) */ \ + roundKey = *keyPtr++; \ + first_round( c0, c1, c2, c3, c4, c5, c6, c7 ) \ + roundKey = *keyPtr++; \ + full_round( c0, c1, c2, c3, c4, c5, c6, c7 ) \ + roundKey = *keyPtr++; \ + full_round( c0, c1, c2, c3, c4, c5, c6, c7 ) \ + roundKey = *keyPtr++; \ + full_round( c0, c1, c2, c3, c4, c5, c6, c7 ) \ + roundKey = *keyPtr++; \ + full_round( c0, c1, c2, c3, c4, c5, c6, c7 ) \ + roundKey = *keyPtr++; \ + full_round( c0, c1, c2, c3, c4, c5, c6, c7 ) \ + roundKey = *keyPtr++; \ + full_round( c0, c1, c2, c3, c4, c5, c6, c7 ) \ + roundKey = *keyPtr++; \ + full_round( c0, c1, c2, c3, c4, c5, c6, c7 ) \ + roundKey = *keyPtr++; \ + full_round( c0, c1, c2, c3, c4, c5, c6, c7 ) \ + roundKey = *keyPtr++; \ +\ + if ( keyPtr < keyLimit ) \ + { \ + /* Do 2 more full rounds (AES-192|AES-256) */ \ + full_round( c0, c1, c2, c3, c4, c5, c6, c7 ) \ + roundKey = *keyPtr++; \ + full_round( c0, c1, c2, c3, c4, c5, c6, c7 ) \ + roundKey = *keyPtr++; \ +\ + if ( keyPtr < keyLimit ) \ + { \ + /* Do 2 more full rounds (AES-256) */ \ + full_round( c0, c1, c2, c3, c4, c5, c6, c7 ) \ + roundKey = *keyPtr++; \ + full_round( c0, c1, c2, c3, c4, c5, c6, c7 ) \ + roundKey = *keyPtr++; \ + } \ + } \ +\ + /* Do final round (AES-128|AES-192|AES-256) */ \ + final_round( c0, c1, c2, c3, c4, c5, c6, c7 ) \ +}; + +// Only AES_ENCRYPT_1_CHAIN needs to specify the first round differently from the full round +#define UNROLL_AES_ROUNDS( full_round, final_round, c0, c1, c2, c3, c4, c5, c6, c7 ) \ + UNROLL_AES_ROUNDS_FIRST( full_round, full_round, final_round, c0, c1, c2, c3, c4, c5, c6, c7 ) + +#define AES_ENCRYPT_ROUND_1( c0, c1, c2, c3, c4, c5, c6, c7 ) \ +{ \ + AESE_AESMC( c0, roundKey ) \ +}; +#define AES_ENCRYPT_FINAL_1( c0, c1, c2, c3, c4, c5, c6, c7 ) \ +{ \ + c0 = vaeseq_u8( c0, roundKey ); \ + roundKey = *keyPtr; \ + c0 = veorq_u8( c0, roundKey ); \ +}; + +#define AES_ENCRYPT_1( pExpandedKey, c0 ) \ +{ \ + const __n128 *keyPtr; \ + const __n128 *keyLimit; \ + __n128 roundKey; \ +\ + keyPtr = (const __n128 *)&pExpandedKey->RoundKey[0]; \ + keyLimit = (const __n128 *)pExpandedKey->lastEncRoundKey; \ +\ + UNROLL_AES_ROUNDS( \ + AES_ENCRYPT_ROUND_1, \ + AES_ENCRYPT_FINAL_1, \ + c0, c1, c2, c3, c4, c5, c6, c7 \ + ) \ +}; + +// Perform AES encryption without the last round key and with a specified first round key +// +// For algorithms where performance is dominated by a chain of dependent AES rounds (i.e. CBC encryption, CCM, CMAC) +// we can gain a reasonable performance uplift by computing (last round key ^ this plaintext block ^ first round key) +// off the critical path and using this computed value in place of first round key in the first AESE instruction. +#define AES_ENCRYPT_CHAIN_FIRST_1( c0, mergedFirstRoundKey, c2, c3, c4, c5, c6, c7 ) \ +{ \ + AESE_AESMC( c0, mergedFirstRoundKey ) \ +}; +#define AES_ENCRYPT_CHAIN_FINAL_1( c0, c1, c2, c3, c4, c5, c6, c7 ) \ +{ \ + c0 = vaeseq_u8( c0, roundKey ); \ +}; + +#define AES_ENCRYPT_1_CHAIN( pExpandedKey, c0, mergedFirstRoundKey ) \ +{ \ + const __n128 *keyPtr; \ + const __n128 *keyLimit; \ + __n128 roundKey; \ +\ + keyPtr = (const __n128 *)&pExpandedKey->RoundKey[0]; \ + keyLimit = (const __n128 *)pExpandedKey->lastEncRoundKey; \ +\ + UNROLL_AES_ROUNDS_FIRST( \ + AES_ENCRYPT_CHAIN_FIRST_1, \ + AES_ENCRYPT_ROUND_1, \ + AES_ENCRYPT_CHAIN_FINAL_1, \ + c0, mergedFirstRoundKey, c2, c3, c4, c5, c6, c7 \ + ) \ +}; + +#define AES_ENCRYPT_ROUND_4( c0, c1, c2, c3, c4, c5, c6, c7 ) \ +{ \ + AESE_AESMC( c0, roundKey ) \ + AESE_AESMC( c1, roundKey ) \ + AESE_AESMC( c2, roundKey ) \ + AESE_AESMC( c3, roundKey ) \ +}; +#define AES_ENCRYPT_FINAL_4( c0, c1, c2, c3, c4, c5, c6, c7 ) \ +{ \ + c0 = vaeseq_u8( c0, roundKey ); \ + c1 = vaeseq_u8( c1, roundKey ); \ + c2 = vaeseq_u8( c2, roundKey ); \ + c3 = vaeseq_u8( c3, roundKey ); \ + roundKey = *keyPtr; \ + c0 = veorq_u8( c0, roundKey ); \ + c1 = veorq_u8( c1, roundKey ); \ + c2 = veorq_u8( c2, roundKey ); \ + c3 = veorq_u8( c3, roundKey ); \ +}; + +#define AES_ENCRYPT_4( pExpandedKey, c0, c1, c2, c3 ) \ +{ \ + const __n128 *keyPtr; \ + const __n128 *keyLimit; \ + __n128 roundKey; \ +\ + keyPtr = (const __n128 *)&pExpandedKey->RoundKey[0]; \ + keyLimit = (const __n128 *)pExpandedKey->lastEncRoundKey; \ +\ + UNROLL_AES_ROUNDS( \ + AES_ENCRYPT_ROUND_4, \ + AES_ENCRYPT_FINAL_4, \ + c0, c1, c2, c3, c4, c5, c6, c7 \ + ) \ +}; + +#define AES_ENCRYPT_ROUND_8( c0, c1, c2, c3, c4, c5, c6, c7 ) \ +{ \ + AESE_AESMC( c0, roundKey ) \ + AESE_AESMC( c1, roundKey ) \ + AESE_AESMC( c2, roundKey ) \ + AESE_AESMC( c3, roundKey ) \ + AESE_AESMC( c4, roundKey ) \ + AESE_AESMC( c5, roundKey ) \ + AESE_AESMC( c6, roundKey ) \ + AESE_AESMC( c7, roundKey ) \ +}; +#define AES_ENCRYPT_FINAL_8( c0, c1, c2, c3, c4, c5, c6, c7 ) \ +{ \ + c0 = vaeseq_u8( c0, roundKey ); \ + c1 = vaeseq_u8( c1, roundKey ); \ + c2 = vaeseq_u8( c2, roundKey ); \ + c3 = vaeseq_u8( c3, roundKey ); \ + c4 = vaeseq_u8( c4, roundKey ); \ + c5 = vaeseq_u8( c5, roundKey ); \ + c6 = vaeseq_u8( c6, roundKey ); \ + c7 = vaeseq_u8( c7, roundKey ); \ + roundKey = *keyPtr; \ + c0 = veorq_u8( c0, roundKey ); \ + c1 = veorq_u8( c1, roundKey ); \ + c2 = veorq_u8( c2, roundKey ); \ + c3 = veorq_u8( c3, roundKey ); \ + c4 = veorq_u8( c4, roundKey ); \ + c5 = veorq_u8( c5, roundKey ); \ + c6 = veorq_u8( c6, roundKey ); \ + c7 = veorq_u8( c7, roundKey ); \ +}; + +#define AES_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 ) \ +{ \ + const __n128 *keyPtr; \ + const __n128 *keyLimit; \ + __n128 roundKey; \ +\ + keyPtr = (const __n128 *)&pExpandedKey->RoundKey[0]; \ + keyLimit = (const __n128 *)pExpandedKey->lastEncRoundKey; \ +\ + UNROLL_AES_ROUNDS( \ + AES_ENCRYPT_ROUND_8, \ + AES_ENCRYPT_FINAL_8, \ + c0, c1, c2, c3, c4, c5, c6, c7 \ + ) \ +}; + +#define AES_DECRYPT_ROUND_1( c0, c1, c2, c3, c4, c5, c6, c7 ) \ +{ \ + AESD_AESIMC( c0, roundKey ) \ +}; +#define AES_DECRYPT_FINAL_1( c0, c1, c2, c3, c4, c5, c6, c7 ) \ +{ \ + c0 = vaesdq_u8( c0, roundKey ); \ + roundKey = *keyPtr; \ + c0 = veorq_u8( c0, roundKey ); \ +}; + +#define AES_DECRYPT_1( pExpandedKey, c0 ) \ +{ \ + const __n128 *keyPtr; \ + const __n128 *keyLimit; \ + __n128 roundKey; \ +\ + keyPtr = (const __n128 *)pExpandedKey->lastEncRoundKey; \ + keyLimit = (const __n128 *)pExpandedKey->lastDecRoundKey; \ +\ + UNROLL_AES_ROUNDS( \ + AES_DECRYPT_ROUND_1, \ + AES_DECRYPT_FINAL_1, \ + c0, c1, c2, c3, c4, c5, c6, c7 \ + ) \ +}; + +#define AES_DECRYPT_ROUND_4( c0, c1, c2, c3, c4, c5, c6, c7 ) \ +{ \ + AESD_AESIMC( c0, roundKey ) \ + AESD_AESIMC( c1, roundKey ) \ + AESD_AESIMC( c2, roundKey ) \ + AESD_AESIMC( c3, roundKey ) \ +}; +#define AES_DECRYPT_FINAL_4( c0, c1, c2, c3, c4, c5, c6, c7 ) \ +{ \ + c0 = vaesdq_u8( c0, roundKey ); \ + c1 = vaesdq_u8( c1, roundKey ); \ + c2 = vaesdq_u8( c2, roundKey ); \ + c3 = vaesdq_u8( c3, roundKey ); \ + roundKey = *keyPtr; \ + c0 = veorq_u8( c0, roundKey ); \ + c1 = veorq_u8( c1, roundKey ); \ + c2 = veorq_u8( c2, roundKey ); \ + c3 = veorq_u8( c3, roundKey ); \ +}; + +#define AES_DECRYPT_4( pExpandedKey, c0, c1, c2, c3 ) \ +{ \ + const __n128 *keyPtr; \ + const __n128 *keyLimit; \ + __n128 roundKey; \ +\ + keyPtr = (const __n128 *)pExpandedKey->lastEncRoundKey; \ + keyLimit = (const __n128 *)pExpandedKey->lastDecRoundKey; \ +\ + UNROLL_AES_ROUNDS( \ + AES_DECRYPT_ROUND_4, \ + AES_DECRYPT_FINAL_4, \ + c0, c1, c2, c3, c4, c5, c6, c7 \ + ) \ +}; + +#define AES_DECRYPT_ROUND_8( c0, c1, c2, c3, c4, c5, c6, c7 ) \ +{ \ + AESD_AESIMC( c0, roundKey ) \ + AESD_AESIMC( c1, roundKey ) \ + AESD_AESIMC( c2, roundKey ) \ + AESD_AESIMC( c3, roundKey ) \ + AESD_AESIMC( c4, roundKey ) \ + AESD_AESIMC( c5, roundKey ) \ + AESD_AESIMC( c6, roundKey ) \ + AESD_AESIMC( c7, roundKey ) \ +}; +#define AES_DECRYPT_FINAL_8( c0, c1, c2, c3, c4, c5, c6, c7 ) \ +{ \ + c0 = vaesdq_u8( c0, roundKey ); \ + c1 = vaesdq_u8( c1, roundKey ); \ + c2 = vaesdq_u8( c2, roundKey ); \ + c3 = vaesdq_u8( c3, roundKey ); \ + c4 = vaesdq_u8( c4, roundKey ); \ + c5 = vaesdq_u8( c5, roundKey ); \ + c6 = vaesdq_u8( c6, roundKey ); \ + c7 = vaesdq_u8( c7, roundKey ); \ + roundKey = *keyPtr; \ + c0 = veorq_u8( c0, roundKey ); \ + c1 = veorq_u8( c1, roundKey ); \ + c2 = veorq_u8( c2, roundKey ); \ + c3 = veorq_u8( c3, roundKey ); \ + c4 = veorq_u8( c4, roundKey ); \ + c5 = veorq_u8( c5, roundKey ); \ + c6 = veorq_u8( c6, roundKey ); \ + c7 = veorq_u8( c7, roundKey ); \ +}; + +#define AES_DECRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 ) \ +{ \ + const __n128 *keyPtr; \ + const __n128 *keyLimit; \ + __n128 roundKey; \ +\ + keyPtr = (const __n128 *)pExpandedKey->lastEncRoundKey; \ + keyLimit = (const __n128 *)pExpandedKey->lastDecRoundKey; \ +\ + UNROLL_AES_ROUNDS( \ + AES_DECRYPT_ROUND_8, \ + AES_DECRYPT_FINAL_8, \ + c0, c1, c2, c3, c4, c5, c6, c7 \ + ) \ +}; + + + +VOID +SYMCRYPT_CALL +SymCryptAesEncryptNeon( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbDst ) +{ + __n128 c; + + c = *( __n128 * ) pbSrc; + + AES_ENCRYPT_1( pExpandedKey, c ); + + *(__n128 *) pbDst = c; +} + +VOID +SYMCRYPT_CALL +SymCryptAesDecryptNeon( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbDst ) +{ + __n128 c; + + c = *( __n128 * ) pbSrc; + + AES_DECRYPT_1( pExpandedKey, c ); + + *(__n128 *) pbDst = c; +} + + +VOID +SYMCRYPT_CALL +SymCryptAesCbcEncryptNeon( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + __n128 c = *(__n128 *)pbChainingValue; + __n128 rk0 = *(__n128 *) &pExpandedKey->RoundKey[0]; + __n128 rkLast = *(__n128 *) pExpandedKey->lastEncRoundKey; + __n128 d, rk0AndLast; + + // This algorithm is dominated by chain of dependent AES rounds, so we want to avoid EOR + // instructions on the critical path where possible + // We can compute (last round key ^ this plaintext block ^ first round key) off the critical + // path and use this with AES_ENCRYPT_1_CHAIN so that only AES instructions write to c in + // the main loop + rk0AndLast = veorq_u8( rk0, rkLast ); + + c = veorq_u8( c, rkLast ); + + while( cbData >= SYMCRYPT_AES_BLOCK_SIZE ) + { + d = veorq_u8( *(__n128 *)pbSrc, rk0AndLast); + AES_ENCRYPT_1_CHAIN( pExpandedKey, c, d ); + *(__n128 *)pbDst = veorq_u8( c, rkLast ); + + pbSrc += SYMCRYPT_AES_BLOCK_SIZE; + pbDst += SYMCRYPT_AES_BLOCK_SIZE; + cbData -= SYMCRYPT_AES_BLOCK_SIZE; + } + *(__n128 *)pbChainingValue = veorq_u8( c, rkLast ); +} + +// Disable warnings and VC++ runtime checks for use of uninitialized values (by design) +#pragma warning(push) +#pragma warning( disable: 6001 4701 ) +#pragma runtime_checks( "u", off ) +VOID +SYMCRYPT_CALL +SymCryptAesCbcDecryptNeon( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + __n128 chain; + __n128 c0, c1, c2, c3, c4, c5, c6, c7; + __n128 d0, d1, d2, d3, d4, d5, d6, d7; + const __n128 * pSrc = (const __n128 *) pbSrc; + __n128 * pDst = (__n128 *) pbDst; + SIZE_T cData = cbData / SYMCRYPT_AES_BLOCK_SIZE; + + if( cData < 1 ) + { + return; + } + + chain = *(__n128 *) pbChainingValue; + + // + // First we do all multiples of 8 blocks + // + + while( cData >= 8 ) + { + d0 = c0 = pSrc[0]; + d1 = c1 = pSrc[1]; + d2 = c2 = pSrc[2]; + d3 = c3 = pSrc[3]; + d4 = c4 = pSrc[4]; + d5 = c5 = pSrc[5]; + d6 = c6 = pSrc[6]; + d7 = c7 = pSrc[7]; + + AES_DECRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 ); + + c0 = veorq_u8( c0, chain ); + c1 = veorq_u8( c1, d0 ); + c2 = veorq_u8( c2, d1 ); + c3 = veorq_u8( c3, d2 ); + c4 = veorq_u8( c4, d3 ); + c5 = veorq_u8( c5, d4 ); + c6 = veorq_u8( c6, d5 ); + c7 = veorq_u8( c7, d6 ); + chain = d7; + + pDst[0] = c0; + pDst[1] = c1; + pDst[2] = c2; + pDst[3] = c3; + pDst[4] = c4; + pDst[5] = c5; + pDst[6] = c6; + pDst[7] = c7; + + pSrc += 8; + pDst += 8; + cData -= 8; + } + + if( cData >= 1 ) + { + // + // There is remaining work to be done + // + d0 = c0 = pSrc[0]; + if( cData >= 2 ) + { + d1 = c1 = pSrc[1]; + if( cData >= 3 ) + { + d2 = c2 = pSrc[2]; + if( cData >= 4 ) + { + d3 = c3 = pSrc[3]; + if( cData >= 5 ) + { + d4 = c4 = pSrc[4]; + if( cData >= 6 ) + { + d5 = c5 = pSrc[5]; + if( cData >= 7 ) + { + d6 = c6 = pSrc[6]; + } + } + } + } + } + } + + // + // Decrypt 1, 4, or 8 blocks in AES-CBC mode. This might decrypt uninitialized registers, + // but those will not be used when we store the results. + // + if( cData > 4 ) + { + AES_DECRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 ); + c0 = veorq_u8( c0, chain ); + c1 = veorq_u8( c1, d0 ); + c2 = veorq_u8( c2, d1 ); + c3 = veorq_u8( c3, d2 ); + c4 = veorq_u8( c4, d3 ); + c5 = veorq_u8( c5, d4 ); + c6 = veorq_u8( c6, d5 ); + } + else if( cData > 1 ) + { + AES_DECRYPT_4( pExpandedKey, c0, c1, c2, c3 ); + c0 = veorq_u8( c0, chain ); + c1 = veorq_u8( c1, d0 ); + c2 = veorq_u8( c2, d1 ); + c3 = veorq_u8( c3, d2 ); + } else + { + AES_DECRYPT_1( pExpandedKey, c0 ); + c0 = veorq_u8( c0, chain ); + } + + chain = pSrc[ cData - 1]; + pDst[0] = c0; + if( cData >= 2 ) + { + pDst[1] = c1; + if( cData >= 3 ) + { + pDst[2] = c2; + if( cData >= 4 ) + { + pDst[3] = c3; + if( cData >= 5 ) + { + pDst[4] = c4; + if( cData >= 6 ) + { + pDst[5] = c5; + if( cData >= 7 ) + { + pDst[6] = c6; + } + } + } + } + } + } + } + + *(__n128 *)pbChainingValue = chain; + + return; +} +#pragma runtime_checks( "u", restore ) +#pragma warning( pop ) + + + +VOID +SYMCRYPT_CALL +SymCryptAesCbcMacNeon( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + __n128 c = *(__n128 *)pbChainingValue; + __n128 rk0 = *(__n128 *) &pExpandedKey->RoundKey[0]; + __n128 rkLast = *(__n128 *) pExpandedKey->lastEncRoundKey; + __n128 d, rk0AndLast; + + // This algorithm is dominated by chain of dependent AES rounds, so we want to avoid EOR + // instructions on the critical path where possible + // We can compute (last round key ^ this plaintext block ^ first round key) off the critical + // path and use this with AES_ENCRYPT_1_CHAIN so that only AES instructions write to c in + // the main loop + rk0AndLast = veorq_u8( rk0, rkLast ); + + c = veorq_u8( c, rkLast ); + + while( cbData >= SYMCRYPT_AES_BLOCK_SIZE ) + { + d = veorq_u8( *(__n128 *)pbData, rk0AndLast); + AES_ENCRYPT_1_CHAIN( pExpandedKey, c, d ); + + pbData += SYMCRYPT_AES_BLOCK_SIZE; + cbData -= SYMCRYPT_AES_BLOCK_SIZE; + } + *(__n128 *)pbChainingValue = veorq_u8( c, rkLast ); +} + +// Disable warnings and VC++ runtime checks for use of uninitialized values (by design) +#pragma warning(push) +#pragma warning( disable: 6001 4701 ) +#pragma runtime_checks( "u", off ) +VOID +SYMCRYPT_CALL +SymCryptAesEcbEncryptNeon( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + __n128 c0, c1, c2, c3, c4, c5, c6, c7; + const __n128 * pSrc = (const __n128 *) pbSrc; + __n128 * pDst = (__n128 *) pbDst; + + while( cbData >= 8 * SYMCRYPT_AES_BLOCK_SIZE ) + { + c0 = pSrc[0]; + c1 = pSrc[1]; + c2 = pSrc[2]; + c3 = pSrc[3]; + c4 = pSrc[4]; + c5 = pSrc[5]; + c6 = pSrc[6]; + c7 = pSrc[7]; + + AES_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 ); + + pDst[0] = c0; + pDst[1] = c1; + pDst[2] = c2; + pDst[3] = c3; + pDst[4] = c4; + pDst[5] = c5; + pDst[6] = c6; + pDst[7] = c7; + + pSrc += 8; + pDst += 8; + cbData -= 8 * SYMCRYPT_AES_BLOCK_SIZE; + } + + if( cbData < 16 ) + { + return; + } + + c0 = pSrc[0]; + if( cbData >= 32 ) + { + c1 = pSrc[1]; + if( cbData >= 48 ) + { + c2 = pSrc[2]; + if( cbData >= 64 ) + { + c3 = pSrc[3]; + if( cbData >= 80 ) + { + c4 = pSrc[4]; + if( cbData >= 96 ) + { + c5 = pSrc[5]; + if( cbData >= 112 ) + { + c6 = pSrc[6]; + } + } + } + } + } + } + + if( cbData >= 5 * SYMCRYPT_AES_BLOCK_SIZE ) + { + AES_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 ); + } + else if( cbData >= 2 * SYMCRYPT_AES_BLOCK_SIZE ) + { + AES_ENCRYPT_4( pExpandedKey, c0, c1, c2, c3 ); + } + else + { + AES_ENCRYPT_1( pExpandedKey, c0 ); + } + + pDst[0] = c0; + if( cbData >= 32 ) + { + pDst[1] = c1; + if( cbData >= 48 ) + { + pDst[2] = c2; + if( cbData >= 64 ) + { + pDst[3] = c3; + if( cbData >= 80 ) + { + pDst[4] = c4; + if( cbData >= 96 ) + { + pDst[5] = c5; + if( cbData >= 112 ) + { + pDst[6] = c6; + } + } + } + } + } + } +} +#pragma runtime_checks( "u", restore) +#pragma warning( pop ) + +#pragma warning(push) +#pragma warning( disable:4701 ) // "Use of uninitialized variable" +#pragma runtime_checks( "u", off ) + +#define SYMCRYPT_AesCtrMsbXxNeon SymCryptAesCtrMsb64Neon +#define VADDQ_UXX vaddq_u64 +#define VSUBQ_UXX vsubq_u64 + +#include "aes-pattern.c" + +#undef VSUBQ_UXX +#undef VADDQ_UXX +#undef SYMCRYPT_AesCtrMsbXxNeon + +#define SYMCRYPT_AesCtrMsbXxNeon SymCryptAesCtrMsb32Neon +#define VADDQ_UXX vaddq_u32 +#define VSUBQ_UXX vsubq_u32 + +#include "aes-pattern.c" + +#undef VSUBQ_UXX +#undef VADDQ_UXX +#undef SYMCRYPT_AesCtrMsbXxNeon + +#pragma runtime_checks( "u", restore ) +#pragma warning(pop) + + +// +// Multiply by alpha +// +// <</>> indicate shifts on 128-bit values +// <<<</>>>> indicate shifts on 32-bit values +// + +// Multiply by ALPHA +// t1 = Input <<<< 1 words shifted left by 1 +// t2 = Input >>>> 31 words shifted right by 31 +// t1 = t1 ^ (t2 << 32) t1 = S << 1 +// t2 = t2 >> 96 t2 = highest bit of S +// t2 = (t2 <<<< 7) + (t2 <<<<3) - (t2) multiply polynomially by 0x87 , we can use - because we only have one bit input +// res = t1 ^ t2 +// +#define XTS_MUL_ALPHA_old( _in, _res ) \ +{\ + __n128 _t1, _t2;\ +\ + _t1 = vshlq_n_u32( _in, 1 ); \ + _t2 = vshrq_n_u32( _in, 31); \ + _t1 = veorq_u32( _t1, vextq_u32( vZero, _t2, 3 )); \ + _t2 = vextq_u32( _t2, vZero, 3); \ + _t2 = vsubq_u32( vaddq_u32( vshlq_n_u32( _t2, 7 ), vshlq_n_u32( _t2, 3 ) ), _t2 ); \ + _res = veorq_u32( _t1, _t2 ); \ +} + +// +// Another approach, use signed shift right to duplicate the bits of the leftmost byte +// and an AND to mask the modulo reduction and the extraneous bits in the other bytes at the same time. +// vAlphaMask = (1, 1, ..., 1, 0x87 ) +// +#define XTS_MUL_ALPHA( _in, _res ) \ +{\ + __n128 _t1, _t2;\ +\ + _t1 = vshlq_n_u8( _in, 1 ); \ + _t2 = vshrq_n_s8( _in, 7 ); \ + _t2 = vextq_u8( _t2, _t2, 15 ); \ + _t2 = vandq_u8( _t2, vAlphaMask ); \ + _res = veorq_u8( _t2, _t1 ); \ +} + + +// Multiply by ALPHA^2 +// t1 = Input <<<< 2 +// t2 = Input >>>> 30 +// t1 = t1 ^ (t2 << 32) +// t2 = t2 >> 96 +// t2 = (t2 <<<< 7) ^ (t2 <<<< 2) ^ (t2 <<<< 1) ^ t2 +// res = t1 ^ t2 +#define XTS_MUL_ALPHA2( _in, _res ) \ +{\ + __n128 _t1, _t2;\ +\ + _t1 = vshlq_n_u32( _in, 2 ); \ + _t2 = vshrq_n_u32( _in, 30); \ + _t1 = veorq_u32( _t1, vextq_u32( vZero, _t2, 3 )); \ + _t2 = vextq_u32( _t2, vZero, 3 ); \ + _t2 = veorq_u32( veorq_u32( veorq_u32( _t2, vshlq_n_u32( _t2, 7 )), vshlq_n_u32( _t2, 2 ) ), vshlq_n_u32( _t2, 1 ) ); \ + _res = veorq_u32( _t1, _t2 ); \ +} + +// Multiply by ALPHA^4 +// t1 = Input <<<< 4 +// t2 = Input >>>> 28 +// t1 = t1 ^ (t2 << 32) +// t2 = t2 >> 96 +// t2 = (t2 <<<< 7) ^ (t2 <<<< 2) ^ (t2 <<<< 1) ^ t2 +// res = t1 ^ t2 +#define XTS_MUL_ALPHA4( _in, _res ) \ +{\ + __n128 _t1, _t2;\ +\ + _t1 = vshlq_n_u32( _in, 4 ); \ + _t2 = vshrq_n_u32( _in, 28); \ + _t1 = veorq_u32( _t1, vextq_u32( vZero, _t2, 3 )); \ + _t2 = vextq_u32( _t2, vZero, 3 ); \ + _t2 = veorq_u32( veorq_u32( veorq_u32( _t2, vshlq_n_u32( _t2, 7 )), vshlq_n_u32( _t2, 2 ) ), vshlq_n_u32( _t2, 1 ) ); \ + _res = veorq_u32( _t1, _t2 ); \ +} + +#define XTS_MUL_ALPHA5( _in, _res ) \ +{\ + __n128 _t1, _t2;\ +\ + _t1 = vshlq_n_u32( _in, 5 ); \ + _t2 = vshrq_n_u32( _in, 27); \ + _t1 = veorq_u32( _t1, vextq_u32( vZero, _t2, 3 )); \ + _t2 = vextq_u32( _t2, vZero, 3 ); \ + _t2 = veorq_u32( veorq_u32( veorq_u32( _t2, vshlq_n_u32( _t2, 7 )), vshlq_n_u32( _t2, 2 ) ), vshlq_n_u32( _t2, 1 ) ); \ + _res = veorq_u32( _t1, _t2 ); \ +} + +// Multiply by ALPHA^8 +// res = (Input << 8) | (Input >> 120) +// t2 = (Input >> 120) * 0x86 +// i.e. ((Input >> 120) <<<< 7) ^ ((Input >> 120) <<<< 2) ^ ((Input >> 120) <<<< 1) +// the 0x01 component is already in res where we want it +// res = res ^ t2 +// +// vAlphaMultiplier = (0, 0, ..., 0, 0x86 ) + +#define XTS_MUL_ALPHA8( _in, _res ) \ +{\ + __n128 _t2;\ +\ + _res = vextq_u8( _in, _in, 15 ); \ + _t2 = vmull_p8( vget_low_p8(_res), vAlphaMultiplier ); \ + _res = veorq_u32( _res, _t2 ); \ +} + + +VOID +SYMCRYPT_CALL +SymCryptXtsAesEncryptDataUnitNeon( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_(SYMCRYPT_AES_BLOCK_SIZE)PBYTE pbTweakBlock, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + __n128 t0, t1, t2, t3, t4, t5, t6, t7; + __n128 c0, c1, c2, c3, c4, c5, c6, c7; + const __n128 vZero = vmovq_n_u8(0); + const __n128 vAlphaMask = SYMCRYPT_SET_N128_U8(0x87, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + const __n64 vAlphaMultiplier = SYMCRYPT_SET_N64_U64(0x0000000000000086); + + SIZE_T cbDataMain; // number of bytes to handle in the main loop + SIZE_T cbDataTail; // number of bytes to handle in the tail loop + BYTE tailBuf[2*SYMCRYPT_AES_BLOCK_SIZE]; + + SYMCRYPT_ASSERT(cbData >= SYMCRYPT_AES_BLOCK_SIZE); + + // To simplify logic and unusual size processing, we handle all + // data not a multiple of 8 blocks in the tail loop + cbDataTail = cbData & ((8*SYMCRYPT_AES_BLOCK_SIZE)-1); + // Additionally, so that ciphertext stealing logic does not rely on + // reading back from the destination buffer, when we have a non-zero + // tail, we ensure that we handle at least 1 whole block in the tail + // + // Note that our caller has ensured we have at least 1 whole block + // to process, this is checked in debug build + // This means that cbDataTail is in [1,15] at this point iff there are + // at least 8 whole blocks to process; so the below does not cause + // cbDataTail or cbDataMain to exceed cbData + cbDataTail += ((cbDataTail > 0) && (cbDataTail < SYMCRYPT_AES_BLOCK_SIZE)) ? (8*SYMCRYPT_AES_BLOCK_SIZE) : 0; + cbDataMain = cbData - cbDataTail; + + SYMCRYPT_ASSERT(cbDataMain <= cbData); + SYMCRYPT_ASSERT(cbDataTail <= cbData); + SYMCRYPT_ASSERT((cbDataMain & ((8*SYMCRYPT_AES_BLOCK_SIZE)-1)) == 0); + + t0 = *(__n128 *)pbTweakBlock; + + if( cbDataMain > 0 ) + { + // Set up for main loop entry + // NOTE: We load the first 8 blocks and store the last 8 blocks out of the loop to allow + // greater instruction interleaving in the main loop. + // This appears to give about 5-8% performance uplift on little (in-order) cores and has + // no effect on big cores. + XTS_MUL_ALPHA4( t0, t4 ); + XTS_MUL_ALPHA ( t0, t1 ); + XTS_MUL_ALPHA ( t4, t5 ); + XTS_MUL_ALPHA ( t1, t2 ); + XTS_MUL_ALPHA ( t5, t6 ); + XTS_MUL_ALPHA ( t2, t3 ); + XTS_MUL_ALPHA ( t6, t7 ); + + c0 = veorq_u32( vld1q_u8( pbSrc + (0*16) ), t0 ); + c1 = veorq_u32( vld1q_u8( pbSrc + (1*16) ), t1 ); + c2 = veorq_u32( vld1q_u8( pbSrc + (2*16) ), t2 ); + c3 = veorq_u32( vld1q_u8( pbSrc + (3*16) ), t3 ); + c4 = veorq_u32( vld1q_u8( pbSrc + (4*16) ), t4 ); + c5 = veorq_u32( vld1q_u8( pbSrc + (5*16) ), t5 ); + c6 = veorq_u32( vld1q_u8( pbSrc + (6*16) ), t6 ); + c7 = veorq_u32( vld1q_u8( pbSrc + (7*16) ), t7 ); + + for(;;) + { + pbSrc += 8 * SYMCRYPT_AES_BLOCK_SIZE; + + AES_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 ); + + cbDataMain -= 8 * SYMCRYPT_AES_BLOCK_SIZE; + if( cbDataMain < 8 * SYMCRYPT_AES_BLOCK_SIZE ) + { + break; + } + + // Interleave the final xor, write, and compute next tweak block, and load, and first xor. + // This reduces register pressure and is more efficient. + vst1q_u8( pbDst + (0*16), veorq_u32( c0, t0 ) ); + vst1q_u8( pbDst + (1*16), veorq_u32( c1, t1 ) ); + vst1q_u8( pbDst + (2*16), veorq_u32( c2, t2 ) ); + vst1q_u8( pbDst + (3*16), veorq_u32( c3, t3 ) ); + vst1q_u8( pbDst + (4*16), veorq_u32( c4, t4 ) ); + vst1q_u8( pbDst + (5*16), veorq_u32( c5, t5 ) ); + vst1q_u8( pbDst + (6*16), veorq_u32( c6, t6 ) ); + vst1q_u8( pbDst + (7*16), veorq_u32( c7, t7 ) ); + + XTS_MUL_ALPHA8( t0, t0 ); + XTS_MUL_ALPHA8( t1, t1 ); + XTS_MUL_ALPHA8( t2, t2 ); + XTS_MUL_ALPHA8( t3, t3 ); + XTS_MUL_ALPHA8( t4, t4 ); + XTS_MUL_ALPHA8( t5, t5 ); + XTS_MUL_ALPHA8( t6, t6 ); + XTS_MUL_ALPHA8( t7, t7 ); + + c0 = veorq_u32( vld1q_u8( pbSrc + (0*16) ), t0 ); + c1 = veorq_u32( vld1q_u8( pbSrc + (1*16) ), t1 ); + c2 = veorq_u32( vld1q_u8( pbSrc + (2*16) ), t2 ); + c3 = veorq_u32( vld1q_u8( pbSrc + (3*16) ), t3 ); + c4 = veorq_u32( vld1q_u8( pbSrc + (4*16) ), t4 ); + c5 = veorq_u32( vld1q_u8( pbSrc + (5*16) ), t5 ); + c6 = veorq_u32( vld1q_u8( pbSrc + (6*16) ), t6 ); + c7 = veorq_u32( vld1q_u8( pbSrc + (7*16) ), t7 ); + + pbDst += 8 * SYMCRYPT_AES_BLOCK_SIZE; + } + + vst1q_u8( pbDst + (0*16), veorq_u32( c0, t0 ) ); + vst1q_u8( pbDst + (1*16), veorq_u32( c1, t1 ) ); + vst1q_u8( pbDst + (2*16), veorq_u32( c2, t2 ) ); + vst1q_u8( pbDst + (3*16), veorq_u32( c3, t3 ) ); + vst1q_u8( pbDst + (4*16), veorq_u32( c4, t4 ) ); + vst1q_u8( pbDst + (5*16), veorq_u32( c5, t5 ) ); + vst1q_u8( pbDst + (6*16), veorq_u32( c6, t6 ) ); + vst1q_u8( pbDst + (7*16), veorq_u32( c7, t7 ) ); + + // We won't do another 8-block set + // Update only the first tweak block in case it is needed for tail + XTS_MUL_ALPHA8( t0, t0 ); + + pbDst += 8 * SYMCRYPT_AES_BLOCK_SIZE; + } + + if( cbDataTail == 0 ) + { + return; // <-- expected case; early return here + } + + // Rare case, with data unit length not being multiple of 128 bytes, handle the tail one block at a time + while( cbDataTail >= 2*SYMCRYPT_AES_BLOCK_SIZE ) + { + c0 = veorq_u32( vld1q_u8(pbSrc), t0 ); + pbSrc += SYMCRYPT_AES_BLOCK_SIZE; + AES_ENCRYPT_1( pExpandedKey, c0 ); + vst1q_u8( pbDst, veorq_u32( c0, t0 ) ); + pbDst += SYMCRYPT_AES_BLOCK_SIZE; + XTS_MUL_ALPHA( t0, t0 ); + cbDataTail -= SYMCRYPT_AES_BLOCK_SIZE; + } + + if( cbDataTail > SYMCRYPT_AES_BLOCK_SIZE ) + { + // Ciphertext stealing encryption + // + // +--------------+ + // | | + // | V + // +-----------------+ | +-----+-----------+ + // | P_m-1 | | | P_m |++++CP+++++| + // +-----------------+ | +-----+-----------+ + // | | | + // enc_m-1 | enc_m + // | | | + // V | V + // +-----+-----------+ | +-----------------+ + // | C_m |++++CP+++++|--+ | C_m-1 | + // +-----+-----------+ +-----------------+ + // | / + // +---------------- / --+ + // / | + // | V + // +-----------------+ | +-----+ + // | C_m-1 |<-+ | C_m | + // +-----------------+ +-----+ + + // Encrypt penultimate plaintext block into tailBuf + c0 = veorq_u32( vld1q_u8(pbSrc), t0 ); + AES_ENCRYPT_1( pExpandedKey, c0 ); + c0 = veorq_u32( c0, t0 ); + vst1q_u8( &tailBuf[0], c0 ); + vst1q_u8( &tailBuf[SYMCRYPT_AES_BLOCK_SIZE], c0 ); + + cbDataTail -= SYMCRYPT_AES_BLOCK_SIZE; + + // Copy final plaintext bytes to prefix of tailBuf - we must read before writing to support in-place encryption + memcpy( &tailBuf[0], pbSrc + SYMCRYPT_AES_BLOCK_SIZE, cbDataTail ); + // Copy prefix of tailBuf[SYMCRYPT_AES_BLOCK_SIZE] to the right place in the destination buffer + memcpy( pbDst + SYMCRYPT_AES_BLOCK_SIZE, &tailBuf[SYMCRYPT_AES_BLOCK_SIZE], cbDataTail ); + + // Do final tweak update + XTS_MUL_ALPHA( t0, t0 ); + + // Load updated tailBuf into c0 + c0 = vld1q_u8( &tailBuf[0] ); + } else { + // Just load final plaintext block into c0 + c0 = vld1q_u8( pbSrc ); + } + + // Final full block encryption + c0 = veorq_u32( c0, t0 ); + AES_ENCRYPT_1( pExpandedKey, c0 ); + vst1q_u8( pbDst, veorq_u32( c0, t0 ) ); +} + + +VOID +SYMCRYPT_CALL +SymCryptXtsAesDecryptDataUnitNeon( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_(SYMCRYPT_AES_BLOCK_SIZE)PBYTE pbTweakBlock, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + __n128 t0, t1, t2, t3, t4, t5, t6, t7; + __n128 c0, c1, c2, c3, c4, c5, c6, c7; + const __n128 vZero = vmovq_n_u8(0); + const __n128 vAlphaMask = SYMCRYPT_SET_N128_U8(0x87, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + const __n64 vAlphaMultiplier = SYMCRYPT_SET_N64_U64(0x0000000000000086); + + SIZE_T cbDataMain; // number of bytes to handle in the main loop + SIZE_T cbDataTail; // number of bytes to handle in the tail loop + BYTE tailBuf[2*SYMCRYPT_AES_BLOCK_SIZE]; + + SYMCRYPT_ASSERT(cbData >= SYMCRYPT_AES_BLOCK_SIZE); + + // To simplify logic and unusual size processing, we handle all + // data not a multiple of 8 blocks in the tail loop + cbDataTail = cbData & ((8*SYMCRYPT_AES_BLOCK_SIZE)-1); + // Additionally, so that ciphertext stealing logic does not rely on + // reading back from the destination buffer, when we have a non-zero + // tail, we ensure that we handle at least 1 whole block in the tail + // + // Note that our caller has ensured we have at least 1 whole block + // to process, this is checked in debug build + // This means that cbDataTail is in [1,15] at this point iff there are + // at least 8 whole blocks to process; so the below does not cause + // cbDataTail or cbDataMain to exceed cbData + cbDataTail += ((cbDataTail > 0) && (cbDataTail < SYMCRYPT_AES_BLOCK_SIZE)) ? (8*SYMCRYPT_AES_BLOCK_SIZE) : 0; + cbDataMain = cbData - cbDataTail; + + SYMCRYPT_ASSERT(cbDataMain <= cbData); + SYMCRYPT_ASSERT(cbDataTail <= cbData); + SYMCRYPT_ASSERT((cbDataMain & ((8*SYMCRYPT_AES_BLOCK_SIZE)-1)) == 0); + + t0 = *(__n128 *)pbTweakBlock; + t7 = t0; + + if( cbDataMain > 0 ) + { + // Set up for main loop entry + // NOTE: We load the first 8 blocks and store the last 8 blocks out of the loop to allow + // greater instruction interleaving in the main loop. + // This appears to give about 5-8% performance uplift on little (in-order) cores and has + // no effect on big cores. + XTS_MUL_ALPHA4( t0, t4 ); + XTS_MUL_ALPHA ( t0, t1 ); + XTS_MUL_ALPHA ( t4, t5 ); + XTS_MUL_ALPHA ( t1, t2 ); + XTS_MUL_ALPHA ( t5, t6 ); + XTS_MUL_ALPHA ( t2, t3 ); + XTS_MUL_ALPHA ( t6, t7 ); + + c0 = veorq_u32( vld1q_u8( pbSrc + (0*16) ), t0 ); + c1 = veorq_u32( vld1q_u8( pbSrc + (1*16) ), t1 ); + c2 = veorq_u32( vld1q_u8( pbSrc + (2*16) ), t2 ); + c3 = veorq_u32( vld1q_u8( pbSrc + (3*16) ), t3 ); + c4 = veorq_u32( vld1q_u8( pbSrc + (4*16) ), t4 ); + c5 = veorq_u32( vld1q_u8( pbSrc + (5*16) ), t5 ); + c6 = veorq_u32( vld1q_u8( pbSrc + (6*16) ), t6 ); + c7 = veorq_u32( vld1q_u8( pbSrc + (7*16) ), t7 ); + + for(;;) + { + pbSrc += 8 * SYMCRYPT_AES_BLOCK_SIZE; + + AES_DECRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 ); + + cbDataMain -= 8 * SYMCRYPT_AES_BLOCK_SIZE; + if( cbDataMain < 8 * SYMCRYPT_AES_BLOCK_SIZE ) + { + break; + } + + // Interleave the final xor, write, and compute next tweak block, and load, and first xor. + // This reduces register pressure and is more efficient. + vst1q_u8( pbDst + (0*16), veorq_u32( c0, t0 ) ); + vst1q_u8( pbDst + (1*16), veorq_u32( c1, t1 ) ); + vst1q_u8( pbDst + (2*16), veorq_u32( c2, t2 ) ); + vst1q_u8( pbDst + (3*16), veorq_u32( c3, t3 ) ); + vst1q_u8( pbDst + (4*16), veorq_u32( c4, t4 ) ); + vst1q_u8( pbDst + (5*16), veorq_u32( c5, t5 ) ); + vst1q_u8( pbDst + (6*16), veorq_u32( c6, t6 ) ); + vst1q_u8( pbDst + (7*16), veorq_u32( c7, t7 ) ); + + XTS_MUL_ALPHA8( t0, t0 ); + XTS_MUL_ALPHA8( t1, t1 ); + XTS_MUL_ALPHA8( t2, t2 ); + XTS_MUL_ALPHA8( t3, t3 ); + XTS_MUL_ALPHA8( t4, t4 ); + XTS_MUL_ALPHA8( t5, t5 ); + XTS_MUL_ALPHA8( t6, t6 ); + XTS_MUL_ALPHA8( t7, t7 ); + + c0 = veorq_u32( vld1q_u8( pbSrc + (0*16) ), t0 ); + c1 = veorq_u32( vld1q_u8( pbSrc + (1*16) ), t1 ); + c2 = veorq_u32( vld1q_u8( pbSrc + (2*16) ), t2 ); + c3 = veorq_u32( vld1q_u8( pbSrc + (3*16) ), t3 ); + c4 = veorq_u32( vld1q_u8( pbSrc + (4*16) ), t4 ); + c5 = veorq_u32( vld1q_u8( pbSrc + (5*16) ), t5 ); + c6 = veorq_u32( vld1q_u8( pbSrc + (6*16) ), t6 ); + c7 = veorq_u32( vld1q_u8( pbSrc + (7*16) ), t7 ); + + pbDst += 8 * SYMCRYPT_AES_BLOCK_SIZE; + } + + vst1q_u8( pbDst + (0*16), veorq_u32( c0, t0 ) ); + vst1q_u8( pbDst + (1*16), veorq_u32( c1, t1 ) ); + vst1q_u8( pbDst + (2*16), veorq_u32( c2, t2 ) ); + vst1q_u8( pbDst + (3*16), veorq_u32( c3, t3 ) ); + vst1q_u8( pbDst + (4*16), veorq_u32( c4, t4 ) ); + vst1q_u8( pbDst + (5*16), veorq_u32( c5, t5 ) ); + vst1q_u8( pbDst + (6*16), veorq_u32( c6, t6 ) ); + vst1q_u8( pbDst + (7*16), veorq_u32( c7, t7 ) ); + + // We won't do another 8-block set + // Update only the first tweak block in case it is needed for tail + XTS_MUL_ALPHA8( t0, t0 ); + + pbDst += 8 * SYMCRYPT_AES_BLOCK_SIZE; + } + + if( cbDataTail == 0 ) + { + return; // <-- expected case; early return here + } + + // Rare case, with data unit length not being multiple of 128 bytes, handle the tail one block at a time + while( cbDataTail >= 2*SYMCRYPT_AES_BLOCK_SIZE ) + { + c0 = veorq_u32( vld1q_u8( pbSrc ), t0 ); + pbSrc += SYMCRYPT_AES_BLOCK_SIZE; + AES_DECRYPT_1( pExpandedKey, c0 ); + vst1q_u8( pbDst, veorq_u32( c0, t0 ) ); + pbDst += SYMCRYPT_AES_BLOCK_SIZE; + XTS_MUL_ALPHA( t0, t0 ); + cbDataTail -= SYMCRYPT_AES_BLOCK_SIZE; + } + + if( cbDataTail > SYMCRYPT_AES_BLOCK_SIZE ) + { + // Ciphertext stealing decryption + // + // +--------------+ + // | | + // | V + // +-----------------+ | +-----+-----------+ + // | C_m-1 | | | C_m |++++CP+++++| + // +-----------------+ | +-----+-----------+ + // | | | + // dec_m | dec_m-1 + // | | | + // V | V + // +-----+-----------+ | +-----------------+ + // | P_m |++++CP+++++|--+ | P_m-1 | + // +-----+-----------+ +-----------------+ + // | / + // +---------------- / --+ + // / | + // | V + // +-----------------+ | +-----+ + // | P_m-1 |<-+ | P_m | + // +-----------------+ +-----+ + + // Do final tweak update into t1 + // Penultimate tweak is in t0, ready for final decryption + XTS_MUL_ALPHA( t0, t1 ); + + // Decrypt penultimate ciphertext block into tailBuf + c0 = veorq_u32( vld1q_u8( pbSrc ), t1 ); + AES_DECRYPT_1( pExpandedKey, c0 ); + c0 = veorq_u32( c0, t1 ); + vst1q_u8( &tailBuf[0], c0 ); + vst1q_u8( &tailBuf[SYMCRYPT_AES_BLOCK_SIZE], c0 ); + + cbDataTail -= SYMCRYPT_AES_BLOCK_SIZE; + + // Copy final ciphertext bytes to prefix of tailBuf - we must read before writing to support in-place decryption + memcpy( &tailBuf[0], pbSrc + SYMCRYPT_AES_BLOCK_SIZE, cbDataTail ); + // Copy prefix of tailBuf[SYMCRYPT_AES_BLOCK_SIZE] to the right place in the destination buffer + memcpy( pbDst + SYMCRYPT_AES_BLOCK_SIZE, &tailBuf[SYMCRYPT_AES_BLOCK_SIZE], cbDataTail ); + + // Load updated tailBuf into c0 + c0 = vld1q_u8( &tailBuf[0] ); + } else { + // Just load final ciphertext block into c0 + c0 = vld1q_u8( pbSrc ); + } + + // Final full block decryption + c0 = veorq_u32( c0, t0 ); + AES_DECRYPT_1( pExpandedKey, c0 ); + vst1q_u8( pbDst, veorq_u32( c0, t0 ) ); +} + +#include "ghash_definitions.h" + +#define AES_ENCRYPT_ROUND_4_GHASH_1( c0, c1, c2, c3, r0, r0x, t0, t1, gHashPointer, gHashExpandedKeyTable, todo, resl, resm, resh ) \ +{ \ + AESE_AESMC( c0, roundKey ) \ + AESE_AESMC( c1, roundKey ) \ + AESE_AESMC( c2, roundKey ) \ + AESE_AESMC( c3, roundKey ) \ +\ + r0x = *gHashPointer; \ + r0x = vrev64q_u8( r0x ); \ + r0 = vextq_u8( r0x, r0x, 8 ); \ + r0x = veorq_u8( r0, r0x ); \ + gHashPointer++; \ +\ + t1 = GHASH_H_POWER(gHashExpandedKeyTable, todo); \ + t0 = vmullq_p64( r0, t1 ); \ + t1 = vmull_high_p64( r0, t1 ); \ +\ + resl = veorq_u8( resl, t0 ); \ + resh = veorq_u8( resh, t1 ); \ +\ + t1 = GHASH_Hx_POWER(gHashExpandedKeyTable, todo); \ + t1 = vmullq_p64( r0x, t1 ); \ +\ + resm = veorq_u8( resm, t1 ); \ + todo--; \ +}; + +// +// Using a loop with AESE_AESMC and AESD_AESIMC, the compiler can still prematurely rearrange the loop and +// lose opportunity for scheduling adjacent pairs. +// Instead, explicitly unroll the AES rounds with this macro. +// +#define AES_GCM_ENCRYPT_4( pExpandedKey, c0, c1, c2, c3, gHashPointer, gHashRounds, gHashExpandedKeyTable, todo, resl, resm, resh ) \ +{ \ + const __n128 *keyPtr; \ + const __n128 *keyLimit; \ + __n128 roundKey; \ +\ + keyPtr = (const __n128 *)&pExpandedKey->RoundKey[0]; \ + keyLimit = (const __n128 *)pExpandedKey->lastEncRoundKey; \ + __n128 t0, t1, r0, r0x; \ + SIZE_T aesEncryptGhashLoop; \ +\ + /* Do gHashRounds full rounds (AES-128|AES-192|AES-256) with stitched GHASH */ \ + roundKey = *keyPtr++; \ + for( aesEncryptGhashLoop = 0; aesEncryptGhashLoop < gHashRounds; aesEncryptGhashLoop++) \ + { \ + AES_ENCRYPT_ROUND_4_GHASH_1( c0, c1, c2, c3, r0, r0x, t0, t1, gHashPointer, gHashExpandedKeyTable, todo, resl, resm, resh ) \ + roundKey = *keyPtr++; \ + } \ +\ + /* Do 9-gHashRounds full rounds (AES-128|AES-192|AES-256) */ \ + for( aesEncryptGhashLoop = 0; aesEncryptGhashLoop < (9-gHashRounds); aesEncryptGhashLoop++) \ + { \ + AES_ENCRYPT_ROUND_4( c0, c1, c2, c3, c4, c5, c6, c7 ) \ + roundKey = *keyPtr++; \ + } \ +\ + if ( keyPtr < keyLimit ) \ + { \ + /* Do 2 more full rounds (AES-192|AES-256) */ \ + AES_ENCRYPT_ROUND_4( c0, c1, c2, c3, c4, c5, c6, c7 ) \ + roundKey = *keyPtr++; \ + AES_ENCRYPT_ROUND_4( c0, c1, c2, c3, c4, c5, c6, c7 ) \ + roundKey = *keyPtr++; \ +\ + if ( keyPtr < keyLimit ) \ + { \ + /* Do 2 more full rounds (AES-256) */ \ + AES_ENCRYPT_ROUND_4( c0, c1, c2, c3, c4, c5, c6, c7 ) \ + roundKey = *keyPtr++; \ + AES_ENCRYPT_ROUND_4( c0, c1, c2, c3, c4, c5, c6, c7 ) \ + roundKey = *keyPtr++; \ + } \ + } \ +\ + /* Do final round (AES-128|AES-192|AES-256) */ \ + AES_ENCRYPT_FINAL_4( c0, c1, c2, c3, c4, c5, c6, c7 ) \ +}; + +#define AES_ENCRYPT_ROUND_8_GHASH_1( c0, c1, c2, c3, c4, c5, c6, c7, r0, r0x, t0, t1, gHashPointer, gHashExpandedKeyTable, todo, resl, resm, resh ) \ +{ \ + AESE_AESMC( c0, roundKey ) \ + AESE_AESMC( c1, roundKey ) \ + AESE_AESMC( c2, roundKey ) \ + AESE_AESMC( c3, roundKey ) \ + AESE_AESMC( c4, roundKey ) \ + AESE_AESMC( c5, roundKey ) \ + AESE_AESMC( c6, roundKey ) \ + AESE_AESMC( c7, roundKey ) \ +\ + r0x = *gHashPointer; \ + r0x = vrev64q_u8( r0x ); \ + r0 = vextq_u8( r0x, r0x, 8 ); \ + r0x = veorq_u8( r0, r0x ); \ + gHashPointer++; \ +\ + t1 = GHASH_H_POWER(gHashExpandedKeyTable, todo); \ + t0 = vmullq_p64( r0, t1 ); \ + t1 = vmull_high_p64( r0, t1 ); \ +\ + resl = veorq_u8( resl, t0 ); \ + resh = veorq_u8( resh, t1 ); \ +\ + t1 = GHASH_Hx_POWER(gHashExpandedKeyTable, todo); \ + t1 = vmullq_p64( r0x, t1 ); \ +\ + resm = veorq_u8( resm, t1 ); \ + todo--; \ +}; + +// +// Using a loop with AESE_AESMC and AESD_AESIMC, the compiler can still prematurely rearrange the loop and +// lose opportunity for scheduling adjacent pairs. +// Instead, explicitly unroll the AES rounds with this macro. +// +#define AES_GCM_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7, gHashPointer, gHashRounds, gHashExpandedKeyTable, todo, resl, resm, resh ) \ +{ \ + const __n128 *keyPtr; \ + const __n128 *keyLimit; \ + __n128 roundKey; \ +\ + keyPtr = (const __n128 *)&pExpandedKey->RoundKey[0]; \ + keyLimit = (const __n128 *)pExpandedKey->lastEncRoundKey; \ + __n128 t0, t1, r0, r0x; \ + SIZE_T aesEncryptGhashLoop; \ +\ + /* Do gHashRounds full rounds (AES-128|AES-192|AES-256) with stitched GHASH */ \ + roundKey = *keyPtr++; \ + for( aesEncryptGhashLoop = 0; aesEncryptGhashLoop < gHashRounds; aesEncryptGhashLoop++) \ + { \ + AES_ENCRYPT_ROUND_8_GHASH_1( c0, c1, c2, c3, c4, c5, c6, c7, r0, r0x, t0, t1, gHashPointer, gHashExpandedKeyTable, todo, resl, resm, resh ) \ + roundKey = *keyPtr++; \ + } \ +\ + /* Do 9-gHashRounds full rounds (AES-128|AES-192|AES-256) */ \ + for( aesEncryptGhashLoop = 0; aesEncryptGhashLoop < (9-gHashRounds); aesEncryptGhashLoop++) \ + { \ + AES_ENCRYPT_ROUND_8( c0, c1, c2, c3, c4, c5, c6, c7 ) \ + roundKey = *keyPtr++; \ + } \ +\ + if ( keyPtr < keyLimit ) \ + { \ + /* Do 2 more full rounds (AES-192|AES-256) */ \ + AES_ENCRYPT_ROUND_8( c0, c1, c2, c3, c4, c5, c6, c7 ) \ + roundKey = *keyPtr++; \ + AES_ENCRYPT_ROUND_8( c0, c1, c2, c3, c4, c5, c6, c7 ) \ + roundKey = *keyPtr++; \ +\ + if ( keyPtr < keyLimit ) \ + { \ + /* Do 2 more full rounds (AES-256) */ \ + AES_ENCRYPT_ROUND_8( c0, c1, c2, c3, c4, c5, c6, c7 ) \ + roundKey = *keyPtr++; \ + AES_ENCRYPT_ROUND_8( c0, c1, c2, c3, c4, c5, c6, c7 ) \ + roundKey = *keyPtr++; \ + } \ + } \ +\ + /* Do final round (AES-128|AES-192|AES-256) */ \ + AES_ENCRYPT_FINAL_8( c0, c1, c2, c3, c4, c5, c6, c7 ) \ +}; + +// This call is functionally identical to: +// SymCryptAesCtrMsb64Neon( pExpandedKey, +// pbChainingValue, +// pbSrc, +// pbDst, +// cbData ); +// SymCryptGHashAppendDataPmull( expandedKeyTable, +// pState, +// pbDstOrig, +// cbDataOrig ); +VOID +SYMCRYPT_CALL +SymCryptAesGcmEncryptStitchedNeon( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( SYMCRYPT_GF128_FIELD_SIZE ) PCSYMCRYPT_GF128_ELEMENT expandedKeyTable, + _Inout_ PSYMCRYPT_GF128_ELEMENT pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + __n128 chain = *(__n128 *)pbChainingValue; + const __n128 * pSrc = (const __n128 *) pbSrc; + const __n128 * pGhashSrc = (const __n128 *) pbDst; + __n128 * pDst = (__n128 *) pbDst; + + const __n128 chainIncrement1 = SYMCRYPT_SET_N128_U64( 0, 1 ); + const __n128 chainIncrement2 = SYMCRYPT_SET_N128_U64( 0, 2 ); + const __n128 chainIncrement8 = SYMCRYPT_SET_N128_U64( 0, 8 ); + + __n128 ctr0, ctr1, ctr2, ctr3, ctr4, ctr5, ctr6, ctr7; + __n128 c0, c1, c2, c3, c4, c5, c6, c7; + __n128 r0, r1; + __n128 r0x, r1x; + + __n128 state; + __n128 a0, a1, a2; + const __n64 vMultiplicationConstant = SYMCRYPT_SET_N64_U64(0xc200000000000000); + SIZE_T nBlocks = cbData / SYMCRYPT_GF128_BLOCK_SIZE; + SIZE_T todo; + + SYMCRYPT_ASSERT( (cbData & SYMCRYPT_GCM_BLOCK_MOD_MASK) == 0 ); // cbData is multiple of block size + + // Our chain variable is in integer format, not the MSBfirst format loaded from memory. + ctr0 = vrev64q_u8( chain ); + ctr1 = vaddq_u32( ctr0, chainIncrement1 ); + ctr2 = vaddq_u32( ctr0, chainIncrement2 ); + ctr3 = vaddq_u32( ctr1, chainIncrement2 ); + ctr4 = vaddq_u32( ctr2, chainIncrement2 ); + ctr5 = vaddq_u32( ctr3, chainIncrement2 ); + ctr6 = vaddq_u32( ctr4, chainIncrement2 ); + ctr7 = vaddq_u32( ctr5, chainIncrement2 ); + + state = *(__n128 *) pState; + + todo = SYMCRYPT_MIN( nBlocks, SYMCRYPT_GHASH_PMULL_HPOWERS ); + CLMUL_3( state, GHASH_H_POWER(expandedKeyTable, todo), GHASH_Hx_POWER(expandedKeyTable, todo), a0, a1, a2 ); + + // Do 8 blocks of CTR either for tail (if total blocks <8) or for encryption of first 8 blocks + c0 = vrev64q_u8( ctr0 ); + c1 = vrev64q_u8( ctr1 ); + c2 = vrev64q_u8( ctr2 ); + c3 = vrev64q_u8( ctr3 ); + c4 = vrev64q_u8( ctr4 ); + c5 = vrev64q_u8( ctr5 ); + c6 = vrev64q_u8( ctr6 ); + c7 = vrev64q_u8( ctr7 ); + + AES_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 ); + + if ( cbData >= 8 * SYMCRYPT_AES_BLOCK_SIZE ) + { + ctr0 = vaddq_u32( ctr0, chainIncrement8 ); + ctr1 = vaddq_u32( ctr1, chainIncrement8 ); + ctr2 = vaddq_u32( ctr2, chainIncrement8 ); + ctr3 = vaddq_u32( ctr3, chainIncrement8 ); + ctr4 = vaddq_u32( ctr4, chainIncrement8 ); + ctr5 = vaddq_u32( ctr5, chainIncrement8 ); + ctr6 = vaddq_u32( ctr6, chainIncrement8 ); + ctr7 = vaddq_u32( ctr7, chainIncrement8 ); + + // Encrypt first 8 blocks + pDst[0] = veorq_u64( pSrc[0], c0 ); + pDst[1] = veorq_u64( pSrc[1], c1 ); + pDst[2] = veorq_u64( pSrc[2], c2 ); + pDst[3] = veorq_u64( pSrc[3], c3 ); + pDst[4] = veorq_u64( pSrc[4], c4 ); + pDst[5] = veorq_u64( pSrc[5], c5 ); + pDst[6] = veorq_u64( pSrc[6], c6 ); + pDst[7] = veorq_u64( pSrc[7], c7 ); + + pDst += 8; + pSrc += 8; + + while( nBlocks >= 16 ) + { + // In this loop we always have 8 blocks to encrypt and we have already encrypted the previous 8 blocks ready for GHASH + c0 = vrev64q_u8( ctr0 ); + c1 = vrev64q_u8( ctr1 ); + c2 = vrev64q_u8( ctr2 ); + c3 = vrev64q_u8( ctr3 ); + c4 = vrev64q_u8( ctr4 ); + c5 = vrev64q_u8( ctr5 ); + c6 = vrev64q_u8( ctr6 ); + c7 = vrev64q_u8( ctr7 ); + + ctr0 = vaddq_u32( ctr0, chainIncrement8 ); + ctr1 = vaddq_u32( ctr1, chainIncrement8 ); + ctr2 = vaddq_u32( ctr2, chainIncrement8 ); + ctr3 = vaddq_u32( ctr3, chainIncrement8 ); + ctr4 = vaddq_u32( ctr4, chainIncrement8 ); + ctr5 = vaddq_u32( ctr5, chainIncrement8 ); + ctr6 = vaddq_u32( ctr6, chainIncrement8 ); + ctr7 = vaddq_u32( ctr7, chainIncrement8 ); + + AES_GCM_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7, pGhashSrc, 8, expandedKeyTable, todo, a0, a1, a2 ); + + pDst[0] = veorq_u64( pSrc[0], c0 ); + pDst[1] = veorq_u64( pSrc[1], c1 ); + pDst[2] = veorq_u64( pSrc[2], c2 ); + pDst[3] = veorq_u64( pSrc[3], c3 ); + pDst[4] = veorq_u64( pSrc[4], c4 ); + pDst[5] = veorq_u64( pSrc[5], c5 ); + pDst[6] = veorq_u64( pSrc[6], c6 ); + pDst[7] = veorq_u64( pSrc[7], c7 ); + + pDst += 8; + pSrc += 8; + nBlocks -= 8; + + if (todo == 0) + { + CLMUL_3_POST( a0, a1, a2 ); + MODREDUCE( vMultiplicationConstant, a0, a1, a2, state ); + + todo = SYMCRYPT_MIN( nBlocks, SYMCRYPT_GHASH_PMULL_HPOWERS ); + CLMUL_3( state, GHASH_H_POWER(expandedKeyTable, todo), GHASH_Hx_POWER(expandedKeyTable, todo), a0, a1, a2 ); + } + } + + // We now have at least 8 blocks of encrypted data to GHASH and at most 7 blocks left to encrypt + // Do 8 blocks of GHASH in parallel with generating 0, 4, or 8 AES-CTR blocks for tail encryption + nBlocks -= 8; + if (nBlocks > 0) + { + c0 = vrev64q_u8( ctr0 ); + c1 = vrev64q_u8( ctr1 ); + c2 = vrev64q_u8( ctr2 ); + c3 = vrev64q_u8( ctr3 ); + + if (nBlocks > 4) + { + // Do 8 rounds of AES-CTR for tail in parallel with 8 rounds of GHASH + c4 = vrev64q_u8( ctr4 ); + c5 = vrev64q_u8( ctr5 ); + c6 = vrev64q_u8( ctr6 ); + + AES_GCM_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7, pGhashSrc, 8, expandedKeyTable, todo, a0, a1, a2 ); + } + else + { + // Do 4 rounds of AES-CTR for tail in parallel with 8 rounds of GHASH + AES_GCM_ENCRYPT_4( pExpandedKey, c0, c1, c2, c3, pGhashSrc, 8, expandedKeyTable, todo, a0, a1, a2 ); + } + + if( todo == 0) + { + CLMUL_3_POST( a0, a1, a2 ); + MODREDUCE( vMultiplicationConstant, a0, a1, a2, state ); + + todo = SYMCRYPT_MIN( nBlocks, SYMCRYPT_GHASH_PMULL_HPOWERS ); + CLMUL_3( state, GHASH_H_POWER(expandedKeyTable, todo), GHASH_Hx_POWER(expandedKeyTable, todo), a0, a1, a2 ); + } + } + else + { + // Just do the final 8 rounds of GHASH + for( todo=8; todo>0; todo-- ) + { + r0x = vrev64q_u8( pGhashSrc[0] ); + r0 = vextq_u8( r0x, r0x, 8 ); + r0x = veorq_u8( r0, r0x ); + pGhashSrc++; + + CLMUL_ACCX_3( r0, r0x, GHASH_H_POWER(expandedKeyTable, todo), GHASH_Hx_POWER(expandedKeyTable, todo), a0, a1, a2 ); + } + + CLMUL_3_POST( a0, a1, a2 ); + MODREDUCE( vMultiplicationConstant, a0, a1, a2, state ); + } + } + + if( nBlocks > 0 ) + { + // Encrypt 1-7 blocks with pre-generated AES-CTR blocks and GHASH the results + while( nBlocks >= 2 ) + { + ctr0 = vaddq_u32( ctr0, chainIncrement2 ); + + r0 = veorq_u64( pSrc[0], c0 ); + r1 = veorq_u64( pSrc[1], c1 ); + + pDst[0] = r0; + pDst[1] = r1; + + r0x = vrev64q_u8( r0 ); + r1x = vrev64q_u8( r1 ); + r0 = vextq_u8( r0x, r0x, 8 ); + r1 = vextq_u8( r1x, r1x, 8 ); + r0x = veorq_u8( r0, r0x ); + r1x = veorq_u8( r1, r1x ); + + CLMUL_ACCX_3( r0, r0x, GHASH_H_POWER(expandedKeyTable, todo - 0), GHASH_Hx_POWER(expandedKeyTable, todo - 0), a0, a1, a2 ); + CLMUL_ACCX_3( r1, r1x, GHASH_H_POWER(expandedKeyTable, todo - 1), GHASH_Hx_POWER(expandedKeyTable, todo - 1), a0, a1, a2 ); + + pDst += 2; + pSrc += 2; + todo -= 2; + nBlocks -= 2; + c0 = c2; + c1 = c3; + c2 = c4; + c3 = c5; + c4 = c6; + } + + if( nBlocks > 0 ) + { + ctr0 = vaddq_u32( ctr0, chainIncrement1 ); + + r0 = veorq_u64( pSrc[0], c0 ); + pDst[0] = r0; + r0x = vrev64q_u8( r0 ); + r0 = vextq_u8( r0x, r0x, 8 ); + r0x = veorq_u8( r0, r0x ); + + CLMUL_ACCX_3( r0, r0x, GHASH_H_POWER(expandedKeyTable, 1), GHASH_Hx_POWER(expandedKeyTable, 1), a0, a1, a2 ); + } + + CLMUL_3_POST( a0, a1, a2 ); + MODREDUCE( vMultiplicationConstant, a0, a1, a2, state ); + } + + chain = vrev64q_u8( ctr0 ); + *(__n128 *)pbChainingValue = chain; + *(__n128 *)pState = state; +} + +#pragma warning(push) +#pragma warning( disable:4701 ) // "Use of uninitialized variable" - +#pragma runtime_checks( "u", off ) +// This call is functionally identical to: +// SymCryptGHashAppendDataPmull(expandedKeyTable, +// pState, +// pbSrc, +// cbData ); +// SymCryptAesCtrMsb64Neon( pExpandedKey, +// pbChainingValue, +// pbSrc, +// pbDst, +// cbData ); +VOID +SYMCRYPT_CALL +SymCryptAesGcmDecryptStitchedNeon( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( SYMCRYPT_GF128_FIELD_SIZE ) PCSYMCRYPT_GF128_ELEMENT expandedKeyTable, + _Inout_ PSYMCRYPT_GF128_ELEMENT pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + __n128 chain = *(__n128 *)pbChainingValue; + const __n128 * pSrc = (const __n128 *) pbSrc; + const __n128 * pGhashSrc = (const __n128 *) pbSrc; + __n128 * pDst = (__n128 *) pbDst; + + const __n128 chainIncrement1 = SYMCRYPT_SET_N128_U64( 0, 1 ); + const __n128 chainIncrement2 = SYMCRYPT_SET_N128_U64( 0, 2 ); + const __n128 chainIncrement8 = SYMCRYPT_SET_N128_U64( 0, 8 ); + + __n128 ctr0, ctr1, ctr2, ctr3, ctr4, ctr5, ctr6, ctr7; + __n128 c0, c1, c2, c3, c4, c5, c6, c7; + + __n128 state; + __n128 a0, a1, a2; + const __n64 vMultiplicationConstant = SYMCRYPT_SET_N64_U64(0xc200000000000000); + SIZE_T nBlocks = cbData / SYMCRYPT_GF128_BLOCK_SIZE; + SIZE_T todo; + + SYMCRYPT_ASSERT( (cbData & SYMCRYPT_GCM_BLOCK_MOD_MASK) == 0 ); // cbData is multiple of block size + + // Our chain variable is in integer format, not the MSBfirst format loaded from memory. + ctr0 = vrev64q_u8( chain ); + ctr1 = vaddq_u32( ctr0, chainIncrement1 ); + ctr2 = vaddq_u32( ctr0, chainIncrement2 ); + ctr3 = vaddq_u32( ctr1, chainIncrement2 ); + ctr4 = vaddq_u32( ctr2, chainIncrement2 ); + ctr5 = vaddq_u32( ctr3, chainIncrement2 ); + ctr6 = vaddq_u32( ctr4, chainIncrement2 ); + ctr7 = vaddq_u32( ctr5, chainIncrement2 ); + + state = *(__n128 *) pState; + + todo = SYMCRYPT_MIN( nBlocks, SYMCRYPT_GHASH_PMULL_HPOWERS ); + + CLMUL_3( state, GHASH_H_POWER(expandedKeyTable, todo), GHASH_Hx_POWER(expandedKeyTable, todo), a0, a1, a2 ); + + while( nBlocks >= 8 ) + { + // In this loop we always have 8 blocks to decrypt and GHASH + c0 = vrev64q_u8( ctr0 ); + c1 = vrev64q_u8( ctr1 ); + c2 = vrev64q_u8( ctr2 ); + c3 = vrev64q_u8( ctr3 ); + c4 = vrev64q_u8( ctr4 ); + c5 = vrev64q_u8( ctr5 ); + c6 = vrev64q_u8( ctr6 ); + c7 = vrev64q_u8( ctr7 ); + + ctr0 = vaddq_u32( ctr0, chainIncrement8 ); + ctr1 = vaddq_u32( ctr1, chainIncrement8 ); + ctr2 = vaddq_u32( ctr2, chainIncrement8 ); + ctr3 = vaddq_u32( ctr3, chainIncrement8 ); + ctr4 = vaddq_u32( ctr4, chainIncrement8 ); + ctr5 = vaddq_u32( ctr5, chainIncrement8 ); + ctr6 = vaddq_u32( ctr6, chainIncrement8 ); + ctr7 = vaddq_u32( ctr7, chainIncrement8 ); + + AES_GCM_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7, pGhashSrc, 8, expandedKeyTable, todo, a0, a1, a2 ); + + pDst[0] = veorq_u64( pSrc[0], c0 ); + pDst[1] = veorq_u64( pSrc[1], c1 ); + pDst[2] = veorq_u64( pSrc[2], c2 ); + pDst[3] = veorq_u64( pSrc[3], c3 ); + pDst[4] = veorq_u64( pSrc[4], c4 ); + pDst[5] = veorq_u64( pSrc[5], c5 ); + pDst[6] = veorq_u64( pSrc[6], c6 ); + pDst[7] = veorq_u64( pSrc[7], c7 ); + + pDst += 8; + pSrc += 8; + nBlocks -= 8; + + if (todo == 0) + { + CLMUL_3_POST( a0, a1, a2 ); + MODREDUCE( vMultiplicationConstant, a0, a1, a2, state ); + + if ( nBlocks > 0 ) + { + todo = SYMCRYPT_MIN( nBlocks, SYMCRYPT_GHASH_PMULL_HPOWERS ); + CLMUL_3( state, GHASH_H_POWER(expandedKeyTable, todo), GHASH_Hx_POWER(expandedKeyTable, todo), a0, a1, a2 ); + } + } + } + + if( nBlocks > 0 ) + { + // We have 1-7 blocks to GHASH and decrypt + // Do the exact number of GHASH blocks we need in parallel with generating either 4 or 8 blocks of AES-CTR + c0 = vrev64q_u8( ctr0 ); + c1 = vrev64q_u8( ctr1 ); + c2 = vrev64q_u8( ctr2 ); + c3 = vrev64q_u8( ctr3 ); + + if( nBlocks > 4 ) + { + c4 = vrev64q_u8( ctr4 ); + c5 = vrev64q_u8( ctr5 ); + c6 = vrev64q_u8( ctr6 ); + + AES_GCM_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7, pGhashSrc, nBlocks, expandedKeyTable, todo, a0, a1, a2 ); + } else { + AES_GCM_ENCRYPT_4( pExpandedKey, c0, c1, c2, c3, pGhashSrc, nBlocks, expandedKeyTable, todo, a0, a1, a2 ); + } + CLMUL_3_POST( a0, a1, a2 ); + MODREDUCE( vMultiplicationConstant, a0, a1, a2, state ); + + // Decrypt 1-7 blocks with pre-generated AES-CTR blocks + while( nBlocks >= 2 ) + { + ctr0 = vaddq_u32( ctr0, chainIncrement2 ); + + pDst[0] = veorq_u64( pSrc[0], c0 ); + pDst[1] = veorq_u64( pSrc[1], c1 ); + + pDst += 2; + pSrc += 2; + nBlocks -= 2; + c0 = c2; + c1 = c3; + c2 = c4; + c3 = c5; + c4 = c6; + } + + if( nBlocks > 0 ) + { + ctr0 = vaddq_u32( ctr0, chainIncrement1 ); + + pDst[0] = veorq_u64( pSrc[0], c0 ); + } + } + + chain = vrev64q_u8( ctr0 ); + *(__n128 *)pbChainingValue = chain; + *(__n128 *)pState = state; +} +#pragma runtime_checks( "u", restore ) +#pragma warning(pop) +#pragma clang attribute pop + +#endif diff --git a/libs/symcrypt/lib/aes-pattern.c b/libs/symcrypt/lib/aes-pattern.c new file mode 100644 index 00000000000..7fa78108d86 --- /dev/null +++ b/libs/symcrypt/lib/aes-pattern.c @@ -0,0 +1,348 @@ +// +// aes-pattern.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// This file contains "pattern" code for AES-related functions. It's not intended to be compiled +// directly; rather it is included by other aes-*.c files which define the macros used here. +// + +#if 0 +#pragma makedep header +#endif + +#if SYMCRYPT_CPU_ARM64 + +VOID +SYMCRYPT_CALL +SYMCRYPT_AesCtrMsbXxNeon( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + __n128 chain = *(__n128 *)pbChainingValue; + const __n128 * pSrc = (const __n128 *) pbSrc; + __n128 * pDst = (__n128 *) pbDst; + + const __n128 chainIncrement1 = SYMCRYPT_SET_N128_U64( 0, 1 ); + const __n128 chainIncrement2 = SYMCRYPT_SET_N128_U64( 0, 2 ); + const __n128 chainIncrement8 = SYMCRYPT_SET_N128_U64( 0, 8 ); + + __n128 ctr0, ctr1, ctr2, ctr3, ctr4, ctr5, ctr6, ctr7; + __n128 c0, c1, c2, c3, c4, c5, c6, c7; + + cbData &= ~(SYMCRYPT_AES_BLOCK_SIZE - 1); + + // Our chain variable is in integer format, not the MSBfirst format loaded from memory. + ctr0 = vrev64q_u8( chain ); + ctr1 = VADDQ_UXX( ctr0, chainIncrement1 ); + ctr2 = VADDQ_UXX( ctr0, chainIncrement2 ); + ctr3 = VADDQ_UXX( ctr1, chainIncrement2 ); + ctr4 = VADDQ_UXX( ctr2, chainIncrement2 ); + ctr5 = VADDQ_UXX( ctr3, chainIncrement2 ); + ctr6 = VADDQ_UXX( ctr4, chainIncrement2 ); + ctr7 = VADDQ_UXX( ctr5, chainIncrement2 ); + +/* + while cbData >= 5 * block + generate 8 blocks of key stream + if cbData < 8 * block + break; + process 8 blocks + if cbData >= 5 * block + process 5-7 blocks + done + if cbData >= 2 * block + generate 4 blocks of key stream + process 2-4 blocks + done + if cbData == 1 block + generate 1 block of key stream + process block +*/ + while( cbData >= 5 * SYMCRYPT_AES_BLOCK_SIZE ) + { + c0 = vrev64q_u8( ctr0 ); + c1 = vrev64q_u8( ctr1 ); + c2 = vrev64q_u8( ctr2 ); + c3 = vrev64q_u8( ctr3 ); + c4 = vrev64q_u8( ctr4 ); + c5 = vrev64q_u8( ctr5 ); + c6 = vrev64q_u8( ctr6 ); + c7 = vrev64q_u8( ctr7 ); + + ctr0 = VADDQ_UXX( ctr0, chainIncrement8 ); + ctr1 = VADDQ_UXX( ctr1, chainIncrement8 ); + ctr2 = VADDQ_UXX( ctr2, chainIncrement8 ); + ctr3 = VADDQ_UXX( ctr3, chainIncrement8 ); + ctr4 = VADDQ_UXX( ctr4, chainIncrement8 ); + ctr5 = VADDQ_UXX( ctr5, chainIncrement8 ); + ctr6 = VADDQ_UXX( ctr6, chainIncrement8 ); + ctr7 = VADDQ_UXX( ctr7, chainIncrement8 ); + + AES_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 ); + + if( cbData < 8 * SYMCRYPT_AES_BLOCK_SIZE ) + { + break; + } + + pDst[0] = veorq_u64( pSrc[0], c0 ); + pDst[1] = veorq_u64( pSrc[1], c1 ); + pDst[2] = veorq_u64( pSrc[2], c2 ); + pDst[3] = veorq_u64( pSrc[3], c3 ); + pDst[4] = veorq_u64( pSrc[4], c4 ); + pDst[5] = veorq_u64( pSrc[5], c5 ); + pDst[6] = veorq_u64( pSrc[6], c6 ); + pDst[7] = veorq_u64( pSrc[7], c7 ); + + pDst += 8; + pSrc += 8; + cbData -= 8 * SYMCRYPT_AES_BLOCK_SIZE; + } + + // + // At this point we have one of the two following cases: + // - cbData >= 5 * 16 and we have 8 blocks of key stream in c0-c7. ctr0-ctr7 is set to (c0+8)-(c7+8) + // - cbData < 5 * 16 and we have no blocks of key stream, and ctr0-ctr7 set to the next 8 counters to use + // + + if( cbData >= SYMCRYPT_AES_BLOCK_SIZE ) // quick exit of function if the request was a multiple of 8 blocks + { + if( cbData >= 5 * SYMCRYPT_AES_BLOCK_SIZE ) + { + // + // We already have the key stream + // + pDst[0] = veorq_u64( pSrc[0], c0 ); + pDst[1] = veorq_u64( pSrc[1], c1 ); + pDst[2] = veorq_u64( pSrc[2], c2 ); + pDst[3] = veorq_u64( pSrc[3], c3 ); + pDst[4] = veorq_u64( pSrc[4], c4 ); + chain = VSUBQ_UXX( ctr5, chainIncrement8 ); + + if( cbData >= 96 ) + { + chain = VSUBQ_UXX( ctr6, chainIncrement8 ); + pDst[5] = veorq_u64( pSrc[5], c5 ); + if( cbData >= 112 ) + { + chain = VSUBQ_UXX( ctr7, chainIncrement8 ); + pDst[6] = veorq_u64( pSrc[6], c6 ); + } + } + } + else if( cbData >= 2 * SYMCRYPT_AES_BLOCK_SIZE ) + { + // Produce 4 blocks of key stream + + chain = ctr2; // chain is only incremented by 2 for now + + c0 = vrev64q_u8( ctr0 ); + c1 = vrev64q_u8( ctr1 ); + c2 = vrev64q_u8( ctr2 ); + c3 = vrev64q_u8( ctr3 ); + + AES_ENCRYPT_4( pExpandedKey, c0, c1, c2, c3 ); + + pDst[0] = veorq_u64( pSrc[0], c0 ); + pDst[1] = veorq_u64( pSrc[1], c1 ); + if( cbData >= 48 ) + { + chain = ctr3; + pDst[2] = veorq_u64( pSrc[2], c2 ); + if( cbData >= 64 ) + { + chain = ctr4; + pDst[3] = veorq_u64( pSrc[3], c3 ); + } + } + } + else + { + // Exactly 1 block to process + chain = ctr1; + + c0 = vrev64q_u8( ctr0 ); + + AES_ENCRYPT_1( pExpandedKey, c0 ); + pDst[0] = veorq_u64( pSrc[0], c0 ); + } + } + else + { + chain = ctr0; + } + + chain = vrev64q_u8( chain ); + *(__n128 *)pbChainingValue = chain; +} + +#endif // SYMCRYPT_CPU_ARM64 + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + +VOID +SYMCRYPT_CALL +SYMCRYPT_AesCtrMsbXxXmm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + __m128i chain = _mm_loadu_si128( (__m128i *) pbChainingValue ); + + __m128i BYTE_REVERSE_ORDER = _mm_set_epi8( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ); + + __m128i chainIncrement1 = _mm_set_epi32( 0, 0, 0, 1 ); + __m128i chainIncrement2 = _mm_set_epi32( 0, 0, 0, 2 ); + __m128i chainIncrement3 = _mm_set_epi32( 0, 0, 0, 3 ); + //__m128i chainIncrement8 = _mm_set_epi32( 0, 0, 0, 8 ); + + __m128i c0, c1, c2, c3, c4, c5, c6, c7; + + cbData &= ~(SYMCRYPT_AES_BLOCK_SIZE - 1); + + chain = _mm_shuffle_epi8( chain, BYTE_REVERSE_ORDER ); + +/* + while cbData >= 5 * block + generate 8 blocks of key stream + if cbData < 8 * block + break; + process 8 blocks + if cbData >= 5 * block + process 5-7 blocks + done + if cbData > 1 block + generate 4 blocks of key stream + process 2-4 blocks + done + if cbData == 1 block + generate 1 block of key stream + process block +*/ + while( cbData >= 5 * SYMCRYPT_AES_BLOCK_SIZE ) + { + c0 = chain; + c1 = MM_ADD_EPIXX( chain, chainIncrement1 ); + c2 = MM_ADD_EPIXX( chain, chainIncrement2 ); + c3 = MM_ADD_EPIXX( c1, chainIncrement2 ); + c4 = MM_ADD_EPIXX( c2, chainIncrement2 ); + c5 = MM_ADD_EPIXX( c3, chainIncrement2 ); + c6 = MM_ADD_EPIXX( c4, chainIncrement2 ); + c7 = MM_ADD_EPIXX( c5, chainIncrement2 ); + chain = MM_ADD_EPIXX( c6, chainIncrement2 ); + + c0 = _mm_shuffle_epi8( c0, BYTE_REVERSE_ORDER ); + c1 = _mm_shuffle_epi8( c1, BYTE_REVERSE_ORDER ); + c2 = _mm_shuffle_epi8( c2, BYTE_REVERSE_ORDER ); + c3 = _mm_shuffle_epi8( c3, BYTE_REVERSE_ORDER ); + c4 = _mm_shuffle_epi8( c4, BYTE_REVERSE_ORDER ); + c5 = _mm_shuffle_epi8( c5, BYTE_REVERSE_ORDER ); + c6 = _mm_shuffle_epi8( c6, BYTE_REVERSE_ORDER ); + c7 = _mm_shuffle_epi8( c7, BYTE_REVERSE_ORDER ); + + AES_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 ); + + if( cbData < 8 * SYMCRYPT_AES_BLOCK_SIZE ) + { + break; + } + + _mm_storeu_si128( (__m128i *) (pbDst + 0), _mm_xor_si128( c0, _mm_loadu_si128( ( __m128i * ) (pbSrc + 0 ) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 16), _mm_xor_si128( c1, _mm_loadu_si128( ( __m128i * ) (pbSrc + 16 ) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 32), _mm_xor_si128( c2, _mm_loadu_si128( ( __m128i * ) (pbSrc + 32 ) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 48), _mm_xor_si128( c3, _mm_loadu_si128( ( __m128i * ) (pbSrc + 48 ) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 64), _mm_xor_si128( c4, _mm_loadu_si128( ( __m128i * ) (pbSrc + 64 ) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 80), _mm_xor_si128( c5, _mm_loadu_si128( ( __m128i * ) (pbSrc + 80 ) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 96), _mm_xor_si128( c6, _mm_loadu_si128( ( __m128i * ) (pbSrc + 96 ) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst +112), _mm_xor_si128( c7, _mm_loadu_si128( ( __m128i * ) (pbSrc +112 ) ) ) ); + pbDst += 8 * SYMCRYPT_AES_BLOCK_SIZE; + pbSrc += 8 * SYMCRYPT_AES_BLOCK_SIZE; + cbData -= 8 * SYMCRYPT_AES_BLOCK_SIZE; + } + + // + // At this point we have one of the two following cases: + // - cbData >= 5 * 16 and we have 8 blocks of key stream in c0-c7. chain is set to c7 + 1 + // - cbData < 5 * 16 and we have no blocks of key stream, with chain the next value to use + // + + if( cbData >= SYMCRYPT_AES_BLOCK_SIZE ) // quick exit of function if the request was a multiple of 8 blocks + { + if( cbData >= 5 * SYMCRYPT_AES_BLOCK_SIZE ) + { + // + // We already have the key stream + // + _mm_storeu_si128( (__m128i *) (pbDst + 0), _mm_xor_si128( c0, _mm_loadu_si128( ( __m128i * ) (pbSrc + 0 ) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 16), _mm_xor_si128( c1, _mm_loadu_si128( ( __m128i * ) (pbSrc + 16 ) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 32), _mm_xor_si128( c2, _mm_loadu_si128( ( __m128i * ) (pbSrc + 32 ) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 48), _mm_xor_si128( c3, _mm_loadu_si128( ( __m128i * ) (pbSrc + 48 ) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 64), _mm_xor_si128( c4, _mm_loadu_si128( ( __m128i * ) (pbSrc + 64 ) ) ) ); + chain = MM_SUB_EPIXX( chain, chainIncrement3 ); + + if( cbData >= 96 ) + { + chain = MM_ADD_EPIXX( chain, chainIncrement1 ); + _mm_storeu_si128( (__m128i *) (pbDst + 80), _mm_xor_si128( c5, _mm_loadu_si128( ( __m128i * ) (pbSrc + 80 ) ) ) ); + if( cbData >= 112 ) + { + chain = MM_ADD_EPIXX( chain, chainIncrement1 ); + _mm_storeu_si128( (__m128i *) (pbDst + 96), _mm_xor_si128( c6, _mm_loadu_si128( ( __m128i * ) (pbSrc + 96 ) ) ) ); + } + } + } + else if( cbData >= 2 * SYMCRYPT_AES_BLOCK_SIZE ) + { + // Produce 4 blocks of key stream + + c0 = chain; + c1 = MM_ADD_EPIXX( chain, chainIncrement1 ); + c2 = MM_ADD_EPIXX( chain, chainIncrement2 ); + c3 = MM_ADD_EPIXX( c1, chainIncrement2 ); + chain = c2; // chain is only incremented by 2 for now + + c0 = _mm_shuffle_epi8( c0, BYTE_REVERSE_ORDER ); + c1 = _mm_shuffle_epi8( c1, BYTE_REVERSE_ORDER ); + c2 = _mm_shuffle_epi8( c2, BYTE_REVERSE_ORDER ); + c3 = _mm_shuffle_epi8( c3, BYTE_REVERSE_ORDER ); + + AES_ENCRYPT_4( pExpandedKey, c0, c1, c2, c3 ); + + _mm_storeu_si128( (__m128i *) (pbDst + 0), _mm_xor_si128( c0, _mm_loadu_si128( ( __m128i * ) (pbSrc + 0 ) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 16), _mm_xor_si128( c1, _mm_loadu_si128( ( __m128i * ) (pbSrc + 16 ) ) ) ); + if( cbData >= 48 ) + { + chain = MM_ADD_EPIXX( chain, chainIncrement1 ); + _mm_storeu_si128( (__m128i *) (pbDst + 32), _mm_xor_si128( c2, _mm_loadu_si128( ( __m128i * ) (pbSrc + 32 ) ) ) ); + if( cbData >= 64 ) + { + chain = MM_ADD_EPIXX( chain, chainIncrement1 ); + _mm_storeu_si128( (__m128i *) (pbDst + 48), _mm_xor_si128( c3, _mm_loadu_si128( ( __m128i * ) (pbSrc + 48 ) ) ) ); + } + } + } + else + { + // Exactly 1 block to process + c0 = chain; + chain = MM_ADD_EPIXX( chain, chainIncrement1 ); + + c0 = _mm_shuffle_epi8( c0, BYTE_REVERSE_ORDER ); + + AES_ENCRYPT_1( pExpandedKey, c0 ); + _mm_storeu_si128( (__m128i *) (pbDst + 0), _mm_xor_si128( c0, _mm_loadu_si128( ( __m128i * ) (pbSrc + 0 ) ) ) ); + } + } + + chain = _mm_shuffle_epi8( chain, BYTE_REVERSE_ORDER ); + _mm_storeu_si128( (__m128i *) pbChainingValue, chain ); +} + +#endif // SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 diff --git a/libs/symcrypt/lib/aes-xmm.c b/libs/symcrypt/lib/aes-xmm.c new file mode 100644 index 00000000000..c300533281f --- /dev/null +++ b/libs/symcrypt/lib/aes-xmm.c @@ -0,0 +1,1792 @@ +// +// aes-xmm.c code for AES implementation +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// All XMM code for AES operations +// Requires compiler support for ssse3, aesni and pclmulqdq +// + +#include "precomp.h" + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + +#include "xtsaes_definitions.h" +#include "ghash_definitions.h" + +#ifdef __clang__ +#pragma clang attribute push (__attribute__((target("ssse3,aes,pclmul"))), apply_to=function) +#else +#pragma GCC push_options +#pragma GCC target("ssse3,aes,pclmul") +#endif + +VOID +SYMCRYPT_CALL +SymCryptAes4SboxXmm( _In_reads_(4) PCBYTE pIn, _Out_writes_(4) PBYTE pOut ) +{ + __m128i x; + x = _mm_set1_epi32( *(int *) pIn ); + + x = _mm_aeskeygenassist_si128( x, 0 ); + + // Could use _mm_storeu_si32( pOut, x ) but it is missing from some headers and _mm_store_ss will be as fast + _mm_store_ss( (float *) pOut, _mm_castsi128_ps(x) ); +} + +VOID +SYMCRYPT_CALL +SymCryptAesCreateDecryptionRoundKeyXmm( + _In_reads_(16) PCBYTE pEncryptionRoundKey, + _Out_writes_(16) PBYTE pDecryptionRoundKey ) +{ + // + // On x86 our key structure is only 4-aligned (the best we can do) so we use unaligned load/stores. + // On Amd64 our round keys are aligned, but recent CPUs have fast unaligned load/store if the address is + // actually aligned properly. + // + _mm_storeu_si128( (__m128i *) pDecryptionRoundKey, _mm_aesimc_si128( _mm_loadu_si128( (__m128i *)pEncryptionRoundKey ) ) ); +} + +// +// The latency of AES instruction has increased up to 8 cycles in Ivy Bridge, +// and back to 7 in Haswell. +// We use 8-parallel code to expose the maximum parallelism to the CPU. +// On x86 it will introduce some register spilling, but the load/stores +// should be able to hide behind the AES instruction latencies. +// Silvermont x86 CPUs has AES-NI with latency = 8 and throughput = 5, so there +// the CPU parallelism is low. +// For things like BitLocker that is fine, but other uses, such as GCM & AES_CTR_DRBG +// use odd sizes. +// We try to do 5-8 blocks in 8-parallel code, 2-4 blocks in 4-parallel code, and +// 1 block in 1-parallel code. +// This is a compromise; the big cores can do 8 parallel in about the time of a 4-parallel, +// but Silvermont cannot and would pay a big price on small requests if we only use 8-parallel. +// Doing only 8-parallel and then 1-parallel would penalize the big cores a lot. +// +// We used to have 7-parallel code, but common request sizes are not multiples of 7 +// blocks so we end up doing a lot of extra work. This is especially expensive on +// Silvermont where the extra work isn't hidden in the latencies. +// + +#define AES_ENCRYPT_1( pExpandedKey, c0 ) \ +{ \ + const BYTE (*keyPtr)[4][4]; \ + const BYTE (*keyLimit)[4][4]; \ + __m128i roundkey; \ +\ + keyPtr = &pExpandedKey->RoundKey[0]; \ + keyLimit = pExpandedKey->lastEncRoundKey; \ +\ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ +\ + c0 = _mm_xor_si128( c0, roundkey ); \ +\ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ + c0 = _mm_aesenc_si128( c0, roundkey ); \ +\ + do \ + { \ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ + c0 = _mm_aesenc_si128( c0, roundkey ); \ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ + c0 = _mm_aesenc_si128( c0, roundkey ); \ + } while( keyPtr < keyLimit ); \ +\ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ +\ + c0 = _mm_aesenclast_si128( c0, roundkey ); \ +}; + + +// Perform AES encryption without the first round key and with a specified last round key +// +// For algorithms where performance is dominated by a chain of dependent AES rounds (i.e. CBC encryption, CCM, CMAC) +// we can gain a reasonable performance uplift by computing (last round key ^ next plaintext block ^ first round key) +// off the critical path and using this computed value in place of last round key in AESENCLAST instructions. +#define AES_ENCRYPT_1_CHAIN( pExpandedKey, cipherState, mergedLastRoundKey ) \ +{ \ + const BYTE (*keyPtr)[4][4]; \ + const BYTE (*keyLimit)[4][4]; \ + __m128i roundkey; \ +\ + keyPtr = &pExpandedKey->RoundKey[1]; \ + keyLimit = pExpandedKey->lastEncRoundKey; \ +\ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ +\ + cipherState = _mm_aesenc_si128( cipherState, roundkey ); \ +\ + do \ + { \ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ + cipherState = _mm_aesenc_si128( cipherState, roundkey ); \ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ + cipherState = _mm_aesenc_si128( cipherState, roundkey ); \ + } while( keyPtr < keyLimit ); \ +\ + cipherState = _mm_aesenclast_si128( cipherState, mergedLastRoundKey ); \ +}; + +#define AES_ENCRYPT_4( pExpandedKey, c0, c1, c2, c3 ) \ +{ \ + const BYTE (*keyPtr)[4][4]; \ + const BYTE (*keyLimit)[4][4]; \ + __m128i roundkey; \ +\ + keyPtr = &pExpandedKey->RoundKey[0]; \ + keyLimit = pExpandedKey->lastEncRoundKey; \ +\ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ +\ + c0 = _mm_xor_si128( c0, roundkey ); \ + c1 = _mm_xor_si128( c1, roundkey ); \ + c2 = _mm_xor_si128( c2, roundkey ); \ + c3 = _mm_xor_si128( c3, roundkey ); \ +\ + do \ + { \ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ + c0 = _mm_aesenc_si128( c0, roundkey ); \ + c1 = _mm_aesenc_si128( c1, roundkey ); \ + c2 = _mm_aesenc_si128( c2, roundkey ); \ + c3 = _mm_aesenc_si128( c3, roundkey ); \ + } while( keyPtr < keyLimit ); \ +\ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ +\ + c0 = _mm_aesenclast_si128( c0, roundkey ); \ + c1 = _mm_aesenclast_si128( c1, roundkey ); \ + c2 = _mm_aesenclast_si128( c2, roundkey ); \ + c3 = _mm_aesenclast_si128( c3, roundkey ); \ +}; + +#define AES_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 ) \ +{ \ + const BYTE (*keyPtr)[4][4]; \ + const BYTE (*keyLimit)[4][4]; \ + __m128i roundkey; \ +\ + keyPtr = &pExpandedKey->RoundKey[0]; \ + keyLimit = pExpandedKey->lastEncRoundKey; \ +\ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ +\ + c0 = _mm_xor_si128( c0, roundkey ); \ + c1 = _mm_xor_si128( c1, roundkey ); \ + c2 = _mm_xor_si128( c2, roundkey ); \ + c3 = _mm_xor_si128( c3, roundkey ); \ + c4 = _mm_xor_si128( c4, roundkey ); \ + c5 = _mm_xor_si128( c5, roundkey ); \ + c6 = _mm_xor_si128( c6, roundkey ); \ + c7 = _mm_xor_si128( c7, roundkey ); \ +\ + do \ + { \ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ + c0 = _mm_aesenc_si128( c0, roundkey ); \ + c1 = _mm_aesenc_si128( c1, roundkey ); \ + c2 = _mm_aesenc_si128( c2, roundkey ); \ + c3 = _mm_aesenc_si128( c3, roundkey ); \ + c4 = _mm_aesenc_si128( c4, roundkey ); \ + c5 = _mm_aesenc_si128( c5, roundkey ); \ + c6 = _mm_aesenc_si128( c6, roundkey ); \ + c7 = _mm_aesenc_si128( c7, roundkey ); \ + } while( keyPtr < keyLimit ); \ +\ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ +\ + c0 = _mm_aesenclast_si128( c0, roundkey ); \ + c1 = _mm_aesenclast_si128( c1, roundkey ); \ + c2 = _mm_aesenclast_si128( c2, roundkey ); \ + c3 = _mm_aesenclast_si128( c3, roundkey ); \ + c4 = _mm_aesenclast_si128( c4, roundkey ); \ + c5 = _mm_aesenclast_si128( c5, roundkey ); \ + c6 = _mm_aesenclast_si128( c6, roundkey ); \ + c7 = _mm_aesenclast_si128( c7, roundkey ); \ +}; + +#define AES_DECRYPT_1( pExpandedKey, c0 ) \ +{ \ + const BYTE (*keyPtr)[4][4]; \ + const BYTE (*keyLimit)[4][4]; \ + __m128i roundkey; \ +\ + keyPtr = pExpandedKey->lastEncRoundKey; \ + keyLimit = pExpandedKey->lastDecRoundKey; \ +\ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ +\ + c0 = _mm_xor_si128( c0, roundkey ); \ +\ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ + c0 = _mm_aesdec_si128( c0, roundkey ); \ +\ + do \ + { \ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ + c0 = _mm_aesdec_si128( c0, roundkey ); \ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ + c0 = _mm_aesdec_si128( c0, roundkey ); \ + } while( keyPtr < keyLimit ); \ +\ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ +\ + c0 = _mm_aesdeclast_si128( c0, roundkey ); \ +}; + +#define AES_DECRYPT_4( pExpandedKey, c0, c1, c2, c3 ) \ +{ \ + const BYTE (*keyPtr)[4][4]; \ + const BYTE (*keyLimit)[4][4]; \ + __m128i roundkey; \ +\ + keyPtr = pExpandedKey->lastEncRoundKey; \ + keyLimit = pExpandedKey->lastDecRoundKey; \ +\ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ +\ + c0 = _mm_xor_si128( c0, roundkey ); \ + c1 = _mm_xor_si128( c1, roundkey ); \ + c2 = _mm_xor_si128( c2, roundkey ); \ + c3 = _mm_xor_si128( c3, roundkey ); \ +\ + do \ + { \ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ + c0 = _mm_aesdec_si128( c0, roundkey ); \ + c1 = _mm_aesdec_si128( c1, roundkey ); \ + c2 = _mm_aesdec_si128( c2, roundkey ); \ + c3 = _mm_aesdec_si128( c3, roundkey ); \ + } while( keyPtr < keyLimit ); \ +\ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ +\ + c0 = _mm_aesdeclast_si128( c0, roundkey ); \ + c1 = _mm_aesdeclast_si128( c1, roundkey ); \ + c2 = _mm_aesdeclast_si128( c2, roundkey ); \ + c3 = _mm_aesdeclast_si128( c3, roundkey ); \ +}; + +#define AES_DECRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 ) \ +{ \ + const BYTE (*keyPtr)[4][4]; \ + const BYTE (*keyLimit)[4][4]; \ + __m128i roundkey; \ +\ + keyPtr = pExpandedKey->lastEncRoundKey; \ + keyLimit = pExpandedKey->lastDecRoundKey; \ +\ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ +\ + c0 = _mm_xor_si128( c0, roundkey ); \ + c1 = _mm_xor_si128( c1, roundkey ); \ + c2 = _mm_xor_si128( c2, roundkey ); \ + c3 = _mm_xor_si128( c3, roundkey ); \ + c4 = _mm_xor_si128( c4, roundkey ); \ + c5 = _mm_xor_si128( c5, roundkey ); \ + c6 = _mm_xor_si128( c6, roundkey ); \ + c7 = _mm_xor_si128( c7, roundkey ); \ +\ + do \ + { \ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ + c0 = _mm_aesdec_si128( c0, roundkey ); \ + c1 = _mm_aesdec_si128( c1, roundkey ); \ + c2 = _mm_aesdec_si128( c2, roundkey ); \ + c3 = _mm_aesdec_si128( c3, roundkey ); \ + c4 = _mm_aesdec_si128( c4, roundkey ); \ + c5 = _mm_aesdec_si128( c5, roundkey ); \ + c6 = _mm_aesdec_si128( c6, roundkey ); \ + c7 = _mm_aesdec_si128( c7, roundkey ); \ + } while( keyPtr < keyLimit ); \ +\ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ +\ + c0 = _mm_aesdeclast_si128( c0, roundkey ); \ + c1 = _mm_aesdeclast_si128( c1, roundkey ); \ + c2 = _mm_aesdeclast_si128( c2, roundkey ); \ + c3 = _mm_aesdeclast_si128( c3, roundkey ); \ + c4 = _mm_aesdeclast_si128( c4, roundkey ); \ + c5 = _mm_aesdeclast_si128( c5, roundkey ); \ + c6 = _mm_aesdeclast_si128( c6, roundkey ); \ + c7 = _mm_aesdeclast_si128( c7, roundkey ); \ +}; + + +// +// The EncryptXmm code is tested through the CFB mode encryption which has no further optimizations. +// +VOID +SYMCRYPT_CALL +SymCryptAesEncryptXmm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbDst ) +{ + __m128i c; + + c = _mm_loadu_si128( ( __m128i * ) pbSrc); + + AES_ENCRYPT_1( pExpandedKey, c ); + + _mm_storeu_si128( (__m128i *) pbDst, c ); +} + +// +// The DecryptXmm code is tested through the EcbDecrypt calls which has no further optimizations. +// +VOID +SYMCRYPT_CALL +SymCryptAesDecryptXmm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbDst ) +{ + __m128i c; + + c = _mm_loadu_si128( ( __m128i * ) pbSrc); + + AES_DECRYPT_1( pExpandedKey, c ); + + _mm_storeu_si128( (__m128i *) pbDst, c ); +} + +// Disable warnings and VC++ runtime checks for use of uninitialized values (by design) +#pragma warning(push) +#pragma warning( disable: 6001 4701 ) +#pragma runtime_checks( "u", off ) +VOID +SYMCRYPT_CALL +SymCryptAesEcbEncryptXmm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + __m128i c0, c1, c2, c3, c4, c5, c6, c7; + + while( cbData >= 8 * SYMCRYPT_AES_BLOCK_SIZE ) + { + c0 = _mm_loadu_si128( ( __m128i * ) (pbSrc + 0 )); + c1 = _mm_loadu_si128( ( __m128i * ) (pbSrc + 16 )); + c2 = _mm_loadu_si128( ( __m128i * ) (pbSrc + 32 )); + c3 = _mm_loadu_si128( ( __m128i * ) (pbSrc + 48 )); + c4 = _mm_loadu_si128( ( __m128i * ) (pbSrc + 64 )); + c5 = _mm_loadu_si128( ( __m128i * ) (pbSrc + 80 )); + c6 = _mm_loadu_si128( ( __m128i * ) (pbSrc + 96 )); + c7 = _mm_loadu_si128( ( __m128i * ) (pbSrc +112 )); + + AES_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 ); + + _mm_storeu_si128( (__m128i *) (pbDst + 0 ), c0 ); + _mm_storeu_si128( (__m128i *) (pbDst + 16 ), c1 ); + _mm_storeu_si128( (__m128i *) (pbDst + 32 ), c2 ); + _mm_storeu_si128( (__m128i *) (pbDst + 48 ), c3 ); + _mm_storeu_si128( (__m128i *) (pbDst + 64 ), c4 ); + _mm_storeu_si128( (__m128i *) (pbDst + 80 ), c5 ); + _mm_storeu_si128( (__m128i *) (pbDst + 96 ), c6 ); + _mm_storeu_si128( (__m128i *) (pbDst +112 ), c7 ); + + pbSrc += 8 * SYMCRYPT_AES_BLOCK_SIZE; + pbDst += 8 * SYMCRYPT_AES_BLOCK_SIZE; + cbData -= 8 * SYMCRYPT_AES_BLOCK_SIZE; + } + + if( cbData < 16 ) + { + return; + } + + c0 = _mm_loadu_si128( ( __m128i * ) (pbSrc + 0 )); + if( cbData >= 32 ) + { + c1 = _mm_loadu_si128( ( __m128i * ) (pbSrc + 16 )); + if( cbData >= 48 ) + { + c2 = _mm_loadu_si128( ( __m128i * ) (pbSrc + 32 )); + if( cbData >= 64 ) + { + c3 = _mm_loadu_si128( ( __m128i * ) (pbSrc + 48 )); + if( cbData >= 80 ) + { + c4 = _mm_loadu_si128( ( __m128i * ) (pbSrc + 64 )); + if( cbData >= 96 ) + { + c5 = _mm_loadu_si128( ( __m128i * ) (pbSrc + 80 )); + if( cbData >= 112 ) + { + c6 = _mm_loadu_si128( ( __m128i * ) (pbSrc + 96 )); + } + } + } + } + } + } + + if( cbData >= 5 * SYMCRYPT_AES_BLOCK_SIZE ) + { + AES_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 ); + } + else if( cbData >= 2 * SYMCRYPT_AES_BLOCK_SIZE ) + { + AES_ENCRYPT_4( pExpandedKey, c0, c1, c2, c3 ); + } + else + { + AES_ENCRYPT_1( pExpandedKey, c0 ); + } + + _mm_storeu_si128( (__m128i *) (pbDst + 0 ), c0 ); + if( cbData >= 32 ) + { + _mm_storeu_si128( (__m128i *) (pbDst + 16 ), c1 ); + if( cbData >= 48 ) + { + _mm_storeu_si128( (__m128i *) (pbDst + 32 ), c2 ); + if( cbData >= 64 ) + { + _mm_storeu_si128( (__m128i *) (pbDst + 48 ), c3 ); + if( cbData >= 80 ) + { + _mm_storeu_si128( (__m128i *) (pbDst + 64 ), c4 ); + if( cbData >= 96 ) + { + _mm_storeu_si128( (__m128i *) (pbDst + 80 ), c5 ); + if( cbData >= 112 ) + { + _mm_storeu_si128( (__m128i *) (pbDst + 96 ), c6 ); + } + } + } + } + } + } +} +#pragma runtime_checks( "u", restore ) +#pragma warning( pop ) + + + +VOID +SYMCRYPT_CALL +SymCryptAesCbcEncryptXmm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + __m128i c = _mm_loadu_si128( (__m128i *) pbChainingValue ); + __m128i rk0 = _mm_loadu_si128( (__m128i *) &pExpandedKey->RoundKey[0] ); + __m128i rkLast = _mm_loadu_si128( (__m128i *) pExpandedKey->lastEncRoundKey ); + __m128i d; + + if (cbData < SYMCRYPT_AES_BLOCK_SIZE) + return; + + // This algorithm is dominated by chain of dependent AES rounds, so we want to avoid XOR + // instructions on the critical path where possible + // We can compute (last round key ^ next plaintext block ^ first round key) off the critical + // path and use this with AES_ENCRYPT_1_CHAIN so that only AES instructions write to c in + // the main loop + d = _mm_xor_si128( _mm_loadu_si128( (__m128i *) pbSrc ), rk0 ); + c = _mm_xor_si128( c, d ); + pbSrc += SYMCRYPT_AES_BLOCK_SIZE; + cbData -= SYMCRYPT_AES_BLOCK_SIZE; + + while( cbData >= SYMCRYPT_AES_BLOCK_SIZE ) + { + d = _mm_xor_si128( _mm_loadu_si128( (__m128i *) pbSrc ), rk0 ); + AES_ENCRYPT_1_CHAIN( pExpandedKey, c, _mm_xor_si128(d, rkLast ) ); + _mm_storeu_si128( (__m128i *) pbDst, _mm_xor_si128(c, d) ); + + pbSrc += SYMCRYPT_AES_BLOCK_SIZE; + pbDst += SYMCRYPT_AES_BLOCK_SIZE; + cbData -= SYMCRYPT_AES_BLOCK_SIZE; + } + AES_ENCRYPT_1_CHAIN( pExpandedKey, c, rkLast ); + _mm_storeu_si128( (__m128i *) pbDst, c ); + _mm_storeu_si128( (__m128i *) pbChainingValue, c ); +} + +// Disable warnings and VC++ runtime checks for use of uninitialized values (by design) +#pragma warning(push) +#pragma warning( disable: 6001 4701 ) +#pragma runtime_checks( "u", off ) +VOID +SYMCRYPT_CALL +SymCryptAesCbcDecryptXmm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + __m128i chain; + __m128i c0, c1, c2, c3, c4, c5, c6, c7; + __m128i d0, d1, d2, d3, d4, d5, d6, d7; + + if( cbData < SYMCRYPT_AES_BLOCK_SIZE ) + { + return; + } + + chain = _mm_loadu_si128( (__m128i *) pbChainingValue ); + + // + // First we do all multiples of 8 blocks + // + + while( cbData >= 8 * SYMCRYPT_AES_BLOCK_SIZE ) + { + d0 = c0 = _mm_loadu_si128( (__m128i *) (pbSrc + 0 * SYMCRYPT_AES_BLOCK_SIZE ) ); + d1 = c1 = _mm_loadu_si128( (__m128i *) (pbSrc + 1 * SYMCRYPT_AES_BLOCK_SIZE ) ); + d2 = c2 = _mm_loadu_si128( (__m128i *) (pbSrc + 2 * SYMCRYPT_AES_BLOCK_SIZE ) ); + d3 = c3 = _mm_loadu_si128( (__m128i *) (pbSrc + 3 * SYMCRYPT_AES_BLOCK_SIZE ) ); + d4 = c4 = _mm_loadu_si128( (__m128i *) (pbSrc + 4 * SYMCRYPT_AES_BLOCK_SIZE ) ); + d5 = c5 = _mm_loadu_si128( (__m128i *) (pbSrc + 5 * SYMCRYPT_AES_BLOCK_SIZE ) ); + d6 = c6 = _mm_loadu_si128( (__m128i *) (pbSrc + 6 * SYMCRYPT_AES_BLOCK_SIZE ) ); + d7 = c7 = _mm_loadu_si128( (__m128i *) (pbSrc + 7 * SYMCRYPT_AES_BLOCK_SIZE ) ); + + AES_DECRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 ); + + c0 = _mm_xor_si128( c0, chain ); + c1 = _mm_xor_si128( c1, d0 ); + c2 = _mm_xor_si128( c2, d1 ); + c3 = _mm_xor_si128( c3, d2 ); + c4 = _mm_xor_si128( c4, d3 ); + c5 = _mm_xor_si128( c5, d4 ); + c6 = _mm_xor_si128( c6, d5 ); + c7 = _mm_xor_si128( c7, d6 ); + chain = d7; + + _mm_storeu_si128( (__m128i *) (pbDst + 0 * SYMCRYPT_AES_BLOCK_SIZE ), c0 ); + _mm_storeu_si128( (__m128i *) (pbDst + 1 * SYMCRYPT_AES_BLOCK_SIZE ), c1 ); + _mm_storeu_si128( (__m128i *) (pbDst + 2 * SYMCRYPT_AES_BLOCK_SIZE ), c2 ); + _mm_storeu_si128( (__m128i *) (pbDst + 3 * SYMCRYPT_AES_BLOCK_SIZE ), c3 ); + _mm_storeu_si128( (__m128i *) (pbDst + 4 * SYMCRYPT_AES_BLOCK_SIZE ), c4 ); + _mm_storeu_si128( (__m128i *) (pbDst + 5 * SYMCRYPT_AES_BLOCK_SIZE ), c5 ); + _mm_storeu_si128( (__m128i *) (pbDst + 6 * SYMCRYPT_AES_BLOCK_SIZE ), c6 ); + _mm_storeu_si128( (__m128i *) (pbDst + 7 * SYMCRYPT_AES_BLOCK_SIZE ), c7 ); + + pbSrc += 8 * SYMCRYPT_AES_BLOCK_SIZE; + pbDst += 8 * SYMCRYPT_AES_BLOCK_SIZE; + cbData -= 8 * SYMCRYPT_AES_BLOCK_SIZE; + } + + if( cbData >= 16 ) + { + // + // There is remaining work to be done + // + d0 = c0 = _mm_loadu_si128( (__m128i *) (pbSrc + 0 * SYMCRYPT_AES_BLOCK_SIZE ) ); + if( cbData >= 32 ) + { + d1 = c1 = _mm_loadu_si128( (__m128i *) (pbSrc + 1 * SYMCRYPT_AES_BLOCK_SIZE ) ); + if( cbData >= 48 ) + { + d2 = c2 = _mm_loadu_si128( (__m128i *) (pbSrc + 2 * SYMCRYPT_AES_BLOCK_SIZE ) ); + if( cbData >= 64 ) + { + d3 = c3 = _mm_loadu_si128( (__m128i *) (pbSrc + 3 * SYMCRYPT_AES_BLOCK_SIZE ) ); + if( cbData >= 80 ) + { + d4 = c4 = _mm_loadu_si128( (__m128i *) (pbSrc + 4 * SYMCRYPT_AES_BLOCK_SIZE ) ); + if( cbData >= 96 ) + { + d5 = c5 = _mm_loadu_si128( (__m128i *) (pbSrc + 5 * SYMCRYPT_AES_BLOCK_SIZE ) ); + if( cbData >= 112 ) + { + d6 = c6 = _mm_loadu_si128( (__m128i *) (pbSrc + 6 * SYMCRYPT_AES_BLOCK_SIZE ) ); + } + } + } + } + } + } + + // + // Decrypt 1, 4, or 8 blocks in AES-CBC mode. This might decrypt uninitialized registers, + // but those will not be used when we store the results. + // + if( cbData > 4 * SYMCRYPT_AES_BLOCK_SIZE ) + { + AES_DECRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 ); + c0 = _mm_xor_si128( c0, chain ); + c1 = _mm_xor_si128( c1, d0 ); + c2 = _mm_xor_si128( c2, d1 ); + c3 = _mm_xor_si128( c3, d2 ); + c4 = _mm_xor_si128( c4, d3 ); + c5 = _mm_xor_si128( c5, d4 ); + c6 = _mm_xor_si128( c6, d5 ); + } + else if( cbData > SYMCRYPT_AES_BLOCK_SIZE ) + { + AES_DECRYPT_4( pExpandedKey, c0, c1, c2, c3 ); + c0 = _mm_xor_si128( c0, chain ); + c1 = _mm_xor_si128( c1, d0 ); + c2 = _mm_xor_si128( c2, d1 ); + c3 = _mm_xor_si128( c3, d2 ); + } else + { + AES_DECRYPT_1( pExpandedKey, c0 ); + c0 = _mm_xor_si128( c0, chain ); + } + + chain = _mm_loadu_si128( (__m128i *) (pbSrc + cbData - SYMCRYPT_AES_BLOCK_SIZE ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 0 * SYMCRYPT_AES_BLOCK_SIZE ), c0 ); + if( cbData >= 32 ) + { + _mm_storeu_si128( (__m128i *) (pbDst + 1 * SYMCRYPT_AES_BLOCK_SIZE ), c1 ); + if( cbData >= 48 ) + { + _mm_storeu_si128( (__m128i *) (pbDst + 2 * SYMCRYPT_AES_BLOCK_SIZE ), c2 ); + if( cbData >= 64 ) + { + _mm_storeu_si128( (__m128i *) (pbDst + 3 * SYMCRYPT_AES_BLOCK_SIZE ), c3 ); + if( cbData >= 80 ) + { + _mm_storeu_si128( (__m128i *) (pbDst + 4 * SYMCRYPT_AES_BLOCK_SIZE ), c4 ); + if( cbData >= 96 ) + { + _mm_storeu_si128( (__m128i *) (pbDst + 5 * SYMCRYPT_AES_BLOCK_SIZE ), c5 ); + if( cbData >= 112 ) + { + _mm_storeu_si128( (__m128i *) (pbDst + 6 * SYMCRYPT_AES_BLOCK_SIZE ), c6 ); + } + } + } + } + } + } + } + + _mm_storeu_si128( (__m128i *) pbChainingValue, chain ); + + return; +} +#pragma runtime_checks( "u", restore ) +#pragma warning( pop ) + +VOID +SYMCRYPT_CALL +SymCryptAesCbcMacXmm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + __m128i c = _mm_loadu_si128( (__m128i *) pbChainingValue ); + __m128i rk0 = _mm_loadu_si128( (__m128i *) &pExpandedKey->RoundKey[0] ); + __m128i rkLast = _mm_loadu_si128( (__m128i *) pExpandedKey->lastEncRoundKey ); + __m128i d, rk0AndLast; + + if (cbData < SYMCRYPT_AES_BLOCK_SIZE) + return; + + // This algorithm is dominated by chain of dependent AES rounds, so we want to avoid XOR + // instructions on the critical path where possible + // We can compute (last round key ^ next plaintext block ^ first round key) off the critical + // path and use this with AES_ENCRYPT_1_CHAIN so that only AES instructions write to c in + // the main loop + d = _mm_xor_si128( _mm_loadu_si128( (__m128i *) pbData ), rk0 ); + c = _mm_xor_si128( c, d ); + pbData += SYMCRYPT_AES_BLOCK_SIZE; + cbData -= SYMCRYPT_AES_BLOCK_SIZE; + + // As we don't compute ciphertext here, we only need to XOR rk0 and rkLast once + rk0AndLast = _mm_xor_si128( rk0, rkLast ); + + while( cbData >= SYMCRYPT_AES_BLOCK_SIZE ) + { + d = _mm_xor_si128( _mm_loadu_si128( (__m128i *) pbData ), rk0AndLast ); + AES_ENCRYPT_1_CHAIN( pExpandedKey, c, d ); + + pbData += SYMCRYPT_AES_BLOCK_SIZE; + cbData -= SYMCRYPT_AES_BLOCK_SIZE; + } + AES_ENCRYPT_1_CHAIN( pExpandedKey, c, rkLast ); + _mm_storeu_si128( (__m128i *) pbChainingValue, c ); +} + + +#pragma warning(push) +#pragma warning( disable:4701 ) // "Use of uninitialized variable" +#pragma runtime_checks( "u", off ) + +#define SYMCRYPT_AesCtrMsbXxXmm SymCryptAesCtrMsb64Xmm +#define MM_ADD_EPIXX _mm_add_epi64 +#define MM_SUB_EPIXX _mm_sub_epi64 + +#include "aes-pattern.c" + +#undef MM_SUB_EPIXX +#undef MM_ADD_EPIXX +#undef SYMCRYPT_AesCtrMsbXxXmm + +#define SYMCRYPT_AesCtrMsbXxXmm SymCryptAesCtrMsb32Xmm +#define MM_ADD_EPIXX _mm_add_epi32 +#define MM_SUB_EPIXX _mm_sub_epi32 + +#include "aes-pattern.c" + +#undef MM_SUB_EPIXX +#undef MM_ADD_EPIXX +#undef SYMCRYPT_AesCtrMsbXxXmm + +#pragma runtime_checks( "u", restore ) +#pragma warning(pop) + +/* + if( cbData >= 16 ) + { + if( cbData >= 32 ) + { + if( cbData >= 48 ) + { + if( cbData >= 64 ) + { + if( cbData >= 80 ) + { + if( cbData >= 96 ) + { + if( cbData >= 112 ) + { + } + } + } + } + } + } + } +*/ + +VOID +SYMCRYPT_CALL +SymCryptXtsAesEncryptDataUnitXmm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbTweakBlock, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE*16 ) PBYTE pbScratch, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + __m128i t0; + __m128i c0, c1, c2, c3, c4, c5, c6, c7; + __m128i roundkey, firstRoundKey, lastRoundKey; + __m128i XTS_ALPHA_MASK = _mm_set_epi32( 1, 1, 1, 0x87 ); + SYMCRYPT_GF128_ELEMENT* tweakBuffer = (SYMCRYPT_GF128_ELEMENT*) pbScratch; + + const BYTE (*keyPtr)[4][4]; + const BYTE (*keyLimit)[4][4] = pExpandedKey->lastEncRoundKey; + UINT64 lastTweakLow, lastTweakHigh; + int aesEncryptXtsLoop; + + SIZE_T cbDataMain; // number of bytes to handle in the main loop + SIZE_T cbDataTail; // number of bytes to handle in the tail loop + + SYMCRYPT_ASSERT(cbData >= SYMCRYPT_AES_BLOCK_SIZE); + + // To simplify logic and unusual size processing, we handle all + // data not a multiple of 8 blocks in the tail loop + cbDataTail = cbData & ((8*SYMCRYPT_AES_BLOCK_SIZE)-1); + // Additionally, so that ciphertext stealing logic does not rely on + // reading back from the destination buffer, when we have a non-zero + // tail, we ensure that we handle at least 1 whole block in the tail + // + // Note that our caller has ensured we have at least 1 whole block + // to process, this is checked in debug build + // This means that cbDataTail is in [1,15] at this point iff there are + // at least 8 whole blocks to process; so the below does not cause + // cbDataTail or cbDataMain to exceed cbData + cbDataTail += ((cbDataTail > 0) && (cbDataTail < SYMCRYPT_AES_BLOCK_SIZE)) ? (8*SYMCRYPT_AES_BLOCK_SIZE) : 0; + cbDataMain = cbData - cbDataTail; + + SYMCRYPT_ASSERT(cbDataMain <= cbData); + SYMCRYPT_ASSERT(cbDataTail <= cbData); + SYMCRYPT_ASSERT((cbDataMain & ((8*SYMCRYPT_AES_BLOCK_SIZE)-1)) == 0); + + c0 = _mm_loadu_si128( (__m128i *) pbTweakBlock ); + XTS_MUL_ALPHA( c0, c1 ); + XTS_MUL_ALPHA( c1, c2 ); + XTS_MUL_ALPHA( c2, c3 ); + + XTS_MUL_ALPHA4( c0, c4 ); + XTS_MUL_ALPHA ( c4, c5 ); + XTS_MUL_ALPHA ( c5, c6 ); + XTS_MUL_ALPHA ( c6, c7 ); + + tweakBuffer[0].m128i = c0; + tweakBuffer[1].m128i = c1; + tweakBuffer[2].m128i = c2; + tweakBuffer[3].m128i = c3; + tweakBuffer[4].m128i = c4; + tweakBuffer[5].m128i = c5; + tweakBuffer[6].m128i = c6; + tweakBuffer[7].m128i = c7; + lastTweakLow = tweakBuffer[7].ull[0]; + lastTweakHigh = tweakBuffer[7].ull[1]; + + firstRoundKey = _mm_loadu_si128( (__m128i *) &pExpandedKey->RoundKey[0] ); + lastRoundKey = _mm_loadu_si128( (__m128i *) pExpandedKey->lastEncRoundKey ); + + while( cbDataMain > 0 ) + { + // At loop entry, tweakBuffer[0-7] are tweakValues for the next 8 blocks + c0 = _mm_xor_si128( tweakBuffer[0].m128i, firstRoundKey ); + c1 = _mm_xor_si128( tweakBuffer[1].m128i, firstRoundKey ); + c2 = _mm_xor_si128( tweakBuffer[2].m128i, firstRoundKey ); + c3 = _mm_xor_si128( tweakBuffer[3].m128i, firstRoundKey ); + c4 = _mm_xor_si128( tweakBuffer[4].m128i, firstRoundKey ); + c5 = _mm_xor_si128( tweakBuffer[5].m128i, firstRoundKey ); + c6 = _mm_xor_si128( tweakBuffer[6].m128i, firstRoundKey ); + c7 = _mm_xor_si128( tweakBuffer[7].m128i, firstRoundKey ); + + c0 = _mm_xor_si128( c0, _mm_loadu_si128( ( __m128i * ) (pbSrc + 0) ) ); + c1 = _mm_xor_si128( c1, _mm_loadu_si128( ( __m128i * ) (pbSrc + 16) ) ); + c2 = _mm_xor_si128( c2, _mm_loadu_si128( ( __m128i * ) (pbSrc + 32) ) ); + c3 = _mm_xor_si128( c3, _mm_loadu_si128( ( __m128i * ) (pbSrc + 48) ) ); + c4 = _mm_xor_si128( c4, _mm_loadu_si128( ( __m128i * ) (pbSrc + 64) ) ); + c5 = _mm_xor_si128( c5, _mm_loadu_si128( ( __m128i * ) (pbSrc + 80) ) ); + c6 = _mm_xor_si128( c6, _mm_loadu_si128( ( __m128i * ) (pbSrc + 96) ) ); + c7 = _mm_xor_si128( c7, _mm_loadu_si128( ( __m128i * ) (pbSrc + 112) ) ); + + keyPtr = &pExpandedKey->RoundKey[1]; + + // Do 8 full rounds (AES-128|AES-192|AES-256) with stitched XTS (performed in scalar registers) + for( aesEncryptXtsLoop = 0; aesEncryptXtsLoop < 8; aesEncryptXtsLoop++ ) + { + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); + keyPtr ++; + c0 = _mm_aesenc_si128( c0, roundkey ); + c1 = _mm_aesenc_si128( c1, roundkey ); + c2 = _mm_aesenc_si128( c2, roundkey ); + c3 = _mm_aesenc_si128( c3, roundkey ); + c4 = _mm_aesenc_si128( c4, roundkey ); + c5 = _mm_aesenc_si128( c5, roundkey ); + c6 = _mm_aesenc_si128( c6, roundkey ); + c7 = _mm_aesenc_si128( c7, roundkey ); + + // Prepare tweakBuffer[8-15] with tweak^lastRoundKey + tweakBuffer[ 8+aesEncryptXtsLoop ].m128i = _mm_xor_si128( tweakBuffer[ aesEncryptXtsLoop ].m128i, lastRoundKey ); + // Prepare tweakBuffer[0-7] with tweaks for next 8 blocks + XTS_MUL_ALPHA_Scalar( lastTweakLow, lastTweakHigh ); + tweakBuffer[ aesEncryptXtsLoop ].ull[0] = lastTweakLow; + tweakBuffer[ aesEncryptXtsLoop ].ull[1] = lastTweakHigh; + } + + do + { + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); + keyPtr ++; + c0 = _mm_aesenc_si128( c0, roundkey ); + c1 = _mm_aesenc_si128( c1, roundkey ); + c2 = _mm_aesenc_si128( c2, roundkey ); + c3 = _mm_aesenc_si128( c3, roundkey ); + c4 = _mm_aesenc_si128( c4, roundkey ); + c5 = _mm_aesenc_si128( c5, roundkey ); + c6 = _mm_aesenc_si128( c6, roundkey ); + c7 = _mm_aesenc_si128( c7, roundkey ); + } while( keyPtr < keyLimit ); + + _mm_storeu_si128( (__m128i *) (pbDst + 0), _mm_aesenclast_si128( c0, tweakBuffer[ 8].m128i ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 16), _mm_aesenclast_si128( c1, tweakBuffer[ 9].m128i ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 32), _mm_aesenclast_si128( c2, tweakBuffer[10].m128i ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 48), _mm_aesenclast_si128( c3, tweakBuffer[11].m128i ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 64), _mm_aesenclast_si128( c4, tweakBuffer[12].m128i ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 80), _mm_aesenclast_si128( c5, tweakBuffer[13].m128i ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 96), _mm_aesenclast_si128( c6, tweakBuffer[14].m128i ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 112), _mm_aesenclast_si128( c7, tweakBuffer[15].m128i ) ); + + pbSrc += 8 * SYMCRYPT_AES_BLOCK_SIZE; + pbDst += 8 * SYMCRYPT_AES_BLOCK_SIZE; + cbDataMain -= 8 * SYMCRYPT_AES_BLOCK_SIZE; + } + + if( cbDataTail == 0 ) + { + return; // <-- expected case; early return here + } + + // Rare case, with data unit length not being multiple of 128 bytes, handle the tail one block at a time + t0 = tweakBuffer[0].m128i; + + while( cbDataTail >= 2*SYMCRYPT_AES_BLOCK_SIZE ) + { + c0 = _mm_xor_si128( _mm_loadu_si128( ( __m128i * ) pbSrc ), t0 ); + pbSrc += SYMCRYPT_AES_BLOCK_SIZE; + AES_ENCRYPT_1( pExpandedKey, c0 ); + _mm_storeu_si128( (__m128i *) pbDst, _mm_xor_si128( c0, t0 ) ); + pbDst += SYMCRYPT_AES_BLOCK_SIZE; + XTS_MUL_ALPHA( t0, t0 ); + cbDataTail -= SYMCRYPT_AES_BLOCK_SIZE; + } + + if( cbDataTail > SYMCRYPT_AES_BLOCK_SIZE ) + { + // Ciphertext stealing encryption + // + // +--------------+ + // | | + // | V + // +-----------------+ | +-----+-----------+ + // | P_m-1 | | | P_m |++++CP+++++| + // +-----------------+ | +-----+-----------+ + // | | | + // enc_m-1 | enc_m + // | | | + // V | V + // +-----+-----------+ | +-----------------+ + // | C_m |++++CP+++++|--+ | C_m-1 | + // +-----+-----------+ +-----------------+ + // | / + // +---------------- / --+ + // / | + // | V + // +-----------------+ | +-----+ + // | C_m-1 |<-+ | C_m | + // +-----------------+ +-----+ + + // Encrypt penultimate plaintext block into tweakBuffer[0] + c0 = _mm_xor_si128( _mm_loadu_si128( (__m128i *) pbSrc ), t0 ); + AES_ENCRYPT_1( pExpandedKey, c0 ); + tweakBuffer[0].m128i = _mm_xor_si128( c0, t0 ); + + cbDataTail -= SYMCRYPT_AES_BLOCK_SIZE; + + // Copy tweakBuffer[0] to tweakBuffer[1] + tweakBuffer[1].m128i = tweakBuffer[0].m128i; + // Copy final plaintext bytes to prefix of tweakBuffer[0] - we must read before writing to support in-place encryption + memcpy( &tweakBuffer[0].ul[0], pbSrc + SYMCRYPT_AES_BLOCK_SIZE, cbDataTail ); + // Copy prefix of tweakBuffer[1] to the right place in the destination buffer + memcpy( pbDst + SYMCRYPT_AES_BLOCK_SIZE, &tweakBuffer[1].ul[0], cbDataTail ); + + // Do final tweak update + XTS_MUL_ALPHA( t0, t0 ); + + // Load updated tweakBuffer[0] into c0 + c0 = tweakBuffer[0].m128i; + } else { + // Just load final plaintext block into c0 + c0 = _mm_loadu_si128( (__m128i*) pbSrc ); + } + + // Final full block encryption + c0 = _mm_xor_si128( c0, t0 ); + AES_ENCRYPT_1( pExpandedKey, c0 ); + _mm_storeu_si128( (__m128i *) pbDst, _mm_xor_si128( c0, t0 ) ); +} + +VOID +SYMCRYPT_CALL +SymCryptXtsAesDecryptDataUnitXmm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbTweakBlock, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE*16 ) PBYTE pbScratch, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + __m128i t0; + __m128i c0, c1, c2, c3, c4, c5, c6, c7; + __m128i roundkey, firstRoundKey, lastRoundKey; + __m128i XTS_ALPHA_MASK = _mm_set_epi32( 1, 1, 1, 0x87 ); + SYMCRYPT_GF128_ELEMENT* tweakBuffer = (SYMCRYPT_GF128_ELEMENT*) pbScratch; + + const BYTE (*keyPtr)[4][4]; + const BYTE (*keyLimit)[4][4] = pExpandedKey->lastDecRoundKey; + UINT64 lastTweakLow, lastTweakHigh; + int aesDecryptXtsLoop; + + SIZE_T cbDataMain; // number of bytes to handle in the main loop + SIZE_T cbDataTail; // number of bytes to handle in the tail loop + + SYMCRYPT_ASSERT(cbData >= SYMCRYPT_AES_BLOCK_SIZE); + + // To simplify logic and unusual size processing, we handle all + // data not a multiple of 8 blocks in the tail loop + cbDataTail = cbData & ((8*SYMCRYPT_AES_BLOCK_SIZE)-1); + // Additionally, so that ciphertext stealing logic does not rely on + // reading back from the destination buffer, when we have a non-zero + // tail, we ensure that we handle at least 1 whole block in the tail + // + // Note that our caller has ensured we have at least 1 whole block + // to process, this is checked in debug build + // This means that cbDataTail is in [1,15] at this point iff there are + // at least 8 whole blocks to process; so the below does not cause + // cbDataTail or cbDataMain to exceed cbData + cbDataTail += ((cbDataTail > 0) && (cbDataTail < SYMCRYPT_AES_BLOCK_SIZE)) ? (8*SYMCRYPT_AES_BLOCK_SIZE) : 0; + cbDataMain = cbData - cbDataTail; + + SYMCRYPT_ASSERT(cbDataMain <= cbData); + SYMCRYPT_ASSERT(cbDataTail <= cbData); + SYMCRYPT_ASSERT((cbDataMain & ((8*SYMCRYPT_AES_BLOCK_SIZE)-1)) == 0); + + c0 = _mm_loadu_si128( (__m128i *) pbTweakBlock ); + XTS_MUL_ALPHA( c0, c1 ); + XTS_MUL_ALPHA( c1, c2 ); + XTS_MUL_ALPHA( c2, c3 ); + + XTS_MUL_ALPHA4( c0, c4 ); + XTS_MUL_ALPHA ( c4, c5 ); + XTS_MUL_ALPHA ( c5, c6 ); + XTS_MUL_ALPHA ( c6, c7 ); + + tweakBuffer[0].m128i = c0; + tweakBuffer[1].m128i = c1; + tweakBuffer[2].m128i = c2; + tweakBuffer[3].m128i = c3; + tweakBuffer[4].m128i = c4; + tweakBuffer[5].m128i = c5; + tweakBuffer[6].m128i = c6; + tweakBuffer[7].m128i = c7; + lastTweakLow = tweakBuffer[7].ull[0]; + lastTweakHigh = tweakBuffer[7].ull[1]; + + firstRoundKey = _mm_loadu_si128( (__m128i *) pExpandedKey->lastEncRoundKey ); + lastRoundKey = _mm_loadu_si128( (__m128i *) pExpandedKey->lastDecRoundKey ); + + while( cbDataMain > 0 ) + { + // At loop entry, tweakBuffer[0-7] are tweakValues for the next 8 blocks + c0 = _mm_xor_si128( tweakBuffer[0].m128i, firstRoundKey ); + c1 = _mm_xor_si128( tweakBuffer[1].m128i, firstRoundKey ); + c2 = _mm_xor_si128( tweakBuffer[2].m128i, firstRoundKey ); + c3 = _mm_xor_si128( tweakBuffer[3].m128i, firstRoundKey ); + c4 = _mm_xor_si128( tweakBuffer[4].m128i, firstRoundKey ); + c5 = _mm_xor_si128( tweakBuffer[5].m128i, firstRoundKey ); + c6 = _mm_xor_si128( tweakBuffer[6].m128i, firstRoundKey ); + c7 = _mm_xor_si128( tweakBuffer[7].m128i, firstRoundKey ); + + c0 = _mm_xor_si128( c0, _mm_loadu_si128( ( __m128i * ) (pbSrc + 0) ) ); + c1 = _mm_xor_si128( c1, _mm_loadu_si128( ( __m128i * ) (pbSrc + 16) ) ); + c2 = _mm_xor_si128( c2, _mm_loadu_si128( ( __m128i * ) (pbSrc + 32) ) ); + c3 = _mm_xor_si128( c3, _mm_loadu_si128( ( __m128i * ) (pbSrc + 48) ) ); + c4 = _mm_xor_si128( c4, _mm_loadu_si128( ( __m128i * ) (pbSrc + 64) ) ); + c5 = _mm_xor_si128( c5, _mm_loadu_si128( ( __m128i * ) (pbSrc + 80) ) ); + c6 = _mm_xor_si128( c6, _mm_loadu_si128( ( __m128i * ) (pbSrc + 96) ) ); + c7 = _mm_xor_si128( c7, _mm_loadu_si128( ( __m128i * ) (pbSrc + 112) ) ); + + keyPtr = pExpandedKey->lastEncRoundKey + 1; + + // Do 8 full rounds (AES-128|AES-192|AES-256) with stitched XTS (performed in scalar registers) + for( aesDecryptXtsLoop = 0; aesDecryptXtsLoop < 8; aesDecryptXtsLoop++ ) + { + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); + keyPtr ++; + c0 = _mm_aesdec_si128( c0, roundkey ); + c1 = _mm_aesdec_si128( c1, roundkey ); + c2 = _mm_aesdec_si128( c2, roundkey ); + c3 = _mm_aesdec_si128( c3, roundkey ); + c4 = _mm_aesdec_si128( c4, roundkey ); + c5 = _mm_aesdec_si128( c5, roundkey ); + c6 = _mm_aesdec_si128( c6, roundkey ); + c7 = _mm_aesdec_si128( c7, roundkey ); + + // Prepare tweakBuffer[8-15] with tweak^lastRoundKey + tweakBuffer[ 8+aesDecryptXtsLoop ].m128i = _mm_xor_si128( tweakBuffer[ aesDecryptXtsLoop ].m128i, lastRoundKey ); + // Prepare tweakBuffer[0-7] with tweaks for next 8 blocks + XTS_MUL_ALPHA_Scalar( lastTweakLow, lastTweakHigh ); + tweakBuffer[ aesDecryptXtsLoop ].ull[0] = lastTweakLow; + tweakBuffer[ aesDecryptXtsLoop ].ull[1] = lastTweakHigh; + } + + do + { + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); + keyPtr ++; + c0 = _mm_aesdec_si128( c0, roundkey ); + c1 = _mm_aesdec_si128( c1, roundkey ); + c2 = _mm_aesdec_si128( c2, roundkey ); + c3 = _mm_aesdec_si128( c3, roundkey ); + c4 = _mm_aesdec_si128( c4, roundkey ); + c5 = _mm_aesdec_si128( c5, roundkey ); + c6 = _mm_aesdec_si128( c6, roundkey ); + c7 = _mm_aesdec_si128( c7, roundkey ); + } while( keyPtr < keyLimit ); + + _mm_storeu_si128( (__m128i *) (pbDst + 0), _mm_aesdeclast_si128( c0, tweakBuffer[ 8].m128i ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 16), _mm_aesdeclast_si128( c1, tweakBuffer[ 9].m128i ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 32), _mm_aesdeclast_si128( c2, tweakBuffer[10].m128i ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 48), _mm_aesdeclast_si128( c3, tweakBuffer[11].m128i ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 64), _mm_aesdeclast_si128( c4, tweakBuffer[12].m128i ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 80), _mm_aesdeclast_si128( c5, tweakBuffer[13].m128i ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 96), _mm_aesdeclast_si128( c6, tweakBuffer[14].m128i ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 112), _mm_aesdeclast_si128( c7, tweakBuffer[15].m128i ) ); + + pbSrc += 8 * SYMCRYPT_AES_BLOCK_SIZE; + pbDst += 8 * SYMCRYPT_AES_BLOCK_SIZE; + cbDataMain -= 8 * SYMCRYPT_AES_BLOCK_SIZE; + } + + if( cbDataTail == 0 ) + { + return; // <-- expected case; early return here + } + + // Rare case, with data unit length not being multiple of 128 bytes, handle the tail one block at a time + t0 = tweakBuffer[0].m128i; + + while( cbDataTail >= 2*SYMCRYPT_AES_BLOCK_SIZE ) + { + c0 = _mm_xor_si128( _mm_loadu_si128( ( __m128i * ) pbSrc ), t0 ); + pbSrc += SYMCRYPT_AES_BLOCK_SIZE; + AES_DECRYPT_1( pExpandedKey, c0 ); + _mm_storeu_si128( (__m128i *) pbDst, _mm_xor_si128( c0, t0 ) ); + pbDst += SYMCRYPT_AES_BLOCK_SIZE; + c7 = t0; + XTS_MUL_ALPHA( t0, t0 ); + cbDataTail -= SYMCRYPT_AES_BLOCK_SIZE; + } + + if( cbDataTail > SYMCRYPT_AES_BLOCK_SIZE ) + { + // Ciphertext stealing decryption + // + // +--------------+ + // | | + // | V + // +-----------------+ | +-----+-----------+ + // | C_m-1 | | | C_m |++++CP+++++| + // +-----------------+ | +-----+-----------+ + // | | | + // dec_m | dec_m-1 + // | | | + // V | V + // +-----+-----------+ | +-----------------+ + // | P_m |++++CP+++++|--+ | P_m-1 | + // +-----+-----------+ +-----------------+ + // | / + // +---------------- / --+ + // / | + // | V + // +-----------------+ | +-----+ + // | P_m-1 |<-+ | P_m | + // +-----------------+ +-----+ + + // Do final tweak update into c1 + // Penultimate tweak is in t0, ready for final decryption + XTS_MUL_ALPHA( t0, c1 ); + + // Decrypt penultimate ciphertext block into tweakBuffer[0] + c0 = _mm_xor_si128( _mm_loadu_si128( (__m128i *) pbSrc ), c1 ); + AES_DECRYPT_1( pExpandedKey, c0 ); + tweakBuffer[0].m128i = _mm_xor_si128( c0, c1 ); + + cbDataTail -= SYMCRYPT_AES_BLOCK_SIZE; + + // Copy tweakBuffer[0] to tweakBuffer[1] + tweakBuffer[1].m128i = tweakBuffer[0].m128i; + // Copy final ciphertext bytes to prefix of tweakBuffer[0] - we must read before writing to support in-place decryption + memcpy( &tweakBuffer[0].ul[0], pbSrc + SYMCRYPT_AES_BLOCK_SIZE, cbDataTail ); + // Copy prefix of tweakBuffer[1] to the right place in the destination buffer + memcpy( pbDst + SYMCRYPT_AES_BLOCK_SIZE, &tweakBuffer[1].ul[0], cbDataTail ); + + // Load updated tweakBuffer[0] into c0 + c0 = tweakBuffer[0].m128i; + } else { + // Just load final ciphertext block into c0 + c0 = _mm_loadu_si128( (__m128i*) pbSrc ); + } + + // Final full block decryption + c0 = _mm_xor_si128( c0, t0 ); + AES_DECRYPT_1( pExpandedKey, c0 ); + _mm_storeu_si128( (__m128i *) pbDst, _mm_xor_si128( c0, t0 ) ); +} + +#define AES_FULLROUND_4_GHASH_1( roundkey, keyPtr, c0, c1, c2, c3, r0, t0, t1, gHashPointer, byteReverseOrder, gHashExpandedKeyTable, todo, resl, resm, resh ) \ +{ \ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ + c0 = _mm_aesenc_si128( c0, roundkey ); \ + c1 = _mm_aesenc_si128( c1, roundkey ); \ + c2 = _mm_aesenc_si128( c2, roundkey ); \ + c3 = _mm_aesenc_si128( c3, roundkey ); \ +\ + r0 = _mm_loadu_si128( (__m128i *) gHashPointer ); \ + r0 = _mm_shuffle_epi8( r0, byteReverseOrder ); \ + gHashPointer += 16; \ +\ + t1 = _mm_loadu_si128( (__m128i *) &GHASH_H_POWER(gHashExpandedKeyTable, todo) ); \ + t0 = _mm_clmulepi64_si128( r0, t1, 0x00 ); \ + t1 = _mm_clmulepi64_si128( r0, t1, 0x11 ); \ +\ + resl = _mm_xor_si128( resl, t0 ); \ + resh = _mm_xor_si128( resh, t1 ); \ +\ + t0 = _mm_srli_si128( r0, 8 ); \ + r0 = _mm_xor_si128( r0, t0 ); \ + t1 = _mm_loadu_si128( (__m128i *) &GHASH_Hx_POWER(gHashExpandedKeyTable, todo) ); \ + t1 = _mm_clmulepi64_si128( r0, t1, 0x00 ); \ +\ + resm = _mm_xor_si128( resm, t1 ); \ + todo --; \ +}; + +#define AES_GCM_ENCRYPT_4( pExpandedKey, c0, c1, c2, c3, gHashPointer, ghashRounds, byteReverseOrder, gHashExpandedKeyTable, todo, resl, resm, resh ) \ +{ \ + const BYTE (*keyPtr)[4][4]; \ + const BYTE (*keyLimit)[4][4]; \ + __m128i roundkey; \ + __m128i t0, t1; \ + __m128i r0; \ + SIZE_T aesEncryptGhashLoop; \ +\ + keyPtr = &pExpandedKey->RoundKey[0]; \ + keyLimit = pExpandedKey->lastEncRoundKey; \ +\ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ + c0 = _mm_xor_si128( c0, roundkey ); \ + c1 = _mm_xor_si128( c1, roundkey ); \ + c2 = _mm_xor_si128( c2, roundkey ); \ + c3 = _mm_xor_si128( c3, roundkey ); \ +\ + /* Do ghashRounds full rounds (AES-128|AES-192|AES-256) with stitched GHASH */ \ + for( aesEncryptGhashLoop = 0; aesEncryptGhashLoop < ghashRounds; aesEncryptGhashLoop++ ) \ + { \ + AES_FULLROUND_4_GHASH_1( roundkey, keyPtr, c0, c1, c2, c3, r0, t0, t1, gHashPointer, byteReverseOrder, gHashExpandedKeyTable, todo, resl, resm, resh ); \ + } \ +\ + do \ + { \ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ + c0 = _mm_aesenc_si128( c0, roundkey ); \ + c1 = _mm_aesenc_si128( c1, roundkey ); \ + c2 = _mm_aesenc_si128( c2, roundkey ); \ + c3 = _mm_aesenc_si128( c3, roundkey ); \ + } while( keyPtr < keyLimit ); \ +\ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ +\ + c0 = _mm_aesenclast_si128( c0, roundkey ); \ + c1 = _mm_aesenclast_si128( c1, roundkey ); \ + c2 = _mm_aesenclast_si128( c2, roundkey ); \ + c3 = _mm_aesenclast_si128( c3, roundkey ); \ +}; + +#define AES_FULLROUND_8_GHASH_1( roundkey, keyPtr, c0, c1, c2, c3, c4, c5, c6, c7, r0, t0, t1, gHashPointer, byteReverseOrder, gHashExpandedKeyTable, todo, resl, resm, resh ) \ +{ \ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ + c0 = _mm_aesenc_si128( c0, roundkey ); \ + c1 = _mm_aesenc_si128( c1, roundkey ); \ + c2 = _mm_aesenc_si128( c2, roundkey ); \ + c3 = _mm_aesenc_si128( c3, roundkey ); \ + c4 = _mm_aesenc_si128( c4, roundkey ); \ + c5 = _mm_aesenc_si128( c5, roundkey ); \ + c6 = _mm_aesenc_si128( c6, roundkey ); \ + c7 = _mm_aesenc_si128( c7, roundkey ); \ +\ + r0 = _mm_loadu_si128( (__m128i *) gHashPointer ); \ + r0 = _mm_shuffle_epi8( r0, byteReverseOrder ); \ + gHashPointer += 16; \ +\ + t1 = _mm_loadu_si128( (__m128i *) &GHASH_H_POWER(gHashExpandedKeyTable, todo) ); \ + t0 = _mm_clmulepi64_si128( r0, t1, 0x00 ); \ + t1 = _mm_clmulepi64_si128( r0, t1, 0x11 ); \ +\ + resl = _mm_xor_si128( resl, t0 ); \ + resh = _mm_xor_si128( resh, t1 ); \ +\ + t0 = _mm_srli_si128( r0, 8 ); \ + r0 = _mm_xor_si128( r0, t0 ); \ + t1 = _mm_loadu_si128( (__m128i *) &GHASH_Hx_POWER(gHashExpandedKeyTable, todo) ); \ + t1 = _mm_clmulepi64_si128( r0, t1, 0x00 ); \ +\ + resm = _mm_xor_si128( resm, t1 ); \ + todo --; \ +}; + +#define AES_GCM_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7, gHashPointer, ghashRounds, byteReverseOrder, gHashExpandedKeyTable, todo, resl, resm, resh ) \ +{ \ + const BYTE (*keyPtr)[4][4]; \ + const BYTE (*keyLimit)[4][4]; \ + __m128i roundkey; \ + __m128i t0, t1; \ + __m128i r0; \ + SIZE_T aesEncryptGhashLoop; \ +\ + keyPtr = &pExpandedKey->RoundKey[0]; \ + keyLimit = pExpandedKey->lastEncRoundKey; \ +\ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ + c0 = _mm_xor_si128( c0, roundkey ); \ + c1 = _mm_xor_si128( c1, roundkey ); \ + c2 = _mm_xor_si128( c2, roundkey ); \ + c3 = _mm_xor_si128( c3, roundkey ); \ + c4 = _mm_xor_si128( c4, roundkey ); \ + c5 = _mm_xor_si128( c5, roundkey ); \ + c6 = _mm_xor_si128( c6, roundkey ); \ + c7 = _mm_xor_si128( c7, roundkey ); \ +\ + /* Do ghashRounds full rounds (AES-128|AES-192|AES-256) with stitched GHASH */ \ + for( aesEncryptGhashLoop = 0; aesEncryptGhashLoop < ghashRounds; aesEncryptGhashLoop++ ) \ + { \ + AES_FULLROUND_8_GHASH_1( roundkey, keyPtr, c0, c1, c2, c3, c4, c5, c6, c7, r0, t0, t1, gHashPointer, byteReverseOrder, gHashExpandedKeyTable, todo, resl, resm, resh ); \ + } \ +\ + do \ + { \ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ + c0 = _mm_aesenc_si128( c0, roundkey ); \ + c1 = _mm_aesenc_si128( c1, roundkey ); \ + c2 = _mm_aesenc_si128( c2, roundkey ); \ + c3 = _mm_aesenc_si128( c3, roundkey ); \ + c4 = _mm_aesenc_si128( c4, roundkey ); \ + c5 = _mm_aesenc_si128( c5, roundkey ); \ + c6 = _mm_aesenc_si128( c6, roundkey ); \ + c7 = _mm_aesenc_si128( c7, roundkey ); \ + } while( keyPtr < keyLimit ); \ +\ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ +\ + c0 = _mm_aesenclast_si128( c0, roundkey ); \ + c1 = _mm_aesenclast_si128( c1, roundkey ); \ + c2 = _mm_aesenclast_si128( c2, roundkey ); \ + c3 = _mm_aesenclast_si128( c3, roundkey ); \ + c4 = _mm_aesenclast_si128( c4, roundkey ); \ + c5 = _mm_aesenclast_si128( c5, roundkey ); \ + c6 = _mm_aesenclast_si128( c6, roundkey ); \ + c7 = _mm_aesenclast_si128( c7, roundkey ); \ +}; + +// This call is functionally identical to: +// SymCryptAesCtrMsb64Xmm( pExpandedKey, +// pbChainingValue, +// pbSrc, +// pbDst, +// cbData ); +// SymCryptGHashAppendDataPclmulqdq( expandedKeyTable, +// pState, +// pbDst, +// cbData ); +VOID +SYMCRYPT_CALL +SymCryptAesGcmEncryptStitchedXmm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( SYMCRYPT_GF128_FIELD_SIZE ) PCSYMCRYPT_GF128_ELEMENT expandedKeyTable, + _Inout_ PSYMCRYPT_GF128_ELEMENT pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + __m128i chain = _mm_loadu_si128( (__m128i *) pbChainingValue ); + + __m128i BYTE_REVERSE_ORDER = _mm_set_epi8( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ); + __m128i vMultiplicationConstant = _mm_set_epi32( 0, 0, 0xc2000000, 0 ); + + __m128i chainIncrement1 = _mm_set_epi32( 0, 0, 0, 1 ); + __m128i chainIncrement2 = _mm_set_epi32( 0, 0, 0, 2 ); + __m128i chainIncrement8 = _mm_set_epi32( 0, 0, 0, 8 ); + + __m128i c0, c1, c2, c3, c4, c5, c6, c7; + __m128i r0, r1; + + __m128i state; + __m128i a0, a1, a2; + SIZE_T nBlocks = cbData / SYMCRYPT_GF128_BLOCK_SIZE; + SIZE_T todo; + PCBYTE pbGhashSrc = pbDst; + + SYMCRYPT_ASSERT( (cbData & SYMCRYPT_GCM_BLOCK_MOD_MASK) == 0 ); // cbData is multiple of block size + + chain = _mm_shuffle_epi8( chain, BYTE_REVERSE_ORDER ); + state = _mm_loadu_si128( (__m128i *) pState ); + + todo = SYMCRYPT_MIN( nBlocks, SYMCRYPT_GHASH_PCLMULQDQ_HPOWERS ); + CLMUL_3( state, GHASH_H_POWER(expandedKeyTable, todo), GHASH_Hx_POWER(expandedKeyTable, todo), a0, a1, a2 ); + + // Do 8 blocks of CTR either for tail (if total blocks <8) or for encryption of first 8 blocks + c0 = chain; + c1 = _mm_add_epi32( chain, chainIncrement1 ); + c2 = _mm_add_epi32( chain, chainIncrement2 ); + c3 = _mm_add_epi32( c1, chainIncrement2 ); + c4 = _mm_add_epi32( c2, chainIncrement2 ); + c5 = _mm_add_epi32( c3, chainIncrement2 ); + c6 = _mm_add_epi32( c4, chainIncrement2 ); + c7 = _mm_add_epi32( c5, chainIncrement2 ); + + c0 = _mm_shuffle_epi8( c0, BYTE_REVERSE_ORDER ); + c1 = _mm_shuffle_epi8( c1, BYTE_REVERSE_ORDER ); + c2 = _mm_shuffle_epi8( c2, BYTE_REVERSE_ORDER ); + c3 = _mm_shuffle_epi8( c3, BYTE_REVERSE_ORDER ); + c4 = _mm_shuffle_epi8( c4, BYTE_REVERSE_ORDER ); + c5 = _mm_shuffle_epi8( c5, BYTE_REVERSE_ORDER ); + c6 = _mm_shuffle_epi8( c6, BYTE_REVERSE_ORDER ); + c7 = _mm_shuffle_epi8( c7, BYTE_REVERSE_ORDER ); + + AES_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 ); + + if( nBlocks >= 8 ) + { + // Encrypt first 8 blocks - update chain + chain = _mm_add_epi32( chain, chainIncrement8 ); + + _mm_storeu_si128( (__m128i *) (pbDst + 0), _mm_xor_si128( c0, _mm_loadu_si128( ( __m128i * ) (pbSrc + 0) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 16), _mm_xor_si128( c1, _mm_loadu_si128( ( __m128i * ) (pbSrc + 16) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 32), _mm_xor_si128( c2, _mm_loadu_si128( ( __m128i * ) (pbSrc + 32) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 48), _mm_xor_si128( c3, _mm_loadu_si128( ( __m128i * ) (pbSrc + 48) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 64), _mm_xor_si128( c4, _mm_loadu_si128( ( __m128i * ) (pbSrc + 64) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 80), _mm_xor_si128( c5, _mm_loadu_si128( ( __m128i * ) (pbSrc + 80) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 96), _mm_xor_si128( c6, _mm_loadu_si128( ( __m128i * ) (pbSrc + 96) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst +112), _mm_xor_si128( c7, _mm_loadu_si128( ( __m128i * ) (pbSrc +112) ) ) ); + + pbDst += 8 * SYMCRYPT_AES_BLOCK_SIZE; + pbSrc += 8 * SYMCRYPT_AES_BLOCK_SIZE; + + while( nBlocks >= 16 ) + { + // In this loop we always have 8 blocks to encrypt and we have already encrypted the previous 8 blocks ready for GHASH + c0 = chain; + c1 = _mm_add_epi32( chain, chainIncrement1 ); + c2 = _mm_add_epi32( chain, chainIncrement2 ); + c3 = _mm_add_epi32( c1, chainIncrement2 ); + c4 = _mm_add_epi32( c2, chainIncrement2 ); + c5 = _mm_add_epi32( c3, chainIncrement2 ); + c6 = _mm_add_epi32( c4, chainIncrement2 ); + c7 = _mm_add_epi32( c5, chainIncrement2 ); + chain = _mm_add_epi32( c6, chainIncrement2 ); + + c0 = _mm_shuffle_epi8( c0, BYTE_REVERSE_ORDER ); + c1 = _mm_shuffle_epi8( c1, BYTE_REVERSE_ORDER ); + c2 = _mm_shuffle_epi8( c2, BYTE_REVERSE_ORDER ); + c3 = _mm_shuffle_epi8( c3, BYTE_REVERSE_ORDER ); + c4 = _mm_shuffle_epi8( c4, BYTE_REVERSE_ORDER ); + c5 = _mm_shuffle_epi8( c5, BYTE_REVERSE_ORDER ); + c6 = _mm_shuffle_epi8( c6, BYTE_REVERSE_ORDER ); + c7 = _mm_shuffle_epi8( c7, BYTE_REVERSE_ORDER ); + + AES_GCM_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7, pbGhashSrc, 8, BYTE_REVERSE_ORDER, expandedKeyTable, todo, a0, a1, a2 ); + + _mm_storeu_si128( (__m128i *) (pbDst + 0), _mm_xor_si128( c0, _mm_loadu_si128( ( __m128i * ) (pbSrc + 0) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 16), _mm_xor_si128( c1, _mm_loadu_si128( ( __m128i * ) (pbSrc + 16) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 32), _mm_xor_si128( c2, _mm_loadu_si128( ( __m128i * ) (pbSrc + 32) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 48), _mm_xor_si128( c3, _mm_loadu_si128( ( __m128i * ) (pbSrc + 48) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 64), _mm_xor_si128( c4, _mm_loadu_si128( ( __m128i * ) (pbSrc + 64) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 80), _mm_xor_si128( c5, _mm_loadu_si128( ( __m128i * ) (pbSrc + 80) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 96), _mm_xor_si128( c6, _mm_loadu_si128( ( __m128i * ) (pbSrc + 96) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst +112), _mm_xor_si128( c7, _mm_loadu_si128( ( __m128i * ) (pbSrc +112) ) ) ); + + pbDst += 8 * SYMCRYPT_AES_BLOCK_SIZE; + pbSrc += 8 * SYMCRYPT_AES_BLOCK_SIZE; + nBlocks -= 8; + + if( todo == 0 ) + { + CLMUL_3_POST( a0, a1, a2 ); + MODREDUCE( vMultiplicationConstant, a0, a1, a2, state ); + + todo = SYMCRYPT_MIN( nBlocks, SYMCRYPT_GHASH_PCLMULQDQ_HPOWERS ); + CLMUL_3( state, GHASH_H_POWER(expandedKeyTable, todo), GHASH_Hx_POWER(expandedKeyTable, todo), a0, a1, a2 ); + } + } + + // We now have at least 8 blocks of encrypted data to GHASH and at most 7 blocks left to encrypt + // Do 8 blocks of GHASH in parallel with generating 0, 4, or 8 AES-CTR blocks for tail encryption + nBlocks -= 8; + if (nBlocks > 0) + { + c0 = chain; + c1 = _mm_add_epi32( chain, chainIncrement1 ); + c2 = _mm_add_epi32( chain, chainIncrement2 ); + c3 = _mm_add_epi32( c1, chainIncrement2 ); + c4 = _mm_add_epi32( c2, chainIncrement2 ); + + c0 = _mm_shuffle_epi8( c0, BYTE_REVERSE_ORDER ); + c1 = _mm_shuffle_epi8( c1, BYTE_REVERSE_ORDER ); + c2 = _mm_shuffle_epi8( c2, BYTE_REVERSE_ORDER ); + c3 = _mm_shuffle_epi8( c3, BYTE_REVERSE_ORDER ); + + if (nBlocks > 4) + { + // Do 8 rounds of AES-CTR for tail in parallel with 8 rounds of GHASH + c5 = _mm_add_epi32( c4, chainIncrement1 ); + c6 = _mm_add_epi32( c4, chainIncrement2 ); + + c4 = _mm_shuffle_epi8( c4, BYTE_REVERSE_ORDER ); + c5 = _mm_shuffle_epi8( c5, BYTE_REVERSE_ORDER ); + c6 = _mm_shuffle_epi8( c6, BYTE_REVERSE_ORDER ); + + AES_GCM_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7, pbGhashSrc, 8, BYTE_REVERSE_ORDER, expandedKeyTable, todo, a0, a1, a2 ); + } + else + { + // Do 4 rounds of AES-CTR for tail in parallel with 8 rounds of GHASH + AES_GCM_ENCRYPT_4( pExpandedKey, c0, c1, c2, c3, pbGhashSrc, 8, BYTE_REVERSE_ORDER, expandedKeyTable, todo, a0, a1, a2 ); + } + + if( todo == 0) + { + CLMUL_3_POST( a0, a1, a2 ); + MODREDUCE( vMultiplicationConstant, a0, a1, a2, state ); + + todo = SYMCRYPT_MIN( nBlocks, SYMCRYPT_GHASH_PCLMULQDQ_HPOWERS ); + CLMUL_3( state, GHASH_H_POWER(expandedKeyTable, todo), GHASH_Hx_POWER(expandedKeyTable, todo), a0, a1, a2 ); + } + } + else + { + // Just do the final 8 rounds of GHASH + for( todo=8; todo>0; todo-- ) + { + r0 = _mm_shuffle_epi8( _mm_loadu_si128( (__m128i *) (pbGhashSrc + 0) ), BYTE_REVERSE_ORDER ); + pbGhashSrc += SYMCRYPT_AES_BLOCK_SIZE; + + CLMUL_ACC_3( r0, GHASH_H_POWER(expandedKeyTable, todo), GHASH_Hx_POWER(expandedKeyTable, todo), a0, a1, a2 ); + } + + CLMUL_3_POST( a0, a1, a2 ); + MODREDUCE( vMultiplicationConstant, a0, a1, a2, state ); + } + } + + if( nBlocks > 0 ) + { + // Encrypt 1-7 blocks with pre-generated AES-CTR blocks and GHASH the results + while( nBlocks >= 2 ) + { + chain = _mm_add_epi32( chain, chainIncrement2 ); + + r0 = _mm_xor_si128( c0, _mm_loadu_si128( ( __m128i * ) (pbSrc + 0) ) ); + r1 = _mm_xor_si128( c1, _mm_loadu_si128( ( __m128i * ) (pbSrc + 16) ) ); + + _mm_storeu_si128( (__m128i *) (pbDst + 0), r0 ); + _mm_storeu_si128( (__m128i *) (pbDst + 16), r1 ); + + r0 = _mm_shuffle_epi8( r0, BYTE_REVERSE_ORDER ); + r1 = _mm_shuffle_epi8( r1, BYTE_REVERSE_ORDER ); + + CLMUL_ACC_3( r0, GHASH_H_POWER(expandedKeyTable, todo - 0), GHASH_Hx_POWER(expandedKeyTable, todo - 0), a0, a1, a2 ); + CLMUL_ACC_3( r1, GHASH_H_POWER(expandedKeyTable, todo - 1), GHASH_Hx_POWER(expandedKeyTable, todo - 1), a0, a1, a2 ); + + pbDst += 2*SYMCRYPT_AES_BLOCK_SIZE; + pbSrc += 2*SYMCRYPT_AES_BLOCK_SIZE; + todo -= 2; + nBlocks -= 2; + c0 = c2; + c1 = c3; + c2 = c4; + c3 = c5; + c4 = c6; + } + + if( nBlocks > 0 ) + { + chain = _mm_add_epi32( chain, chainIncrement1 ); + + r0 = _mm_xor_si128( c0, _mm_loadu_si128( ( __m128i * ) (pbSrc + 0) ) ); + + _mm_storeu_si128( (__m128i *) (pbDst + 0), r0 ); + + r0 = _mm_shuffle_epi8( r0, BYTE_REVERSE_ORDER ); + + CLMUL_ACC_3( r0, GHASH_H_POWER(expandedKeyTable, 1), GHASH_Hx_POWER(expandedKeyTable, 1), a0, a1, a2 ); + } + + CLMUL_3_POST( a0, a1, a2 ); + MODREDUCE( vMultiplicationConstant, a0, a1, a2, state ); + } + + chain = _mm_shuffle_epi8( chain, BYTE_REVERSE_ORDER ); + _mm_storeu_si128( (__m128i *) pbChainingValue, chain ); + _mm_storeu_si128( (__m128i *) pState, state ); +} + +#pragma warning(push) +#pragma warning( disable:4701 ) +#pragma runtime_checks( "u", off ) +// This call is functionally identical to: +// SymCryptGHashAppendDataPclmulqdq( expandedKeyTable, +// pState, +// pbSrc, +// cbData ); +// SymCryptAesCtrMsb64Xmm( pExpandedKey, +// pbChainingValue, +// pbSrc, +// pbDst, +// cbData ); +VOID +SYMCRYPT_CALL +SymCryptAesGcmDecryptStitchedXmm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( SYMCRYPT_GF128_FIELD_SIZE ) PCSYMCRYPT_GF128_ELEMENT expandedKeyTable, + _Inout_ PSYMCRYPT_GF128_ELEMENT pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + __m128i chain = _mm_loadu_si128( (__m128i *) pbChainingValue ); + + __m128i BYTE_REVERSE_ORDER = _mm_set_epi8( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ); + __m128i vMultiplicationConstant = _mm_set_epi32( 0, 0, 0xc2000000, 0 ); + + __m128i chainIncrement1 = _mm_set_epi32( 0, 0, 0, 1 ); + __m128i chainIncrement2 = _mm_set_epi32( 0, 0, 0, 2 ); + + __m128i c0, c1, c2, c3, c4, c5, c6, c7; + + __m128i state; + __m128i a0, a1, a2; + SIZE_T nBlocks = cbData / SYMCRYPT_GF128_BLOCK_SIZE; + SIZE_T todo = 0; + PCBYTE pbGhashSrc = pbSrc; + + SYMCRYPT_ASSERT( (cbData & SYMCRYPT_GCM_BLOCK_MOD_MASK) == 0 ); // cbData is multiple of block size + + chain = _mm_shuffle_epi8( chain, BYTE_REVERSE_ORDER ); + state = _mm_loadu_si128( (__m128i *) pState ); + + todo = SYMCRYPT_MIN( nBlocks, SYMCRYPT_GHASH_PCLMULQDQ_HPOWERS ); + CLMUL_3( state, GHASH_H_POWER(expandedKeyTable, todo), GHASH_Hx_POWER(expandedKeyTable, todo), a0, a1, a2 ); + + while( nBlocks >= 8 ) + { + // In this loop we always have 8 blocks to decrypt and GHASH + c0 = chain; + c1 = _mm_add_epi32( chain, chainIncrement1 ); + c2 = _mm_add_epi32( chain, chainIncrement2 ); + c3 = _mm_add_epi32( c1, chainIncrement2 ); + c4 = _mm_add_epi32( c2, chainIncrement2 ); + c5 = _mm_add_epi32( c3, chainIncrement2 ); + c6 = _mm_add_epi32( c4, chainIncrement2 ); + c7 = _mm_add_epi32( c5, chainIncrement2 ); + chain = _mm_add_epi32( c6, chainIncrement2 ); + + c0 = _mm_shuffle_epi8( c0, BYTE_REVERSE_ORDER ); + c1 = _mm_shuffle_epi8( c1, BYTE_REVERSE_ORDER ); + c2 = _mm_shuffle_epi8( c2, BYTE_REVERSE_ORDER ); + c3 = _mm_shuffle_epi8( c3, BYTE_REVERSE_ORDER ); + c4 = _mm_shuffle_epi8( c4, BYTE_REVERSE_ORDER ); + c5 = _mm_shuffle_epi8( c5, BYTE_REVERSE_ORDER ); + c6 = _mm_shuffle_epi8( c6, BYTE_REVERSE_ORDER ); + c7 = _mm_shuffle_epi8( c7, BYTE_REVERSE_ORDER ); + + AES_GCM_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7, pbGhashSrc, 8, BYTE_REVERSE_ORDER, expandedKeyTable, todo, a0, a1, a2 ); + + _mm_storeu_si128( (__m128i *) (pbDst + 0), _mm_xor_si128( c0, _mm_loadu_si128( ( __m128i * ) (pbSrc + 0) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 16), _mm_xor_si128( c1, _mm_loadu_si128( ( __m128i * ) (pbSrc + 16) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 32), _mm_xor_si128( c2, _mm_loadu_si128( ( __m128i * ) (pbSrc + 32) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 48), _mm_xor_si128( c3, _mm_loadu_si128( ( __m128i * ) (pbSrc + 48) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 64), _mm_xor_si128( c4, _mm_loadu_si128( ( __m128i * ) (pbSrc + 64) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 80), _mm_xor_si128( c5, _mm_loadu_si128( ( __m128i * ) (pbSrc + 80) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 96), _mm_xor_si128( c6, _mm_loadu_si128( ( __m128i * ) (pbSrc + 96) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst +112), _mm_xor_si128( c7, _mm_loadu_si128( ( __m128i * ) (pbSrc +112) ) ) ); + + pbDst += 8 * SYMCRYPT_AES_BLOCK_SIZE; + pbSrc += 8 * SYMCRYPT_AES_BLOCK_SIZE; + nBlocks -= 8; + + if ( todo == 0 ) + { + CLMUL_3_POST( a0, a1, a2 ); + MODREDUCE( vMultiplicationConstant, a0, a1, a2, state ); + + if ( nBlocks > 0 ) + { + todo = SYMCRYPT_MIN( nBlocks, SYMCRYPT_GHASH_PCLMULQDQ_HPOWERS ); + CLMUL_3( state, GHASH_H_POWER(expandedKeyTable, todo), GHASH_Hx_POWER(expandedKeyTable, todo), a0, a1, a2 ); + } + } + } + + if( nBlocks > 0 ) + { + // We have 1-7 blocks to GHASH and decrypt + // Do the exact number of GHASH blocks we need in parallel with generating either 4 or 8 blocks of AES-CTR + c0 = chain; + c1 = _mm_add_epi32( chain, chainIncrement1 ); + c2 = _mm_add_epi32( chain, chainIncrement2 ); + c3 = _mm_add_epi32( c1, chainIncrement2 ); + c4 = _mm_add_epi32( c2, chainIncrement2 ); + + c0 = _mm_shuffle_epi8( c0, BYTE_REVERSE_ORDER ); + c1 = _mm_shuffle_epi8( c1, BYTE_REVERSE_ORDER ); + c2 = _mm_shuffle_epi8( c2, BYTE_REVERSE_ORDER ); + c3 = _mm_shuffle_epi8( c3, BYTE_REVERSE_ORDER ); + + if( nBlocks > 4 ) + { + c5 = _mm_add_epi32( c4, chainIncrement1 ); + c6 = _mm_add_epi32( c4, chainIncrement2 ); + + c4 = _mm_shuffle_epi8( c4, BYTE_REVERSE_ORDER ); + c5 = _mm_shuffle_epi8( c5, BYTE_REVERSE_ORDER ); + c6 = _mm_shuffle_epi8( c6, BYTE_REVERSE_ORDER ); + + AES_GCM_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7, pbGhashSrc, nBlocks, BYTE_REVERSE_ORDER, expandedKeyTable, todo, a0, a1, a2 ); + } else { + AES_GCM_ENCRYPT_4( pExpandedKey, c0, c1, c2, c3, pbGhashSrc, nBlocks, BYTE_REVERSE_ORDER, expandedKeyTable, todo, a0, a1, a2 ); + } + + CLMUL_3_POST( a0, a1, a2 ); + MODREDUCE( vMultiplicationConstant, a0, a1, a2, state ); + + // Decrypt 1-7 blocks with pre-generated AES-CTR blocks + while( nBlocks >= 2 ) + { + chain = _mm_add_epi32( chain, chainIncrement2 ); + + _mm_storeu_si128( (__m128i *) (pbDst + 0), _mm_xor_si128( c0, _mm_loadu_si128( ( __m128i * ) (pbSrc + 0) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 16), _mm_xor_si128( c1, _mm_loadu_si128( ( __m128i * ) (pbSrc + 16) ) ) ); + + pbDst += 2*SYMCRYPT_AES_BLOCK_SIZE; + pbSrc += 2*SYMCRYPT_AES_BLOCK_SIZE; + nBlocks -= 2; + c0 = c2; + c1 = c3; + c2 = c4; + c3 = c5; + c4 = c6; + } + + if( nBlocks > 0 ) + { + chain = _mm_add_epi32( chain, chainIncrement1 ); + + _mm_storeu_si128( (__m128i *) (pbDst + 0), _mm_xor_si128( c0, _mm_loadu_si128( ( __m128i * ) (pbSrc + 0) ) ) ); + } + } + + chain = _mm_shuffle_epi8( chain, BYTE_REVERSE_ORDER ); + _mm_storeu_si128( (__m128i *) pbChainingValue, chain ); + _mm_storeu_si128((__m128i *)pState, state ); +} +#pragma runtime_checks( "u", restore ) +#pragma warning(pop) + +#ifdef __clang__ +#pragma clang attribute pop +#else +#pragma GCC pop_options +#endif + +#endif // CPU_X86 | CPU_AMD64 diff --git a/libs/symcrypt/lib/aes-ymm.c b/libs/symcrypt/lib/aes-ymm.c new file mode 100644 index 00000000000..aa2f473e424 --- /dev/null +++ b/libs/symcrypt/lib/aes-ymm.c @@ -0,0 +1,793 @@ +// +// aes-ymm.c code for AES implementation +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// All YMM code for AES operations +// Requires compiler support for aesni, pclmulqdq, avx2, vaes and vpclmulqdq +// + +#include "precomp.h" + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + +#ifdef __clang__ +#pragma clang attribute push (__attribute__((target("avx2,vaes,vpclmulqdq"))), apply_to=function) +#else +#pragma GCC push_options +#pragma GCC target("avx2,vaes,vpclmulqdq") +#endif + +#include "xtsaes_definitions.h" +#include "ghash_definitions.h" + +#define AES_ENCRYPT_YMM_2048( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 ) \ +{ \ + const BYTE (*keyPtr)[4][4]; \ + const BYTE (*keyLimit)[4][4]; \ + __m256i roundkeys; \ +\ + keyPtr = pExpandedKey->RoundKey; \ + keyLimit = pExpandedKey->lastEncRoundKey; \ +\ + /* _mm256_broadcastsi128_si256 requires AVX2 */ \ + roundkeys = _mm256_broadcastsi128_si256( *( (const __m128i *) keyPtr ) ); \ + keyPtr ++; \ +\ + /* _mm256_xor_si256 requires AVX2 */ \ + c0 = _mm256_xor_si256( c0, roundkeys ); \ + c1 = _mm256_xor_si256( c1, roundkeys ); \ + c2 = _mm256_xor_si256( c2, roundkeys ); \ + c3 = _mm256_xor_si256( c3, roundkeys ); \ + c4 = _mm256_xor_si256( c4, roundkeys ); \ + c5 = _mm256_xor_si256( c5, roundkeys ); \ + c6 = _mm256_xor_si256( c6, roundkeys ); \ + c7 = _mm256_xor_si256( c7, roundkeys ); \ +\ + do \ + { \ + roundkeys = _mm256_broadcastsi128_si256( *( (const __m128i *) keyPtr ) ); \ + keyPtr ++; \ + c0 = _mm256_aesenc_epi128( c0, roundkeys ); \ + c1 = _mm256_aesenc_epi128( c1, roundkeys ); \ + c2 = _mm256_aesenc_epi128( c2, roundkeys ); \ + c3 = _mm256_aesenc_epi128( c3, roundkeys ); \ + c4 = _mm256_aesenc_epi128( c4, roundkeys ); \ + c5 = _mm256_aesenc_epi128( c5, roundkeys ); \ + c6 = _mm256_aesenc_epi128( c6, roundkeys ); \ + c7 = _mm256_aesenc_epi128( c7, roundkeys ); \ + } while( keyPtr < keyLimit ); \ +\ + roundkeys = _mm256_broadcastsi128_si256( *( (const __m128i *) keyPtr ) ); \ +\ + c0 = _mm256_aesenclast_epi128( c0, roundkeys ); \ + c1 = _mm256_aesenclast_epi128( c1, roundkeys ); \ + c2 = _mm256_aesenclast_epi128( c2, roundkeys ); \ + c3 = _mm256_aesenclast_epi128( c3, roundkeys ); \ + c4 = _mm256_aesenclast_epi128( c4, roundkeys ); \ + c5 = _mm256_aesenclast_epi128( c5, roundkeys ); \ + c6 = _mm256_aesenclast_epi128( c6, roundkeys ); \ + c7 = _mm256_aesenclast_epi128( c7, roundkeys ); \ +}; + +#define AES_DECRYPT_YMM_2048( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 ) \ +{ \ + const BYTE (*keyPtr)[4][4]; \ + const BYTE (*keyLimit)[4][4]; \ + __m256i roundkeys; \ +\ + keyPtr = pExpandedKey->lastEncRoundKey; \ + keyLimit = pExpandedKey->lastDecRoundKey; \ +\ + /* _mm256_broadcastsi128_si256 requires AVX2 */ \ + roundkeys = _mm256_broadcastsi128_si256( *( (const __m128i *) keyPtr ) ); \ + keyPtr ++; \ +\ + /* _mm256_xor_si256 requires AVX2 */ \ + c0 = _mm256_xor_si256( c0, roundkeys ); \ + c1 = _mm256_xor_si256( c1, roundkeys ); \ + c2 = _mm256_xor_si256( c2, roundkeys ); \ + c3 = _mm256_xor_si256( c3, roundkeys ); \ + c4 = _mm256_xor_si256( c4, roundkeys ); \ + c5 = _mm256_xor_si256( c5, roundkeys ); \ + c6 = _mm256_xor_si256( c6, roundkeys ); \ + c7 = _mm256_xor_si256( c7, roundkeys ); \ +\ + do \ + { \ + roundkeys = _mm256_broadcastsi128_si256( *( (const __m128i *) keyPtr ) ); \ + keyPtr ++; \ + c0 = _mm256_aesdec_epi128( c0, roundkeys ); \ + c1 = _mm256_aesdec_epi128( c1, roundkeys ); \ + c2 = _mm256_aesdec_epi128( c2, roundkeys ); \ + c3 = _mm256_aesdec_epi128( c3, roundkeys ); \ + c4 = _mm256_aesdec_epi128( c4, roundkeys ); \ + c5 = _mm256_aesdec_epi128( c5, roundkeys ); \ + c6 = _mm256_aesdec_epi128( c6, roundkeys ); \ + c7 = _mm256_aesdec_epi128( c7, roundkeys ); \ + } while( keyPtr < keyLimit ); \ +\ + roundkeys = _mm256_broadcastsi128_si256( *( (const __m128i *) keyPtr ) ); \ +\ + c0 = _mm256_aesdeclast_epi128( c0, roundkeys ); \ + c1 = _mm256_aesdeclast_epi128( c1, roundkeys ); \ + c2 = _mm256_aesdeclast_epi128( c2, roundkeys ); \ + c3 = _mm256_aesdeclast_epi128( c3, roundkeys ); \ + c4 = _mm256_aesdeclast_epi128( c4, roundkeys ); \ + c5 = _mm256_aesdeclast_epi128( c5, roundkeys ); \ + c6 = _mm256_aesdeclast_epi128( c6, roundkeys ); \ + c7 = _mm256_aesdeclast_epi128( c7, roundkeys ); \ +}; + +VOID +SYMCRYPT_CALL +SymCryptXtsAesEncryptDataUnitYmm_2048( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbTweakBlock, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE*16 ) PBYTE pbScratch, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + __m128i t0, t1, t2, t3, t4, t5, t6, t7; + __m256i c0, c1, c2, c3, c4, c5, c6, c7; + __m128i XTS_ALPHA_MASK; + __m256i XTS_ALPHA_MULTIPLIER_Ymm; + + // Load tweaks into big T + __m256i T0, T1, T2, T3, T4, T5, T6, T7; + + SIZE_T cbDataMain; // number of bytes to handle in the main loop + SIZE_T cbDataTail; // number of bytes to handle in the tail loop + + // To simplify logic and unusual size processing, we handle all + // data not a multiple of 16 blocks in the tail loop + cbDataTail = cbData & ((16*SYMCRYPT_AES_BLOCK_SIZE)-1); + // Additionally, so that ciphertext stealing logic does not rely on + // reading back from the destination buffer, when we have a non-zero + // tail, we ensure that we handle at least 1 whole block in the tail + cbDataTail += ((cbDataTail > 0) && (cbDataTail < SYMCRYPT_AES_BLOCK_SIZE)) ? (16*SYMCRYPT_AES_BLOCK_SIZE) : 0; + cbDataMain = cbData - cbDataTail; + + SYMCRYPT_ASSERT(cbDataMain <= cbData); + SYMCRYPT_ASSERT(cbDataTail <= cbData); + SYMCRYPT_ASSERT((cbDataMain & ((16*SYMCRYPT_AES_BLOCK_SIZE)-1)) == 0); + + if( cbDataMain == 0 ) + { + SymCryptXtsAesEncryptDataUnitXmm( pExpandedKey, pbTweakBlock, pbScratch, pbSrc, pbDst, cbDataTail ); + return; + } + + t0 = _mm_loadu_si128( (__m128i *) pbTweakBlock ); + XTS_ALPHA_MASK = _mm_set_epi32( 1, 1, 1, 0x87 ); + XTS_ALPHA_MULTIPLIER_Ymm = _mm256_set_epi64x( 0, 0x87, 0, 0x87 ); + + // Do not stall. + XTS_MUL_ALPHA4( t0, t4 ); + XTS_MUL_ALPHA ( t0, t1 ); + XTS_MUL_ALPHA ( t4, t5 ); + XTS_MUL_ALPHA ( t1, t2 ); + XTS_MUL_ALPHA ( t5, t6 ); + XTS_MUL_ALPHA ( t2, t3 ); + XTS_MUL_ALPHA ( t6, t7 ); + + T0 = _mm256_insertf128_si256( _mm256_castsi128_si256( t0 ), t1, 1 ); // AVX + T1 = _mm256_insertf128_si256( _mm256_castsi128_si256( t2 ), t3, 1 ); + T2 = _mm256_insertf128_si256( _mm256_castsi128_si256( t4 ), t5, 1 ); + T3 = _mm256_insertf128_si256( _mm256_castsi128_si256( t6 ), t7, 1 ); + XTS_MUL_ALPHA8_YMM(T0, T4); + XTS_MUL_ALPHA8_YMM(T1, T5); + XTS_MUL_ALPHA8_YMM(T2, T6); + XTS_MUL_ALPHA8_YMM(T3, T7); + + for(;;) + { + c0 = _mm256_xor_si256( T0, _mm256_loadu_si256( ( __m256i * ) ( pbSrc + 0 ) ) ); + c1 = _mm256_xor_si256( T1, _mm256_loadu_si256( ( __m256i * ) ( pbSrc + 2*SYMCRYPT_AES_BLOCK_SIZE ) ) ); + c2 = _mm256_xor_si256( T2, _mm256_loadu_si256( ( __m256i * ) ( pbSrc + 4*SYMCRYPT_AES_BLOCK_SIZE ) ) ); + c3 = _mm256_xor_si256( T3, _mm256_loadu_si256( ( __m256i * ) ( pbSrc + 6*SYMCRYPT_AES_BLOCK_SIZE ) ) ); + c4 = _mm256_xor_si256( T4, _mm256_loadu_si256( ( __m256i * ) ( pbSrc + 8*SYMCRYPT_AES_BLOCK_SIZE ) ) ); + c5 = _mm256_xor_si256( T5, _mm256_loadu_si256( ( __m256i * ) ( pbSrc + 10*SYMCRYPT_AES_BLOCK_SIZE ) ) ); + c6 = _mm256_xor_si256( T6, _mm256_loadu_si256( ( __m256i * ) ( pbSrc + 12*SYMCRYPT_AES_BLOCK_SIZE ) ) ); + c7 = _mm256_xor_si256( T7, _mm256_loadu_si256( ( __m256i * ) ( pbSrc + 14*SYMCRYPT_AES_BLOCK_SIZE ) ) ); + + pbSrc += 16 * SYMCRYPT_AES_BLOCK_SIZE; + + AES_ENCRYPT_YMM_2048( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 ); + + _mm256_storeu_si256( ( __m256i * ) ( pbDst + 0 ), _mm256_xor_si256( c0, T0 ) ); + _mm256_storeu_si256( ( __m256i * ) ( pbDst + 2*SYMCRYPT_AES_BLOCK_SIZE ), _mm256_xor_si256( c1, T1 ) ); + _mm256_storeu_si256( ( __m256i * ) ( pbDst + 4*SYMCRYPT_AES_BLOCK_SIZE ), _mm256_xor_si256( c2, T2 ) ); + _mm256_storeu_si256( ( __m256i * ) ( pbDst + 6*SYMCRYPT_AES_BLOCK_SIZE ), _mm256_xor_si256( c3, T3 ) ); + _mm256_storeu_si256( ( __m256i * ) ( pbDst + 8*SYMCRYPT_AES_BLOCK_SIZE ), _mm256_xor_si256( c4, T4 ) ); + _mm256_storeu_si256( ( __m256i * ) ( pbDst + 10*SYMCRYPT_AES_BLOCK_SIZE ), _mm256_xor_si256( c5, T5 ) ); + _mm256_storeu_si256( ( __m256i * ) ( pbDst + 12*SYMCRYPT_AES_BLOCK_SIZE ), _mm256_xor_si256( c6, T6 ) ); + _mm256_storeu_si256( ( __m256i * ) ( pbDst + 14*SYMCRYPT_AES_BLOCK_SIZE ), _mm256_xor_si256( c7, T7 ) ); + + pbDst += 16 * SYMCRYPT_AES_BLOCK_SIZE; + + cbDataMain -= 16 * SYMCRYPT_AES_BLOCK_SIZE; + if( cbDataMain < 16 * SYMCRYPT_AES_BLOCK_SIZE ) + { + break; + } + + XTS_MUL_ALPHA16_YMM(T0, T0); + XTS_MUL_ALPHA16_YMM(T1, T1); + XTS_MUL_ALPHA16_YMM(T2, T2); + XTS_MUL_ALPHA16_YMM(T3, T3); + XTS_MUL_ALPHA16_YMM(T4, T4); + XTS_MUL_ALPHA16_YMM(T5, T5); + XTS_MUL_ALPHA16_YMM(T6, T6); + XTS_MUL_ALPHA16_YMM(T7, T7); + } + + // We won't do another 16-block set so we don't update the tweak blocks + + if( cbDataTail > 0 ) + { + // + // This is a rare case: the data unit length is not a multiple of 256 bytes. + // We do this in the Xmm implementation. + // Fix up the tweak block first + // + t7 = _mm256_extracti128_si256 ( T7, 1 /* Highest 128 bits */ ); // AVX2 + _mm256_zeroupper(); + XTS_MUL_ALPHA( t7, t0 ); + _mm_storeu_si128( (__m128i *) pbTweakBlock, t0 ); + + SymCryptXtsAesEncryptDataUnitXmm( pExpandedKey, pbTweakBlock, pbScratch, pbSrc, pbDst, cbDataTail ); + } + else { + _mm256_zeroupper(); + } +} + +VOID +SYMCRYPT_CALL +SymCryptXtsAesDecryptDataUnitYmm_2048( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbTweakBlock, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE*16 ) PBYTE pbScratch, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + __m128i t0, t1, t2, t3, t4, t5, t6, t7; + __m256i c0, c1, c2, c3, c4, c5, c6, c7; + __m128i XTS_ALPHA_MASK; + __m256i XTS_ALPHA_MULTIPLIER_Ymm; + + // Load tweaks into big T + __m256i T0, T1, T2, T3, T4, T5, T6, T7; + + SIZE_T cbDataMain; // number of bytes to handle in the main loop + SIZE_T cbDataTail; // number of bytes to handle in the tail loop + + // To simplify logic and unusual size processing, we handle all + // data not a multiple of 16 blocks in the tail loop + cbDataTail = cbData & ((16*SYMCRYPT_AES_BLOCK_SIZE)-1); + // Additionally, so that ciphertext stealing logic does not rely on + // reading back from the destination buffer, when we have a non-zero + // tail, we ensure that we handle at least 1 whole block in the tail + cbDataTail += ((cbDataTail > 0) && (cbDataTail < SYMCRYPT_AES_BLOCK_SIZE)) ? (16*SYMCRYPT_AES_BLOCK_SIZE) : 0; + cbDataMain = cbData - cbDataTail; + + SYMCRYPT_ASSERT(cbDataMain <= cbData); + SYMCRYPT_ASSERT(cbDataTail <= cbData); + SYMCRYPT_ASSERT((cbDataMain & ((16*SYMCRYPT_AES_BLOCK_SIZE)-1)) == 0); + + if( cbDataMain == 0 ) + { + SymCryptXtsAesDecryptDataUnitXmm( pExpandedKey, pbTweakBlock, pbScratch, pbSrc, pbDst, cbDataTail ); + return; + } + + t0 = _mm_loadu_si128( (__m128i *) pbTweakBlock ); + XTS_ALPHA_MASK = _mm_set_epi32( 1, 1, 1, 0x87 ); + XTS_ALPHA_MULTIPLIER_Ymm = _mm256_set_epi64x( 0, 0x87, 0, 0x87 ); + + // Do not stall. + XTS_MUL_ALPHA4( t0, t4 ); + XTS_MUL_ALPHA ( t0, t1 ); + XTS_MUL_ALPHA ( t4, t5 ); + XTS_MUL_ALPHA ( t1, t2 ); + XTS_MUL_ALPHA ( t5, t6 ); + XTS_MUL_ALPHA ( t2, t3 ); + XTS_MUL_ALPHA ( t6, t7 ); + + T0 = _mm256_insertf128_si256( _mm256_castsi128_si256( t0 ), t1, 1); // AVX + T1 = _mm256_insertf128_si256( _mm256_castsi128_si256( t2 ), t3, 1); + T2 = _mm256_insertf128_si256( _mm256_castsi128_si256( t4 ), t5, 1); + T3 = _mm256_insertf128_si256( _mm256_castsi128_si256( t6 ), t7, 1); + XTS_MUL_ALPHA8_YMM(T0, T4); + XTS_MUL_ALPHA8_YMM(T1, T5); + XTS_MUL_ALPHA8_YMM(T2, T6); + XTS_MUL_ALPHA8_YMM(T3, T7); + + for(;;) + { + c0 = _mm256_xor_si256( T0, _mm256_loadu_si256( ( __m256i * ) ( pbSrc + 0 ) ) ); + c1 = _mm256_xor_si256( T1, _mm256_loadu_si256( ( __m256i * ) ( pbSrc + 2*SYMCRYPT_AES_BLOCK_SIZE ) ) ); + c2 = _mm256_xor_si256( T2, _mm256_loadu_si256( ( __m256i * ) ( pbSrc + 4*SYMCRYPT_AES_BLOCK_SIZE ) ) ); + c3 = _mm256_xor_si256( T3, _mm256_loadu_si256( ( __m256i * ) ( pbSrc + 6*SYMCRYPT_AES_BLOCK_SIZE ) ) ); + c4 = _mm256_xor_si256( T4, _mm256_loadu_si256( ( __m256i * ) ( pbSrc + 8*SYMCRYPT_AES_BLOCK_SIZE ) ) ); + c5 = _mm256_xor_si256( T5, _mm256_loadu_si256( ( __m256i * ) ( pbSrc + 10*SYMCRYPT_AES_BLOCK_SIZE ) ) ); + c6 = _mm256_xor_si256( T6, _mm256_loadu_si256( ( __m256i * ) ( pbSrc + 12*SYMCRYPT_AES_BLOCK_SIZE ) ) ); + c7 = _mm256_xor_si256( T7, _mm256_loadu_si256( ( __m256i * ) ( pbSrc + 14*SYMCRYPT_AES_BLOCK_SIZE ) ) ); + + pbSrc += 16 * SYMCRYPT_AES_BLOCK_SIZE; + + AES_DECRYPT_YMM_2048( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 ); + + _mm256_storeu_si256( ( __m256i * ) ( pbDst + 0 ), _mm256_xor_si256( c0, T0 ) ); + _mm256_storeu_si256( ( __m256i * ) ( pbDst + 2*SYMCRYPT_AES_BLOCK_SIZE ), _mm256_xor_si256( c1, T1 ) ); + _mm256_storeu_si256( ( __m256i * ) ( pbDst + 4*SYMCRYPT_AES_BLOCK_SIZE ), _mm256_xor_si256( c2, T2 ) ); + _mm256_storeu_si256( ( __m256i * ) ( pbDst + 6*SYMCRYPT_AES_BLOCK_SIZE ), _mm256_xor_si256( c3, T3 ) ); + _mm256_storeu_si256( ( __m256i * ) ( pbDst + 8*SYMCRYPT_AES_BLOCK_SIZE ), _mm256_xor_si256( c4, T4 ) ); + _mm256_storeu_si256( ( __m256i * ) ( pbDst + 10*SYMCRYPT_AES_BLOCK_SIZE ), _mm256_xor_si256( c5, T5 ) ); + _mm256_storeu_si256( ( __m256i * ) ( pbDst + 12*SYMCRYPT_AES_BLOCK_SIZE ), _mm256_xor_si256( c6, T6 ) ); + _mm256_storeu_si256( ( __m256i * ) ( pbDst + 14*SYMCRYPT_AES_BLOCK_SIZE ), _mm256_xor_si256( c7, T7 ) ); + + pbDst += 16 * SYMCRYPT_AES_BLOCK_SIZE; + + cbDataMain -= 16 * SYMCRYPT_AES_BLOCK_SIZE; + if( cbDataMain < 16 * SYMCRYPT_AES_BLOCK_SIZE ) + { + break; + } + + XTS_MUL_ALPHA16_YMM(T0, T0); + XTS_MUL_ALPHA16_YMM(T1, T1); + XTS_MUL_ALPHA16_YMM(T2, T2); + XTS_MUL_ALPHA16_YMM(T3, T3); + XTS_MUL_ALPHA16_YMM(T4, T4); + XTS_MUL_ALPHA16_YMM(T5, T5); + XTS_MUL_ALPHA16_YMM(T6, T6); + XTS_MUL_ALPHA16_YMM(T7, T7); + } + + // We won't do another 16-block set so we don't update the tweak blocks + + if( cbDataTail > 0 ) + { + // + // This is a rare case: the data unit length is not a multiple of 256 bytes. + // We do this in the Xmm implementation. + // Fix up the tweak block first + // + t7 = _mm256_extracti128_si256 ( T7, 1 /* Highest 128 bits */ ); // AVX2 + _mm256_zeroupper(); + XTS_MUL_ALPHA( t7, t0 ); + _mm_storeu_si128( (__m128i *) pbTweakBlock, t0 ); + + SymCryptXtsAesDecryptDataUnitXmm( pExpandedKey, pbTweakBlock, pbScratch, pbSrc, pbDst, cbDataTail ); + } + else { + _mm256_zeroupper(); + } +} + +#define AES_FULLROUND_16_GHASH_2_Ymm( roundkeys, keyPtr, c0, c1, c2, c3, c4, c5, c6, c7, r0, t0, t1, gHashPointer, byteReverseOrder, gHashExpandedKeyTable, todo, resl, resm, resh ) \ +{ \ + roundkeys = _mm256_broadcastsi128_si256( *( (const __m128i *) keyPtr ) ); \ + keyPtr ++; \ + c0 = _mm256_aesenc_epi128( c0, roundkeys ); \ + c1 = _mm256_aesenc_epi128( c1, roundkeys ); \ + c2 = _mm256_aesenc_epi128( c2, roundkeys ); \ + c3 = _mm256_aesenc_epi128( c3, roundkeys ); \ + c4 = _mm256_aesenc_epi128( c4, roundkeys ); \ + c5 = _mm256_aesenc_epi128( c5, roundkeys ); \ + c6 = _mm256_aesenc_epi128( c6, roundkeys ); \ + c7 = _mm256_aesenc_epi128( c7, roundkeys ); \ +\ + r0 = _mm256_loadu_si256( (__m256i *) gHashPointer ); \ + r0 = _mm256_shuffle_epi8( r0, byteReverseOrder ); \ + gHashPointer += 32; \ +\ + t1 = _mm256_loadu_si256( (__m256i *) &GHASH_H_POWER(gHashExpandedKeyTable, todo) ); \ + t0 = _mm256_clmulepi64_epi128( r0, t1, 0x00 ); \ + t1 = _mm256_clmulepi64_epi128( r0, t1, 0x11 ); \ +\ + resl = _mm256_xor_si256( resl, t0 ); \ + resh = _mm256_xor_si256( resh, t1 ); \ +\ + t0 = _mm256_srli_si256( r0, 8 ); \ + r0 = _mm256_xor_si256( r0, t0 ); \ + t1 = _mm256_loadu_si256( (__m256i *) &GHASH_Hx_POWER(gHashExpandedKeyTable, todo) ); \ + t1 = _mm256_clmulepi64_epi128( r0, t1, 0x00 ); \ +\ + resm = _mm256_xor_si256( resm, t1 ); \ + todo -= 2; \ +}; + +#define AES_GCM_ENCRYPT_16_Ymm( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7, gHashPointer, byteReverseOrder, gHashExpandedKeyTable, todo, resl, resm, resh ) \ +{ \ + const BYTE (*keyPtr)[4][4]; \ + const BYTE (*keyLimit)[4][4]; \ + __m256i roundkeys; \ + __m256i t0, t1; \ + __m256i r0; \ + int aesEncryptGhashLoop; \ +\ + keyPtr = pExpandedKey->RoundKey; \ + keyLimit = pExpandedKey->lastEncRoundKey; \ +\ + /* _mm256_broadcastsi128_si256 requires AVX2 */ \ + roundkeys = _mm256_broadcastsi128_si256( *( (const __m128i *) keyPtr ) ); \ + keyPtr ++; \ +\ + /* _mm256_xor_si256 requires AVX2 */ \ + c0 = _mm256_xor_si256( c0, roundkeys ); \ + c1 = _mm256_xor_si256( c1, roundkeys ); \ + c2 = _mm256_xor_si256( c2, roundkeys ); \ + c3 = _mm256_xor_si256( c3, roundkeys ); \ + c4 = _mm256_xor_si256( c4, roundkeys ); \ + c5 = _mm256_xor_si256( c5, roundkeys ); \ + c6 = _mm256_xor_si256( c6, roundkeys ); \ + c7 = _mm256_xor_si256( c7, roundkeys ); \ +\ + /* Do 8(x2) full rounds (AES-128|AES-192|AES-256) with stitched GHASH */ \ + for( aesEncryptGhashLoop = 0; aesEncryptGhashLoop < 4; aesEncryptGhashLoop++ ) \ + { \ + AES_FULLROUND_16_GHASH_2_Ymm( roundkeys, keyPtr, c0, c1, c2, c3, c4, c5, c6, c7, r0, t0, t1, gHashPointer, byteReverseOrder, gHashExpandedKeyTable, todo, resl, resm, resh ); \ + AES_FULLROUND_16_GHASH_2_Ymm( roundkeys, keyPtr, c0, c1, c2, c3, c4, c5, c6, c7, r0, t0, t1, gHashPointer, byteReverseOrder, gHashExpandedKeyTable, todo, resl, resm, resh ); \ + } \ +\ + do \ + { \ + roundkeys = _mm256_broadcastsi128_si256( *( (const __m128i *) keyPtr ) ); \ + keyPtr ++; \ + c0 = _mm256_aesenc_epi128( c0, roundkeys ); \ + c1 = _mm256_aesenc_epi128( c1, roundkeys ); \ + c2 = _mm256_aesenc_epi128( c2, roundkeys ); \ + c3 = _mm256_aesenc_epi128( c3, roundkeys ); \ + c4 = _mm256_aesenc_epi128( c4, roundkeys ); \ + c5 = _mm256_aesenc_epi128( c5, roundkeys ); \ + c6 = _mm256_aesenc_epi128( c6, roundkeys ); \ + c7 = _mm256_aesenc_epi128( c7, roundkeys ); \ + } while( keyPtr < keyLimit ); \ +\ + roundkeys = _mm256_broadcastsi128_si256( *( (const __m128i *) keyPtr ) ); \ +\ + c0 = _mm256_aesenclast_epi128( c0, roundkeys ); \ + c1 = _mm256_aesenclast_epi128( c1, roundkeys ); \ + c2 = _mm256_aesenclast_epi128( c2, roundkeys ); \ + c3 = _mm256_aesenclast_epi128( c3, roundkeys ); \ + c4 = _mm256_aesenclast_epi128( c4, roundkeys ); \ + c5 = _mm256_aesenclast_epi128( c5, roundkeys ); \ + c6 = _mm256_aesenclast_epi128( c6, roundkeys ); \ + c7 = _mm256_aesenclast_epi128( c7, roundkeys ); \ +}; + +VOID +SYMCRYPT_CALL +SymCryptAesGcmEncryptStitchedYmm_2048( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( SYMCRYPT_GF128_FIELD_SIZE ) PCSYMCRYPT_GF128_ELEMENT expandedKeyTable, + _Inout_ PSYMCRYPT_GF128_ELEMENT pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + __m128i chain = _mm_loadu_si128( (__m128i *) pbChainingValue ); + + __m128i BYTE_REVERSE_ORDER_xmm = _mm_set_epi8( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ); + __m256i BYTE_REVERSE_ORDER = _mm256_set_epi64x( 0x0001020304050607, 0x08090a0b0c0d0e0f, 0x0001020304050607, 0x08090a0b0c0d0e0f ); + __m128i vMultiplicationConstant = _mm_set_epi32( 0, 0, 0xc2000000, 0 ); + + __m256i chainIncrementUpper1 = _mm256_set_epi64x( 0, 1, 0, 0 ); + __m256i chainIncrement2 = _mm256_set_epi64x( 0, 2, 0, 2 ); + __m256i chainIncrement4 = _mm256_set_epi64x( 0, 4, 0, 4 ); + __m256i chainIncrement16 = _mm256_set_epi64x( 0, 16, 0, 16 ); + + __m256i ctr0, ctr1, ctr2, ctr3, ctr4, ctr5, ctr6, ctr7; + __m256i c0, c1, c2, c3, c4, c5, c6, c7; + __m256i r0, r1, r2, r3, r4, r5, r6, r7; + __m256i Hi, Hix; + + __m128i state; + __m128i a0_xmm, a1_xmm, a2_xmm; + __m256i a0, a1, a2; + SIZE_T nBlocks = cbData / SYMCRYPT_GF128_BLOCK_SIZE; + SIZE_T todo; + PCBYTE pbGhashSrc = pbDst; + + SYMCRYPT_ASSERT( (cbData & SYMCRYPT_GCM_BLOCK_MOD_MASK) == 0 ); // cbData is multiple of block size + SYMCRYPT_ASSERT( nBlocks >= GCM_YMM_MINBLOCKS ); + + todo = SYMCRYPT_MIN( nBlocks, SYMCRYPT_GHASH_PCLMULQDQ_HPOWERS ) & ~(GCM_YMM_MINBLOCKS-1); + chain = _mm_shuffle_epi8( chain, BYTE_REVERSE_ORDER_xmm ); + + state = _mm_loadu_si128( (__m128i *) pState ); + ctr0 = _mm256_insertf128_si256( _mm256_castsi128_si256( chain ), chain, 1); // AVX + ctr0 = _mm256_add_epi32( ctr0, chainIncrementUpper1 ); + ctr1 = _mm256_add_epi32( ctr0, chainIncrement2 ); + ctr2 = _mm256_add_epi32( ctr0, chainIncrement4 ); + ctr3 = _mm256_add_epi32( ctr1, chainIncrement4 ); + ctr4 = _mm256_add_epi32( ctr2, chainIncrement4 ); + ctr5 = _mm256_add_epi32( ctr3, chainIncrement4 ); + ctr6 = _mm256_add_epi32( ctr4, chainIncrement4 ); + ctr7 = _mm256_add_epi32( ctr5, chainIncrement4 ); + + CLMUL_3( state, GHASH_H_POWER(expandedKeyTable, todo), GHASH_Hx_POWER(expandedKeyTable, todo), a0_xmm, a1_xmm, a2_xmm ); + a0 = a1 = a2 = _mm256_setzero_si256(); + + c0 = _mm256_shuffle_epi8( ctr0, BYTE_REVERSE_ORDER ); + c1 = _mm256_shuffle_epi8( ctr1, BYTE_REVERSE_ORDER ); + c2 = _mm256_shuffle_epi8( ctr2, BYTE_REVERSE_ORDER ); + c3 = _mm256_shuffle_epi8( ctr3, BYTE_REVERSE_ORDER ); + c4 = _mm256_shuffle_epi8( ctr4, BYTE_REVERSE_ORDER ); + c5 = _mm256_shuffle_epi8( ctr5, BYTE_REVERSE_ORDER ); + c6 = _mm256_shuffle_epi8( ctr6, BYTE_REVERSE_ORDER ); + c7 = _mm256_shuffle_epi8( ctr7, BYTE_REVERSE_ORDER ); + + ctr0 = _mm256_add_epi32( ctr0, chainIncrement16 ); + ctr1 = _mm256_add_epi32( ctr1, chainIncrement16 ); + ctr2 = _mm256_add_epi32( ctr2, chainIncrement16 ); + ctr3 = _mm256_add_epi32( ctr3, chainIncrement16 ); + ctr4 = _mm256_add_epi32( ctr4, chainIncrement16 ); + ctr5 = _mm256_add_epi32( ctr5, chainIncrement16 ); + ctr6 = _mm256_add_epi32( ctr6, chainIncrement16 ); + ctr7 = _mm256_add_epi32( ctr7, chainIncrement16 ); + + AES_ENCRYPT_YMM_2048( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 ); + + _mm256_storeu_si256( (__m256i *) (pbDst + 0), _mm256_xor_si256( c0, _mm256_loadu_si256( ( __m256i * ) (pbSrc + 0) ) ) ); + _mm256_storeu_si256( (__m256i *) (pbDst + 32), _mm256_xor_si256( c1, _mm256_loadu_si256( ( __m256i * ) (pbSrc + 32) ) ) ); + _mm256_storeu_si256( (__m256i *) (pbDst + 64), _mm256_xor_si256( c2, _mm256_loadu_si256( ( __m256i * ) (pbSrc + 64) ) ) ); + _mm256_storeu_si256( (__m256i *) (pbDst + 96), _mm256_xor_si256( c3, _mm256_loadu_si256( ( __m256i * ) (pbSrc + 96) ) ) ); + _mm256_storeu_si256( (__m256i *) (pbDst +128), _mm256_xor_si256( c4, _mm256_loadu_si256( ( __m256i * ) (pbSrc +128) ) ) ); + _mm256_storeu_si256( (__m256i *) (pbDst +160), _mm256_xor_si256( c5, _mm256_loadu_si256( ( __m256i * ) (pbSrc +160) ) ) ); + _mm256_storeu_si256( (__m256i *) (pbDst +192), _mm256_xor_si256( c6, _mm256_loadu_si256( ( __m256i * ) (pbSrc +192) ) ) ); + _mm256_storeu_si256( (__m256i *) (pbDst +224), _mm256_xor_si256( c7, _mm256_loadu_si256( ( __m256i * ) (pbSrc +224) ) ) ); + + pbDst += 16 * SYMCRYPT_AES_BLOCK_SIZE; + pbSrc += 16 * SYMCRYPT_AES_BLOCK_SIZE; + + while( nBlocks >= 2*GCM_YMM_MINBLOCKS ) + { + c0 = _mm256_shuffle_epi8( ctr0, BYTE_REVERSE_ORDER ); + c1 = _mm256_shuffle_epi8( ctr1, BYTE_REVERSE_ORDER ); + c2 = _mm256_shuffle_epi8( ctr2, BYTE_REVERSE_ORDER ); + c3 = _mm256_shuffle_epi8( ctr3, BYTE_REVERSE_ORDER ); + c4 = _mm256_shuffle_epi8( ctr4, BYTE_REVERSE_ORDER ); + c5 = _mm256_shuffle_epi8( ctr5, BYTE_REVERSE_ORDER ); + c6 = _mm256_shuffle_epi8( ctr6, BYTE_REVERSE_ORDER ); + c7 = _mm256_shuffle_epi8( ctr7, BYTE_REVERSE_ORDER ); + + ctr0 = _mm256_add_epi32( ctr0, chainIncrement16 ); + ctr1 = _mm256_add_epi32( ctr1, chainIncrement16 ); + ctr2 = _mm256_add_epi32( ctr2, chainIncrement16 ); + ctr3 = _mm256_add_epi32( ctr3, chainIncrement16 ); + ctr4 = _mm256_add_epi32( ctr4, chainIncrement16 ); + ctr5 = _mm256_add_epi32( ctr5, chainIncrement16 ); + ctr6 = _mm256_add_epi32( ctr6, chainIncrement16 ); + ctr7 = _mm256_add_epi32( ctr7, chainIncrement16 ); + + AES_GCM_ENCRYPT_16_Ymm( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7, pbGhashSrc, BYTE_REVERSE_ORDER, expandedKeyTable, todo, a0, a1, a2 ); + + _mm256_storeu_si256( (__m256i *) (pbDst + 0), _mm256_xor_si256( c0, _mm256_loadu_si256( ( __m256i * ) (pbSrc + 0) ) ) ); + _mm256_storeu_si256( (__m256i *) (pbDst + 32), _mm256_xor_si256( c1, _mm256_loadu_si256( ( __m256i * ) (pbSrc + 32) ) ) ); + _mm256_storeu_si256( (__m256i *) (pbDst + 64), _mm256_xor_si256( c2, _mm256_loadu_si256( ( __m256i * ) (pbSrc + 64) ) ) ); + _mm256_storeu_si256( (__m256i *) (pbDst + 96), _mm256_xor_si256( c3, _mm256_loadu_si256( ( __m256i * ) (pbSrc + 96) ) ) ); + _mm256_storeu_si256( (__m256i *) (pbDst +128), _mm256_xor_si256( c4, _mm256_loadu_si256( ( __m256i * ) (pbSrc +128) ) ) ); + _mm256_storeu_si256( (__m256i *) (pbDst +160), _mm256_xor_si256( c5, _mm256_loadu_si256( ( __m256i * ) (pbSrc +160) ) ) ); + _mm256_storeu_si256( (__m256i *) (pbDst +192), _mm256_xor_si256( c6, _mm256_loadu_si256( ( __m256i * ) (pbSrc +192) ) ) ); + _mm256_storeu_si256( (__m256i *) (pbDst +224), _mm256_xor_si256( c7, _mm256_loadu_si256( ( __m256i * ) (pbSrc +224) ) ) ); + + pbDst += 16 * SYMCRYPT_AES_BLOCK_SIZE; + pbSrc += 16 * SYMCRYPT_AES_BLOCK_SIZE; + nBlocks -= 16; + + if ( todo == 0 ) + { + a0_xmm = _mm_xor_si128( a0_xmm, _mm256_extracti128_si256 ( a0, 0 /* Lowest 128 bits */ )); + a1_xmm = _mm_xor_si128( a1_xmm, _mm256_extracti128_si256 ( a1, 0 /* Lowest 128 bits */ )); + a2_xmm = _mm_xor_si128( a2_xmm, _mm256_extracti128_si256 ( a2, 0 /* Lowest 128 bits */ )); + + a0_xmm = _mm_xor_si128( a0_xmm, _mm256_extracti128_si256 ( a0, 1 /* Highest 128 bits */ )); + a1_xmm = _mm_xor_si128( a1_xmm, _mm256_extracti128_si256 ( a1, 1 /* Highest 128 bits */ )); + a2_xmm = _mm_xor_si128( a2_xmm, _mm256_extracti128_si256 ( a2, 1 /* Highest 128 bits */ )); + CLMUL_3_POST( a0_xmm, a1_xmm, a2_xmm ); + MODREDUCE( vMultiplicationConstant, a0_xmm, a1_xmm, a2_xmm, state ); + + todo = SYMCRYPT_MIN( nBlocks, SYMCRYPT_GHASH_PCLMULQDQ_HPOWERS ) & ~(GCM_YMM_MINBLOCKS-1); + CLMUL_3( state, GHASH_H_POWER(expandedKeyTable, todo), GHASH_Hx_POWER(expandedKeyTable, todo), a0_xmm, a1_xmm, a2_xmm ); + a0 = a1 = a2 = _mm256_setzero_si256(); + } + } + + r0 = _mm256_shuffle_epi8( _mm256_loadu_si256( (__m256i *) (pbGhashSrc + 0) ), BYTE_REVERSE_ORDER ); + r1 = _mm256_shuffle_epi8( _mm256_loadu_si256( (__m256i *) (pbGhashSrc + 32) ), BYTE_REVERSE_ORDER ); + r2 = _mm256_shuffle_epi8( _mm256_loadu_si256( (__m256i *) (pbGhashSrc + 64) ), BYTE_REVERSE_ORDER ); + r3 = _mm256_shuffle_epi8( _mm256_loadu_si256( (__m256i *) (pbGhashSrc + 96) ), BYTE_REVERSE_ORDER ); + r4 = _mm256_shuffle_epi8( _mm256_loadu_si256( (__m256i *) (pbGhashSrc +128) ), BYTE_REVERSE_ORDER ); + r5 = _mm256_shuffle_epi8( _mm256_loadu_si256( (__m256i *) (pbGhashSrc +160) ), BYTE_REVERSE_ORDER ); + r6 = _mm256_shuffle_epi8( _mm256_loadu_si256( (__m256i *) (pbGhashSrc +192) ), BYTE_REVERSE_ORDER ); + r7 = _mm256_shuffle_epi8( _mm256_loadu_si256( (__m256i *) (pbGhashSrc +224) ), BYTE_REVERSE_ORDER ); + + Hi = _mm256_loadu_si256( (__m256i *) &GHASH_H_POWER(expandedKeyTable, todo - 0) ); + Hix = _mm256_loadu_si256( (__m256i *) &GHASH_Hx_POWER(expandedKeyTable, todo - 0) ); + CLMUL_ACC_3_Ymm( r0, Hi, Hix, a0, a1, a2 ); + Hi = _mm256_loadu_si256( (__m256i *) &GHASH_H_POWER(expandedKeyTable, todo - 2) ); + Hix = _mm256_loadu_si256( (__m256i *) &GHASH_Hx_POWER(expandedKeyTable, todo - 2) ); + CLMUL_ACC_3_Ymm( r1, Hi, Hix, a0, a1, a2 ); + Hi = _mm256_loadu_si256( (__m256i *) &GHASH_H_POWER(expandedKeyTable, todo - 4) ); + Hix = _mm256_loadu_si256( (__m256i *) &GHASH_Hx_POWER(expandedKeyTable, todo - 4) ); + CLMUL_ACC_3_Ymm( r2, Hi, Hix, a0, a1, a2 ); + Hi = _mm256_loadu_si256( (__m256i *) &GHASH_H_POWER(expandedKeyTable, todo - 6) ); + Hix = _mm256_loadu_si256( (__m256i *) &GHASH_Hx_POWER(expandedKeyTable, todo - 6) ); + CLMUL_ACC_3_Ymm( r3, Hi, Hix, a0, a1, a2 ); + Hi = _mm256_loadu_si256( (__m256i *) &GHASH_H_POWER(expandedKeyTable, todo - 8) ); + Hix = _mm256_loadu_si256( (__m256i *) &GHASH_Hx_POWER(expandedKeyTable, todo - 8) ); + CLMUL_ACC_3_Ymm( r4, Hi, Hix, a0, a1, a2 ); + Hi = _mm256_loadu_si256( (__m256i *) &GHASH_H_POWER(expandedKeyTable, todo -10) ); + Hix = _mm256_loadu_si256( (__m256i *) &GHASH_Hx_POWER(expandedKeyTable, todo -10) ); + CLMUL_ACC_3_Ymm( r5, Hi, Hix, a0, a1, a2 ); + Hi = _mm256_loadu_si256( (__m256i *) &GHASH_H_POWER(expandedKeyTable, todo -12) ); + Hix = _mm256_loadu_si256( (__m256i *) &GHASH_Hx_POWER(expandedKeyTable, todo -12) ); + CLMUL_ACC_3_Ymm( r6, Hi, Hix, a0, a1, a2 ); + Hi = _mm256_loadu_si256( (__m256i *) &GHASH_H_POWER(expandedKeyTable, todo -14) ); + Hix = _mm256_loadu_si256( (__m256i *) &GHASH_Hx_POWER(expandedKeyTable, todo -14) ); + CLMUL_ACC_3_Ymm( r7, Hi, Hix, a0, a1, a2 ); + + a0_xmm = _mm_xor_si128( a0_xmm, _mm256_extracti128_si256 ( a0, 0 /* Lowest 128 bits */ )); + a1_xmm = _mm_xor_si128( a1_xmm, _mm256_extracti128_si256 ( a1, 0 /* Lowest 128 bits */ )); + a2_xmm = _mm_xor_si128( a2_xmm, _mm256_extracti128_si256 ( a2, 0 /* Lowest 128 bits */ )); + + a0_xmm = _mm_xor_si128( a0_xmm, _mm256_extracti128_si256 ( a0, 1 /* Highest 128 bits */ )); + a1_xmm = _mm_xor_si128( a1_xmm, _mm256_extracti128_si256 ( a1, 1 /* Highest 128 bits */ )); + a2_xmm = _mm_xor_si128( a2_xmm, _mm256_extracti128_si256 ( a2, 1 /* Highest 128 bits */ )); + CLMUL_3_POST( a0_xmm, a1_xmm, a2_xmm ); + MODREDUCE( vMultiplicationConstant, a0_xmm, a1_xmm, a2_xmm, state ); + + chain = _mm256_extracti128_si256 ( ctr0, 0 /* Lowest 128 bits */ ); + _mm256_zeroupper(); + + chain = _mm_shuffle_epi8( chain, BYTE_REVERSE_ORDER_xmm ); + _mm_storeu_si128((__m128i *) pbChainingValue, chain ); + _mm_storeu_si128((__m128i *) pState, state ); + + cbData &= ( GCM_YMM_MINBLOCKS*SYMCRYPT_AES_BLOCK_SIZE ) - 1; + SYMCRYPT_ASSERT( cbData == (nBlocks-16)*SYMCRYPT_AES_BLOCK_SIZE ); + if ( cbData >= SYMCRYPT_AES_BLOCK_SIZE ) + { + SymCryptAesGcmEncryptStitchedXmm( pExpandedKey, pbChainingValue, expandedKeyTable, pState, pbSrc, pbDst, cbData); + } +} + +VOID +SYMCRYPT_CALL +SymCryptAesGcmDecryptStitchedYmm_2048( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( SYMCRYPT_GF128_FIELD_SIZE ) PCSYMCRYPT_GF128_ELEMENT expandedKeyTable, + _Inout_ PSYMCRYPT_GF128_ELEMENT pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + __m128i chain = _mm_loadu_si128( (__m128i *) pbChainingValue ); + + __m128i BYTE_REVERSE_ORDER_xmm = _mm_set_epi8( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ); + __m256i BYTE_REVERSE_ORDER = _mm256_set_epi64x( 0x0001020304050607, 0x08090a0b0c0d0e0f, 0x0001020304050607, 0x08090a0b0c0d0e0f ); + __m128i vMultiplicationConstant = _mm_set_epi32( 0, 0, 0xc2000000, 0 ); + + __m256i chainIncrementUpper1 = _mm256_set_epi64x( 0, 1, 0, 0 ); + __m256i chainIncrement2 = _mm256_set_epi64x( 0, 2, 0, 2 ); + __m256i chainIncrement4 = _mm256_set_epi64x( 0, 4, 0, 4 ); + __m256i chainIncrement16 = _mm256_set_epi64x( 0, 16, 0, 16 ); + + __m256i ctr0, ctr1, ctr2, ctr3, ctr4, ctr5, ctr6, ctr7; + __m256i c0, c1, c2, c3, c4, c5, c6, c7; + + __m128i state; + __m128i a0_xmm, a1_xmm, a2_xmm; + __m256i a0, a1, a2; + SIZE_T nBlocks = cbData / SYMCRYPT_GF128_BLOCK_SIZE; + SIZE_T todo; + PCBYTE pbGhashSrc = pbSrc; + + SYMCRYPT_ASSERT( (cbData & SYMCRYPT_GCM_BLOCK_MOD_MASK) == 0 ); // cbData is multiple of block size + SYMCRYPT_ASSERT( nBlocks >= GCM_YMM_MINBLOCKS ); + + todo = SYMCRYPT_MIN( nBlocks, SYMCRYPT_GHASH_PCLMULQDQ_HPOWERS ) & ~(GCM_YMM_MINBLOCKS-1); + chain = _mm_shuffle_epi8( chain, BYTE_REVERSE_ORDER_xmm ); + + state = _mm_loadu_si128( (__m128i *) pState ); + ctr0 = _mm256_insertf128_si256( _mm256_castsi128_si256( chain ), chain, 1); // AVX + ctr0 = _mm256_add_epi32( ctr0, chainIncrementUpper1 ); + ctr1 = _mm256_add_epi32( ctr0, chainIncrement2 ); + ctr2 = _mm256_add_epi32( ctr0, chainIncrement4 ); + ctr3 = _mm256_add_epi32( ctr1, chainIncrement4 ); + ctr4 = _mm256_add_epi32( ctr2, chainIncrement4 ); + ctr5 = _mm256_add_epi32( ctr3, chainIncrement4 ); + ctr6 = _mm256_add_epi32( ctr4, chainIncrement4 ); + ctr7 = _mm256_add_epi32( ctr5, chainIncrement4 ); + + CLMUL_3( state, GHASH_H_POWER(expandedKeyTable, todo), GHASH_Hx_POWER(expandedKeyTable, todo), a0_xmm, a1_xmm, a2_xmm ); + a0 = a1 = a2 = _mm256_setzero_si256(); + + while( nBlocks >= GCM_YMM_MINBLOCKS ) + { + c0 = _mm256_shuffle_epi8( ctr0, BYTE_REVERSE_ORDER ); + c1 = _mm256_shuffle_epi8( ctr1, BYTE_REVERSE_ORDER ); + c2 = _mm256_shuffle_epi8( ctr2, BYTE_REVERSE_ORDER ); + c3 = _mm256_shuffle_epi8( ctr3, BYTE_REVERSE_ORDER ); + c4 = _mm256_shuffle_epi8( ctr4, BYTE_REVERSE_ORDER ); + c5 = _mm256_shuffle_epi8( ctr5, BYTE_REVERSE_ORDER ); + c6 = _mm256_shuffle_epi8( ctr6, BYTE_REVERSE_ORDER ); + c7 = _mm256_shuffle_epi8( ctr7, BYTE_REVERSE_ORDER ); + + ctr0 = _mm256_add_epi32( ctr0, chainIncrement16 ); + ctr1 = _mm256_add_epi32( ctr1, chainIncrement16 ); + ctr2 = _mm256_add_epi32( ctr2, chainIncrement16 ); + ctr3 = _mm256_add_epi32( ctr3, chainIncrement16 ); + ctr4 = _mm256_add_epi32( ctr4, chainIncrement16 ); + ctr5 = _mm256_add_epi32( ctr5, chainIncrement16 ); + ctr6 = _mm256_add_epi32( ctr6, chainIncrement16 ); + ctr7 = _mm256_add_epi32( ctr7, chainIncrement16 ); + + AES_GCM_ENCRYPT_16_Ymm( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7, pbGhashSrc, BYTE_REVERSE_ORDER, expandedKeyTable, todo, a0, a1, a2 ); + + _mm256_storeu_si256( (__m256i *) (pbDst + 0), _mm256_xor_si256( c0, _mm256_loadu_si256( ( __m256i * ) (pbSrc + 0) ) ) ); + _mm256_storeu_si256( (__m256i *) (pbDst + 32), _mm256_xor_si256( c1, _mm256_loadu_si256( ( __m256i * ) (pbSrc + 32) ) ) ); + _mm256_storeu_si256( (__m256i *) (pbDst + 64), _mm256_xor_si256( c2, _mm256_loadu_si256( ( __m256i * ) (pbSrc + 64) ) ) ); + _mm256_storeu_si256( (__m256i *) (pbDst + 96), _mm256_xor_si256( c3, _mm256_loadu_si256( ( __m256i * ) (pbSrc + 96) ) ) ); + _mm256_storeu_si256( (__m256i *) (pbDst +128), _mm256_xor_si256( c4, _mm256_loadu_si256( ( __m256i * ) (pbSrc +128) ) ) ); + _mm256_storeu_si256( (__m256i *) (pbDst +160), _mm256_xor_si256( c5, _mm256_loadu_si256( ( __m256i * ) (pbSrc +160) ) ) ); + _mm256_storeu_si256( (__m256i *) (pbDst +192), _mm256_xor_si256( c6, _mm256_loadu_si256( ( __m256i * ) (pbSrc +192) ) ) ); + _mm256_storeu_si256( (__m256i *) (pbDst +224), _mm256_xor_si256( c7, _mm256_loadu_si256( ( __m256i * ) (pbSrc +224) ) ) ); + + pbDst += 16 * SYMCRYPT_AES_BLOCK_SIZE; + pbSrc += 16 * SYMCRYPT_AES_BLOCK_SIZE; + nBlocks -= 16; + + if ( todo == 0 ) + { + a0_xmm = _mm_xor_si128( a0_xmm, _mm256_extracti128_si256 ( a0, 0 /* Lowest 128 bits */ )); + a1_xmm = _mm_xor_si128( a1_xmm, _mm256_extracti128_si256 ( a1, 0 /* Lowest 128 bits */ )); + a2_xmm = _mm_xor_si128( a2_xmm, _mm256_extracti128_si256 ( a2, 0 /* Lowest 128 bits */ )); + + a0_xmm = _mm_xor_si128( a0_xmm, _mm256_extracti128_si256 ( a0, 1 /* Highest 128 bits */ )); + a1_xmm = _mm_xor_si128( a1_xmm, _mm256_extracti128_si256 ( a1, 1 /* Highest 128 bits */ )); + a2_xmm = _mm_xor_si128( a2_xmm, _mm256_extracti128_si256 ( a2, 1 /* Highest 128 bits */ )); + CLMUL_3_POST( a0_xmm, a1_xmm, a2_xmm ); + MODREDUCE( vMultiplicationConstant, a0_xmm, a1_xmm, a2_xmm, state ); + + if ( nBlocks > 0 ) + { + todo = SYMCRYPT_MIN( nBlocks, SYMCRYPT_GHASH_PCLMULQDQ_HPOWERS ) & ~(GCM_YMM_MINBLOCKS-1); + CLMUL_3( state, GHASH_H_POWER(expandedKeyTable, todo), GHASH_Hx_POWER(expandedKeyTable, todo), a0_xmm, a1_xmm, a2_xmm ); + a0 = a1 = a2 = _mm256_setzero_si256(); + } + } + } + + chain = _mm256_extracti128_si256 ( ctr0, 0 /* Lowest 128 bits */ ); + _mm256_zeroupper(); + + chain = _mm_shuffle_epi8( chain, BYTE_REVERSE_ORDER_xmm ); + _mm_storeu_si128((__m128i *) pbChainingValue, chain ); + _mm_storeu_si128((__m128i *) pState, state ); + + cbData &= ( GCM_YMM_MINBLOCKS*SYMCRYPT_AES_BLOCK_SIZE ) - 1; + SYMCRYPT_ASSERT( cbData == nBlocks*SYMCRYPT_AES_BLOCK_SIZE ); + if ( cbData >= SYMCRYPT_AES_BLOCK_SIZE ) + { + SymCryptAesGcmDecryptStitchedXmm( pExpandedKey, pbChainingValue, expandedKeyTable, pState, pbSrc, pbDst, cbData); + } +} + +#ifdef __clang__ +#pragma clang attribute pop +#else +#pragma GCC pop_options +#endif + +#endif // CPU_X86 | CPU_AMD64 diff --git a/libs/symcrypt/lib/aesCtrDrbg.c b/libs/symcrypt/lib/aesCtrDrbg.c new file mode 100644 index 00000000000..457e3f7fcd9 --- /dev/null +++ b/libs/symcrypt/lib/aesCtrDrbg.c @@ -0,0 +1,986 @@ +// +// aesCtrDrbg.c code for SP 800-90 AES-CTR-DRBG implementation +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +// +// This code is derived from the implementation already in use in CNG. +// + +#include "precomp.h" + +#define SYMCRYPT_RNG_AES_KEY_SIZE (32) +#define SYMCRYPT_RNG_AES_KEY_AND_V_SIZE (32 + 16) +#define SYMCRYPT_RNG_AES_MAX_REQUEST_SIZE (1<<16) +#define SYMCRYPT_RNG_AES_MAX_REQUESTS_PER_RESEED ((UINT64)1<<48) + +VOID +SYMCRYPT_CALL +SymCryptRngAesBcc( + _In_ PSYMCRYPT_AES_EXPANDED_KEY pKey, + _In_reads_( cbData ) PCBYTE pcbData, + _In_ SIZE_T cbData, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbResult ) +{ + // + //Length of input should always be multiple of the AES block size + // + SYMCRYPT_ASSERT(cbData % SYMCRYPT_AES_BLOCK_SIZE == 0); + + SymCryptWipe( pbResult, SYMCRYPT_AES_BLOCK_SIZE ); + + SymCryptAesCbcMac( pKey, pbResult, pcbData, cbData ); +} + + +VOID +SYMCRYPT_CALL +SymCryptRngAesDf( + _In_reads_(cbData) PCBYTE pcbData, + _In_ SIZE_T cbData, + _Out_writes_(SYMCRYPT_RNG_AES_INTERNAL_SEED_SIZE) PBYTE pbSeed ) +{ + //maximal input length + IV + padding + SYMCRYPT_ALIGN BYTE buf[SYMCRYPT_RNG_AES_MAX_SEED_SIZE + 3 * SYMCRYPT_AES_BLOCK_SIZE]; + PBYTE pb; + SIZE_T lenIvS; + + SYMCRYPT_ALIGN BYTE temp[SYMCRYPT_RNG_AES_KEY_AND_V_SIZE]; + SYMCRYPT_AES_EXPANDED_KEY aesKey; + PBYTE pX; + + SIZE_T i; + + C_ASSERT( sizeof( temp ) % SYMCRYPT_AES_BLOCK_SIZE == 0 ); + + // + // See SP800-90 section 10.4.2 + // + // Our buf contains the following data: + // - 16 bytes IV + // - 4 bytes L + // - 4 bytes N + // - up to SEEDLEN bytes input data + // - 1 byte 0x80 + // - zeroes to fill to a multiple of 16 + // + + SYMCRYPT_ASSERT( cbData >= SYMCRYPT_RNG_AES_MIN_RESEED_SIZE && + cbData <= SYMCRYPT_RNG_AES_MAX_SEED_SIZE ); + + // + // Initialize the entire buf to zero + // + SymCryptWipeKnownSize( buf, sizeof( buf ) ); + + // + // build the string S in buf[16...] + // + pb = &buf[ SYMCRYPT_AES_BLOCK_SIZE ]; + + // + // Set L; SP800-90 isn't clear, but we'll use MSB first as that is what is used elsewhere. + // + SYMCRYPT_STORE_MSBFIRST32( pb, (UINT32) cbData ); + pb += 4; + + // + // Set N + // + SYMCRYPT_STORE_MSBFIRST32( pb, SYMCRYPT_RNG_AES_INTERNAL_SEED_SIZE ); + pb += 4; + + // + // Set input_string + // + + memcpy( pb, pcbData, cbData ); + pb += cbData; + + // + // set padding + // + *pb++ = 0x80; + + while( (pb - buf) % SYMCRYPT_AES_BLOCK_SIZE != 0 ) + { +#pragma prefast( suppress: 26015, "Logic why this doesn't overflow the buf[] array is too complicated for prefast" ) + *pb++ = 0; + } + + lenIvS = pb - buf; // Length of IV & S together + + // + // Set up the inital key + // + + for( i = 0; i < SYMCRYPT_RNG_AES_KEY_SIZE; i++ ) + { + temp[i] = (BYTE) i; + } + SymCryptAesExpandKeyEncryptOnly( &aesKey, temp, SYMCRYPT_RNG_AES_KEY_SIZE ); + + + // + // Produce the 'temp' intermediate result. + // + + for( i=0; i< SYMCRYPT_RNG_AES_KEY_AND_V_SIZE / SYMCRYPT_AES_BLOCK_SIZE; i++ ) + { + // + // Update the IV with the right i value. + // i is only 0-2, so we only have to set a single byte + // + buf[3] = (BYTE) i; + + // + // Now we perform the BCC function, which is just CbcMac + // BCC(K,(IV||S)) + SymCryptRngAesBcc( &aesKey, buf, lenIvS, &temp[ i * SYMCRYPT_AES_BLOCK_SIZE ] ); + } + + // + // Second phase, produce the actual output + // + SymCryptAesExpandKeyEncryptOnly( &aesKey, temp, SYMCRYPT_RNG_AES_KEY_SIZE ); + pX = &temp[SYMCRYPT_RNG_AES_KEY_SIZE]; + + for( i=0; i < SYMCRYPT_RNG_AES_INTERNAL_SEED_SIZE; i += SYMCRYPT_AES_BLOCK_SIZE ) + { + SymCryptAesEncrypt( &aesKey, pX, pX ); + memcpy( &pbSeed[ i ], pX, SYMCRYPT_AES_BLOCK_SIZE ); + } + + SymCryptWipeKnownSize( buf, sizeof( buf ) ); + SymCryptWipeKnownSize( temp, sizeof( temp ) ); + SymCryptWipeKnownSize( &aesKey, sizeof( aesKey ) ); +} + + +VOID +SYMCRYPT_CALL +SymCryptRngAesGenerateBlocks( + _In_ PSYMCRYPT_AES_EXPANDED_KEY pAesKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pV, + _Out_writes_(cbRandom) PBYTE pbRandom, + _In_ SIZE_T cbRandom ) +// +// Internal function to generate output blocks from the state. +// cbRandom must be a multiple of the block size. +// +{ + UINT64 v; + SIZE_T cBlocks; + SIZE_T blocksToDo; + SIZE_T bytesToDo; + +// +// The roll-over of the counter is hard to test, especially since our +// NIST test vectors only cover small outputs. +// We have an option to test the output against a simpler (older) implementation +// to validate the proper working of the code. +// +#define TEST_AGAINST_OLD_CODE 0 +#if TEST_AGAINST_OLD_CODE + BYTE Vcopy[16]; + BYTE buf[16]; + PCBYTE pbCheck = pbRandom; + SIZE_T cbCheck = cbRandom; + + memcpy( Vcopy, pV, 16 ); +#endif + + // + // cbRandom must be a multiple of BLOCK_LEN and > 0. + // + SYMCRYPT_ASSERT( (cbRandom & (SYMCRYPT_AES_BLOCK_SIZE-1)) == 0 ); + + cBlocks = cbRandom / SYMCRYPT_AES_BLOCK_SIZE; + + // + // We violate the write-once rule here by wiping the output buffer and then + // filling it with the CTR-mode encryption. + // This is safe because the caller only learns the proper output anyway. + // + SymCryptWipe( pbRandom, cbRandom ); + + // + // This loop is a little complicated because we need to pre-increment the 128-bit value V + // and the SymCryptAesCtrMsb64 function does a 64-bit post-increment. + // + while( cBlocks != 0 ) + { + // Increment V + v = SYMCRYPT_LOAD_MSBFIRST64( &pV[8] ) + 1; + SYMCRYPT_STORE_MSBFIRST64( &pV[8], v ); + SYMCRYPT_STORE_MSBFIRST64( &pV[0], SYMCRYPT_LOAD_MSBFIRST64( &pV[0] ) + (v == 0) ); + + // + // The SymCryptAesCtrMsb64 routine will increment the last 64 bits of the V value, + // but not handle the carry to the first 64 bits. + // We limit how many block we do so that we never cross this boundary. + // SymCryptAesCtrMsb64 does a post-increment, so it may increment the last 64 bits + // to zero as long as we don't rely on the V value afterwards. + // As one-in-2^64 code is not testable, we terminate the Msb64 call earlier, and + // much earlier on CHKed builds. + // +#if SYMCRYPT_DEBUG +#define MAX_CTRMSB64_BLOCKS (1 << 3) // very small; overflow will be triggered by any reasonable test +#else +#define MAX_CTRMSB64_BLOCKS (1 << 10) // increase when we have this well-tested +#endif + // + // 1 + (~v & mask) is the value you can add to v so that the mask bits of the sum + // end up to be zero. It is in the range 1 .. mask+1 + // + blocksToDo = SYMCRYPT_MIN( cBlocks, 1 + ( (~v) & (MAX_CTRMSB64_BLOCKS - 1) ) ); + + bytesToDo = blocksToDo * SYMCRYPT_AES_BLOCK_SIZE; + SYMCRYPT_ASSERT( bytesToDo <= cbRandom ); + SymCryptAesCtrMsb64( pAesKey, &pV[0], pbRandom, pbRandom, bytesToDo ); + pbRandom += bytesToDo; + cbRandom -= bytesToDo; // only used for prefast assertions; optimized away in shipping code + cBlocks -= blocksToDo; + + // + // Post-decrement the V block to compensate for the post-increment of the Msb64 function + // + v += blocksToDo - 1; + SYMCRYPT_ASSERT( v != 0 ); + + SYMCRYPT_STORE_MSBFIRST64( &pV[8], v ); + // No need to carry to the first half of V here, it cannot happen + } + +#if TEST_AGAINST_OLD_CODE + // + // We tried to use the CtrMsb64 mode to generate the blocks, but that leads to + // a number of complications. + // The lack of carry means we end up with code paths that run once per 2^64 blocks + // or so, and that is very hard to test. + // Furthermore, CtrMsb64 uses post-increment, whereas AES-CTR_DRBG uses pre-increment. + // That adds sufficient extra complications and testing problems that we went back + // to the solution below. + // + + while( cbCheck != 0 ) + { + SYMCRYPT_ASSERT( cbCheck >= SYMCRYPT_AES_BLOCK_SIZE ); // Keep prefast happy + // + // Increment the 128-bit block V MSByte first. + // + v = SYMCRYPT_LOAD_MSBFIRST64( &Vcopy[8] ) + 1; + SYMCRYPT_STORE_MSBFIRST64( &Vcopy[8], v ); + if( v == 0 ) + { + // + // This almost never happens. + // Using an if() is not side-channel safe, but in this case + // the side channel does not reveal anything that actually hurts the + // security of the algorithm. + // + SYMCRYPT_STORE_MSBFIRST64( Vcopy, 1 + LOAD_MSBFIRST64( Vcopy ) ); + } + + SymCryptAesEncrypt( pAesKey, Vcopy, buf ); + if( memcmp( buf, pbCheck, 16 ) != 0 ) + { + SymCryptFatal( 'OLD?' ); + } + pbCheck += SYMCRYPT_AES_BLOCK_SIZE; + cbCheck -= SYMCRYPT_AES_BLOCK_SIZE; + } +#endif +} + +FORCEINLINE +int +SymCryptRngAesAreBlocksIdentical( + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pSrc1, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pSrc2 ) +// +// return 1 if the blocks are identical, 0 if they are different. +// +{ + SYMCRYPT_UNALIGNED const SIZE_T * p1 = (SYMCRYPT_UNALIGNED const SIZE_T *) pSrc1; + SYMCRYPT_UNALIGNED const SIZE_T * p2 = (SYMCRYPT_UNALIGNED const SIZE_T *) pSrc2; + + SIZE_T tmp; + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_ARM + + C_ASSERT( sizeof( SIZE_T ) == 4 ); + tmp = (p1[0] ^ p2[0]) | (p1[1] ^ p2[1]) | (p1[2] ^ p2[2]) | (p1[3] ^ p2[3]); + +#elif SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM64 + + C_ASSERT( sizeof( SIZE_T ) == 8 ); + tmp = (p1[0] ^ p2[0]) | (p1[1] ^ p2[1]); + +#else + + SIZE_T i; + + C_ASSERT( 16 % sizeof( SIZE_T ) == 0 ); + + tmp = 0; + for( i=0; i < 16/sizeof( SIZE_T ); i ++ ) + { + tmp |= p1[i] ^ p2[i]; + } + +#endif + + return tmp == 0; +} + + +VOID +SYMCRYPT_CALL +SymCryptRngAesCheckBlocksNotIdentical( + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbPreviousBlock, + _In_reads_( cbData ) PCBYTE pcbData, + SIZE_T cbData ) +{ + SIZE_T identical; + SIZE_T i; + + SYMCRYPT_ASSERT( ((cbData & 15) == 0) && cbData > 0 ); + + identical = SymCryptRngAesAreBlocksIdentical( pbPreviousBlock, pcbData ); + + for( i = SYMCRYPT_AES_BLOCK_SIZE; i < cbData; i += SYMCRYPT_AES_BLOCK_SIZE ) + { + SYMCRYPT_ASSERT( cbData >= i + SYMCRYPT_AES_BLOCK_SIZE ); + identical |= SymCryptRngAesAreBlocksIdentical( &pcbData[i-SYMCRYPT_AES_BLOCK_SIZE], &pcbData[ i ] ); + } + + memcpy( pbPreviousBlock, &pcbData[cbData - SYMCRYPT_AES_BLOCK_SIZE], SYMCRYPT_AES_BLOCK_SIZE ); + + // + // The structure of AES-CTR-DRBG makes it impossible for two consecutive blocks of a single request + // to be equal. The only way this could happen is if the first block of one request is the same as + // the last block of the previous request. But the probability of this happening is 2^{-128}. + // This never happens, so the whole check is technically useless. + // Nevertheless, it is required by FIPS 140-2, so we have to implement it, + // but we don't have to handle the error usefully in any way. + // (Trying to handle this error sensibly is far too complicated, and adds far more danger from code + // bugs than it is worth. It is much better to just treat it as a fatal occurrence.) + // + + if( identical ) + { + SymCryptFatal( 'acdi' ); + } +} + +VOID +SYMCRYPT_CALL +SymCryptRngAesUpdate( + _Inout_ PSYMCRYPT_RNG_AES_STATE pState, + _In_reads_opt_( SYMCRYPT_RNG_AES_INTERNAL_SEED_SIZE ) PCBYTE pbProvidedData, + _In_opt_ PSYMCRYPT_AES_EXPANDED_KEY pAesKey) +// +// Implement the CTR_DRBG Update function. +// pbProvidedData is optional, but if provided must always be exactly seedlen bits. +// pAesKey is the already expanded key of the RngState. This is optional, and only has +// to be provided if the caller already has it. +// +{ + SYMCRYPT_AES_EXPANDED_KEY aesKey; + PSYMCRYPT_AES_EXPANDED_KEY pKey; + SYMCRYPT_ALIGN BYTE buf[SYMCRYPT_AES_BLOCK_SIZE]; + + if(NULL == pAesKey) + { + SymCryptAesExpandKeyEncryptOnly( &aesKey, pState->keyAndV, SYMCRYPT_RNG_AES_KEY_SIZE ); + pKey = &aesKey; + } + else + { + pKey = pAesKey; + } + + // + // Copy the V value so that we can overwrite it safely. + // + + memcpy( buf, &pState->keyAndV[SYMCRYPT_RNG_AES_KEY_SIZE], sizeof( buf ) ); + + SymCryptRngAesGenerateBlocks( + pKey, + buf, // pV + pState->keyAndV, // pbRandom + sizeof( pState->keyAndV) ); // cbRandom + + if( pbProvidedData != NULL ) + { + // XOR provided data in + SymCryptXorBytes( pState->keyAndV, pbProvidedData, pState->keyAndV, SYMCRYPT_RNG_AES_INTERNAL_SEED_SIZE ); + } + + SymCryptWipeKnownSize( buf, sizeof( buf ) ); + + // + // Only wipe the key if necessary. + // + if( pKey == &aesKey ) + { + SymCryptWipeKnownSize( &aesKey, sizeof( aesKey )); + } +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRngAesGenerateSmall( + _Inout_ PSYMCRYPT_RNG_AES_STATE pRngState, + _Out_writes_( cbRandom ) PBYTE pbRandom, + SIZE_T cbRandom, + _In_reads_opt_( cbAdditionalInput ) PCBYTE pbAdditionalInput, + SIZE_T cbAdditionalInput ) +// +// This is the Generate function of our SP800-90 compliant implementation. +// It follows the method specified in SP800-90A 10.2.1.5.2 +// +{ + SYMCRYPT_AES_EXPANDED_KEY aesKey; + SYMCRYPT_ALIGN BYTE buf[SYMCRYPT_AES_BLOCK_SIZE]; + SYMCRYPT_ALIGN BYTE abSeed[SYMCRYPT_RNG_AES_INTERNAL_SEED_SIZE]; + + // + // SP 800-90 9.3.1 requires a check on the length of the request. + // + if( cbRandom > SYMCRYPT_RNG_AES_MAX_REQUEST_SIZE ) + { + return SYMCRYPT_WRONG_DATA_SIZE; + } + // + // The requestCounter test is useless as it can never happen. (It would require + // 2^48 calls to this function to trigger this error.) + // Unfortunately, SP800-90 section 11 requires a test of this error, so we have + // to implement the error. + // + if( pRngState->requestCounter > SYMCRYPT_RNG_AES_MAX_REQUESTS_PER_RESEED ) + { + return SYMCRYPT_FIPS_FAILURE; + } + + if( pbAdditionalInput != NULL ) + { + // Update additional input using Derivation function + SymCryptRngAesDf( pbAdditionalInput, cbAdditionalInput, abSeed ); + pbAdditionalInput = &abSeed[0]; + + // Update state with modified additional input + SymCryptRngAesUpdate( pRngState, pbAdditionalInput, NULL ); + } + + SymCryptAesExpandKeyEncryptOnly( &aesKey, pRngState->keyAndV, SYMCRYPT_RNG_AES_KEY_SIZE ); + + if( cbRandom >= SYMCRYPT_AES_BLOCK_SIZE ) + { + SIZE_T wholeBlocks = cbRandom & ~(SYMCRYPT_AES_BLOCK_SIZE - 1); + SymCryptRngAesGenerateBlocks( &aesKey, + &pRngState->keyAndV[ SYMCRYPT_RNG_AES_KEY_SIZE], + pbRandom, + wholeBlocks ); + if( pRngState->fips140_2Check ) + { + SymCryptRngAesCheckBlocksNotIdentical( pRngState->previousBlock, pbRandom, wholeBlocks ); + } + pbRandom += wholeBlocks; + cbRandom -= wholeBlocks; + } + + if( cbRandom > 0 ) + { + SYMCRYPT_ASSERT( cbRandom < SYMCRYPT_AES_BLOCK_SIZE ); + SymCryptRngAesGenerateBlocks( &aesKey, + &pRngState->keyAndV[ SYMCRYPT_RNG_AES_KEY_SIZE], + buf, + sizeof( buf ) ); + if( pRngState->fips140_2Check ) + { + SymCryptRngAesCheckBlocksNotIdentical( pRngState->previousBlock, buf, sizeof( buf ) ); + } + + memcpy( pbRandom, buf, cbRandom ); + SymCryptWipeKnownSize( buf, sizeof( buf ) ); + } + + SymCryptRngAesUpdate( pRngState, pbAdditionalInput, &aesKey ); + + ++pRngState->requestCounter; + + SymCryptWipeKnownSize( &aesKey, sizeof( aesKey ) ); + SymCryptWipeKnownSize( abSeed, sizeof( abSeed ) ); + + return SYMCRYPT_NO_ERROR; +} + + +_Use_decl_annotations_ +SYMCRYPT_NOINLINE +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRngAesInstantiate( PSYMCRYPT_RNG_AES_STATE pRngState, + PCBYTE pcbSeedMaterial, + SIZE_T cbSeedMaterial ) +// +// This function creates a new SP 800-90 AES_CTR_DRBG instance. +// Our code is structured differently from what SP 800-90 assumes. +// At this point in time, the entropy has already been collected and it is +// passed to this function. Thus, there is no check for failing to get +// the entropy. If entropy collection fails, the caller of this function +// will generate an error. (Actually, we only choose to instantiate a FIPS-compliant +// SP 800-90 DRBG when we do have good entropy available, so there is never an +// error that we don't have the required entropy.) +// +{ + if( cbSeedMaterial < SYMCRYPT_RNG_AES_MIN_INSTANTIATE_SIZE ) + { + return SYMCRYPT_EXTERNAL_FAILURE; + } + + // + // Instantiation of a new state is identical to setting the state to zero + // and then performing a reseed with the same seed material. + // + // See SP 800-90 10.2.1.3.2 & 10.2.1.4.2 + // + SymCryptWipeKnownSize( pRngState, sizeof( *pRngState ) ); + + SYMCRYPT_SET_MAGIC( pRngState ); + + return SymCryptRngAesReseed( pRngState, pcbSeedMaterial, cbSeedMaterial ); +} + +_Use_decl_annotations_ +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptRngAesGenerate( PSYMCRYPT_RNG_AES_STATE pRngState, + PBYTE pbRandom, + SIZE_T cbRandom ) +// +// For FIPS compliance purposes, this is NOT the generate function of the DRBG. +// The generate function is SymCryptRngAesGenerateSmall. +// This is a wrapper around the generate function that supports larger output +// sizes, and handles any errors by making them fatal. +// +{ + SYMCRYPT_ERROR scError; + + SYMCRYPT_CHECK_MAGIC( pRngState ); + + while( cbRandom > SYMCRYPT_RNG_AES_MAX_REQUEST_SIZE ) + { + + scError = SymCryptRngAesGenerateSmall( pRngState, pbRandom, SYMCRYPT_RNG_AES_MAX_REQUEST_SIZE, NULL, 0 ); + if( scError != SYMCRYPT_NO_ERROR ) + { + SymCryptFatal( 'acdx' ); + } + pbRandom += SYMCRYPT_RNG_AES_MAX_REQUEST_SIZE; + cbRandom -= SYMCRYPT_RNG_AES_MAX_REQUEST_SIZE; + } + + if( cbRandom > 0 ) + { + scError = SymCryptRngAesGenerateSmall( pRngState, pbRandom, cbRandom, NULL, 0 ); + if( scError != SYMCRYPT_NO_ERROR ) + { + SymCryptFatal( 'acdx' ); + } + } +} + +_Use_decl_annotations_ +SYMCRYPT_NOINLINE +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRngAesReseed( PSYMCRYPT_RNG_AES_STATE pRngState, + PCBYTE pcbSeedMaterial, + SIZE_T cbSeedMaterial ) +{ + SYMCRYPT_ALIGN BYTE abSeed[SYMCRYPT_RNG_AES_INTERNAL_SEED_SIZE]; + + SYMCRYPT_CHECK_MAGIC( pRngState ); + + // + // For a reseed, the minimum # bits is the security strength, or the key size. + // We retain the same maximum as that protects our own internal buffers. + // + if (cbSeedMaterial < SYMCRYPT_RNG_AES_MIN_RESEED_SIZE || + cbSeedMaterial > SYMCRYPT_RNG_AES_MAX_SEED_SIZE ) + { + return SYMCRYPT_EXTERNAL_FAILURE; // bug is external to SymCrypt (i.e. the caller) + } + + // + // We do not perform the FIPS-required reseed self-test here. + // Rather, we have a function that external callers can use to implement that test before + // calling this reseed function. + // This allows callers that are not interested in FIPS certification to skip the test. + // + + SymCryptRngAesDf( pcbSeedMaterial, cbSeedMaterial, abSeed ); + + SymCryptRngAesUpdate( pRngState, abSeed, NULL ); + + pRngState->requestCounter = 1; + + SymCryptWipeKnownSize( abSeed, sizeof( abSeed ) ); + + return SYMCRYPT_NO_ERROR; +} + +_Use_decl_annotations_ +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptRngAesUninstantiate( PSYMCRYPT_RNG_AES_STATE pRngState ) +{ + SymCryptWipeKnownSize( pRngState, sizeof( *pRngState ) ); +} + +//////////////////////////////////////////////////////////////////////////// +// Self test + +// +// The test vector is from the NIST DRBG Test Vectors file +// +static const BYTE g_abInstantiateEntropyInputPlusNonce[] = +{ + // Entropy input + + 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, + 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, + 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, + 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, + 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, + 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, + + // Nonce + 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, + 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, + +}; + + +static const BYTE g_abReseedEntropy[] = +{ + + 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87, + 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, + 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97, + 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, + 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7, + 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF +}; + +static const BYTE g_abOutput1[ 32 ] = +{ + 0xD1,0xE9,0xC7,0x37,0xB6,0xEB,0xAE,0xD7, + 0x65,0xA0,0xD4,0xE4,0xC6,0xEA,0xEB,0xE2, + 0x67,0xF5,0xE9,0x19,0x36,0x80,0xFD,0xFF, + 0xA6,0x2F,0x48,0x65,0xB3,0xF0,0x09,0xEC, +}; + +static const BYTE g_expectedStateAfterInstantiate[ SYMCRYPT_RNG_AES_KEY_AND_V_SIZE ] = +{ + //key + 0x8C,0x10,0xB6,0x58,0x44,0x0C,0x71,0x35, + 0x64,0x9D,0xC7,0x7B,0xE6,0xE5,0x75,0xCE, + 0x87,0xE7,0x48,0x90,0x83,0x9B,0x89,0x59, + 0x14,0x17,0xAF,0xAD,0x14,0xB2,0x26,0xD5, + //V + 0xB4,0x03,0x6B,0x1D,0xBA,0x04,0x3A,0xE6, + 0x55,0xAC,0xD6,0x46,0xEC,0x5A,0xD3,0x5C, +}; + +static const BYTE g_expectedStateAfterReseed[ SYMCRYPT_RNG_AES_KEY_AND_V_SIZE ] = +{ + //key + 0x17,0x98,0xC0,0xDF,0x09,0x69,0x6A,0x46, + 0x19,0x46,0xFE,0x6D,0x68,0x7D,0x8C,0xC8, + 0x3F,0xEE,0xF1,0x22,0xF3,0xBB,0xC5,0xF2, + 0x9D,0xAC,0x85,0x10,0xF3,0x4A,0xF0,0x15, + //V + 0x0B,0xF3,0x34,0x4D,0xF5,0x29,0x27,0x6B, + 0x0D,0x5B,0xBC,0x83,0x9B,0xD3,0x65,0x6A, +}; + +static const BYTE g_expectedStateAfterGenerate[ SYMCRYPT_RNG_AES_KEY_AND_V_SIZE ] = +{ + //key + 0x28, 0xbc, 0x65, 0xa8, 0x6a, 0xb7, 0xc7, 0x4e, 0xdf, 0x4b, 0xb8, 0x72, 0x87, 0xd3, 0x4f, 0xbb, + 0x8d, 0x6f, 0x16, 0xd7, 0xb9, 0x1b, 0x6a, 0xbb, 0xee, 0x7b, 0x88, 0x86, 0x5b, 0x0f, 0xc7, 0xbd, + //V + 0xb7, 0x46, 0x11, 0xf3, 0x92, 0x95, 0xa6, 0x25, 0x7c, 0x39, 0x98, 0x4c, 0x9c, 0x09, 0x9b, 0x30, +}; + + +VOID +SYMCRYPT_CALL +SymCryptRngAesTestInstantiate( PSYMCRYPT_RNG_AES_STATE pRngState ) +// +// Test the Instantiate function on the passed instance. Leave it +// in the initialized state for the test vector. +// +{ + SYMCRYPT_ERROR scError; + // + // First test the error handling + // +#pragma prefast( suppress: 26060 6309 28020, "Deliberate test of invalid parameter"); + scError = SymCryptRngAesInstantiate( pRngState, NULL, 327 ); + if( scError == SYMCRYPT_NO_ERROR ) + { + SymCryptFatal( 'aci1' ); + } + + + scError = SymCryptRngAesInstantiate( pRngState, + g_abInstantiateEntropyInputPlusNonce, + sizeof( g_abInstantiateEntropyInputPlusNonce ) + ); + + SymCryptInjectError( pRngState->keyAndV, SYMCRYPT_RNG_AES_KEY_AND_V_SIZE ); + + if ( scError != SYMCRYPT_NO_ERROR || + 0 != memcmp( pRngState->keyAndV, + g_expectedStateAfterInstantiate, + SYMCRYPT_RNG_AES_KEY_AND_V_SIZE )) + { + SymCryptFatal( 'aci2' ); + } +} + +VOID +SYMCRYPT_CALL +SymCryptRngAesTestReseed( PSYMCRYPT_RNG_AES_STATE pRngState ) +{ + SYMCRYPT_ERROR scError; + + // + // Set the state to a known state + // + SYMCRYPT_SET_MAGIC( pRngState ); + memcpy( pRngState->keyAndV, g_expectedStateAfterInstantiate, SYMCRYPT_RNG_AES_KEY_AND_V_SIZE ); + pRngState->requestCounter = 7; + pRngState->fips140_2Check = FALSE; + + // + // Test error handling + // +#pragma prefast(suppress: 26060 6309 28020, "Deliberate test of invalid parameter"); + scError = SymCryptRngAesReseed( pRngState, NULL, 597 ); + if( scError == SYMCRYPT_NO_ERROR ) + { + SymCryptFatal( 'acr1' ); + } + + scError = SymCryptRngAesReseed( pRngState, g_abReseedEntropy, sizeof( g_abReseedEntropy ) ); + + SymCryptInjectError( pRngState->keyAndV, SYMCRYPT_RNG_AES_KEY_AND_V_SIZE ); + + if ( scError != SYMCRYPT_NO_ERROR || + 0 != memcmp( pRngState->keyAndV, + g_expectedStateAfterReseed, + SYMCRYPT_RNG_AES_KEY_AND_V_SIZE ) ) + { + SymCryptFatal( 'acr2' ); + } +} + +VOID +SYMCRYPT_CALL +SymCryptRngAesTestGenerate( PSYMCRYPT_RNG_AES_STATE pRngState ) +{ + BYTE abOutput[2*SYMCRYPT_AES_BLOCK_SIZE]; + SYMCRYPT_ERROR scError; + + // + // Set the state to a known value + // + SYMCRYPT_SET_MAGIC( pRngState ); + memcpy( pRngState->keyAndV, g_expectedStateAfterReseed, SYMCRYPT_RNG_AES_KEY_AND_V_SIZE ); + pRngState->requestCounter = 7; + pRngState->fips140_2Check = FALSE; + + // + // Test the error handling + // - Too many requests since last reseed + // - Too many bytes in request + // + + pRngState->requestCounter = SYMCRYPT_RNG_AES_MAX_REQUESTS_PER_RESEED + 1; + scError = SymCryptRngAesGenerateSmall( pRngState, abOutput, sizeof( g_abOutput1 ), NULL, 0 ); + + if( scError == SYMCRYPT_NO_ERROR ) + { + SymCryptFatal( 'acg1' ); + } + pRngState->requestCounter = 7; + +#pragma prefast( suppress: 6202 26000, "buffer size of cbOutput is purposely incorrect"); + scError = SymCryptRngAesGenerateSmall( pRngState, abOutput, SYMCRYPT_RNG_AES_MAX_REQUEST_SIZE + 1, NULL, 0 ); + + if( scError == SYMCRYPT_NO_ERROR ) + { + SymCryptFatal( 'acg2' ); + } + + // + // Now test for correct output data. + // + scError = SymCryptRngAesGenerateSmall( pRngState, abOutput, sizeof( g_abOutput1 ), NULL, 0 ); + + SymCryptInjectError( abOutput, sizeof( abOutput ) ); + + if( scError != SYMCRYPT_NO_ERROR || memcmp( abOutput, g_abOutput1, sizeof( g_abOutput1 ) ) != 0 ) + { + SymCryptFatal( 'acg3' ); + } + + // + // And test for the correct resulting state + // + SymCryptInjectError( pRngState->keyAndV, SYMCRYPT_RNG_AES_KEY_AND_V_SIZE ); + + if ( 0 != memcmp( pRngState->keyAndV, + g_expectedStateAfterGenerate, + SYMCRYPT_RNG_AES_KEY_AND_V_SIZE ) ) + { + SymCryptFatal( 'acg4' ); + } +} + + +VOID +SYMCRYPT_CALL +SymCryptRngAesTestUninstantiate( PSYMCRYPT_RNG_AES_STATE pRngState ) +{ + const SIZE_T * p = (const SIZE_T *) pRngState; + SIZE_T t; + SIZE_T i; + + C_ASSERT( sizeof( *pRngState ) % sizeof( SIZE_T ) == 0 ); // This is true on all our platforms. + + SYMCRYPT_CHECK_MAGIC( pRngState ); + + SymCryptRngAesUninstantiate( pRngState ); + + t = 0; + for( i=0; i< sizeof( *pRngState ) / sizeof( SIZE_T ); i ++ ) + { + t |= p[i]; + } + + if( t != 0 ) + { + SymCryptFatal( 'acdu' ); + } +} + +VOID +SYMCRYPT_CALL +SymCryptRngAesInstantiateSelftest(void) +{ + SYMCRYPT_RNG_AES_STATE rng; + + SymCryptRngAesTestInstantiate( &rng ); + + // + // Uninstantiate has to be tested whenever another function is tested. + // + SymCryptRngAesTestUninstantiate( &rng ); +} + +VOID +SYMCRYPT_CALL +SymCryptRngAesReseedSelftest(void) +{ + SYMCRYPT_RNG_AES_STATE rng; + + SymCryptRngAesTestReseed( &rng ); + + // + // Uninstantiate has to be tested whenever another function is tested. + // + SymCryptRngAesTestUninstantiate( &rng ); +} + +VOID +SYMCRYPT_CALL +SymCryptRngAesGenerateSelftest(void) +{ + SYMCRYPT_RNG_AES_STATE rng; + + SymCryptRngAesTestGenerate( &rng ); + + // + // Uninstantiate has to be tested whenever another function is tested. + // + SymCryptRngAesTestUninstantiate( &rng ); +} + + +/////////////////////////////////////////////////////////////////// +// AES-CTR_DRGB with FIPS 140-2 continuous self-test +// + + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRngAesFips140_2Instantiate( PSYMCRYPT_RNG_AES_FIPS140_2_STATE pRngState, + PCBYTE pcbSeedMaterial, + SIZE_T cbSeedMaterial ) +{ + SYMCRYPT_ERROR scError; + + scError = SymCryptRngAesInstantiate( &pRngState->rng, pcbSeedMaterial, cbSeedMaterial ); + + if( scError == SYMCRYPT_NO_ERROR ) + { + // + // Generate the first block of output and store it so that we can compare future blocks. + // + SymCryptRngAesGenerate( &pRngState->rng, pRngState->rng.previousBlock, sizeof( pRngState->rng.previousBlock ) ); + pRngState->rng.fips140_2Check = TRUE; + } + + return scError; +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptRngAesFips140_2Generate( PSYMCRYPT_RNG_AES_FIPS140_2_STATE pRngState, + PBYTE pbRandom, + SIZE_T cbRandom ) +{ + SymCryptRngAesGenerate( &pRngState->rng, pbRandom, cbRandom ); +} + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRngAesFips140_2Reseed( PSYMCRYPT_RNG_AES_FIPS140_2_STATE pRngState, + PCBYTE pcbSeedMaterial, + SIZE_T cbSeedMaterial ) +{ + return SymCryptRngAesReseed( &pRngState->rng, pcbSeedMaterial, cbSeedMaterial ); +} + + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptRngAesFips140_2Uninstantiate( PSYMCRYPT_RNG_AES_FIPS140_2_STATE pRngState ) +{ + SymCryptRngAesUninstantiate( &pRngState->rng ); +} diff --git a/libs/symcrypt/lib/aescmac.c b/libs/symcrypt/lib/aescmac.c new file mode 100644 index 00000000000..7af0e31bcd9 --- /dev/null +++ b/libs/symcrypt/lib/aescmac.c @@ -0,0 +1,258 @@ +// +// aescmac.c Implementation of the AES-CMAC block cipher mode +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +const SYMCRYPT_MAC SymCryptAesCmacAlgorithm_fast = { + SymCryptAesCmacExpandKey, + SymCryptAesCmacInit, + SymCryptAesCmacAppend, + SymCryptAesCmacResult, + sizeof(SYMCRYPT_AES_CMAC_EXPANDED_KEY), + sizeof(SYMCRYPT_AES_CMAC_STATE), + SYMCRYPT_AES_CMAC_RESULT_SIZE, + NULL, + 0, +}; + +const PCSYMCRYPT_MAC SymCryptAesCmacAlgorithm = &SymCryptAesCmacAlgorithm_fast; + +VOID +SYMCRYPT_CALL +SymCryptCmacMunge( + _Inout_updates_bytes_(SYMCRYPT_AES_BLOCK_SIZE) BYTE buf[SYMCRYPT_AES_BLOCK_SIZE] ) +{ + SIZE_T carry = 0; + SIZE_T tmp; + int i; + + for( i=15; i>=0; i-- ) + { + tmp = buf[i]; + buf[i] = ((tmp << 1) | carry) & 0xff; + carry = tmp >> 7; + } + + buf[15] ^= (0 - carry) & 0x87; // This is the R_128 value from SP 800-38B 5.3 +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptAesCmacExpandKey( + _Out_ PSYMCRYPT_AES_CMAC_EXPANDED_KEY pExpandedKey, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SYMCRYPT_ALIGN BYTE buf[SYMCRYPT_AES_BLOCK_SIZE]; + + scError = SymCryptAesExpandKey( &pExpandedKey->aesKey, pbKey, cbKey ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + SymCryptWipeKnownSize( buf, sizeof( buf ) ); + + SymCryptAesEncrypt( &pExpandedKey->aesKey, buf, buf ); + + SymCryptCmacMunge( buf ); + memcpy( &pExpandedKey->K1, buf, sizeof( buf ) ); + SymCryptCmacMunge( buf ); + memcpy( &pExpandedKey->K2, buf, sizeof( buf ) ); + + SymCryptWipeKnownSize( buf, sizeof( buf ) ); + + SYMCRYPT_SET_MAGIC( pExpandedKey ); + +cleanup: + + return scError; +} + + +VOID +SYMCRYPT_CALL +SymCryptAesCmacKeyCopy( + _In_ PCSYMCRYPT_AES_CMAC_EXPANDED_KEY pSrc, + _Out_ PSYMCRYPT_AES_CMAC_EXPANDED_KEY pDst ) +{ + SYMCRYPT_CHECK_MAGIC( pSrc ); + SymCryptAesKeyCopy( &pSrc->aesKey, &pDst->aesKey ); + memcpy( pDst->K1, pSrc->K1, sizeof( pDst->K1 ) ); + memcpy( pDst->K2, pSrc->K2, sizeof( pDst->K2 ) ); + SYMCRYPT_SET_MAGIC( pDst ); +} + + +VOID +SYMCRYPT_CALL +SymCryptAesCmac( + _In_ PSYMCRYPT_AES_CMAC_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_AES_CMAC_RESULT_SIZE ) PBYTE pbResult ) +{ + SYMCRYPT_AES_CMAC_STATE state; + + SymCryptAesCmacInit( &state, pExpandedKey ); + SymCryptAesCmacAppend( &state, pbData, cbData ); + SymCryptAesCmacResult( &state, pbResult ); + + SymCryptWipeKnownSize( &state, sizeof( state ) ); +} + + +VOID +SYMCRYPT_CALL +SymCryptAesCmacStateCopy( + _In_ PCSYMCRYPT_AES_CMAC_STATE pSrc, + _In_opt_ PCSYMCRYPT_AES_CMAC_EXPANDED_KEY pExpandedKey, + _Out_ PSYMCRYPT_AES_CMAC_STATE pDst ) +{ + SYMCRYPT_CHECK_MAGIC( pSrc ); + *pDst = *pSrc; + + if( pExpandedKey == NULL ) + { + SYMCRYPT_CHECK_MAGIC( pSrc->pKey ); + pDst->pKey = pSrc->pKey; + } + else + { + SYMCRYPT_CHECK_MAGIC( pExpandedKey ); + pDst->pKey = pExpandedKey; + } + + SYMCRYPT_SET_MAGIC( pDst ); +} + +VOID +SYMCRYPT_CALL +SymCryptAesCmacInit( + _Out_ PSYMCRYPT_AES_CMAC_STATE pState, + _In_ PCSYMCRYPT_AES_CMAC_EXPANDED_KEY pExpandedKey) +{ + SYMCRYPT_CHECK_MAGIC( pExpandedKey ); + + pState->bytesInBuf = 0; + SymCryptWipeKnownSize( pState->chain, sizeof( pState->chain ) ); + pState->pKey = pExpandedKey; + + SYMCRYPT_SET_MAGIC( pState ); +} + +VOID +SYMCRYPT_CALL +SymCryptAesCmacAppend( + _Inout_ PSYMCRYPT_AES_CMAC_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + + SYMCRYPT_CHECK_MAGIC( pState ); + + if( pState->bytesInBuf != 0 ) + { + SIZE_T freeInBuf = SYMCRYPT_AES_BLOCK_SIZE - pState->bytesInBuf; + SYMCRYPT_ASSERT( freeInBuf < SYMCRYPT_AES_BLOCK_SIZE ); + + if( cbData <= freeInBuf ) + { + // Do nothing. + // the data will be copied into the buf at the end of this function + // + } + else + { + memcpy( &pState->buf[pState->bytesInBuf], pbData, freeInBuf ); + pbData += freeInBuf; + cbData -= freeInBuf; + SymCryptAesCbcMac( &pState->pKey->aesKey, &pState->chain[0], &pState->buf[0], SYMCRYPT_AES_BLOCK_SIZE ); + pState->bytesInBuf = 0; + } + } + + // + // At this point, either pState->bytesInBuf == 0, or it is !=0 but cbData is small enough that all the + // data will still fit in the buffer without further processing. + // + + if( cbData > SYMCRYPT_AES_BLOCK_SIZE ) + { + SIZE_T bytesToDo = (cbData-1) & ~(SIZE_T)(SYMCRYPT_AES_BLOCK_SIZE - 1); + SymCryptAesCbcMac( &pState->pKey->aesKey, &pState->chain[0], pbData, bytesToDo ); + pbData += bytesToDo; + cbData -= bytesToDo; + } + + if( cbData > 0 ) + { + memcpy( &pState->buf[pState->bytesInBuf], pbData, cbData ); + pState->bytesInBuf += cbData; + } +} + + +VOID +SYMCRYPT_CALL +SymCryptAesCmacResult( + _Inout_ PSYMCRYPT_AES_CMAC_STATE pState, + _Out_writes_( SYMCRYPT_AES_CMAC_RESULT_SIZE ) PBYTE pbResult ) +{ + SYMCRYPT_CHECK_MAGIC( pState ); + + if( pState->bytesInBuf < SYMCRYPT_AES_BLOCK_SIZE ) + { + SymCryptWipe( &pState->buf[pState->bytesInBuf + 1], SYMCRYPT_AES_BLOCK_SIZE - pState->bytesInBuf - 1 ); + pState->buf[pState->bytesInBuf] = 0x80; + SymCryptXorBytes( &pState->buf[0], &pState->pKey->K2[0], &pState->buf[0], SYMCRYPT_AES_BLOCK_SIZE ); + } + else + { + SymCryptXorBytes( &pState->buf[0], &pState->pKey->K1[0], &pState->buf[0], SYMCRYPT_AES_BLOCK_SIZE ); + } + + SymCryptAesCbcMac( &pState->pKey->aesKey, &pState->chain[0], &pState->buf[0], SYMCRYPT_AES_BLOCK_SIZE ); + memcpy( pbResult, &pState->chain[0], SYMCRYPT_AES_BLOCK_SIZE ); + + // + // Put the state back in the original starting state, + // and wipe any traces of the data. + // + pState->bytesInBuf = 0; + SymCryptWipeKnownSize( pState->chain, sizeof( pState->chain ) ); + SymCryptWipeKnownSize( pState->buf, sizeof( pState->buf ) ); +} + + + +static const BYTE aesCmacKat[SYMCRYPT_AES_CMAC_RESULT_SIZE] = { + 0x0a, 0x54, 0xa6, 0xa4, 0x25, 0xd4, 0x84, 0x38, 0xc3, 0xf8, 0xbb, 0xe0, 0x9b, 0xf9, 0x44, 0xcc, +}; + + +VOID +SYMCRYPT_CALL +SymCryptAesCmacSelftest(void) +{ + SYMCRYPT_AES_CMAC_EXPANDED_KEY xKey; + BYTE res[SYMCRYPT_AES_CMAC_RESULT_SIZE]; + + SymCryptAesCmacExpandKey( &xKey, SymCryptTestKey32, 16 ); + SymCryptAesCmac( &xKey, SymCryptTestMsg3, sizeof( SymCryptTestMsg3 ), res ); + + SymCryptInjectError( res, sizeof( res ) ); + if( memcmp( res, aesCmacKat, sizeof( res ) ) != 0 ) + { + SymCryptFatal( 'hsh5' ); + } + + // + // Normally we would wipe the expanded key structure here, + // but as this is a selftest with known data this is not needed. + // +} diff --git a/libs/symcrypt/lib/aeskw.c b/libs/symcrypt/lib/aeskw.c new file mode 100644 index 00000000000..c16ec6becf5 --- /dev/null +++ b/libs/symcrypt/lib/aeskw.c @@ -0,0 +1,457 @@ +// +// aeskw.c Implementation of the AES-KW(P) block cipher modes +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +// +// The KW and KWP modes have inherently terrible performance characteristics from how they are +// defined. Notably, they require a serial chain of AES block operations 12x longer than an +// equivalent AES-CBC encryption (which is already not a favored mode just because of the serial +// nature). +// Additionally the intermediate state of AES-KW and AES-KWP must be of a size proportional to +// the plaintext / ciphertext, rather than fitting into some constant-sized state. +// +// The current strategy for intermediate state handling is to allocate an internal buffer for +// the state. We expect that the caller does not care too much about performance if they are using +// these modes, so the overhead of an allocation per operation should not be a problem. +// +// While it is possible to expose an API surface which uses the destination buffer as a scratch +// buffer to store intermediate state, this would break the read-once/write-once rule, making the +// API surface brittle to misuse if the caller is encrypting to memory that may be in a different +// security domain (i.e. kernel caller encrypting a secret directly into memory which is mapped to +// user mode). +// If we need to expose a non-allocating version, we can introduce a lower-level API where the +// caller provides an appropriately sized scratch buffer, but we will cross that bridge if it is +// required. +// + +#include "precomp.h" + +const UINT64 SymCryptAesKwDefaultICV = 0xA6A6A6A6A6A6A6A6; +const UINT32 SymCryptAesKwpDefaultICV = 0xA65959A6; +#define SYMCRYPT_AES_SEMIBLOCK_SIZE (SYMCRYPT_AES_BLOCK_SIZE / 2) + +const SIZE_T SymCryptAesKWMinPlaintextLen = 16u; // 2*SYMCRYPT_AES_SEMIBLOCK_SIZE +const SIZE_T SymCryptAesKWMaxPlaintextLen = (1u<<31)-8; +const SIZE_T SymCryptAesKWMinCiphertextLen = 24u; // 3*SYMCRYPT_AES_SEMIBLOCK_SIZE +const SIZE_T SymCryptAesKWMaxCiphertextLen = (1u<<31); + +const SIZE_T SymCryptAesKWPMinPlaintextLen = 1u; +const SIZE_T SymCryptAesKWPMaxPlaintextLen = (1u<<31)-8; +const SIZE_T SymCryptAesKWPMinCiphertextLen = 16u; +const SIZE_T SymCryptAesKWPMaxCiphertextLen = (1u<<31); + +// +// This function corresponds to algorithm W(S) from section 6.1 of SP 800-38F +// +// We perform this algorithm destructively in place, reading and writing to the same location +// multiple times +// +static +VOID +SYMCRYPT_CALL +SymCryptAesKwxInternalWrap( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_bytes_(cbBuf) PBYTE pbBuf, + UINT32 cbBuf ) +{ + SYMCRYPT_ALIGN BYTE activeBlock[SYMCRYPT_AES_BLOCK_SIZE]; + const UINT32 nSemiBlocks = cbBuf / SYMCRYPT_AES_SEMIBLOCK_SIZE; // n per SP 800-38F + UINT64 encryptionIdx = 1; // t per SP 800-38F + UINT64 lowHalfTemp = 0; + + SYMCRYPT_ASSERT((cbBuf & (SYMCRYPT_AES_SEMIBLOCK_SIZE-1)) == 0); + SYMCRYPT_ASSERT(cbBuf >= SymCryptAesKWMinCiphertextLen); + SYMCRYPT_ASSERT(cbBuf <= SymCryptAesKWMaxCiphertextLen); + + // Special case for first encryption + // Initialize the low half of active block with the first semi-block of input + memcpy( activeBlock, pbBuf, SYMCRYPT_AES_SEMIBLOCK_SIZE); + + for( UINT32 outerLoopCnt = 0; outerLoopCnt < 6; outerLoopCnt++ ) + { + for( UINT32 innerLoopCnt = 1; innerLoopCnt < nSemiBlocks; innerLoopCnt++ ) + { + SIZE_T bufOffset = innerLoopCnt*SYMCRYPT_AES_SEMIBLOCK_SIZE; + + // Initialize the high half of active block to semi-block from buf + memcpy( activeBlock+SYMCRYPT_AES_SEMIBLOCK_SIZE, pbBuf+bufOffset, SYMCRYPT_AES_SEMIBLOCK_SIZE); + + // Encrypt activeBlock in place + SymCryptAesEncrypt( pExpandedKey, activeBlock, activeBlock ); + + // Store the high half of result back to semi-block from buf + memcpy( pbBuf+bufOffset, activeBlock+SYMCRYPT_AES_SEMIBLOCK_SIZE, SYMCRYPT_AES_SEMIBLOCK_SIZE ); + + // Use the low half of the result and the next encryptionIdx to + // initialize the low half of the next encryption + lowHalfTemp = SYMCRYPT_LOAD_LSBFIRST64( activeBlock ); + lowHalfTemp ^= SYMCRYPT_BSWAP64( encryptionIdx ); + SYMCRYPT_STORE_LSBFIRST64( activeBlock, lowHalfTemp ); + + // Update encryptionIdx + encryptionIdx++; + } + } + + SYMCRYPT_ASSERT( (encryptionIdx-1) == (nSemiBlocks-1)*6 ); + + // Special case for last encryption + // Store the final low half of encryption as the first semi-block of output + SYMCRYPT_STORE_LSBFIRST64( pbBuf, lowHalfTemp ); + + SymCryptWipeKnownSize( activeBlock, sizeof(activeBlock) ); +} + +// +// This function corresponds to algorithm W^-1(S) from section 6.1 of SP 800-38F +// +// We perform this algorithm destructively in place, reading and writing to the same location +// multiple times +// +static +VOID +SYMCRYPT_CALL +SymCryptAesKwxInternalUnwrap( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_bytes_(cbBuf) PBYTE pbBuf, + UINT32 cbBuf ) +{ + SYMCRYPT_ALIGN BYTE activeBlock[SYMCRYPT_AES_BLOCK_SIZE]; + const UINT32 nSemiBlocks = cbBuf / SYMCRYPT_AES_SEMIBLOCK_SIZE; // n per SP 800-38F + UINT64 decryptionIdx = 6*(nSemiBlocks-1); // t per SP 800-38F + UINT64 lowHalfTemp = 0; + + SYMCRYPT_ASSERT((cbBuf & (SYMCRYPT_AES_SEMIBLOCK_SIZE-1)) == 0); + SYMCRYPT_ASSERT(cbBuf >= SymCryptAesKWMinCiphertextLen); + SYMCRYPT_ASSERT(cbBuf <= SymCryptAesKWMaxCiphertextLen); + + // Special case for first decryption + // Initialize the low half temporary with the first semi-block of input + lowHalfTemp = SYMCRYPT_LOAD_LSBFIRST64( pbBuf ); + + for( UINT32 outerLoopCnt = 0; outerLoopCnt < 6; outerLoopCnt++ ) + { + for( UINT32 innerLoopCnt = nSemiBlocks-1; innerLoopCnt > 0; innerLoopCnt-- ) + { + SIZE_T bufOffset = innerLoopCnt*SYMCRYPT_AES_SEMIBLOCK_SIZE; + + // Update low half with decryptionIdx and store to low half of active block + lowHalfTemp ^= SYMCRYPT_BSWAP64( decryptionIdx ); + SYMCRYPT_STORE_LSBFIRST64( activeBlock, lowHalfTemp ); + + // Initialize the high half of active block to semi-block from buf + memcpy( activeBlock+SYMCRYPT_AES_SEMIBLOCK_SIZE, pbBuf+bufOffset, SYMCRYPT_AES_SEMIBLOCK_SIZE); + + // Decrypt activeBlock in place + SymCryptAesDecrypt( pExpandedKey, activeBlock, activeBlock ); + + // Store the high half of result back to semi-block from buf + memcpy( pbBuf+bufOffset, activeBlock+SYMCRYPT_AES_SEMIBLOCK_SIZE, SYMCRYPT_AES_SEMIBLOCK_SIZE ); + + // Update decryptionIdx + decryptionIdx--; + + // Use the low half of the result to set the low half temporary + lowHalfTemp = SYMCRYPT_LOAD_LSBFIRST64( activeBlock ); + } + } + + SYMCRYPT_ASSERT( decryptionIdx == 0 ); + + // Special case for last decryption + // Store the final low half of decryption as the first semi-block of output + SYMCRYPT_STORE_LSBFIRST64( pbBuf, lowHalfTemp ); + + SymCryptWipeKnownSize( activeBlock, sizeof(activeBlock) ); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptAesKwEncrypt( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_(cbSrc) PCBYTE pbSrc, + SIZE_T cbSrc, + _Out_writes_to_(cbDst, *pcbResult) PBYTE pbDst, + SIZE_T cbDst, + _Out_ SIZE_T* pcbResult ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PBYTE pbScratch = NULL; + UINT32 cbScratch = 0; + + if( (cbSrc < SymCryptAesKWMinPlaintextLen) || + (cbSrc > SymCryptAesKWMaxPlaintextLen) || + ((cbSrc & (SYMCRYPT_AES_SEMIBLOCK_SIZE-1)) != 0) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + cbScratch = ((UINT32) cbSrc)+SYMCRYPT_AES_SEMIBLOCK_SIZE; + if( cbDst < cbScratch ) + { + scError = SYMCRYPT_BUFFER_TOO_SMALL; + goto cleanup; + } + + pbScratch = SymCryptCallbackAlloc( cbScratch ); + if( pbScratch == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + // set up input buffer as ICV1 || P + SYMCRYPT_STORE_LSBFIRST64( pbScratch, SymCryptAesKwDefaultICV ); + memcpy( pbScratch+8, pbSrc, cbSrc ); + + // encrypt input buffer in place + SymCryptAesKwxInternalWrap( pExpandedKey, pbScratch, cbScratch ); + + // copy encrypted buffer to output + memcpy( pbDst, pbScratch, cbScratch ); + *pcbResult = cbScratch; + +cleanup: + if( pbScratch != NULL ) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + } + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptAesKwDecrypt( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_(cbSrc) PCBYTE pbSrc, + SIZE_T cbSrc, + _Out_writes_to_(cbDst, *pcbResult) PBYTE pbDst, + SIZE_T cbDst, + _Out_ SIZE_T* pcbResult ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PBYTE pbScratch = NULL; + UINT32 cbScratch = 0; + + if( (cbSrc < SymCryptAesKWMinCiphertextLen) || + (cbSrc > SymCryptAesKWMaxCiphertextLen) || + ((cbSrc & (SYMCRYPT_AES_SEMIBLOCK_SIZE-1)) != 0) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + cbScratch = (UINT32) cbSrc; + if( cbDst < cbScratch-SYMCRYPT_AES_SEMIBLOCK_SIZE ) + { + scError = SYMCRYPT_BUFFER_TOO_SMALL; + goto cleanup; + } + + pbScratch = SymCryptCallbackAlloc( cbScratch ); + if( pbScratch == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + // set up input buffer as C + memcpy( pbScratch, pbSrc, cbSrc ); + + // decrypt input buffer in place + SymCryptAesKwxInternalUnwrap( pExpandedKey, pbScratch, cbScratch ); + + // check first semi-block has the expected value + if( SYMCRYPT_LOAD_LSBFIRST64( pbScratch ) != SymCryptAesKwDefaultICV ) + { + scError = SYMCRYPT_AUTHENTICATION_FAILURE; + goto cleanup; + } + + // copy decrypted buffer to output + memcpy( pbDst, pbScratch+SYMCRYPT_AES_SEMIBLOCK_SIZE, cbScratch-SYMCRYPT_AES_SEMIBLOCK_SIZE ); + *pcbResult = cbScratch-SYMCRYPT_AES_SEMIBLOCK_SIZE; + +cleanup: + if( pbScratch != NULL ) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + } + return scError; + +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptAesKwpEncrypt( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_(cbSrc) PCBYTE pbSrc, + SIZE_T cbSrc, + _Out_writes_to_(cbDst, *pcbResult) PBYTE pbDst, + SIZE_T cbDst, + _Out_ SIZE_T* pcbResult ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PBYTE pbScratch = NULL; + UINT32 cbScratch = 0; + UINT32 cbPad = 0; + + if( (cbSrc < SymCryptAesKWPMinPlaintextLen) || + (cbSrc > SymCryptAesKWPMaxPlaintextLen) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + cbPad = SYMCRYPT_AES_SEMIBLOCK_SIZE - ((UINT32) cbSrc & (SYMCRYPT_AES_SEMIBLOCK_SIZE-1)); + if( cbPad == SYMCRYPT_AES_SEMIBLOCK_SIZE ) + { + cbPad = 0; + } + + cbScratch = (UINT32) cbSrc + SYMCRYPT_AES_SEMIBLOCK_SIZE + cbPad; + if( cbDst < cbScratch ) + { + scError = SYMCRYPT_BUFFER_TOO_SMALL; + goto cleanup; + } + + SYMCRYPT_ASSERT( cbScratch >= 16 ); + + pbScratch = SymCryptCallbackAlloc( cbScratch ); + if( pbScratch == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + // set up input buffer as ICV2 || len(P) || P || PAD + SYMCRYPT_STORE_LSBFIRST32( pbScratch, SymCryptAesKwpDefaultICV ); + SYMCRYPT_STORE_MSBFIRST32( pbScratch+4, (UINT32) cbSrc ); + // pad by unconditionally setting the last 8 bytes to 0 + // then overwrite some or all of the padding bytes with plaintext + SYMCRYPT_STORE_LSBFIRST64( pbScratch+cbScratch-SYMCRYPT_AES_SEMIBLOCK_SIZE, 0u ); + memcpy( pbScratch+8, pbSrc, cbSrc ); + + // encrypt input buffer in place + if( cbScratch == SYMCRYPT_AES_BLOCK_SIZE ) + { + // special case for AES-KWP with small plaintext + SymCryptAesEncrypt( pExpandedKey, pbScratch, pbScratch ); + } else { + SymCryptAesKwxInternalWrap( pExpandedKey, pbScratch, cbScratch ); + } + + // copy encrypted buffer to output + memcpy( pbDst, pbScratch, cbScratch ); + *pcbResult = cbScratch; + +cleanup: + if( pbScratch != NULL ) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + } + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptAesKwpDecrypt( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_(cbSrc) PCBYTE pbSrc, + SIZE_T cbSrc, + _Out_writes_to_(cbDst, *pcbResult) PBYTE pbDst, + SIZE_T cbDst, + _Out_ SIZE_T* pcbResult ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PBYTE pbScratch = NULL; + UINT32 cbScratch = 0; + UINT32 cbPlaintext = 0; + UINT32 cbPad = 0; + UINT32 mVerificationError = 0; // Mask indicating whether the decrypted buffer is malformed + UINT32 mIsPlaintext = 0; // Mask for plaintext bytes in the final semi-block + + if( (cbSrc < SymCryptAesKWPMinCiphertextLen) || + (cbSrc > SymCryptAesKWPMaxCiphertextLen) || + ((cbSrc & (SYMCRYPT_AES_SEMIBLOCK_SIZE-1)) != 0) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + cbScratch = (UINT32) cbSrc; + if( cbDst < cbScratch-SYMCRYPT_AES_SEMIBLOCK_SIZE-7 ) + { + scError = SYMCRYPT_BUFFER_TOO_SMALL; + goto cleanup; + } + + pbScratch = SymCryptCallbackAlloc( cbScratch ); + if( pbScratch == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + // set up input buffer as C + memcpy( pbScratch, pbSrc, cbSrc ); + + // decrypt input buffer in place + if( cbScratch == SYMCRYPT_AES_BLOCK_SIZE ) + { + // special case for AES-KWP with small ciphertext + SymCryptAesDecrypt( pExpandedKey, pbScratch, pbScratch ); + } else { + SymCryptAesKwxInternalUnwrap( pExpandedKey, pbScratch, cbScratch ); + } + + // Check if the decrypted buffer is of an expected form + // check bytes [0..3] are expected ICV + mVerificationError |= SYMCRYPT_LOAD_LSBFIRST32( pbScratch ) ^ SymCryptAesKwpDefaultICV; + + // check bytes [4..7] are a valid plaintext length (i.e. computed cbPad in range [0,7]) + cbPlaintext = SYMCRYPT_LOAD_MSBFIRST32( pbScratch+4 ); + cbPad = (UINT32) cbSrc - cbPlaintext - SYMCRYPT_AES_SEMIBLOCK_SIZE; + mVerificationError |= (cbPad & 0xfffffff8); + + // check that padding is all 0s + for( UINT32 i = 1; i<SYMCRYPT_AES_SEMIBLOCK_SIZE; i++ ) + { + mIsPlaintext = SymCryptMask32LtU31(i, SYMCRYPT_AES_SEMIBLOCK_SIZE-(cbPad&7)); + mVerificationError |= ((UINT32) pbScratch[ cbScratch-SYMCRYPT_AES_SEMIBLOCK_SIZE+i ]) & ~mIsPlaintext; + } + + // Now if there was any verification error, we fail + if( mVerificationError != 0 ) + { + scError = SYMCRYPT_AUTHENTICATION_FAILURE; + goto cleanup; + } + + // We are variable time w.r.t. the plaintext length on success + if( cbDst < cbPlaintext ) + { + scError = SYMCRYPT_BUFFER_TOO_SMALL; + goto cleanup; + } + + // copy decrypted buffer to output + memcpy( pbDst, pbScratch+SYMCRYPT_AES_SEMIBLOCK_SIZE, cbPlaintext ); + *pcbResult = cbPlaintext; + +cleanup: + if( pbScratch != NULL ) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + } + return scError; + +} diff --git a/libs/symcrypt/lib/blockciphermodes.c b/libs/symcrypt/lib/blockciphermodes.c new file mode 100644 index 00000000000..54fe294bb99 --- /dev/null +++ b/libs/symcrypt/lib/blockciphermodes.c @@ -0,0 +1,470 @@ +// +// BlockCipherModes.c generic implementation of all block cipher modes +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +VOID +SYMCRYPT_CALL +SymCryptEcbEncrypt( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ PCVOID pExpandedKey, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + SIZE_T i; + SIZE_T cbToDo = cbData & ~(pBlockCipher->blockSize - 1); + + if( pBlockCipher->ecbEncryptFunc != NULL ) + { + // + // Use optimized implementation if available + // + (*pBlockCipher->ecbEncryptFunc)( pExpandedKey, pbSrc, pbDst, cbData ); + return; + } + + // + // To avoid buffer overruns we truncate the work to an integral number of blocks. + // + + for( i=0; i<cbToDo; i+= pBlockCipher->blockSize ) + { + (*pBlockCipher->encryptFunc)( pExpandedKey, pbSrc + i, pbDst + i ); + } +} + +VOID +SYMCRYPT_CALL +SymCryptEcbDecrypt( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ PCVOID pExpandedKey, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + SIZE_T i; + SIZE_T cbToDo = cbData & ~(pBlockCipher->blockSize - 1); + + if( pBlockCipher->ecbDecryptFunc != NULL ) + { + // + // Use optimized implementation if available + // + (*pBlockCipher->ecbDecryptFunc)( pExpandedKey, pbSrc, pbDst, cbData ); + return; + } + + for( i=0; i<cbToDo; i+= pBlockCipher->blockSize ) + { + (*pBlockCipher->decryptFunc)( pExpandedKey, pbSrc + i, pbDst + i ); + } +} + + +// +// SymCryptCbcEncrypt +// +// Generic CBC encryption routine for block ciphers. +// The following restrictions must be obeyed: +// - blockSize <= 32 and must be a power of 2 +// - cbData must be a multiple of the block size +// +VOID +SYMCRYPT_CALL +SymCryptCbcEncrypt( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ PCVOID pExpandedKey, + _Inout_updates_( pBlockCipher->blockSize ) + PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + SYMCRYPT_ALIGN BYTE buf[SYMCRYPT_MAX_BLOCK_SIZE]; + SIZE_T blockSize; + PCBYTE pbSrcEnd; + PCBYTE pSrc = pbSrc; + PBYTE pDst = pbDst; + + if( pBlockCipher->cbcEncryptFunc != NULL ) + { + // + // Use optimized implementation if available + // + (*pBlockCipher->cbcEncryptFunc)( pExpandedKey, pbChainingValue, pSrc, pDst, cbData ); + return; + } + + blockSize = pBlockCipher->blockSize; + + SYMCRYPT_ASSERT( blockSize <= SYMCRYPT_MAX_BLOCK_SIZE ); + + + // + // Compute the end of the data, rounding the size down to a multiple of the block size. + // + pbSrcEnd = &pbSrc[ cbData & ~(blockSize - 1) ]; + + // + // We keep the chaining state in a local buffer to enforce the read-once write-once rule. + // + memcpy( buf, pbChainingValue, blockSize ); + while( pSrc < pbSrcEnd ) + { + SYMCRYPT_ASSERT( pSrc <= pbSrc + cbData - blockSize ); // help PreFast + SYMCRYPT_ASSERT( pDst <= pbDst + cbData - blockSize ); // help PreFast + SYMCRYPT_ASSERT( blockSize <= cbData ); // help PreFast + SymCryptXorBytes( pSrc, buf, buf, blockSize ); + (*pBlockCipher->encryptFunc)( pExpandedKey, buf, buf ); + memcpy( pDst, buf, blockSize ); + pSrc += blockSize; + pDst += blockSize; + } + + memcpy( pbChainingValue, buf, blockSize ); + + SymCryptWipeKnownSize( buf, sizeof( buf )); +} + +// +// SymCryptCbcDecrypt +// +// Generic CBC decryption routine for block ciphers. +// The following restrictions must be obeyed: +// - blockSize <= 32 and must be a power of 2 +// - cbData must be a multiple of the block size +// +VOID +SYMCRYPT_CALL +SymCryptCbcDecrypt( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ PCVOID pExpandedKey, + _Inout_updates_( pBlockCipher->blockSize ) + PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + SYMCRYPT_ALIGN BYTE buf[3 * SYMCRYPT_MAX_BLOCK_SIZE]; + PBYTE chain = &buf[0]; + PBYTE ciphertext = &buf[SYMCRYPT_MAX_BLOCK_SIZE]; + PBYTE tmp = &buf[2*SYMCRYPT_MAX_BLOCK_SIZE]; + + SIZE_T blockSize; + PCBYTE pbSrcEnd; + + if( pBlockCipher->cbcDecryptFunc != NULL ) + { + (*pBlockCipher->cbcDecryptFunc)( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + return; + } + + blockSize = pBlockCipher->blockSize; + SYMCRYPT_ASSERT( blockSize <= SYMCRYPT_MAX_BLOCK_SIZE ); + + // + // Compute the end of the data, rounding the size down to a multiple of the block size. + // + pbSrcEnd = &pbSrc[ cbData & ~(blockSize-1) ]; + +#pragma warning(suppress: 22105) + memcpy( chain, pbChainingValue, blockSize ); + + // + // Loop structured to obey the read-once/write-once rule + // + while( pbSrc < pbSrcEnd ) + { + SYMCRYPT_ASSERT( pbSrc <= pbSrcEnd - blockSize ); // help PreFast + memcpy( ciphertext, pbSrc, blockSize ); + (*pBlockCipher->decryptFunc)( pExpandedKey, ciphertext, tmp ); + SymCryptXorBytes( tmp, chain, pbDst, blockSize ); + memcpy( chain, ciphertext, blockSize ); + pbDst += blockSize; + pbSrc += blockSize; + } + + memcpy( pbChainingValue, chain, blockSize ); + + SymCryptWipeKnownSize( buf, sizeof( buf )); +} + +VOID +SYMCRYPT_CALL +SymCryptCbcMac( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ PCVOID pExpandedKey, + _Inout_updates_( pBlockCipher->blockSize ) + PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + SIZE_T cbData ) +{ + SYMCRYPT_ALIGN BYTE buf[32]; + SIZE_T blockSize; + PCBYTE pbSrcEnd; + PCBYTE p; + + if( pBlockCipher->cbcMacFunc != NULL ) + { + // + // Use optimized implementation if available + // + (*pBlockCipher->cbcMacFunc)( pExpandedKey, pbChainingValue, pbSrc, cbData ); + return; + } + + blockSize = pBlockCipher->blockSize; + SYMCRYPT_ASSERT( blockSize <= SYMCRYPT_MAX_BLOCK_SIZE ); + + // + // Compute the end of the data, rounding the size down to a multiple of the block size. + // + pbSrcEnd = &pbSrc[ cbData & ~(blockSize - 1) ]; + + // + // We keep the chaining state in a local buffer to enforce the read-once write-once rule. + // It also improves memory locality. + // + memcpy( buf, pbChainingValue, blockSize ); + p = pbSrc; + while( p < pbSrcEnd ) + { + SYMCRYPT_ASSERT( p <= pbSrc + cbData - blockSize ); + SymCryptXorBytes( p, buf, buf, blockSize ); + (*pBlockCipher->encryptFunc)( pExpandedKey, buf, buf ); + p += blockSize; + } + + memcpy( pbChainingValue, buf, blockSize ); + + SymCryptWipeKnownSize( buf, sizeof( buf )); +} + +VOID +SYMCRYPT_CALL +SymCryptCtrMsb32( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ PCVOID pExpandedKey, + _Inout_updates_( pBlockCipher->blockSize ) + PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + SYMCRYPT_ALIGN BYTE buf[2 * SYMCRYPT_MAX_BLOCK_SIZE]; + PBYTE count = &buf[0]; + PBYTE keystream= &buf[SYMCRYPT_MAX_BLOCK_SIZE]; + SIZE_T blockSize; + PCBYTE pbSrcEnd; + + blockSize = pBlockCipher->blockSize; + SYMCRYPT_ASSERT( blockSize <= SYMCRYPT_MAX_BLOCK_SIZE ); + + // + // Compute the end of the data, rounding the size down to a multiple of the block size. + // + pbSrcEnd = &pbSrc[ cbData & ~(blockSize - 1) ]; + + // + // We keep the chaining state in a local buffer to enforce the read-once write-once rule. + // It also improves memory locality. + // + #pragma warning(suppress: 22105) + memcpy( count, pbChainingValue, blockSize ); + while( pbSrc < pbSrcEnd ) + { + SYMCRYPT_ASSERT( pbSrc <= pbSrcEnd - blockSize ); // help PreFast + (*pBlockCipher->encryptFunc)( pExpandedKey, count, keystream ); + SymCryptXorBytes( keystream, pbSrc, pbDst, blockSize ); + + // + // We only need to increment the last 32 bits of the counter value. + // + SYMCRYPT_STORE_MSBFIRST32( &count[ blockSize-4 ], 1 + SYMCRYPT_LOAD_MSBFIRST32( &count[ blockSize-4 ] ) ); + + pbSrc += blockSize; + pbDst += blockSize; + } + + memcpy( pbChainingValue, count, blockSize ); + + SymCryptWipeKnownSize( buf, sizeof( buf )); +} + +VOID +SYMCRYPT_CALL +SymCryptCtrMsb64( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ PCVOID pExpandedKey, + _Inout_updates_( pBlockCipher->blockSize ) + PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + SYMCRYPT_ALIGN BYTE buf[2 * SYMCRYPT_MAX_BLOCK_SIZE]; + PBYTE count = &buf[0]; + PBYTE keystream= &buf[SYMCRYPT_MAX_BLOCK_SIZE]; + SIZE_T blockSize; + PCBYTE pbSrcEnd; + + if( pBlockCipher->ctrMsb64Func != NULL ) + { + // + // Use optimized implementation if available + // + (*pBlockCipher->ctrMsb64Func)( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + return; + } + + blockSize = pBlockCipher->blockSize; + SYMCRYPT_ASSERT( blockSize <= SYMCRYPT_MAX_BLOCK_SIZE ); + + // + // Compute the end of the data, rounding the size down to a multiple of the block size. + // + pbSrcEnd = &pbSrc[ cbData & ~(blockSize - 1) ]; + + // + // We keep the chaining state in a local buffer to enforce the read-once write-once rule. + // It also improves memory locality. + // + #pragma warning(suppress: 22105) + memcpy( count, pbChainingValue, blockSize ); + while( pbSrc < pbSrcEnd ) + { + SYMCRYPT_ASSERT( pbSrc <= pbSrcEnd - blockSize ); // help PreFast + (*pBlockCipher->encryptFunc)( pExpandedKey, count, keystream ); + SymCryptXorBytes( keystream, pbSrc, pbDst, blockSize ); + + // + // We only need to increment the last 64 bits of the counter value. + // + SYMCRYPT_STORE_MSBFIRST64( &count[ blockSize-8 ], 1 + SYMCRYPT_LOAD_MSBFIRST64( &count[ blockSize-8 ] ) ); + + pbSrc += blockSize; + pbDst += blockSize; + } + + memcpy( pbChainingValue, count, blockSize ); + + SymCryptWipeKnownSize( buf, sizeof( buf )); +} + +VOID +SYMCRYPT_CALL +SymCryptCfbEncrypt( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + SIZE_T cbShift, + _In_ PCVOID pExpandedKey, + _Inout_updates_( pBlockCipher->blockSize ) + PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +// +// Encrypt a buffer using the CFB cipher mode. +// +// This implements the CFB mode using a 1-byte feedback shift. +// This requires a block cipher encryption call for each byte, which is very slow. +// Use of this cipher mode is not recommended. +// +// - pBlockCipher is a pointer to the block cipher description table. +// Suitable description tables for all ciphers in this library have been pre-defined. +// - pExpandedKey points to the expanded key to use. This generic function uses PVOID so there +// is no type safety to ensure that the expanded key and the encryption function match. +// - pbChainingValue points to the chaining value. On entry and exit it +// contains the last blockSize ciphertext bytes. +// - pbSrc is the input data buffer that will be encrypted/decrypted. +// - cbData. Number of bytes to encrypt/decrypt. This must be a multiple of the block size. +// - pbDst is the output buffer that receives the encrypted/decrypted data. The input and output +// buffers may be the same or non-overlapping, but may not partially overlap. +// +{ + SYMCRYPT_ALIGN BYTE buf[2*SYMCRYPT_MAX_BLOCK_SIZE]; + PBYTE chain = &buf[0]; + PBYTE tmp = &buf[SYMCRYPT_MAX_BLOCK_SIZE]; + SIZE_T blockSize; + + blockSize = pBlockCipher->blockSize; + SYMCRYPT_ASSERT( blockSize <= SYMCRYPT_MAX_BLOCK_SIZE ); + + // Force cbShift to either be 1 or blockSize + if(cbShift != 1) + { + cbShift = blockSize; + } + + memcpy( chain, pbChainingValue, blockSize ); + while( cbData >= cbShift ) + { + (*pBlockCipher->encryptFunc)( pExpandedKey, chain, tmp ); + SymCryptXorBytes( pbSrc, tmp, tmp, cbShift ); // tmp[0..cbShift-1] ^= pbSrc[0..cbShift-1] + memcpy( pbDst, tmp, cbShift ); + + memmove( chain, chain + cbShift, blockSize - cbShift ); + memcpy( chain + blockSize - cbShift, tmp, cbShift ); + + pbDst += cbShift; + pbSrc += cbShift; + cbData -= cbShift; + } + + memcpy( pbChainingValue, chain, blockSize ); +} + + +VOID +SYMCRYPT_CALL +SymCryptCfbDecrypt( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + SIZE_T cbShift, + _In_ PCVOID pExpandedKey, + _Inout_updates_( pBlockCipher->blockSize ) + PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + SYMCRYPT_ALIGN BYTE buf[2*SYMCRYPT_MAX_BLOCK_SIZE]; + PBYTE chain = &buf[0]; + PBYTE tmp = &buf[SYMCRYPT_MAX_BLOCK_SIZE]; + SIZE_T blockSize; + + blockSize = pBlockCipher->blockSize; + SYMCRYPT_ASSERT( blockSize <= SYMCRYPT_MAX_BLOCK_SIZE ); + + // Force cbShift to either be 1 or blockSize + if(cbShift != 1) + { + cbShift = blockSize; + } + + memcpy( chain, pbChainingValue, blockSize ); + while( cbData >= cbShift ) + { + (*pBlockCipher->encryptFunc)( pExpandedKey, chain, tmp ); + + // + // First we update the chain block + // + + memmove( chain, chain + cbShift, blockSize - cbShift ); + memcpy( chain + blockSize - cbShift, pbSrc, cbShift ); + + // + // To obey the read-once rule, we take the ciphertext from the updated chain block. + // + SymCryptXorBytes( chain + blockSize - cbShift, tmp, pbDst, cbShift ); + + pbDst += cbShift; + pbSrc += cbShift; + cbData -= cbShift; + } + + memcpy( pbChainingValue, chain, blockSize ); +} diff --git a/libs/symcrypt/lib/ccm.c b/libs/symcrypt/lib/ccm.c new file mode 100644 index 00000000000..912f657d10e --- /dev/null +++ b/libs/symcrypt/lib/ccm.c @@ -0,0 +1,634 @@ +// +// CCM.c implementation of the CCM block cipher mode +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +#define CCM_MIN_NONCE_SIZE (7) +#define CCM_MAX_NONCE_SIZE (13) +#define CCM_MIN_TAG_SIZE (4) +#define CCM_MAX_TAG_SIZE (16) + +#define CCM_MAX_COUNTER_SIZE (SYMCRYPT_CCM_BLOCK_SIZE - 1 - CCM_MIN_NONCE_SIZE) + +#define AUTHDATA_16BIT_LIMIT ((1<<16) - (1<<8)) +#define AUTHDATA_32BIT_LIMIT (1ull << 32) + +// Compile time BOOL statically determines if we need to check cbAuthData < AUTHDATA_32BIT_LIMIT +// Used to suppress MSVC C4127 and clang Wtautological-constant-out-of-range-compare on 32b platforms +const BOOL fcbAuthDataLt32bitLimitStatic = SIZE_T_MAX < AUTHDATA_32BIT_LIMIT; + +#define CCM_BLOCK_MOD_MASK (SYMCRYPT_CCM_BLOCK_SIZE - 1) +#define CCM_BLOCK_ROUND_MASK (~CCM_BLOCK_MOD_MASK) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptCcmValidateParameters( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ SIZE_T cbNonce, + _In_ SIZE_T cbAssociatedData, + _In_ UINT64 cbData, + _In_ SIZE_T cbTag + ) +{ + SIZE_T cbCounter; + + UNREFERENCED_PARAMETER( cbAssociatedData ); + + if( pBlockCipher->blockSize != SYMCRYPT_CCM_BLOCK_SIZE ) + { + return SYMCRYPT_WRONG_BLOCK_SIZE; + } + + // + // Test against limits in SP800-38C appendix A + // + if( cbNonce < CCM_MIN_NONCE_SIZE || cbNonce > CCM_MAX_NONCE_SIZE ) + { + return SYMCRYPT_WRONG_NONCE_SIZE; + } + + // + // cbAssociatedData is limited to <2^64 + // We don't test for this. None of our platforms has a SIZE_T that is + // large enough to violate this condition. And the test + // is of a form that the compiler cannot optimize away. + // + + // + // The counter block consists of a single flag byte, the nonce, and the counter field. + // + cbCounter = SYMCRYPT_CCM_BLOCK_SIZE - cbNonce - 1; + + // + // per SP800-38C cbData is limited to 2^{8*cbCounter} + // There is no way to do this test in a single comparison. + // We don't have to worry about side-channels in the && because + // cbCounter depends only on the length of the nonce, and we do not + // try to hide any lengths. + // + if( cbCounter < sizeof( UINT64 ) && + cbData >= ((UINT64)1 << (8*cbCounter)) ) + { + return SYMCRYPT_WRONG_DATA_SIZE; + } + + if( cbTag < CCM_MIN_TAG_SIZE || + cbTag > CCM_MAX_TAG_SIZE || + (cbTag & 1) == 1 // valid tag lengths are [4, 6, 8, ..., 16] + ) + { + return SYMCRYPT_WRONG_TAG_SIZE; + } + + return SYMCRYPT_NO_ERROR; +} + + + +VOID +SYMCRYPT_CALL +SymCryptCcmEncryptDecryptPart( + _Inout_ PSYMCRYPT_CCM_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) + +{ + SIZE_T cbToDo = cbData; + SIZE_T bytesToProcess; + + // + // Use any left-over key stream + // + while( (pState->bytesProcessed & CCM_BLOCK_MOD_MASK) != 0 && cbToDo > 0 ) + { + *pbDst = *pbSrc ^ pState->keystreamBlock[ pState->bytesProcessed & CCM_BLOCK_MOD_MASK ]; + pbDst++; + pbSrc++; + cbToDo--; + pState->bytesProcessed++; + } + + // + // Bulk process the main part of the input and output + // + if( cbToDo >= SYMCRYPT_CCM_BLOCK_SIZE ) + { + bytesToProcess = cbToDo & CCM_BLOCK_ROUND_MASK; + SYMCRYPT_ASSERT( bytesToProcess <= cbToDo ); + + SYMCRYPT_ASSERT( pState->pBlockCipher->blockSize == SYMCRYPT_CCM_BLOCK_SIZE ); + SymCryptCtrMsb64( pState->pBlockCipher, + pState->pExpandedKey, + &pState->counterBlock[0], + pbSrc, + pbDst, + bytesToProcess ); + pbSrc += bytesToProcess; + pbDst += bytesToProcess; + pState->bytesProcessed += bytesToProcess; + cbToDo -= bytesToProcess; + } + + if( cbToDo > 0 ) + { + // + // Encrypt an all-zero key stream block to get the key stream. + // + SymCryptWipeKnownSize( &pState->keystreamBlock[0], SYMCRYPT_CCM_BLOCK_SIZE ); + + SYMCRYPT_ASSERT( pState->pBlockCipher->blockSize == SYMCRYPT_CCM_BLOCK_SIZE ); + SymCryptCtrMsb64( pState->pBlockCipher, + pState->pExpandedKey, + &pState->counterBlock[0], + &pState->keystreamBlock[0], + &pState->keystreamBlock[0], + SYMCRYPT_CCM_BLOCK_SIZE ); + while( cbToDo > 0 ) + { + *pbDst = *pbSrc ^ pState->keystreamBlock[ pState->bytesProcessed & CCM_BLOCK_MOD_MASK ]; + pbDst++; + pbSrc++; + cbToDo--; + pState->bytesProcessed++; + } + } +} + + +VOID +SYMCRYPT_CALL +SymCryptCcmAddMacData( + _Inout_ PSYMCRYPT_CCM_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + SIZE_T bytesToProcess; + if( pState->bytesInMacBlock > 0 ) + { + bytesToProcess = SYMCRYPT_MIN( cbData, SYMCRYPT_CCM_BLOCK_SIZE - pState->bytesInMacBlock ); + SymCryptXorBytes( &pState->macBlock[pState->bytesInMacBlock], pbData, &pState->macBlock[pState->bytesInMacBlock], bytesToProcess ); + pbData += bytesToProcess; + cbData -= bytesToProcess; + pState->bytesInMacBlock += bytesToProcess; + + if( pState->bytesInMacBlock == SYMCRYPT_CCM_BLOCK_SIZE ) + { + pState->pBlockCipher->encryptFunc( pState->pExpandedKey, &pState->macBlock[0], &pState->macBlock[0] ); + pState->bytesInMacBlock = 0; + } + } + + if( cbData >= SYMCRYPT_CCM_BLOCK_SIZE ) + { + bytesToProcess = cbData & CCM_BLOCK_ROUND_MASK; + SYMCRYPT_ASSERT( pState->pBlockCipher->blockSize == SYMCRYPT_CCM_BLOCK_SIZE ); + + SymCryptCbcMac( pState->pBlockCipher, + pState->pExpandedKey, + &pState->macBlock[0], + pbData, + bytesToProcess ); + + pbData += bytesToProcess; + cbData -= bytesToProcess; + } + + if( cbData > 0 ) + { + SymCryptXorBytes( &pState->macBlock[0], pbData, &pState->macBlock[0], cbData ); + pState->bytesInMacBlock = cbData; + } +} + +VOID +SYMCRYPT_CALL +SymCryptCcmPadMacData( _Inout_ PSYMCRYPT_CCM_STATE pState ) +{ + // + // Pad the MAC data with zeroes until we hit the block size. + // The data is xorred into macBlock, so we don't have to update that. + // All we do is apply the block cipher if there was any data remaining in the macBlock. + // + if( pState->bytesInMacBlock > 0 ) + { + pState->pBlockCipher->encryptFunc( pState->pExpandedKey, &pState->macBlock[0], &pState->macBlock[0] ); + pState->bytesInMacBlock = 0; + } +} + + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptCcmEncrypt( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ PCVOID pExpandedKey, + _In_reads_( cbNonce ) PCBYTE pbNonce, + SIZE_T cbNonce, + _In_reads_opt_( cbAuthData ) PCBYTE pbAuthData, + SIZE_T cbAuthData, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData, + _Out_writes_( cbTag ) PBYTE pbTag, + SIZE_T cbTag ) +{ + SYMCRYPT_CCM_STATE state; + + SymCryptCcmInit( &state, + pBlockCipher, + pExpandedKey, + pbNonce, cbNonce, + pbAuthData, cbAuthData, + cbData, cbTag ); + + SymCryptCcmEncryptPart( &state, pbSrc, pbDst, cbData ); + + SymCryptCcmEncryptFinal( &state, pbTag, cbTag ); +} + +SYMCRYPT_NOINLINE +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptCcmDecrypt( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ PCVOID pExpandedKey, + _In_reads_( cbNonce ) PCBYTE pbNonce, + SIZE_T cbNonce, + _In_reads_opt_( cbAuthData ) PCBYTE pbAuthData, + SIZE_T cbAuthData, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData, + _In_reads_( cbTag ) PCBYTE pbTag, + SIZE_T cbTag ) +{ + SYMCRYPT_CCM_STATE state; + SYMCRYPT_ERROR status; + + SymCryptCcmInit( &state, + pBlockCipher, + pExpandedKey, + pbNonce, cbNonce, + pbAuthData, cbAuthData, + cbData, cbTag ); + + + SymCryptCcmDecryptPart( &state, pbSrc, pbDst, cbData ); + + status = SymCryptCcmDecryptFinal( &state, pbTag, cbTag ); + + // + // If we failed for any reason we wipe our output buffer to avoid returning + // decrypted but unauthenticated data. + // + if( status != SYMCRYPT_NO_ERROR ) + { + SymCryptWipe( pbDst, cbData ); + } + + return status; +} + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptCcmInit( + _Out_ PSYMCRYPT_CCM_STATE pState, + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ PCVOID pExpandedKey, + _In_reads_( cbNonce ) PCBYTE pbNonce, + SIZE_T cbNonce, + _In_reads_opt_( cbAuthData ) PCBYTE pbAuthData, + SIZE_T cbAuthData, + UINT64 cbData, + SIZE_T cbTag ) +{ + BYTE flags; + BYTE tmpBuf[ SYMCRYPT_CCM_BLOCK_SIZE ]; + SIZE_T cbCounter; + + SYMCRYPT_SET_MAGIC( pState ); + + // + // Validate parameters in checked builds + // + SYMCRYPT_ASSERT( SymCryptCcmValidateParameters( pBlockCipher, cbNonce, cbAuthData, cbData, cbTag ) == SYMCRYPT_NO_ERROR ); + + + // + // compute # bytes in the counter field + // We limit cbNonce to 15 so that cbCounter + cbNonce = 15 will always hold + // This is much cheaper than full parameter validation, and it is enough to + // avoid any buffer overflows. + // + cbNonce &= SYMCRYPT_CCM_BLOCK_SIZE - 1; + cbCounter = SYMCRYPT_CCM_BLOCK_SIZE - 1 - cbNonce; + + pState->pBlockCipher = pBlockCipher; + pState->pExpandedKey = pExpandedKey; + pState->cbNonce = cbNonce; + pState->cbData = cbData; + pState->cbTag = cbTag; + pState->cbCounter = cbCounter; + pState->bytesProcessed = 0; + pState->bytesInMacBlock = 0; + + // + // Build the initial blocks for authentication and en/decryption + // + // Per Sp800-38c the flag byte is made up of four fields: + // Bits 0-2 are cbCounter - 1 + // Bits 3-5 are (cbTag-2)/2 + // Bit 6 is 1 if cbAuthData > 0 + // Bit 7 is reserved and set to 0. + flags = (BYTE) (pState->cbCounter - 1); + flags |= ((cbTag-2)/2) << 3; + if( cbAuthData > 0 ) + { + // + // No side-channel concerns with this if statements as we don't try to hide the + // data length or presence of authdata. + // + flags |= (1 << 6); + } + + + // + // The MAC starting block consists of three fields: + // the flag byte, the nonce, and cbData encoded into cbCounter bytes. + // + pState->macBlock[0] = flags; + memcpy( &pState->macBlock[1], pbNonce, cbNonce ); + SYMCRYPT_STORE_MSBFIRST64( &tmpBuf[0], cbData ); + memcpy( &pState->macBlock[1+cbNonce], &tmpBuf[ 8 - cbCounter ], cbCounter ); + + // + // The counter block is similar in layout, but with two changes: + // Bits 3-7 of the flag bytes are set to 0. + // The counter field is set to one (first counter value used for data encryption). + // Wiping the whole block first is probably faster, as the size is known and the + // block is aligned. + // We also copy the nonce from the mac block to follow the read-once rule. + // + SymCryptWipeKnownSize( &pState->counterBlock[0], SYMCRYPT_CCM_BLOCK_SIZE ); + pState->counterBlock[0] = (BYTE)(flags & 0x7); + memcpy( &pState->counterBlock[1], &pState->macBlock[1], cbNonce ); + pState->counterBlock[ SYMCRYPT_CCM_BLOCK_SIZE - 1] = 1; + + // + // Encrypt the current MAC block; our CBC convention is to do the encryption + // as soon as we have enough data. + // + pBlockCipher->encryptFunc( pExpandedKey, &pState->macBlock[0], &pState->macBlock[0] ); + + // + // Next we process the associated data + // See the CCM specs for the details + // + if( cbAuthData <= 0 ) + { + // + // cbAuthData == 0, nothing needs to be done. + // + } else if( cbAuthData < AUTHDATA_16BIT_LIMIT ) + { + // + // 16-bit length encoding. + // + SYMCRYPT_STORE_MSBFIRST16( &tmpBuf[0], (UINT16) cbAuthData ); + SymCryptCcmAddMacData( pState, &tmpBuf[0], 2 ); + } else if( fcbAuthDataLt32bitLimitStatic || cbAuthData < AUTHDATA_32BIT_LIMIT ) + { + // + // 32-bit length + // + tmpBuf[0] = 0xff; + tmpBuf[1] = 0xfe; // Magic prefix as per SP 800-38c + SYMCRYPT_STORE_MSBFIRST32( &tmpBuf[2], (UINT32) cbAuthData ); + SymCryptCcmAddMacData( pState, &tmpBuf[0], 2 + sizeof( UINT32 ) ); + } else + { + // + // 64-bit length + // + tmpBuf[0] = 0xff; + tmpBuf[1] = 0xff; // Magic prefix as per SP 800-38c + SYMCRYPT_STORE_MSBFIRST64( &tmpBuf[2], cbAuthData ); + SymCryptCcmAddMacData( pState, &tmpBuf[0], 2 + sizeof( UINT64 ) ); + } + + SymCryptCcmAddMacData( pState, pbAuthData, cbAuthData ); + SymCryptCcmPadMacData( pState ); // Pad MAC data with zeroes until the next block size boundary + +} + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptCcmEncryptPart( + _Inout_ PSYMCRYPT_CCM_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + UINT64 bytesProcessedAfterThisCall; + + SYMCRYPT_CHECK_MAGIC( pState ); + + bytesProcessedAfterThisCall = cbData + pState->bytesProcessed; + + SYMCRYPT_ASSERT( bytesProcessedAfterThisCall >= cbData && + bytesProcessedAfterThisCall <= pState->cbData ); + + // + // We are violating the read-once implementation rule here. We read the data twice: + // once for MACing and once for encryption. + // In this particular situation this is safe to do. + // We consider the read for the MAC operation as reading the 'real' value. + // The encryption code reads the data, but all it does is XOR the key stream into + // it. (CCM encryption uses CTR mode for the encryption part.) + // We don't care if the attacker modifies the data before the encryption. + // We are revealing the key stream anyway (from the plaintext and ciphertext) and + // the exact byte value that we xor the key stream into is irrelevant. + // + SymCryptCcmAddMacData( pState, pbSrc, cbData ); + + SymCryptCcmEncryptDecryptPart( pState, pbSrc, pbDst, cbData ); + +} + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptCcmEncryptFinal( + _Inout_ PSYMCRYPT_CCM_STATE pState, + _Out_writes_( cbTag ) PBYTE pbTag, + SIZE_T cbTag ) +{ + // + // Check invariants in checked builds + // + SYMCRYPT_CHECK_MAGIC( pState ); + + SYMCRYPT_ASSERT( cbTag == pState->cbTag && pState->bytesProcessed == pState->cbData ); + + + SymCryptCcmPadMacData( pState ); + + // + // Set the counter value to zero to get the counter value that encrypts the tag, + // and then encrypt the tag. + // We reset bytesProcessed so that the partial encrypt/decrypt function will do the right thing + // + SymCryptWipe( &pState->counterBlock[1 + pState->cbNonce], pState->cbCounter ); + + pState->bytesProcessed = 0; + + SymCryptCcmEncryptDecryptPart( pState, &pState->macBlock[0], &pState->macBlock[0], SYMCRYPT_CCM_BLOCK_SIZE ); + + memcpy( pbTag, &pState->macBlock[0], cbTag ); + + SymCryptWipeKnownSize( pState, sizeof( *pState ) ); + SYMCRYPT_ASSERT( pState->bytesInMacBlock == 0 ); +} + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptCcmDecryptPart( + _Inout_ PSYMCRYPT_CCM_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + UINT64 bytesProcessedAfterThisCall; + + SYMCRYPT_CHECK_MAGIC( pState ); + + bytesProcessedAfterThisCall = cbData + pState->bytesProcessed; + + SYMCRYPT_ASSERT( bytesProcessedAfterThisCall >= cbData && + bytesProcessedAfterThisCall <= pState->cbData ); + + + // + // We are violating the read-once/write-once implementation rule here. + // We write the decrypted data and then read it back for the authentication function. + // In this particular situation this is safe to do. + // + // Anyone who can access the memory space that contains the source and destination of this + // function can recover the key stream used for this (key,nonce) combination. + // We can think of the decryption function as merely exposing the key stream, and then the + // caller picking the ciphertext (and by implication the plaintext) to be authenticated. + // Thus the data we read during authentication is the 'real' plaintext, and the + // decryption function merely made the key stream available. + // + // Note that this would not safe in general, it is only safe because CTR mode decryption already + // reveals the key stream. + // + SymCryptCcmEncryptDecryptPart( pState, pbSrc, pbDst, cbData ); + SymCryptCcmAddMacData( pState, pbDst, cbData ); + +} + +SYMCRYPT_NOINLINE +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptCcmDecryptFinal( + _Inout_ PSYMCRYPT_CCM_STATE pState, + _In_reads_( cbTag ) PCBYTE pbTag, + SIZE_T cbTag ) +{ + SYMCRYPT_ERROR status; + + // + // Check invariants in checked builds + // + SYMCRYPT_CHECK_MAGIC( pState ); + + SYMCRYPT_ASSERT( cbTag == pState->cbTag && pState->bytesProcessed == pState->cbData ); + + SymCryptCcmPadMacData( pState ); + + // + // Set the counter value to zero to get the counter value that encrypts the tag, + // and then encrypt the tag + // We reset bytesProcessed so that the partial encrypt/decrypt function will do the right thing + // + SymCryptWipe( &pState->counterBlock[1 + pState->cbNonce], pState->cbCounter ); + + pState->bytesProcessed = 0; + + SymCryptCcmEncryptDecryptPart( pState, &pState->macBlock[0], &pState->macBlock[0], SYMCRYPT_CCM_BLOCK_SIZE ); + + if( !SymCryptEqual( pbTag, &pState->macBlock[0], cbTag ) ) + { + status = SYMCRYPT_AUTHENTICATION_FAILURE; + } + else + { + status = SYMCRYPT_NO_ERROR; + } + + SymCryptWipeKnownSize( pState, sizeof( *pState ) ); + SYMCRYPT_ASSERT( pState->bytesInMacBlock == 0 ); + + return status; +} + + +static const BYTE SymCryptCcmSelftestResult[3 + SYMCRYPT_AES_BLOCK_SIZE ] = +{ + 0x42, 0xd7, 0xda, + 0x3d, 0x9e, 0x95, 0x82, 0x29, 0x3c, 0x10, 0x9c, 0xa3, 0x39, 0x31, 0x3f, 0x18, 0xf3, 0x10, 0xf6 +}; + +VOID +SYMCRYPT_CALL +SymCryptCcmSelftest(void) +{ + BYTE buf[ 3 + SYMCRYPT_AES_BLOCK_SIZE ]; + SYMCRYPT_AES_EXPANDED_KEY key; + SYMCRYPT_ERROR err; + + if( SymCryptAesExpandKey( &key, SymCryptTestKey32, 16 ) != SYMCRYPT_NO_ERROR ) + { + SymCryptFatal( 'ccm0' ); + } + + SymCryptCcmEncrypt( SymCryptAesBlockCipher, + &key, + &SymCryptTestKey32[16], 12, + NULL, 0, + &SymCryptTestMsg3[0], buf, 3, + &buf[3], SYMCRYPT_AES_BLOCK_SIZE ); + + SymCryptInjectError( buf, sizeof( buf ) ); + if( memcmp( buf, SymCryptCcmSelftestResult, sizeof( buf ) ) != 0 ) + { + SymCryptFatal( 'ccm1' ); + } + + // inject error into the ciphertext or tag + SymCryptInjectError( buf, sizeof( buf ) ); + + err = SymCryptCcmDecrypt( SymCryptAesBlockCipher, + &key, + &SymCryptTestKey32[16], 12, + NULL, 0, + buf, buf, 3, + &buf[3], SYMCRYPT_AES_BLOCK_SIZE ); + + SymCryptInjectError( buf, 3 ); + + if( err != SYMCRYPT_NO_ERROR || memcmp( buf, SymCryptTestMsg3, 3 ) != 0 ) + { + SymCryptFatal( 'ccm2' ); + } + +} diff --git a/libs/symcrypt/lib/chacha20.c b/libs/symcrypt/lib/chacha20.c new file mode 100644 index 00000000000..4841babea3c --- /dev/null +++ b/libs/symcrypt/lib/chacha20.c @@ -0,0 +1,267 @@ +// +// ChaCha20.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +VOID +SYMCRYPT_CALL +SymCryptChaCha20CryptBlocks( + _Inout_ PSYMCRYPT_CHACHA20_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); +// Encrypt Src to Dst using whole blocks, starting at block floor(pState->offset/64). +// # blocks processed is floor( cbData / 64 ) +// pState->offset point is updated by 64 for each block encrypted + + + +#define OFFSET_MASK (((UINT64)1 << 38) - 1) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptChaCha20Init( + _Out_ PSYMCRYPT_CHACHA20_STATE pState, + _In_reads_( cbKey ) PCBYTE pbKey, + _In_ SIZE_T cbKey, + _In_reads_( cbNonce ) PCBYTE pbNonce, + SIZE_T cbNonce, + UINT64 offset ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + if (cbKey != 32) + { + scError = SYMCRYPT_WRONG_KEY_SIZE; + goto cleanup; + } + + if (cbNonce != 12) + { + scError = SYMCRYPT_WRONG_NONCE_SIZE; + goto cleanup; + } + + SymCryptLsbFirstToUint32( pbKey, &pState->key[0], 8 ); + SymCryptLsbFirstToUint32( pbNonce, &pState->nonce[0], 3 ); + + SymCryptChaCha20SetOffset( pState, offset ); + +cleanup: + return scError; +} + +VOID +SYMCRYPT_CALL +SymCryptChaCha20SetOffset( + _Inout_ PSYMCRYPT_CHACHA20_STATE pState, + UINT64 offset ) +{ + pState->offset = offset; + pState->keystreamBufferValid = FALSE; +} + +VOID +SYMCRYPT_CALL +SymCryptChaCha20Crypt( + _Inout_ PSYMCRYPT_CHACHA20_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + UINT32 blockOffset; + SIZE_T nBytes; + + blockOffset = pState->offset & 0x3f; + + // If the offset is in the middle of the block, we first crypt until the end + // of the block + if( blockOffset != 0 ) + { + if( !pState->keystreamBufferValid ) + { + // Generate a block of key stream + SymCryptWipe( &pState->keystream[0], 64 ); + SymCryptChaCha20CryptBlocks( pState, + &pState->keystream[0], + &pState->keystream[0], + 64 ); + pState->offset -= 64; // Don't update the offset yet + } + + nBytes = 64 - blockOffset; // # bytes in buffer starting at offset + if( cbData < nBytes ) + { + // We don't use the generated block to the end. The buffer will be valid + // at the end as the offset won't advance beyond the block. + nBytes = cbData; + pState->keystreamBufferValid = TRUE; + } else { + // We'll use the rest of the generated block. After that the key stream + // buffer won't be valid as the offset will advance beyond it. + pState->keystreamBufferValid = FALSE; + } + + SymCryptXorBytes( pbSrc, &pState->keystream[ blockOffset ], pbDst, nBytes ); + pbSrc += nBytes; + pbDst += nBytes; + cbData -= nBytes; + pState->offset += nBytes; + } + + // Here: pbSrc, pbDst, cbData, and pState->offset all in sync + // and either cbData == 0 or offset is at a block boundary + + if( cbData >= 64 ) + { + nBytes = cbData & ~0x3f; + SymCryptChaCha20CryptBlocks( pState, pbSrc, pbDst, nBytes ); + pbSrc += nBytes; + pbDst += nBytes; + cbData -= nBytes; + } + + if( cbData > 0 ) + { + // Generate a block of key stream + SymCryptWipe( &pState->keystream[0], 64 ); + SymCryptChaCha20CryptBlocks( pState, + &pState->keystream[0], + &pState->keystream[0], + 64 ); + pState->offset -= 64; // Don't update the offset yet + pState->keystreamBufferValid = TRUE; + + SymCryptXorBytes( pbSrc, &pState->keystream[0], pbDst, cbData ); + pState->offset += cbData; + // The following updates are correct but not needed + // pbSrc += cbData; + // pbDst += cbData; + // cbData -= cbData; + } +} + +#define CHACHA_QUARTERROUND( a, b, c, d ) { \ + a += b; d ^= a; d = ROL32( d, 16 ); \ + c += d; b ^= c; b = ROL32( b, 12 ); \ + a += b; d ^= a; d = ROL32( d, 8 ); \ + c += d; b ^= c; b = ROL32( b, 7 ); \ +} + +VOID +SYMCRYPT_CALL +SymCryptChaCha20CryptBlocks( + _Inout_ PSYMCRYPT_CHACHA20_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + UINT32 counter; + UINT32 s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15; + int i; + + counter = (UINT32)(pState->offset >> 6); + + while( cbData >= 64 ) + { + // Initialize the state + s0 = 0x61707865; + s1 = 0x3320646e; + s2 = 0x79622d32; + s3 = 0x6b206574; + s4 = pState->key[0]; + s5 = pState->key[1]; + s6 = pState->key[2]; + s7 = pState->key[3]; + s8 = pState->key[4]; + s9 = pState->key[5]; + s10 = pState->key[6]; + s11 = pState->key[7]; + s12 = counter; + s13 = pState->nonce[0]; + s14 = pState->nonce[1]; + s15 = pState->nonce[2]; + + for( i=0; i<10; i++ ) + { + CHACHA_QUARTERROUND( s0 , s4 , s8 , s12 ); + CHACHA_QUARTERROUND( s1 , s5 , s9 , s13 ); + CHACHA_QUARTERROUND( s2 , s6 , s10, s14 ); + CHACHA_QUARTERROUND( s3 , s7 , s11, s15 ); + + CHACHA_QUARTERROUND( s0 , s5 , s10, s15 ); + CHACHA_QUARTERROUND( s1 , s6 , s11, s12 ); + CHACHA_QUARTERROUND( s2 , s7 , s8 , s13 ); + CHACHA_QUARTERROUND( s3 , s4 , s9 , s14 ); + } + + s0 += 0x61707865; + s1 += 0x3320646e; + s2 += 0x79622d32; + s3 += 0x6b206574; + s4 += pState->key[0]; + s5 += pState->key[1]; + s6 += pState->key[2]; + s7 += pState->key[3]; + s8 += pState->key[4]; + s9 += pState->key[5]; + s10 += pState->key[6]; + s11 += pState->key[7]; + s12 += counter; + s13 += pState->nonce[0]; + s14 += pState->nonce[1]; + s15 += pState->nonce[2]; + + SYMCRYPT_STORE_LSBFIRST32( pbDst + 0, s0 ^ SYMCRYPT_LOAD_LSBFIRST32( pbSrc + 0 ) ); + SYMCRYPT_STORE_LSBFIRST32( pbDst + 4, s1 ^ SYMCRYPT_LOAD_LSBFIRST32( pbSrc + 4 ) ); + SYMCRYPT_STORE_LSBFIRST32( pbDst + 8, s2 ^ SYMCRYPT_LOAD_LSBFIRST32( pbSrc + 8 ) ); + SYMCRYPT_STORE_LSBFIRST32( pbDst + 12, s3 ^ SYMCRYPT_LOAD_LSBFIRST32( pbSrc + 12 ) ); + SYMCRYPT_STORE_LSBFIRST32( pbDst + 16, s4 ^ SYMCRYPT_LOAD_LSBFIRST32( pbSrc + 16 ) ); + SYMCRYPT_STORE_LSBFIRST32( pbDst + 20, s5 ^ SYMCRYPT_LOAD_LSBFIRST32( pbSrc + 20 ) ); + SYMCRYPT_STORE_LSBFIRST32( pbDst + 24, s6 ^ SYMCRYPT_LOAD_LSBFIRST32( pbSrc + 24 ) ); + SYMCRYPT_STORE_LSBFIRST32( pbDst + 28, s7 ^ SYMCRYPT_LOAD_LSBFIRST32( pbSrc + 28 ) ); + SYMCRYPT_STORE_LSBFIRST32( pbDst + 32, s8 ^ SYMCRYPT_LOAD_LSBFIRST32( pbSrc + 32 ) ); + SYMCRYPT_STORE_LSBFIRST32( pbDst + 36, s9 ^ SYMCRYPT_LOAD_LSBFIRST32( pbSrc + 36 ) ); + SYMCRYPT_STORE_LSBFIRST32( pbDst + 40, s10 ^ SYMCRYPT_LOAD_LSBFIRST32( pbSrc + 40 ) ); + SYMCRYPT_STORE_LSBFIRST32( pbDst + 44, s11 ^ SYMCRYPT_LOAD_LSBFIRST32( pbSrc + 44 ) ); + SYMCRYPT_STORE_LSBFIRST32( pbDst + 48, s12 ^ SYMCRYPT_LOAD_LSBFIRST32( pbSrc + 48 ) ); + SYMCRYPT_STORE_LSBFIRST32( pbDst + 52, s13 ^ SYMCRYPT_LOAD_LSBFIRST32( pbSrc + 52 ) ); + SYMCRYPT_STORE_LSBFIRST32( pbDst + 56, s14 ^ SYMCRYPT_LOAD_LSBFIRST32( pbSrc + 56 ) ); + SYMCRYPT_STORE_LSBFIRST32( pbDst + 60, s15 ^ SYMCRYPT_LOAD_LSBFIRST32( pbSrc + 60 ) ); + + counter ++; + // If counter overflows then the caller has encrypted more than 256GB of data with a single stream, which is + // called out as being insecure. It is the caller's responsibility to avoid this! + pbSrc += 64; + pbDst += 64; + cbData -= 64; + pState->offset += 64; + } +} + +static const BYTE chacha20KatAnswer[ 3 ] = { 0xb5, 0xe0, 0x54 }; + +VOID +SYMCRYPT_CALL +SymCryptChaCha20Selftest(void) +{ + BYTE buf[3]; + SYMCRYPT_CHACHA20_STATE state; + + SymCryptChaCha20Init( &state, + SymCryptTestKey32, sizeof( SymCryptTestKey32 ), + SymCryptTestMsg16, 12, + 0 ); + + SymCryptChaCha20Crypt( &state, SymCryptTestMsg3, buf, sizeof( buf ) ); + + SymCryptInjectError( buf, sizeof( buf ) ); + + if( memcmp( buf, chacha20KatAnswer, sizeof( buf )) != 0 ) + { + SymCryptFatal( 'Cha2' ); + } +} diff --git a/libs/symcrypt/lib/chacha20_poly1305.c b/libs/symcrypt/lib/chacha20_poly1305.c new file mode 100644 index 00000000000..5b46a6d209e --- /dev/null +++ b/libs/symcrypt/lib/chacha20_poly1305.c @@ -0,0 +1,257 @@ +// +// ChaCha20_Poly1305.c +// +// Copyright (c) Microsoft Corporation. +// + +#include "precomp.h" + +#define CHACHA20_POLY1305_MAX_DATA_SIZE (((1ull << 32) - 1) * 64) + +// Compile time BOOL statically determines if we need to check cbData > CHACHA20_POLY1305_MAX_DATA_SIZE +// Used to suppress MSVC C4127 and clang Wtautological-constant-out-of-range-compare on 32b platforms +const BOOL fcbDataLteMaxDataSizeStatic = SIZE_T_MAX <= CHACHA20_POLY1305_MAX_DATA_SIZE; + +VOID +SYMCRYPT_CALL +SymCryptChaCha20Poly1305ComputeTag( + _Inout_ PSYMCRYPT_POLY1305_STATE pState, + _In_reads_opt_( cbAuthData ) PCBYTE pbAuthData, + SIZE_T cbAuthData, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_POLY1305_RESULT_SIZE ) PBYTE pbTag ) +{ + SYMCRYPT_ALIGN BYTE buf[SYMCRYPT_POLY1305_BLOCK_SIZE]; + BYTE partialBlockSize; + + SymCryptWipeKnownSize( buf, SYMCRYPT_POLY1305_BLOCK_SIZE ); + + // Add additional authentication data if needed. + if ( cbAuthData > 0 ) + { + SymCryptPoly1305Append( pState, pbAuthData, cbAuthData ); + + // Append zeros to make a complete Poly1305 block. + partialBlockSize = cbAuthData % SYMCRYPT_POLY1305_BLOCK_SIZE; + if ( partialBlockSize > 0 ) + { + SymCryptPoly1305Append( pState, buf, SYMCRYPT_POLY1305_BLOCK_SIZE - partialBlockSize ); + } + } + + // Add ciphertext if needed. + if ( cbData > 0 ) + { + SymCryptPoly1305Append( pState, pbData, cbData ); + + // Append zeros to make a complete Poly1305 block. + partialBlockSize = cbData % SYMCRYPT_POLY1305_BLOCK_SIZE; + if ( partialBlockSize > 0 ) + { + SymCryptPoly1305Append( pState, buf, SYMCRYPT_POLY1305_BLOCK_SIZE - partialBlockSize ); + } + } + + // Add length of additional authentication data and ciphertext. + SYMCRYPT_STORE_LSBFIRST64( &buf[0], cbAuthData ); + SYMCRYPT_STORE_LSBFIRST64( &buf[8], cbData ); + SymCryptPoly1305Append( pState, buf, SYMCRYPT_POLY1305_BLOCK_SIZE ); + SymCryptWipeKnownSize( buf, SYMCRYPT_POLY1305_BLOCK_SIZE ); + + SymCryptPoly1305Result( pState, pbTag ); +} + +SYMCRYPT_NOINLINE +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptChaCha20Poly1305Encrypt( + _In_reads_( cbKey ) PCBYTE pbKey, + SIZE_T cbKey, + _In_reads_( cbNonce ) PCBYTE pbNonce, + SIZE_T cbNonce, + _In_reads_opt_( cbAuthData ) PCBYTE pbAuthData, + SIZE_T cbAuthData, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData, + _Out_writes_( cbTag ) PBYTE pbTag, + SIZE_T cbTag ) +{ + SYMCRYPT_ERROR status = SYMCRYPT_NO_ERROR; + SYMCRYPT_CHACHA20_STATE ChaCha20State; + SYMCRYPT_POLY1305_STATE Poly1305State; + SYMCRYPT_ALIGN BYTE key[SYMCRYPT_POLY1305_KEY_SIZE]; + + if ( !fcbDataLteMaxDataSizeStatic && cbData > CHACHA20_POLY1305_MAX_DATA_SIZE ) + { + status = SYMCRYPT_WRONG_DATA_SIZE; + goto cleanup; + } + + if ( cbTag != SYMCRYPT_POLY1305_RESULT_SIZE ) + { + status = SYMCRYPT_WRONG_TAG_SIZE; + goto cleanup; + } + + status = SymCryptChaCha20Init( &ChaCha20State, pbKey, cbKey, pbNonce, cbNonce, 0 ); + if ( status != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Generate the first 32 bytes of keystream. + SymCryptWipeKnownSize( key, sizeof( key ) ); + SymCryptChaCha20Crypt( &ChaCha20State, key, key, sizeof ( key ) ); + + // Create the Poly1305 key using the first 32 bytes of the ChaCha20 keystream. + SymCryptPoly1305Init( &Poly1305State, key ); + SymCryptWipeKnownSize( key, sizeof( key ) ); + + // Encrypt data if needed. + if ( cbData > 0 ) + { + // Advance the keystream to counter 1 (offset 64) for data encryption. + SymCryptChaCha20SetOffset( &ChaCha20State, 64 ); + SymCryptChaCha20Crypt( &ChaCha20State, pbSrc, pbDst, cbData ); + } + + // We read the ciphertext back, violating the general rule not to rely on I/O buffers + // as they can reside in a different security domain. For ChaCha20Poly1305, like GCM, + // this read-back of data is not a problem. An attacker with access to the buffer + // will get the ChaCha20 key stream plus the Poly1305 authenticator of a single value. + // As Poly1305 is strong even with attacker-controlled data, this is harmless. + SymCryptChaCha20Poly1305ComputeTag( &Poly1305State, pbAuthData, cbAuthData, + pbDst, cbData, pbTag ); +cleanup: + + SymCryptWipeKnownSize( &ChaCha20State, sizeof( ChaCha20State ) ); + SymCryptWipeKnownSize( &Poly1305State, sizeof( Poly1305State ) ); + + return status; +} + +SYMCRYPT_NOINLINE +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptChaCha20Poly1305Decrypt( + _In_reads_( cbKey ) PCBYTE pbKey, + SIZE_T cbKey, + _In_reads_( cbNonce ) PCBYTE pbNonce, + SIZE_T cbNonce, + _In_reads_opt_( cbAuthData ) PCBYTE pbAuthData, + SIZE_T cbAuthData, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData, + _In_reads_( cbTag ) PCBYTE pbTag, + SIZE_T cbTag ) +{ + SYMCRYPT_ERROR status = SYMCRYPT_NO_ERROR; + SYMCRYPT_CHACHA20_STATE ChaCha20State; + SYMCRYPT_POLY1305_STATE Poly1305State; + SYMCRYPT_ALIGN BYTE buf[SYMCRYPT_POLY1305_RESULT_SIZE]; + SYMCRYPT_ALIGN BYTE key[SYMCRYPT_POLY1305_KEY_SIZE]; + + if ( !fcbDataLteMaxDataSizeStatic && cbData > CHACHA20_POLY1305_MAX_DATA_SIZE ) + { + status = SYMCRYPT_WRONG_DATA_SIZE; + goto cleanup; + } + + if ( cbTag != SYMCRYPT_POLY1305_RESULT_SIZE ) + { + status = SYMCRYPT_WRONG_TAG_SIZE; + goto cleanup; + } + + status = SymCryptChaCha20Init( &ChaCha20State, pbKey, cbKey, pbNonce, cbNonce, 0 ); + if ( status != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Generate the first 32 bytes of keystream. + SymCryptWipeKnownSize( key, sizeof( key ) ); + SymCryptChaCha20Crypt( &ChaCha20State, key, key, sizeof( key ) ); + + // Create the Poly1305 key using the first 32 bytes of the ChaCha20 keystream. + SymCryptPoly1305Init( &Poly1305State, key ); + SymCryptWipeKnownSize( key, sizeof( key ) ); + + // We read the ciphertext back, violating the general rule not to rely on I/O buffers + // as they can reside in a different security domain. For ChaCha20Poly1305, like GCM, + // this read-back of data is not a problem. An attacker with access to the buffer + // will get the ChaCha20 key stream plus the Poly1305 authenticator of a single value. + // As Poly1305 is strong even with attacker-controlled data, this is harmless. + SymCryptChaCha20Poly1305ComputeTag( &Poly1305State, pbAuthData, cbAuthData, + pbSrc, cbData, buf ); + + // Validate tag. + if (!SymCryptEqual(pbTag, buf, cbTag)) + { + status = SYMCRYPT_AUTHENTICATION_FAILURE; + goto cleanup; + } + + // Decrypt data if needed. + if ( cbData > 0) + { + // Advance the keystream to counter 1 (offset 64) for data decryption. + SymCryptChaCha20SetOffset( &ChaCha20State, 64 ); + SymCryptChaCha20Crypt( &ChaCha20State, pbSrc, pbDst, cbData ); + } + +cleanup: + + SymCryptWipeKnownSize( &ChaCha20State, sizeof( ChaCha20State ) ); + SymCryptWipeKnownSize( &Poly1305State, sizeof( Poly1305State ) ); + + return status; +} + + +static const BYTE SymCryptChaCha20Poly1305Result[3 + SYMCRYPT_POLY1305_RESULT_SIZE] = +{ + 0x5d, 0xba, 0x7b, + 0x80, 0x10, 0xd2, 0x05, 0x4a, 0xad, 0x53, 0x1f, 0xa2, 0xce, 0x83, 0xc1, 0x66, 0x12, 0x85, 0x21 +}; + +VOID +SYMCRYPT_CALL +SymCryptChaCha20Poly1305Selftest(void) +{ + BYTE buf[3 + SYMCRYPT_POLY1305_RESULT_SIZE]; + SYMCRYPT_ERROR err; + + if ( SymCryptChaCha20Poly1305Encrypt( SymCryptTestKey32, sizeof( SymCryptTestKey32 ), + SymCryptTestMsg16, 12, + SymCryptTestMsg16, sizeof( SymCryptTestMsg16 ), + &SymCryptTestMsg3[0], buf, 3, + &buf[3], SYMCRYPT_POLY1305_RESULT_SIZE ) != SYMCRYPT_NO_ERROR ) + { + SymCryptFatal( 'ccp0' ); + } + + SymCryptInjectError( buf, sizeof( buf ) ); + if ( memcmp( buf, SymCryptChaCha20Poly1305Result, sizeof( buf ) ) != 0 ) + { + SymCryptFatal( 'ccp1' ); + } + + // Inject error into the ciphertext or tag. + SymCryptInjectError( buf, sizeof( buf ) ); + + err = SymCryptChaCha20Poly1305Decrypt( SymCryptTestKey32, sizeof( SymCryptTestKey32 ), + SymCryptTestMsg16, 12, + SymCryptTestMsg16, sizeof( SymCryptTestMsg16 ), + buf, buf, 3, + &buf[3], SYMCRYPT_POLY1305_RESULT_SIZE ); + SymCryptInjectError( buf, 3 ); + + if ( err != SYMCRYPT_NO_ERROR || memcmp( buf, SymCryptTestMsg3, 3 ) != 0 ) + { + SymCryptFatal( 'ccp2' ); + } +} diff --git a/libs/symcrypt/lib/cpuid.c b/libs/symcrypt/lib/cpuid.c new file mode 100644 index 00000000000..cf31adfd5ae --- /dev/null +++ b/libs/symcrypt/lib/cpuid.c @@ -0,0 +1,419 @@ +// +// cpuid.c code for CPU feature detection based on CPUID +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + + +#include "precomp.h" + +#if (SYMCRYPT_CPU_ARM | SYMCRYPT_CPU_ARM64) & SYMCRYPT_MS_VC +#include <excpt.h> +#endif + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + +#ifdef __clang__ +#pragma clang attribute push (__attribute__((target("xsave"))), apply_to=function) +#else +#pragma GCC push_options +#pragma GCC target("xsave") +#endif + +// +// RDRAND availability is signaled by CPUID.1.ecx[30] +// PCLMULQDQ availability is signaled by CPUID.1.ecx[1] +// AES_NI availability is signaled by CPUID.1.ecx[25] +// SSSE3 availability is signaled by CPUID.1.ecx[9] +// SSE3 availability is signaled by CPUID.1.ecx[0] +// SSE2 availability is signaled by CPUID.1.edx[26] +// + +#define CPUID_1_ECX_RDRAND_BIT 30 +#define CPUID_1_ECX_PCLMULQDQ_BIT 1 +#define CPUID_1_ECX_AESNI_BIT 25 +#define CPUID_1_ECX_SSSE3_BIT 9 +#define CPUID_1_ECX_SSE3_BIT 0 +#define CPUID_1_EDX_SSE2_BIT 26 +#define CPUID_1_EDX_SSE_BIT 25 +#define CPUID_1_ECX_AVX_BIT 28 +#define CPUID_1_ECX_CMPXCHG16B_BIT 13 +#define CPUID_70_EBX_AVX2_BIT 5 +#define CPUID_70_EBX_RDSEED_BIT 18 +#define CPUID_70_EBX_SHANI_BIT 29 +#define CPUID_70_EBX_ADX_BIT 19 +#define CPUID_70_EBX_BMI2_BIT 8 +#define CPUID_70_EBX_AVX512F_BIT 16 +#define CPUID_70_EBX_AVX512BW_BIT 30 +#define CPUID_70_EBX_AVX512DQ_BIT 17 +#define CPUID_70_EBX_AVX512VL_BIT 31 +#define CPUID_70_ECX_VAES_BIT 9 +#define CPUID_70_ECX_VPCLMULQDQ_BIT 10 + + +#define CPUID_1_ECX_OSXSAVE_BIT 27 + +typedef struct _CPUID_BIT_INFO { + BYTE leaf; + BYTE word; + BYTE bitno; + SYMCRYPT_CPU_FEATURES requiredBy; +} CPUID_BIT_INFO; + +#define WORD_EAX 0 +#define WORD_EBX 1 +#define WORD_ECX 2 +#define WORD_EDX 3 + +int g_SymCryptCpuid1[4]; // We cache the results of CPUID(1) to help diagnose CPU detection errors + +const +CPUID_BIT_INFO cpuidBitInfo[] = { + {1, WORD_ECX, CPUID_1_ECX_RDRAND_BIT, SYMCRYPT_CPU_FEATURE_RDRAND }, + {1, WORD_ECX, CPUID_1_ECX_PCLMULQDQ_BIT, SYMCRYPT_CPU_FEATURE_PCLMULQDQ }, + {1, WORD_ECX, CPUID_1_ECX_AESNI_BIT, SYMCRYPT_CPU_FEATURE_AESNI }, + {1, WORD_EDX, CPUID_1_EDX_SSE_BIT, SYMCRYPT_CPU_FEATURE_SSE2 | SYMCRYPT_CPU_FEATURE_SSSE3 }, + {1, WORD_EDX, CPUID_1_EDX_SSE2_BIT, SYMCRYPT_CPU_FEATURE_SSE2 | SYMCRYPT_CPU_FEATURE_SSSE3 }, + {1, WORD_ECX, CPUID_1_ECX_SSE3_BIT, SYMCRYPT_CPU_FEATURE_SSSE3 }, + {1, WORD_ECX, CPUID_1_ECX_SSSE3_BIT, SYMCRYPT_CPU_FEATURE_SSSE3 }, + {1, WORD_ECX, CPUID_1_ECX_AVX_BIT, SYMCRYPT_CPU_FEATURE_AVX2 }, + {1, WORD_ECX, CPUID_1_ECX_CMPXCHG16B_BIT, SYMCRYPT_CPU_FEATURE_CMPXCHG16B }, + {7, WORD_EBX, CPUID_70_EBX_AVX2_BIT, SYMCRYPT_CPU_FEATURE_AVX2 }, + {7, WORD_EBX, CPUID_70_EBX_RDSEED_BIT, SYMCRYPT_CPU_FEATURE_RDSEED }, + {7, WORD_EBX, CPUID_70_EBX_SHANI_BIT, SYMCRYPT_CPU_FEATURE_SHANI }, + {7, WORD_EBX, CPUID_70_EBX_ADX_BIT, SYMCRYPT_CPU_FEATURE_ADX }, + {7, WORD_EBX, CPUID_70_EBX_BMI2_BIT, SYMCRYPT_CPU_FEATURE_BMI2 }, + {7, WORD_EBX, CPUID_70_EBX_AVX512F_BIT, SYMCRYPT_CPU_FEATURE_AVX512 }, + {7, WORD_EBX, CPUID_70_EBX_AVX512VL_BIT, SYMCRYPT_CPU_FEATURE_AVX512 }, + {7, WORD_EBX, CPUID_70_EBX_AVX512BW_BIT, SYMCRYPT_CPU_FEATURE_AVX512 }, + {7, WORD_EBX, CPUID_70_EBX_AVX512DQ_BIT, SYMCRYPT_CPU_FEATURE_AVX512 }, + {7, WORD_ECX, CPUID_70_ECX_VAES_BIT, SYMCRYPT_CPU_FEATURE_VAES }, + {7, WORD_ECX, CPUID_70_ECX_VPCLMULQDQ_BIT, SYMCRYPT_CPU_FEATURE_VAES }, +}; + +VOID +SYMCRYPT_CALL +SymCryptDetectCpuFeaturesByCpuid( UINT32 flags ) +{ + UINT32 result; + int CPUInfo[4]; + int InfoType; + int maxInfoType; + int i; + BOOLEAN allowYmm, allowZmm; + INT64 xGetBvResult; + + // + // Mark all features as present (the result bits indicate not-present, so set the features we know to 0). + // + result = ~ (UINT32)( + SYMCRYPT_CPU_FEATURE_SSE2 | + SYMCRYPT_CPU_FEATURE_SSSE3 | + SYMCRYPT_CPU_FEATURE_AESNI | + SYMCRYPT_CPU_FEATURE_PCLMULQDQ | + SYMCRYPT_CPU_FEATURE_AVX2 | + SYMCRYPT_CPU_FEATURE_SHANI | + SYMCRYPT_CPU_FEATURE_BMI2 | + SYMCRYPT_CPU_FEATURE_ADX | + SYMCRYPT_CPU_FEATURE_RDRAND | + SYMCRYPT_CPU_FEATURE_RDSEED | + SYMCRYPT_CPU_FEATURE_AVX512 | + SYMCRYPT_CPU_FEATURE_VAES | + SYMCRYPT_CPU_FEATURE_CMPXCHG16B + ); + + // InfoType holds the function id of previous cpuid + // so we don't have to repeatedly invoke cpuid. + InfoType = 0; + SymCryptCpuidExFunc( CPUInfo, InfoType, 0 ); + maxInfoType = CPUInfo[WORD_EAX]; + + for( i=0; i< sizeof( cpuidBitInfo ) / sizeof( *cpuidBitInfo ); i++ ) + { + if( cpuidBitInfo[i].leaf != InfoType ) + { + InfoType = cpuidBitInfo[i].leaf; + SymCryptCpuidExFunc( CPUInfo, InfoType, 0 ); + } + if( cpuidBitInfo[i].leaf > maxInfoType || (CPUInfo[ cpuidBitInfo[i].word ] & (1UL << cpuidBitInfo[i].bitno) ) == 0 ) + { + result |= cpuidBitInfo[i].requiredBy; + } + } + + if( (flags & SYMCRYPT_CPUID_DETECT_FLAG_CHECK_OS_SUPPORT_FOR_YMM) != 0 ) + { + // + // Check for OS support of the YMM registers. + // This detection is optional in any environment because some environments are single-threaded, and + // OS support is not required. (E.g. Boot library.) + // + // We use the following logic: + // Check that the OSXSAVE bit is 1, which means we can use XGETBV + // Use XGETBV and check that XCR0[2:1] = '11b' signaling that both XMM and YMM are enabled by OS + // Note that we only disable the AVX2 usage; AESNI & XMM registers are used independent of OS support, because + // all our (known) OSes have it. + // + allowYmm = FALSE; + allowZmm = FALSE; + SymCryptCpuidExFunc( CPUInfo, 1, 0 ); + + if( (CPUInfo[WORD_ECX] & (1 << CPUID_1_ECX_OSXSAVE_BIT)) != 0 ) + { + // OSXSAVE bit is set, we can use XGETBV + xGetBvResult = _xgetbv( _XCR_XFEATURE_ENABLED_MASK ); + + // Check that bits 1 and 2 are set, corresponding to the XMM and YMM register state + if( (xGetBvResult & 0x6) == 0x6) + { + allowYmm = TRUE; + + // + // For AVX-512, also check that bits 5, 6, and 7 are set, corresponding to the + // opmask, ZMM (0-15), and ZMM (16-31) register states + // This follows the recommendation in the Intel 64 and IA-32 Architectures Software + // Developer's Manual, Volume 1, 15.3 / 15.4. + // + // It seems plausible that on some system the OS would not support save/restore of + // AVX-512 state, but use of AVX-512VL instructions on Ymm or Xmm registers would be + // OK, however Intel explicitly suggests that we should only use AVX512-VL if the + // support is indicated by xgetbv, so we use the same logic as for AVX2 (our + // SymCrypt feature indicates both CPU support, and OS support for saving/restoring + // the extended state) + // + if( (xGetBvResult & 0xe0) == 0xe0) + { + allowZmm = TRUE; + } + } + } + + if( !allowYmm ) + { + // Disallow the AVX2-dependent code because we don't have OS YMM support. + result |= SYMCRYPT_CPU_FEATURE_AVX2; + } + + if( !allowZmm ) + { + // Disallow any AVX512-dependent code because we don't have OS ZMM support. + // Note that not all AVX-512 dependent code will need to save/restore ZMM state, but we + // do not support AVX-512 instructions (even acting on YMM or XMM registers), unless the + // OS indicates support via XCR0 + result |= SYMCRYPT_CPU_FEATURE_AVX512; + } + } + + + if( (result & SYMCRYPT_CPU_FEATURE_AESNI) == 0 ) // thus, if AES-NI is present according to CPUID + { + // + // In Win7 Beta we had an interesting crash bucket. + // It only occurred on the AsusTek A6K line of laptops which sometimes + // set the cpuid AES-NI bit (but not always). This leads to a crash as + // we start using AES instructions that don't exist on those machines. + // + // I found on-line reviews for the A6K line from december 2005 so it was launched around + // that time. + // + // These laptops all have AMD CPUs, so we fix it by locking out the particular AMD CPUs + // families that don't have AES-NI anyway. + // + // We really shouldn't need this logic, and it only slows things down. + // We should be able to remove it at some point in the future. + // + // At AMD's recommendation, we use the logic below. + // The AMD engineers reviewed this code to ensure we don't lock out future CPUs + // that will have AES-NI. + // + SymCryptCpuidExFunc( CPUInfo, 0, 0 ); + if( CPUInfo[WORD_EBX] == 'htuA' + && CPUInfo[WORD_ECX] == 'DMAc' + && CPUInfo[WORD_EDX] == 'itne' ) + { + // + // We have an AMD cpu, check the family. + // + UINT32 baseFamily; + UINT32 extFamily; + UINT32 family; + + // + // Extract the base family and extended family values, and combine them to the full + // family value. + // + SymCryptCpuidExFunc( CPUInfo, 1, 0 ); + + baseFamily = (CPUInfo[WORD_EAX] >> 8) & 0xf; + + extFamily = (CPUInfo[WORD_EAX] >> 20) & 0xff; + + if( baseFamily < 0xf ) + { + family = baseFamily; + } else { + family = baseFamily + extFamily; + } + + // + // AMD will not implement the AES instruction set until family 0x15 + // + if( family < 0x15 ) + { + result |= SYMCRYPT_CPU_FEATURE_AESNI; + } + } + } + + SymCryptCpuidExFunc( g_SymCryptCpuid1, 1, 0 ); // Keep cache of CPUID results for diagnosis + + g_SymCryptCpuFeaturesNotPresent = (SYMCRYPT_CPU_FEATURES) result; +} + +#ifdef __clang__ +#pragma clang attribute pop +#else +#pragma GCC pop_options +#endif + +#elif SYMCRYPT_CPU_ARM && 0 + +#define CP15_ISAR5 15, 0, 0, 2, 5 // Instruction Set Attribute Register 5 + +#define READ_ARM_FEATURE(_FeatureRegister, _Index) \ + (((ULONG)_MoveFromCoprocessor(_FeatureRegister) >> ((_Index) * 4)) & 0xF) + +#define ISAR5_AES 1 +#define ISAR5_AES_AESE 1 +#define ISAR5_AES_PMULL 2 + +#define ISAR5_SHA2 3 +#define ISAR5_SHA2_SHA256H 1 + +#define ISAR5_CRC32 4 +#define ISAR5_CRC32_IMP 1 + +VOID +SYMCRYPT_CALL +SymCryptDetectCpuFeaturesFromRegisters(void) +{ + UINT32 result; + +#if 0 // We currently do not use any neon crypto features on ARM code, so no detection needed. + + // + // We start with a result that allows everything. + // This makes the code simpler when you have one CPU feature flag that disables multiple feature bits. + // + result = ~ (UINT32)( + SYMCRYPT_CPU_FEATURE_NEON | + SYMCRYPT_CPU_FEATURE_NEON_AES | + SYMCRYPT_CPU_FEATURE_NEON_PMULL | + SYMCRYPT_CPU_FEATURE_NEON_SHA256 + ); + + // + // Reading the status registers might fail, so we use a try block. + // + try { + + if( READ_ARM_FEATURE(CP15_ISAR5, ISAR5_AES) < ISAR5_AES_AESE ) + { + result |= SYMCRYPT_CPU_FEATURE_NEON_AES; + } + + if( READ_ARM_FEATURE(CP15_ISAR5, ISAR5_AES) < ISAR5_AES_PMULL ) + { + result |= SYMCRYPT_CPU_FEATURE_NEON_PMULL; + } + + if( READ_ARM_FEATURE(CP15_ISAR5, ISAR5_SHA2) < ISAR5_SHA2_SHA256H ) + { + result |= SYMCRYPT_CPU_FEATURE_NEON_SHA256; + } + + } except(EXCEPTION_EXECUTE_HANDLER) { + // + // Something went wrong reading the registers; disable all the crypto extensions leaving only the standard NEON registers available. + // + result |= SYMCRYPT_CPU_FEATURE_NEON_AES | SYMCRYPT_CPU_FEATURE_NEON_PMULL | SYMCRYPT_CPU_FEATURE_NEON_SHA256; + } +#endif + // + // For now we ignore the new instructions in ARM until we can get clarity on how to detect Arm32-on-Arm64. + // + result = ~(UINT32)SYMCRYPT_CPU_FEATURE_NEON; + + g_SymCryptCpuFeaturesNotPresent = (SYMCRYPT_CPU_FEATURES) result; +} + + +#elif SYMCRYPT_CPU_ARM64 && 0 + + +#define ARM64_SYSREG(op0, op1, crn, crm, op2) \ + ( ((op0 & 1) << 14) | \ + ((op1 & 7) << 11) | \ + ((crn & 15) << 7) | \ + ((crm & 15) << 3) | \ + ((op2 & 7) << 0) ) + +#define ARM64_ID_AA64ISAR0_EL1 ARM64_SYSREG(3,0, 0, 6,0) // ISA Feature Register 0 + +#define ISAR0_AES 1 +#define ISAR0_AES_NI 0 +#define ISAR0_AES_INSTRUCTIONS 1 +#define ISAR0_AES_PLUS_PMULL64 2 + +#define ISAR0_SHA2 3 +#define ISAR0_SHA2_NI 0 +#define ISAR0_SHA2_INSTRUCTIONS 1 + +#define ISAR0_CRC32 4 +#define ISAR0_CRC32_NI 0 +#define ISAR0_CRC32_INSTRUCTIONS 1 + +#define READ_ARM64_FEATURE(_FeatureRegister, _Index) \ + (((ULONG64)_ReadStatusReg(_FeatureRegister) >> ((_Index) * 4)) & 0xF) + +VOID +SYMCRYPT_CALL +SymCryptDetectCpuFeaturesFromRegisters(void) +{ + UINT32 result; + + result = ~ (UINT32)( + SYMCRYPT_CPU_FEATURE_NEON | + SYMCRYPT_CPU_FEATURE_NEON_AES | + SYMCRYPT_CPU_FEATURE_NEON_PMULL | + SYMCRYPT_CPU_FEATURE_NEON_SHA256 + ); + +#if SYMCRYPT_MS_VC + __try { + + if( READ_ARM64_FEATURE(ARM64_ID_AA64ISAR0_EL1, ISAR0_AES) < ISAR0_AES_INSTRUCTIONS ) + { + result |= SYMCRYPT_CPU_FEATURE_NEON_AES; + } + + if( READ_ARM64_FEATURE(ARM64_ID_AA64ISAR0_EL1, ISAR0_AES) < ISAR0_AES_PLUS_PMULL64 ) + { + result |= SYMCRYPT_CPU_FEATURE_NEON_PMULL; + } + + if( READ_ARM64_FEATURE(ARM64_ID_AA64ISAR0_EL1, ISAR0_SHA2) < ISAR0_SHA2_INSTRUCTIONS ) + { + result |= SYMCRYPT_CPU_FEATURE_NEON_SHA256; + } + + g_SymCryptCpuFeaturesNotPresent = (SYMCRYPT_CPU_FEATURES) result; + + } __except(EXCEPTION_EXECUTE_HANDLER) { + ; //NOTHING; + } +#endif + +} + +#endif // CPU arch selection diff --git a/libs/symcrypt/lib/cpuid_um.c b/libs/symcrypt/lib/cpuid_um.c new file mode 100644 index 00000000000..64e7e7d4fbe --- /dev/null +++ b/libs/symcrypt/lib/cpuid_um.c @@ -0,0 +1,131 @@ +// +// cpuid_um.c code for CPU feature detection based on OS features available in user-mode +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// This file contains the feature detection code that is only compiled for user-mode. +// The IsProcessorFeaturePresent API is only in UM, so linking any code out of +// a source file that contains a call to it doesn't work for KM code. +// By splitting it into a separate file, the code is ignored by KM callers because +// they never reference anything in this file. +// + + + +#include "precomp.h" + +#if SYMCRYPT_CPU_ARM64 && SYMCRYPT_PLATFORM_WINDOWS +#undef UNREFERENCED_PARAMETER +#include <processthreadsapi.h> + +// From winnt.h +#define PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE 30 + +VOID +SYMCRYPT_CALL +SymCryptDetectCpuFeaturesFromIsProcessorFeaturePresent(void) +{ + if( IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ) + { + g_SymCryptCpuFeaturesNotPresent = (SYMCRYPT_CPU_FEATURES) ~( + SYMCRYPT_CPU_FEATURE_NEON | + SYMCRYPT_CPU_FEATURE_NEON_AES | + SYMCRYPT_CPU_FEATURE_NEON_PMULL | + SYMCRYPT_CPU_FEATURE_NEON_SHA256 + ); + } else { + g_SymCryptCpuFeaturesNotPresent = (SYMCRYPT_CPU_FEATURES) ~SYMCRYPT_CPU_FEATURE_NEON; + } +} + +#elif SYMCRYPT_CPU_ARM64 && SYMCRYPT_GNUC + +#if SYMCRYPT_PLATFORM_APPLE +#include <sys/sysctl.h> + +VOID +SYMCRYPT_CALL +SymCryptDetectCpuFeaturesFromIsProcessorFeaturePresent(void) +{ + // Arm64 code relies on presence of ASIMD everywhere (it is always present with Armv8); the + // compiler is permitted to generate ASIMD instructions anywhere + // The SYMCRYPT_CPU_FEATURE_NEON is currently always present and never checked + SYMCRYPT_CPU_FEATURES result = ~SYMCRYPT_CPU_FEATURE_NEON; + + // On macOS ARM64, we use sysctl to query CPU features + // All Apple Silicon Macs support AES, PMULL, and SHA2 instructions + uint32_t has_feature = 0; + size_t len = sizeof(has_feature); + + // Check for AES support + if( sysctlbyname("hw.optional.arm.FEAT_AES", &has_feature, &len, NULL, 0) == 0 && has_feature ) + { + result &= ~SYMCRYPT_CPU_FEATURE_NEON_AES; + } + + // Check for PMULL support + has_feature = 0; + len = sizeof(has_feature); + if( sysctlbyname("hw.optional.arm.FEAT_PMULL", &has_feature, &len, NULL, 0) == 0 && has_feature ) + { + result &= ~SYMCRYPT_CPU_FEATURE_NEON_PMULL; + } + + // Check for SHA2 support + has_feature = 0; + len = sizeof(has_feature); + if( sysctlbyname("hw.optional.arm.FEAT_SHA256", &has_feature, &len, NULL, 0) == 0 && has_feature ) + { + result &= ~SYMCRYPT_CPU_FEATURE_NEON_SHA256; + } + + g_SymCryptCpuFeaturesNotPresent = result; +} + +#else // Linux and other Unix platforms + +#include <sys/auxv.h> + +// #include <asm/hwcap.h> +#define HWCAP_AES (1 << 3) +#define HWCAP_PMULL (1 << 4) +#define HWCAP_SHA2 (1 << 6) + +VOID +SYMCRYPT_CALL +SymCryptDetectCpuFeaturesFromIsProcessorFeaturePresent(void) +{ + unsigned long hwcaps = getauxval( AT_HWCAP ); + + SYMCRYPT_CPU_FEATURES result = ~( + SYMCRYPT_CPU_FEATURE_NEON | + SYMCRYPT_CPU_FEATURE_NEON_AES | + SYMCRYPT_CPU_FEATURE_NEON_PMULL | + SYMCRYPT_CPU_FEATURE_NEON_SHA256 + ); + + // Arm64 code relies on presence of ASIMD everywhere (it is always present with Armv8); the + // compiler is permitted to generate ASIMD instructions anywhere + // The SYMCRYPT_CPU_FEATURE_NEON is currently always present and never checked + + if( !(hwcaps & HWCAP_AES) ) + { + result |= SYMCRYPT_CPU_FEATURE_NEON_AES; + } + + if( !(hwcaps & HWCAP_PMULL) ) + { + result |= SYMCRYPT_CPU_FEATURE_NEON_PMULL; + } + + if( !(hwcaps & HWCAP_SHA2) ) + { + result |= SYMCRYPT_CPU_FEATURE_NEON_SHA256; + } + + g_SymCryptCpuFeaturesNotPresent = result; +} + +#endif // SYMCRYPT_PLATFORM_APPLE + +#endif diff --git a/libs/symcrypt/lib/crt.c b/libs/symcrypt/lib/crt.c new file mode 100644 index 00000000000..85148646b7f --- /dev/null +++ b/libs/symcrypt/lib/crt.c @@ -0,0 +1,215 @@ +// +// crt.c Chinese Remainder Theorem Algorithms +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptCrtGenerateForTwoCoprimes( + _In_ PCSYMCRYPT_MODULUS pmP, + _In_ PCSYMCRYPT_MODULUS pmQ, + UINT32 flags, + _Out_ PSYMCRYPT_MODELEMENT peInvQModP, + _Out_ PSYMCRYPT_MODELEMENT peInvPModQ, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PCSYMCRYPT_INT piSrc1 = NULL; + PCSYMCRYPT_INT piSrc2 = NULL; + + PSYMCRYPT_INT piInvSrc1ModSrc2 = NULL; + PSYMCRYPT_INT piInvSrc2ModSrc1 = NULL; + + UINT32 nDigits = 0; + UINT32 cbInt = 0; + + BOOLEAN oddP = FALSE; + + SYMCRYPT_ASSERT( pmP != NULL ); + SYMCRYPT_ASSERT( pmQ != NULL ); + + nDigits = SYMCRYPT_MAX( SymCryptModulusDigitsizeOfObject( pmP ), SymCryptModulusDigitsizeOfObject( pmQ )); + + // Create two temporary integers + cbInt = SymCryptSizeofIntFromDigits( nDigits ); + + SYMCRYPT_ASSERT( cbScratch >= 2*cbInt + SYMCRYPT_SCRATCH_BYTES_FOR_EXTENDED_GCD( nDigits )); + + piInvSrc1ModSrc2 = SymCryptIntCreate( pbScratch, cbInt, nDigits ); pbScratch += cbInt; cbScratch -= cbInt; + piInvSrc2ModSrc1 = SymCryptIntCreate( pbScratch, cbInt, nDigits ); pbScratch += cbInt; cbScratch -= cbInt; + + oddP = ((SymCryptIntGetValueLsbits32(SymCryptIntFromModulus( (PSYMCRYPT_MODULUS) pmP )) & 1) == 1); + if (oddP) + { + piSrc1 = SymCryptIntFromModulus( (PSYMCRYPT_MODULUS) pmQ ); + piSrc2 = SymCryptIntFromModulus( (PSYMCRYPT_MODULUS) pmP ); + } + else + { + piSrc1 = SymCryptIntFromModulus( (PSYMCRYPT_MODULUS) pmP ); + piSrc2 = SymCryptIntFromModulus( (PSYMCRYPT_MODULUS) pmQ ); + } + + // IntExtendedGcd requirements: + // - First argument > 0 + // - Second argument odd + if( SymCryptIntIsEqualUint32(piSrc1, 0) || + ((SymCryptIntGetValueLsbits32(piSrc2) & 1) != 1) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Extended GCD + SymCryptIntExtendedGcd( piSrc1, piSrc2, flags, NULL, NULL, piInvSrc1ModSrc2, piInvSrc2ModSrc1, pbScratch, cbScratch ); + + if (oddP) + { + SymCryptIntToModElement( piInvSrc2ModSrc1, pmQ, peInvPModQ, pbScratch, cbScratch ); + SymCryptIntToModElement( piInvSrc1ModSrc2, pmP, peInvQModP, pbScratch, cbScratch ); + } + else + { + SymCryptIntToModElement( piInvSrc2ModSrc1, pmP, peInvQModP, pbScratch, cbScratch ); + SymCryptIntToModElement( piInvSrc1ModSrc2, pmQ, peInvPModQ, pbScratch, cbScratch ); + } + +cleanup: + return scError; +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptCrtGenerateInverses( + UINT32 nCoprimes, + _In_reads_( nCoprimes ) PCSYMCRYPT_MODULUS * ppmCoprimes, + UINT32 flags, + _Out_writes_( nCoprimes ) PSYMCRYPT_MODELEMENT * ppeCrtInverses, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + if (nCoprimes == 2) + { + SymCryptCrtGenerateForTwoCoprimes( + ppmCoprimes[0], + ppmCoprimes[1], + flags, + ppeCrtInverses[0], + ppeCrtInverses[1], + pbScratch, + cbScratch ); + } + else + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + +cleanup: + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptCrtSolve( + UINT32 nCoprimes, + _In_reads_( nCoprimes ) PCSYMCRYPT_MODULUS * ppmCoprimes, + _In_reads_( nCoprimes ) PCSYMCRYPT_MODELEMENT * ppeCrtInverses, + _In_reads_( nCoprimes ) PCSYMCRYPT_MODELEMENT * ppeCrtRemainders, + UINT32 flags, + _Out_ PSYMCRYPT_INT piSolution, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + SYMCRYPT_ASSERT( nCoprimes >= 2 ); + + PSYMCRYPT_INT piTmp = NULL; + PSYMCRYPT_MODELEMENT peTmp = NULL; + + PSYMCRYPT_INT piDouble = NULL; + + UINT32 nDigitsMax = 0; + + UINT32 cbInt = 0; + UINT32 cbModElement = 0; + UINT32 cbDouble = 0; + + UINT32 carry = 0; + + UNREFERENCED_PARAMETER( flags ); + + nDigitsMax = SYMCRYPT_MAX( SymCryptModulusDigitsizeOfObject( ppmCoprimes[0] ), SymCryptModulusDigitsizeOfObject( ppmCoprimes[1] ) ); + + cbInt = SymCryptSizeofIntFromDigits( nDigitsMax ); + cbModElement = SymCryptSizeofModElementFromModulus( ppmCoprimes[0] ); + cbDouble = SymCryptSizeofIntFromDigits( 2*nDigitsMax ); + + if( cbDouble == 0 ) + { + // It is possible that cbDouble would not fit within the maximum integer + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + SYMCRYPT_ASSERT( cbScratch >= cbInt + cbModElement + cbDouble + + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigitsMax ), + SYMCRYPT_SCRATCH_BYTES_FOR_INT_MUL( 2*nDigitsMax ) ) + ); + + // Create temporaries + piTmp = SymCryptIntCreate( pbScratch, cbInt, nDigitsMax ); pbScratch += cbInt; cbScratch -= cbInt; + + peTmp = SymCryptModElementCreate( pbScratch, cbModElement, ppmCoprimes[0] ); pbScratch += cbModElement; cbScratch -= cbModElement; + + piDouble = SymCryptIntCreate( pbScratch, cbDouble, 2*nDigitsMax ); pbScratch += cbDouble; cbScratch -= cbDouble; + + if (nCoprimes == 2) + { + // + // Let r0 and r1 be the two remainders modulo p and q respectively + // Then we calculate (q^{-1}(r0 - r1) mod p)*q + r1 + // + SymCryptModElementToInt( ppmCoprimes[1], ppeCrtRemainders[1], piTmp, pbScratch, cbScratch ); // Convert r1 to Int + SymCryptIntToModElement( piTmp, ppmCoprimes[0], peTmp, pbScratch, cbScratch ); // Convert it to r1 mod p + + SymCryptModSub( ppmCoprimes[0], ppeCrtRemainders[0], peTmp, peTmp, pbScratch, cbScratch ); // (r0 - r1) mod p + SymCryptModMul( ppmCoprimes[0], ppeCrtInverses[0], peTmp, peTmp, pbScratch, cbScratch ); // q^{-1}*(r0 - r1) mod p + SymCryptModElementToInt( ppmCoprimes[0], peTmp, piTmp, pbScratch, cbScratch ); // Convert it to integer + + SymCryptIntMulMixedSize( piTmp, SymCryptIntFromModulus((PSYMCRYPT_MODULUS)ppmCoprimes[1]), piDouble, pbScratch, cbScratch ); // Multiply by q + scError = SymCryptIntCopyMixedSize( piDouble, piSolution ); // Copy it into the solution + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + SymCryptModElementToInt( ppmCoprimes[1], ppeCrtRemainders[1], piTmp, pbScratch, cbScratch ); // Convert r1 to integer + + carry = SymCryptIntAddMixedSize( piTmp, piSolution, piSolution ); // Add it to the solution + + if (carry>0) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + } + else + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + +cleanup: + return scError; +} diff --git a/libs/symcrypt/lib/cshake_pattern.c b/libs/symcrypt/lib/cshake_pattern.c new file mode 100644 index 00000000000..89e346ef7dd --- /dev/null +++ b/libs/symcrypt/lib/cshake_pattern.c @@ -0,0 +1,152 @@ +// +// cshake_pattern.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#if 0 +#pragma makedep header +#endif + +// +// This source file implements cSHAKE128 and cSHAKE256 +// +// See the symcrypt.h file for documentation on what the various functions do. +// + + +// +// SymCryptCShake +// +VOID +SYMCRYPT_CALL +SYMCRYPT_Xxx( + _In_reads_( cbFunctionNameString ) PCBYTE pbFunctionNameString, + SIZE_T cbFunctionNameString, + _In_reads_( cbCustomizationString ) PCBYTE pbCustomizationString, + SIZE_T cbCustomizationString, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( cbResult ) PBYTE pbResult, + SIZE_T cbResult) +{ + SYMCRYPT_XXX_STATE state; + + SYMCRYPT_XxxInit(&state, + pbFunctionNameString, cbFunctionNameString, + pbCustomizationString, cbCustomizationString); + + SYMCRYPT_XxxAppend(&state, pbData, cbData); + SYMCRYPT_XxxExtract(&state, pbResult, cbResult, TRUE); +} + + +// +// SymCryptCShakeInit +// +VOID +SYMCRYPT_CALL +SYMCRYPT_XxxInit( + _Out_ PSYMCRYPT_XXX_STATE pState, + _In_reads_( cbFunctionNameString ) PCBYTE pbFunctionNameString, + SIZE_T cbFunctionNameString, + _In_reads_( cbCustomizationString ) PCBYTE pbCustomizationString, + SIZE_T cbCustomizationString) +{ + C_ASSERT( sizeof(SYMCRYPT_XXX_STATE) == sizeof(SYMCRYPT_SHAKEXXX_STATE) ); + + SYMCRYPT_SHAKEXXX_INIT( (SYMCRYPT_SHAKEXXX_STATE*)pState ); + + // Perform cSHAKE processing of input strings when any of the input strings is non-empty + if (cbFunctionNameString != 0 || cbCustomizationString != 0) + { + // cSHAKE and SHAKE have different paddings. pState->paddingValue + // is set to SYMCRYPT_SHAKE_PADDING_VALUE in the SHAKE initialization above. + // We update the padding value here because at least one of the input strings + // is non-empty and cSHAKE will not default to SHAKE. + pState->ks.paddingValue = SYMCRYPT_CSHAKE_PADDING_VALUE; + + SymCryptCShakeEncodeInputStrings(&pState->ks, + pbFunctionNameString, cbFunctionNameString, + pbCustomizationString, cbCustomizationString); + } + + SYMCRYPT_SET_MAGIC(pState); +} + +// +// SymCryptCShakeAppend +// +VOID +SYMCRYPT_CALL +SYMCRYPT_XxxAppend( + _Inout_ PSYMCRYPT_XXX_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + // Fixing of the padding value + // + // SymCryptKeccakAppend will reset the state, switch to absorb mode, + // and append data to the empty state if the state was in squeeze mode + // when Append is called. This behavior is equivalent to initializing + // cSHAKE with empty input strings, which makes cSHAKE a SHAKE instance. + // + // cSHAKE and SHAKE have different paddings, so we have to update the + // padding value in case it was cSHAKE padding before. + if (pState->ks.squeezeMode) + { + pState->ks.paddingValue = SYMCRYPT_SHAKE_PADDING_VALUE; + } + + SymCryptKeccakAppend(&pState->ks, pbData, cbData); +} + +// +// SymCryptCShakeExtract +// +VOID +SYMCRYPT_CALL +SYMCRYPT_XxxExtract( + _Inout_ PSYMCRYPT_XXX_STATE pState, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult, + BOOLEAN bWipe) +{ + SymCryptKeccakExtract(&pState->ks, pbResult, cbResult, bWipe); + + if (bWipe) + { + // If the state was wiped, set the state as if cSHAKE was initialized + // with empty strings, which is equivalent to empty SHAKE state. + // We have no way to store the Function Name string and Customization + // string information to go back to the initial cSHAKE state. + pState->ks.paddingValue = SYMCRYPT_SHAKE_PADDING_VALUE; + } +} + +// +// SymCryptCShakeResult +// +VOID +SYMCRYPT_CALL +SYMCRYPT_XxxResult( + _Inout_ PSYMCRYPT_XXX_STATE pState, + _Out_writes_( SYMCRYPT_CSHAKEXXX_RESULT_SIZE ) PBYTE pbResult) +{ + SymCryptKeccakExtract(&pState->ks, pbResult, SYMCRYPT_CSHAKEXXX_RESULT_SIZE, TRUE); + + // Revert to cSHAKE initialized with empty strings state, i.e., empty SHAKE state + pState->ks.paddingValue = SYMCRYPT_SHAKE_PADDING_VALUE; +} + +// +// SymCryptCShakeStateCopy +// +VOID +SYMCRYPT_CALL +SYMCRYPT_XxxStateCopy(_In_ const SYMCRYPT_XXX_STATE* pSrc, _Out_ SYMCRYPT_XXX_STATE* pDst) +{ + SYMCRYPT_CHECK_MAGIC(pSrc); + *pDst = *pSrc; + SYMCRYPT_SET_MAGIC(pDst); +} diff --git a/libs/symcrypt/lib/desx.c b/libs/symcrypt/lib/desx.c new file mode 100644 index 00000000000..9bfbb0d23f6 --- /dev/null +++ b/libs/symcrypt/lib/desx.c @@ -0,0 +1,131 @@ +// +// DesX.c DESX implementation +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + + +#include "precomp.h" + + +const SYMCRYPT_BLOCKCIPHER SymCryptDesxBlockCipher_default = { + SymCryptDesxExpandKey, // PSYMCRYPT_BLOCKCIPHER_EXPAND_KEY expandKeyFunc; + SymCryptDesxEncrypt, // PSYMCRYPT_BLOCKCIPHER_CRYPT encryptFunc; + SymCryptDesxDecrypt, // PSYMCRYPT_BLOCKCIPHER_CRYPT decryptFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_CRYPT_ECB ecbEncryptFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_CRYPT_ECB ecbDecryptFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_CRYPT_MODE cbcEncryptFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_CRYPT_MODE cbcDecryptFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_MAC_MODE cbcMacFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_CRYPT_MODE ctrMsbFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_AEADPART_MODE gcmEncryptPartFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_AEADPART_MODE gcmDecryptPartFunc; + 8, // SIZE_T blockSize; + sizeof( SYMCRYPT_DESX_EXPANDED_KEY ), // SIZE_T expandedKeySize; // = sizeof( SYMCRYPT_XXX_EXPANDED_KEY ) +}; + +const PCSYMCRYPT_BLOCKCIPHER SymCryptDesxBlockCipher = &SymCryptDesxBlockCipher_default; + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDesxExpandKey( _Out_ PSYMCRYPT_DESX_EXPANDED_KEY pExpandedKey, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ) +{ + if( cbKey != 24 ) + { + return SYMCRYPT_WRONG_KEY_SIZE; + } + + SymCryptDesExpandKey( &pExpandedKey->desKey, pbKey, 8 ); + memcpy( pExpandedKey->inputWhitening, pbKey+8, 8 ); + memcpy( pExpandedKey->outputWhitening, pbKey+16, 8 ); + + return SYMCRYPT_NO_ERROR; +} + +VOID +SYMCRYPT_CALL +SymCryptDesxEncrypt( + _In_ PCSYMCRYPT_DESX_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_DESX_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_DESX_BLOCK_SIZE ) PBYTE pbDst ) +{ + SYMCRYPT_ALIGN BYTE buf[8]; + + // + // We buffer the result locally to obey the read once/write once rule. + // + SymCryptXorBytes( pbSrc, pExpandedKey->inputWhitening, buf, 8 ); + SymCryptDesEncrypt( &pExpandedKey->desKey, buf, buf ); + SymCryptXorBytes( buf, pExpandedKey->outputWhitening, pbDst, 8 ); + + SymCryptWipeKnownSize( buf, sizeof( buf ) ); +} + +VOID +SYMCRYPT_CALL +SymCryptDesxDecrypt( + _In_ PCSYMCRYPT_DESX_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_DESX_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_DESX_BLOCK_SIZE ) PBYTE pbDst ) +{ + SYMCRYPT_ALIGN BYTE buf[8]; + + // + // We buffer the result locally to obey the read once/write once rule. + // + SymCryptXorBytes( pbSrc, pExpandedKey->outputWhitening, buf, 8 ); + SymCryptDesDecrypt( &pExpandedKey->desKey, buf, buf ); + SymCryptXorBytes( buf, pExpandedKey->inputWhitening, pbDst, 8 ); + + SymCryptWipeKnownSize( buf, sizeof( buf ) ); +} + + +static const BYTE desxKnownKey[24] = { + 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, + 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff, + 0x01, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18 +}; + +static const BYTE desxKnownPlaintext[] = { + 0xd9, 0xb6, 0xa1, 0x4e, 0xe6, 0x71, 0x4e, 0x17 +}; + +static const BYTE desxKnownCiphertext[] = { + 0x66, 0x77, 0x1f, 0x2a, 0x0c, 0x05, 0x01, 0xca +}; + + +VOID +SYMCRYPT_CALL +SymCryptDesxSelftest(void) +{ + SYMCRYPT_DESX_EXPANDED_KEY key; + BYTE buf[SYMCRYPT_DESX_BLOCK_SIZE]; + + if( SymCryptDesxExpandKey( &key, desxKnownKey, sizeof( desxKnownKey )) != SYMCRYPT_NO_ERROR ) + { + SymCryptFatal( 'desx' ); + } + + SymCryptDesxEncrypt( &key, desxKnownPlaintext, buf ); + + SymCryptInjectError( buf, SYMCRYPT_DESX_BLOCK_SIZE ); + + if( memcmp( buf, desxKnownCiphertext, SYMCRYPT_DESX_BLOCK_SIZE ) != 0 ) + { + SymCryptFatal( 'desy' ); + } + + SymCryptDesxDecrypt( &key, desxKnownCiphertext, buf ); + + SymCryptInjectError( buf, SYMCRYPT_DESX_BLOCK_SIZE ); + + if( memcmp( buf, desxKnownPlaintext, SYMCRYPT_DESX_BLOCK_SIZE ) != 0 ) + { + SymCryptFatal( 'desz' ); + } + +} diff --git a/libs/symcrypt/lib/dh.c b/libs/symcrypt/lib/dh.c new file mode 100644 index 00000000000..9f2a3796aa7 --- /dev/null +++ b/libs/symcrypt/lib/dh.c @@ -0,0 +1,141 @@ +// +// dh.c DH functions +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// + +#include "precomp.h" + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDhSecretAgreement( + _In_ PCSYMCRYPT_DLKEY pkPrivate, + _In_ PCSYMCRYPT_DLKEY pkPublic, + SYMCRYPT_NUMBER_FORMAT format, + UINT32 flags, + _Out_writes_( cbAgreedSecret ) PBYTE pbAgreedSecret, + SIZE_T cbAgreedSecret ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PBYTE pbScratch = NULL; + SIZE_T cbScratch = 0; + PBYTE pbScratchInternal = NULL; + SIZE_T cbScratchInternal = 0; + + PCSYMCRYPT_DLGROUP pDlgroup = NULL; + + PSYMCRYPT_MODELEMENT peRes = NULL; + UINT32 cbModelement = 0; + + UINT32 nBitsOfExp = 0; + + // Make sure that the keys may be used in DH + if ( ((pkPrivate->fAlgorithmInfo & SYMCRYPT_FLAG_DLKEY_DH) == 0) || + ((pkPublic->fAlgorithmInfo & SYMCRYPT_FLAG_DLKEY_DH) == 0) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Make sure we only specify the correct flags and that + // there is a private key + if ( (flags != 0) || (!pkPrivate->fHasPrivateKey) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Check that the group is the same for both keys + if ( SymCryptDlgroupIsSame( pkPrivate->pDlgroup, pkPublic->pDlgroup ) ) + { + pDlgroup = pkPrivate->pDlgroup; + } + else + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Check the output buffer has the correct size + if (cbAgreedSecret != SymCryptDlkeySizeofPublicKey( pkPrivate )) + { + scError = SYMCRYPT_WRONG_BLOCK_SIZE; + goto cleanup; + } + + // Objects and scratch space size calculation + cbModelement = SymCryptSizeofModElementFromModulus( pDlgroup->pmP ); + cbScratch = cbModelement + + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_MODEXP( pDlgroup->nDigitsOfP ), + SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pDlgroup->nDigitsOfP )); + + // Scratch space allocation + pbScratch = SymCryptCallbackAlloc( cbScratch ); + if ( pbScratch == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + // Creating temporary + pbScratchInternal = pbScratch; + cbScratchInternal = cbScratch; + peRes = SymCryptModElementCreate( pbScratchInternal, cbModelement, pDlgroup->pmP ); + pbScratchInternal += cbModelement; + cbScratchInternal -= cbModelement; + + SYMCRYPT_ASSERT( peRes != NULL); + + // Fix the bits of the exponent (the private key might be either mod Q, mod 2^nBitsPriv, or mod P) + if (pkPrivate->fPrivateModQ) + { + nBitsOfExp = pkPrivate->nBitsPriv; + } + else + { + nBitsOfExp = pDlgroup->nBitsOfP; + } + + // Calculate the secret + SymCryptModExp( + pDlgroup->pmP, + pkPublic->pePublicKey, + pkPrivate->piPrivateKey, + nBitsOfExp, + 0, // SC safe + peRes, + pbScratchInternal, + cbScratchInternal ); + + // Check if the result is zero + if ( SymCryptModElementIsZero( pDlgroup->pmP, peRes ) ) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + // Output the result + scError = SymCryptModElementGetValue( + pDlgroup->pmP, + peRes, + pbAgreedSecret, + cbAgreedSecret, + format, + pbScratchInternal, + cbScratchInternal ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + +cleanup: + if ( pbScratch != NULL ) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + } + + return scError; +} diff --git a/libs/symcrypt/lib/dl_internal_groups.c b/libs/symcrypt/lib/dl_internal_groups.c new file mode 100644 index 00000000000..3476407c5ca --- /dev/null +++ b/libs/symcrypt/lib/dl_internal_groups.c @@ -0,0 +1,922 @@ +// +// dl_internal_groups.c Parameters for internally supported dl groups. +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// + +#include "precomp.h" + +// Do not delete the following preprocessor directive. +// It is used for folding the parameters. +#if 1 + +/*********************************** + * * + * IKE GROUPS (RFC 3526) * + * * + ***********************************/ + +static const BYTE rgbIke3526Modp2048[] = { + //P + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xC9, 0x0F, 0xDA, 0xA2, 0x21, 0x68, 0xC2, 0x34, + 0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1, + 0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74, + 0x02, 0x0B, 0xBE, 0xA6, 0x3B, 0x13, 0x9B, 0x22, + 0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD, + 0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B, + 0x30, 0x2B, 0x0A, 0x6D, 0xF2, 0x5F, 0x14, 0x37, + 0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45, + 0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6, + 0xF4, 0x4C, 0x42, 0xE9, 0xA6, 0x37, 0xED, 0x6B, + 0x0B, 0xFF, 0x5C, 0xB6, 0xF4, 0x06, 0xB7, 0xED, + 0xEE, 0x38, 0x6B, 0xFB, 0x5A, 0x89, 0x9F, 0xA5, + 0xAE, 0x9F, 0x24, 0x11, 0x7C, 0x4B, 0x1F, 0xE6, + 0x49, 0x28, 0x66, 0x51, 0xEC, 0xE4, 0x5B, 0x3D, + 0xC2, 0x00, 0x7C, 0xB8, 0xA1, 0x63, 0xBF, 0x05, + 0x98, 0xDA, 0x48, 0x36, 0x1C, 0x55, 0xD3, 0x9A, + 0x69, 0x16, 0x3F, 0xA8, 0xFD, 0x24, 0xCF, 0x5F, + 0x83, 0x65, 0x5D, 0x23, 0xDC, 0xA3, 0xAD, 0x96, + 0x1C, 0x62, 0xF3, 0x56, 0x20, 0x85, 0x52, 0xBB, + 0x9E, 0xD5, 0x29, 0x07, 0x70, 0x96, 0x96, 0x6D, + 0x67, 0x0C, 0x35, 0x4E, 0x4A, 0xBC, 0x98, 0x04, + 0xF1, 0x74, 0x6C, 0x08, 0xCA, 0x18, 0x21, 0x7C, + 0x32, 0x90, 0x5E, 0x46, 0x2E, 0x36, 0xCE, 0x3B, + 0xE3, 0x9E, 0x77, 0x2C, 0x18, 0x0E, 0x86, 0x03, + 0x9B, 0x27, 0x83, 0xA2, 0xEC, 0x07, 0xA2, 0x8F, + 0xB5, 0xC5, 0x5D, 0xF0, 0x6F, 0x4C, 0x52, 0xC9, + 0xDE, 0x2B, 0xCB, 0xF6, 0x95, 0x58, 0x17, 0x18, + 0x39, 0x95, 0x49, 0x7C, 0xEA, 0x95, 0x6A, 0xE5, + 0x15, 0xD2, 0x26, 0x18, 0x98, 0xFA, 0x05, 0x10, + 0x15, 0x72, 0x8E, 0x5A, 0x8A, 0xAC, 0xAA, 0x68, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF +}; + +static const BYTE rgbIke3526Modp3072[] = { + //P + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xC9, 0x0F, 0xDA, 0xA2, 0x21, 0x68, 0xC2, 0x34, + 0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1, + 0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74, + 0x02, 0x0B, 0xBE, 0xA6, 0x3B, 0x13, 0x9B, 0x22, + 0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD, + 0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B, + 0x30, 0x2B, 0x0A, 0x6D, 0xF2, 0x5F, 0x14, 0x37, + 0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45, + 0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6, + 0xF4, 0x4C, 0x42, 0xE9, 0xA6, 0x37, 0xED, 0x6B, + 0x0B, 0xFF, 0x5C, 0xB6, 0xF4, 0x06, 0xB7, 0xED, + 0xEE, 0x38, 0x6B, 0xFB, 0x5A, 0x89, 0x9F, 0xA5, + 0xAE, 0x9F, 0x24, 0x11, 0x7C, 0x4B, 0x1F, 0xE6, + 0x49, 0x28, 0x66, 0x51, 0xEC, 0xE4, 0x5B, 0x3D, + 0xC2, 0x00, 0x7C, 0xB8, 0xA1, 0x63, 0xBF, 0x05, + 0x98, 0xDA, 0x48, 0x36, 0x1C, 0x55, 0xD3, 0x9A, + 0x69, 0x16, 0x3F, 0xA8, 0xFD, 0x24, 0xCF, 0x5F, + 0x83, 0x65, 0x5D, 0x23, 0xDC, 0xA3, 0xAD, 0x96, + 0x1C, 0x62, 0xF3, 0x56, 0x20, 0x85, 0x52, 0xBB, + 0x9E, 0xD5, 0x29, 0x07, 0x70, 0x96, 0x96, 0x6D, + 0x67, 0x0C, 0x35, 0x4E, 0x4A, 0xBC, 0x98, 0x04, + 0xF1, 0x74, 0x6C, 0x08, 0xCA, 0x18, 0x21, 0x7C, + 0x32, 0x90, 0x5E, 0x46, 0x2E, 0x36, 0xCE, 0x3B, + 0xE3, 0x9E, 0x77, 0x2C, 0x18, 0x0E, 0x86, 0x03, + 0x9B, 0x27, 0x83, 0xA2, 0xEC, 0x07, 0xA2, 0x8F, + 0xB5, 0xC5, 0x5D, 0xF0, 0x6F, 0x4C, 0x52, 0xC9, + 0xDE, 0x2B, 0xCB, 0xF6, 0x95, 0x58, 0x17, 0x18, + 0x39, 0x95, 0x49, 0x7C, 0xEA, 0x95, 0x6A, 0xE5, + 0x15, 0xD2, 0x26, 0x18, 0x98, 0xFA, 0x05, 0x10, + 0x15, 0x72, 0x8E, 0x5A, 0x8A, 0xAA, 0xC4, 0x2D, + 0xAD, 0x33, 0x17, 0x0D, 0x04, 0x50, 0x7A, 0x33, + 0xA8, 0x55, 0x21, 0xAB, 0xDF, 0x1C, 0xBA, 0x64, + 0xEC, 0xFB, 0x85, 0x04, 0x58, 0xDB, 0xEF, 0x0A, + 0x8A, 0xEA, 0x71, 0x57, 0x5D, 0x06, 0x0C, 0x7D, + 0xB3, 0x97, 0x0F, 0x85, 0xA6, 0xE1, 0xE4, 0xC7, + 0xAB, 0xF5, 0xAE, 0x8C, 0xDB, 0x09, 0x33, 0xD7, + 0x1E, 0x8C, 0x94, 0xE0, 0x4A, 0x25, 0x61, 0x9D, + 0xCE, 0xE3, 0xD2, 0x26, 0x1A, 0xD2, 0xEE, 0x6B, + 0xF1, 0x2F, 0xFA, 0x06, 0xD9, 0x8A, 0x08, 0x64, + 0xD8, 0x76, 0x02, 0x73, 0x3E, 0xC8, 0x6A, 0x64, + 0x52, 0x1F, 0x2B, 0x18, 0x17, 0x7B, 0x20, 0x0C, + 0xBB, 0xE1, 0x17, 0x57, 0x7A, 0x61, 0x5D, 0x6C, + 0x77, 0x09, 0x88, 0xC0, 0xBA, 0xD9, 0x46, 0xE2, + 0x08, 0xE2, 0x4F, 0xA0, 0x74, 0xE5, 0xAB, 0x31, + 0x43, 0xDB, 0x5B, 0xFC, 0xE0, 0xFD, 0x10, 0x8E, + 0x4B, 0x82, 0xD1, 0x20, 0xA9, 0x3A, 0xD2, 0xCA, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF +}; + +static const BYTE rgbIke3526Modp4096[] = { + //P + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xC9, 0x0F, 0xDA, 0xA2, 0x21, 0x68, 0xC2, 0x34, + 0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1, + 0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74, + 0x02, 0x0B, 0xBE, 0xA6, 0x3B, 0x13, 0x9B, 0x22, + 0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD, + 0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B, + 0x30, 0x2B, 0x0A, 0x6D, 0xF2, 0x5F, 0x14, 0x37, + 0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45, + 0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6, + 0xF4, 0x4C, 0x42, 0xE9, 0xA6, 0x37, 0xED, 0x6B, + 0x0B, 0xFF, 0x5C, 0xB6, 0xF4, 0x06, 0xB7, 0xED, + 0xEE, 0x38, 0x6B, 0xFB, 0x5A, 0x89, 0x9F, 0xA5, + 0xAE, 0x9F, 0x24, 0x11, 0x7C, 0x4B, 0x1F, 0xE6, + 0x49, 0x28, 0x66, 0x51, 0xEC, 0xE4, 0x5B, 0x3D, + 0xC2, 0x00, 0x7C, 0xB8, 0xA1, 0x63, 0xBF, 0x05, + 0x98, 0xDA, 0x48, 0x36, 0x1C, 0x55, 0xD3, 0x9A, + 0x69, 0x16, 0x3F, 0xA8, 0xFD, 0x24, 0xCF, 0x5F, + 0x83, 0x65, 0x5D, 0x23, 0xDC, 0xA3, 0xAD, 0x96, + 0x1C, 0x62, 0xF3, 0x56, 0x20, 0x85, 0x52, 0xBB, + 0x9E, 0xD5, 0x29, 0x07, 0x70, 0x96, 0x96, 0x6D, + 0x67, 0x0C, 0x35, 0x4E, 0x4A, 0xBC, 0x98, 0x04, + 0xF1, 0x74, 0x6C, 0x08, 0xCA, 0x18, 0x21, 0x7C, + 0x32, 0x90, 0x5E, 0x46, 0x2E, 0x36, 0xCE, 0x3B, + 0xE3, 0x9E, 0x77, 0x2C, 0x18, 0x0E, 0x86, 0x03, + 0x9B, 0x27, 0x83, 0xA2, 0xEC, 0x07, 0xA2, 0x8F, + 0xB5, 0xC5, 0x5D, 0xF0, 0x6F, 0x4C, 0x52, 0xC9, + 0xDE, 0x2B, 0xCB, 0xF6, 0x95, 0x58, 0x17, 0x18, + 0x39, 0x95, 0x49, 0x7C, 0xEA, 0x95, 0x6A, 0xE5, + 0x15, 0xD2, 0x26, 0x18, 0x98, 0xFA, 0x05, 0x10, + 0x15, 0x72, 0x8E, 0x5A, 0x8A, 0xAA, 0xC4, 0x2D, + 0xAD, 0x33, 0x17, 0x0D, 0x04, 0x50, 0x7A, 0x33, + 0xA8, 0x55, 0x21, 0xAB, 0xDF, 0x1C, 0xBA, 0x64, + 0xEC, 0xFB, 0x85, 0x04, 0x58, 0xDB, 0xEF, 0x0A, + 0x8A, 0xEA, 0x71, 0x57, 0x5D, 0x06, 0x0C, 0x7D, + 0xB3, 0x97, 0x0F, 0x85, 0xA6, 0xE1, 0xE4, 0xC7, + 0xAB, 0xF5, 0xAE, 0x8C, 0xDB, 0x09, 0x33, 0xD7, + 0x1E, 0x8C, 0x94, 0xE0, 0x4A, 0x25, 0x61, 0x9D, + 0xCE, 0xE3, 0xD2, 0x26, 0x1A, 0xD2, 0xEE, 0x6B, + 0xF1, 0x2F, 0xFA, 0x06, 0xD9, 0x8A, 0x08, 0x64, + 0xD8, 0x76, 0x02, 0x73, 0x3E, 0xC8, 0x6A, 0x64, + 0x52, 0x1F, 0x2B, 0x18, 0x17, 0x7B, 0x20, 0x0C, + 0xBB, 0xE1, 0x17, 0x57, 0x7A, 0x61, 0x5D, 0x6C, + 0x77, 0x09, 0x88, 0xC0, 0xBA, 0xD9, 0x46, 0xE2, + 0x08, 0xE2, 0x4F, 0xA0, 0x74, 0xE5, 0xAB, 0x31, + 0x43, 0xDB, 0x5B, 0xFC, 0xE0, 0xFD, 0x10, 0x8E, + 0x4B, 0x82, 0xD1, 0x20, 0xA9, 0x21, 0x08, 0x01, + 0x1A, 0x72, 0x3C, 0x12, 0xA7, 0x87, 0xE6, 0xD7, + 0x88, 0x71, 0x9A, 0x10, 0xBD, 0xBA, 0x5B, 0x26, + 0x99, 0xC3, 0x27, 0x18, 0x6A, 0xF4, 0xE2, 0x3C, + 0x1A, 0x94, 0x68, 0x34, 0xB6, 0x15, 0x0B, 0xDA, + 0x25, 0x83, 0xE9, 0xCA, 0x2A, 0xD4, 0x4C, 0xE8, + 0xDB, 0xBB, 0xC2, 0xDB, 0x04, 0xDE, 0x8E, 0xF9, + 0x2E, 0x8E, 0xFC, 0x14, 0x1F, 0xBE, 0xCA, 0xA6, + 0x28, 0x7C, 0x59, 0x47, 0x4E, 0x6B, 0xC0, 0x5D, + 0x99, 0xB2, 0x96, 0x4F, 0xA0, 0x90, 0xC3, 0xA2, + 0x23, 0x3B, 0xA1, 0x86, 0x51, 0x5B, 0xE7, 0xED, + 0x1F, 0x61, 0x29, 0x70, 0xCE, 0xE2, 0xD7, 0xAF, + 0xB8, 0x1B, 0xDD, 0x76, 0x21, 0x70, 0x48, 0x1C, + 0xD0, 0x06, 0x91, 0x27, 0xD5, 0xB0, 0x5A, 0xA9, + 0x93, 0xB4, 0xEA, 0x98, 0x8D, 0x8F, 0xDD, 0xC1, + 0x86, 0xFF, 0xB7, 0xDC, 0x90, 0xA6, 0xC0, 0x8F, + 0x4D, 0xF4, 0x35, 0xC9, 0x34, 0x06, 0x31, 0x99, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF +}; + +static const BYTE rgbIke3526Modp6144[] = { + //P + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xC9, 0x0F, 0xDA, 0xA2, 0x21, 0x68, 0xC2, 0x34, + 0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1, + 0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74, + 0x02, 0x0B, 0xBE, 0xA6, 0x3B, 0x13, 0x9B, 0x22, + 0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD, + 0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B, + 0x30, 0x2B, 0x0A, 0x6D, 0xF2, 0x5F, 0x14, 0x37, + 0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45, + 0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6, + 0xF4, 0x4C, 0x42, 0xE9, 0xA6, 0x37, 0xED, 0x6B, + 0x0B, 0xFF, 0x5C, 0xB6, 0xF4, 0x06, 0xB7, 0xED, + 0xEE, 0x38, 0x6B, 0xFB, 0x5A, 0x89, 0x9F, 0xA5, + 0xAE, 0x9F, 0x24, 0x11, 0x7C, 0x4B, 0x1F, 0xE6, + 0x49, 0x28, 0x66, 0x51, 0xEC, 0xE4, 0x5B, 0x3D, + 0xC2, 0x00, 0x7C, 0xB8, 0xA1, 0x63, 0xBF, 0x05, + 0x98, 0xDA, 0x48, 0x36, 0x1C, 0x55, 0xD3, 0x9A, + 0x69, 0x16, 0x3F, 0xA8, 0xFD, 0x24, 0xCF, 0x5F, + 0x83, 0x65, 0x5D, 0x23, 0xDC, 0xA3, 0xAD, 0x96, + 0x1C, 0x62, 0xF3, 0x56, 0x20, 0x85, 0x52, 0xBB, + 0x9E, 0xD5, 0x29, 0x07, 0x70, 0x96, 0x96, 0x6D, + 0x67, 0x0C, 0x35, 0x4E, 0x4A, 0xBC, 0x98, 0x04, + 0xF1, 0x74, 0x6C, 0x08, 0xCA, 0x18, 0x21, 0x7C, + 0x32, 0x90, 0x5E, 0x46, 0x2E, 0x36, 0xCE, 0x3B, + 0xE3, 0x9E, 0x77, 0x2C, 0x18, 0x0E, 0x86, 0x03, + 0x9B, 0x27, 0x83, 0xA2, 0xEC, 0x07, 0xA2, 0x8F, + 0xB5, 0xC5, 0x5D, 0xF0, 0x6F, 0x4C, 0x52, 0xC9, + 0xDE, 0x2B, 0xCB, 0xF6, 0x95, 0x58, 0x17, 0x18, + 0x39, 0x95, 0x49, 0x7C, 0xEA, 0x95, 0x6A, 0xE5, + 0x15, 0xD2, 0x26, 0x18, 0x98, 0xFA, 0x05, 0x10, + 0x15, 0x72, 0x8E, 0x5A, 0x8A, 0xAA, 0xC4, 0x2D, + 0xAD, 0x33, 0x17, 0x0D, 0x04, 0x50, 0x7A, 0x33, + 0xA8, 0x55, 0x21, 0xAB, 0xDF, 0x1C, 0xBA, 0x64, + 0xEC, 0xFB, 0x85, 0x04, 0x58, 0xDB, 0xEF, 0x0A, + 0x8A, 0xEA, 0x71, 0x57, 0x5D, 0x06, 0x0C, 0x7D, + 0xB3, 0x97, 0x0F, 0x85, 0xA6, 0xE1, 0xE4, 0xC7, + 0xAB, 0xF5, 0xAE, 0x8C, 0xDB, 0x09, 0x33, 0xD7, + 0x1E, 0x8C, 0x94, 0xE0, 0x4A, 0x25, 0x61, 0x9D, + 0xCE, 0xE3, 0xD2, 0x26, 0x1A, 0xD2, 0xEE, 0x6B, + 0xF1, 0x2F, 0xFA, 0x06, 0xD9, 0x8A, 0x08, 0x64, + 0xD8, 0x76, 0x02, 0x73, 0x3E, 0xC8, 0x6A, 0x64, + 0x52, 0x1F, 0x2B, 0x18, 0x17, 0x7B, 0x20, 0x0C, + 0xBB, 0xE1, 0x17, 0x57, 0x7A, 0x61, 0x5D, 0x6C, + 0x77, 0x09, 0x88, 0xC0, 0xBA, 0xD9, 0x46, 0xE2, + 0x08, 0xE2, 0x4F, 0xA0, 0x74, 0xE5, 0xAB, 0x31, + 0x43, 0xDB, 0x5B, 0xFC, 0xE0, 0xFD, 0x10, 0x8E, + 0x4B, 0x82, 0xD1, 0x20, 0xA9, 0x21, 0x08, 0x01, + 0x1A, 0x72, 0x3C, 0x12, 0xA7, 0x87, 0xE6, 0xD7, + 0x88, 0x71, 0x9A, 0x10, 0xBD, 0xBA, 0x5B, 0x26, + 0x99, 0xC3, 0x27, 0x18, 0x6A, 0xF4, 0xE2, 0x3C, + 0x1A, 0x94, 0x68, 0x34, 0xB6, 0x15, 0x0B, 0xDA, + 0x25, 0x83, 0xE9, 0xCA, 0x2A, 0xD4, 0x4C, 0xE8, + 0xDB, 0xBB, 0xC2, 0xDB, 0x04, 0xDE, 0x8E, 0xF9, + 0x2E, 0x8E, 0xFC, 0x14, 0x1F, 0xBE, 0xCA, 0xA6, + 0x28, 0x7C, 0x59, 0x47, 0x4E, 0x6B, 0xC0, 0x5D, + 0x99, 0xB2, 0x96, 0x4F, 0xA0, 0x90, 0xC3, 0xA2, + 0x23, 0x3B, 0xA1, 0x86, 0x51, 0x5B, 0xE7, 0xED, + 0x1F, 0x61, 0x29, 0x70, 0xCE, 0xE2, 0xD7, 0xAF, + 0xB8, 0x1B, 0xDD, 0x76, 0x21, 0x70, 0x48, 0x1C, + 0xD0, 0x06, 0x91, 0x27, 0xD5, 0xB0, 0x5A, 0xA9, + 0x93, 0xB4, 0xEA, 0x98, 0x8D, 0x8F, 0xDD, 0xC1, + 0x86, 0xFF, 0xB7, 0xDC, 0x90, 0xA6, 0xC0, 0x8F, + 0x4D, 0xF4, 0x35, 0xC9, 0x34, 0x02, 0x84, 0x92, + 0x36, 0xC3, 0xFA, 0xB4, 0xD2, 0x7C, 0x70, 0x26, + 0xC1, 0xD4, 0xDC, 0xB2, 0x60, 0x26, 0x46, 0xDE, + 0xC9, 0x75, 0x1E, 0x76, 0x3D, 0xBA, 0x37, 0xBD, + 0xF8, 0xFF, 0x94, 0x06, 0xAD, 0x9E, 0x53, 0x0E, + 0xE5, 0xDB, 0x38, 0x2F, 0x41, 0x30, 0x01, 0xAE, + 0xB0, 0x6A, 0x53, 0xED, 0x90, 0x27, 0xD8, 0x31, + 0x17, 0x97, 0x27, 0xB0, 0x86, 0x5A, 0x89, 0x18, + 0xDA, 0x3E, 0xDB, 0xEB, 0xCF, 0x9B, 0x14, 0xED, + 0x44, 0xCE, 0x6C, 0xBA, 0xCE, 0xD4, 0xBB, 0x1B, + 0xDB, 0x7F, 0x14, 0x47, 0xE6, 0xCC, 0x25, 0x4B, + 0x33, 0x20, 0x51, 0x51, 0x2B, 0xD7, 0xAF, 0x42, + 0x6F, 0xB8, 0xF4, 0x01, 0x37, 0x8C, 0xD2, 0xBF, + 0x59, 0x83, 0xCA, 0x01, 0xC6, 0x4B, 0x92, 0xEC, + 0xF0, 0x32, 0xEA, 0x15, 0xD1, 0x72, 0x1D, 0x03, + 0xF4, 0x82, 0xD7, 0xCE, 0x6E, 0x74, 0xFE, 0xF6, + 0xD5, 0x5E, 0x70, 0x2F, 0x46, 0x98, 0x0C, 0x82, + 0xB5, 0xA8, 0x40, 0x31, 0x90, 0x0B, 0x1C, 0x9E, + 0x59, 0xE7, 0xC9, 0x7F, 0xBE, 0xC7, 0xE8, 0xF3, + 0x23, 0xA9, 0x7A, 0x7E, 0x36, 0xCC, 0x88, 0xBE, + 0x0F, 0x1D, 0x45, 0xB7, 0xFF, 0x58, 0x5A, 0xC5, + 0x4B, 0xD4, 0x07, 0xB2, 0x2B, 0x41, 0x54, 0xAA, + 0xCC, 0x8F, 0x6D, 0x7E, 0xBF, 0x48, 0xE1, 0xD8, + 0x14, 0xCC, 0x5E, 0xD2, 0x0F, 0x80, 0x37, 0xE0, + 0xA7, 0x97, 0x15, 0xEE, 0xF2, 0x9B, 0xE3, 0x28, + 0x06, 0xA1, 0xD5, 0x8B, 0xB7, 0xC5, 0xDA, 0x76, + 0xF5, 0x50, 0xAA, 0x3D, 0x8A, 0x1F, 0xBF, 0xF0, + 0xEB, 0x19, 0xCC, 0xB1, 0xA3, 0x13, 0xD5, 0x5C, + 0xDA, 0x56, 0xC9, 0xEC, 0x2E, 0xF2, 0x96, 0x32, + 0x38, 0x7F, 0xE8, 0xD7, 0x6E, 0x3C, 0x04, 0x68, + 0x04, 0x3E, 0x8F, 0x66, 0x3F, 0x48, 0x60, 0xEE, + 0x12, 0xBF, 0x2D, 0x5B, 0x0B, 0x74, 0x74, 0xD6, + 0xE6, 0x94, 0xF9, 0x1E, 0x6D, 0xCC, 0x40, 0x24, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF +}; + +static const +BYTE rgbIke3526Modp8192[] = { + //P + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xC9, 0x0F, 0xDA, 0xA2, 0x21, 0x68, 0xC2, 0x34, + 0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1, + 0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74, + 0x02, 0x0B, 0xBE, 0xA6, 0x3B, 0x13, 0x9B, 0x22, + 0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD, + 0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B, + 0x30, 0x2B, 0x0A, 0x6D, 0xF2, 0x5F, 0x14, 0x37, + 0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45, + 0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6, + 0xF4, 0x4C, 0x42, 0xE9, 0xA6, 0x37, 0xED, 0x6B, + 0x0B, 0xFF, 0x5C, 0xB6, 0xF4, 0x06, 0xB7, 0xED, + 0xEE, 0x38, 0x6B, 0xFB, 0x5A, 0x89, 0x9F, 0xA5, + 0xAE, 0x9F, 0x24, 0x11, 0x7C, 0x4B, 0x1F, 0xE6, + 0x49, 0x28, 0x66, 0x51, 0xEC, 0xE4, 0x5B, 0x3D, + 0xC2, 0x00, 0x7C, 0xB8, 0xA1, 0x63, 0xBF, 0x05, + 0x98, 0xDA, 0x48, 0x36, 0x1C, 0x55, 0xD3, 0x9A, + 0x69, 0x16, 0x3F, 0xA8, 0xFD, 0x24, 0xCF, 0x5F, + 0x83, 0x65, 0x5D, 0x23, 0xDC, 0xA3, 0xAD, 0x96, + 0x1C, 0x62, 0xF3, 0x56, 0x20, 0x85, 0x52, 0xBB, + 0x9E, 0xD5, 0x29, 0x07, 0x70, 0x96, 0x96, 0x6D, + 0x67, 0x0C, 0x35, 0x4E, 0x4A, 0xBC, 0x98, 0x04, + 0xF1, 0x74, 0x6C, 0x08, 0xCA, 0x18, 0x21, 0x7C, + 0x32, 0x90, 0x5E, 0x46, 0x2E, 0x36, 0xCE, 0x3B, + 0xE3, 0x9E, 0x77, 0x2C, 0x18, 0x0E, 0x86, 0x03, + 0x9B, 0x27, 0x83, 0xA2, 0xEC, 0x07, 0xA2, 0x8F, + 0xB5, 0xC5, 0x5D, 0xF0, 0x6F, 0x4C, 0x52, 0xC9, + 0xDE, 0x2B, 0xCB, 0xF6, 0x95, 0x58, 0x17, 0x18, + 0x39, 0x95, 0x49, 0x7C, 0xEA, 0x95, 0x6A, 0xE5, + 0x15, 0xD2, 0x26, 0x18, 0x98, 0xFA, 0x05, 0x10, + 0x15, 0x72, 0x8E, 0x5A, 0x8A, 0xAA, 0xC4, 0x2D, + 0xAD, 0x33, 0x17, 0x0D, 0x04, 0x50, 0x7A, 0x33, + 0xA8, 0x55, 0x21, 0xAB, 0xDF, 0x1C, 0xBA, 0x64, + 0xEC, 0xFB, 0x85, 0x04, 0x58, 0xDB, 0xEF, 0x0A, + 0x8A, 0xEA, 0x71, 0x57, 0x5D, 0x06, 0x0C, 0x7D, + 0xB3, 0x97, 0x0F, 0x85, 0xA6, 0xE1, 0xE4, 0xC7, + 0xAB, 0xF5, 0xAE, 0x8C, 0xDB, 0x09, 0x33, 0xD7, + 0x1E, 0x8C, 0x94, 0xE0, 0x4A, 0x25, 0x61, 0x9D, + 0xCE, 0xE3, 0xD2, 0x26, 0x1A, 0xD2, 0xEE, 0x6B, + 0xF1, 0x2F, 0xFA, 0x06, 0xD9, 0x8A, 0x08, 0x64, + 0xD8, 0x76, 0x02, 0x73, 0x3E, 0xC8, 0x6A, 0x64, + 0x52, 0x1F, 0x2B, 0x18, 0x17, 0x7B, 0x20, 0x0C, + 0xBB, 0xE1, 0x17, 0x57, 0x7A, 0x61, 0x5D, 0x6C, + 0x77, 0x09, 0x88, 0xC0, 0xBA, 0xD9, 0x46, 0xE2, + 0x08, 0xE2, 0x4F, 0xA0, 0x74, 0xE5, 0xAB, 0x31, + 0x43, 0xDB, 0x5B, 0xFC, 0xE0, 0xFD, 0x10, 0x8E, + 0x4B, 0x82, 0xD1, 0x20, 0xA9, 0x21, 0x08, 0x01, + 0x1A, 0x72, 0x3C, 0x12, 0xA7, 0x87, 0xE6, 0xD7, + 0x88, 0x71, 0x9A, 0x10, 0xBD, 0xBA, 0x5B, 0x26, + 0x99, 0xC3, 0x27, 0x18, 0x6A, 0xF4, 0xE2, 0x3C, + 0x1A, 0x94, 0x68, 0x34, 0xB6, 0x15, 0x0B, 0xDA, + 0x25, 0x83, 0xE9, 0xCA, 0x2A, 0xD4, 0x4C, 0xE8, + 0xDB, 0xBB, 0xC2, 0xDB, 0x04, 0xDE, 0x8E, 0xF9, + 0x2E, 0x8E, 0xFC, 0x14, 0x1F, 0xBE, 0xCA, 0xA6, + 0x28, 0x7C, 0x59, 0x47, 0x4E, 0x6B, 0xC0, 0x5D, + 0x99, 0xB2, 0x96, 0x4F, 0xA0, 0x90, 0xC3, 0xA2, + 0x23, 0x3B, 0xA1, 0x86, 0x51, 0x5B, 0xE7, 0xED, + 0x1F, 0x61, 0x29, 0x70, 0xCE, 0xE2, 0xD7, 0xAF, + 0xB8, 0x1B, 0xDD, 0x76, 0x21, 0x70, 0x48, 0x1C, + 0xD0, 0x06, 0x91, 0x27, 0xD5, 0xB0, 0x5A, 0xA9, + 0x93, 0xB4, 0xEA, 0x98, 0x8D, 0x8F, 0xDD, 0xC1, + 0x86, 0xFF, 0xB7, 0xDC, 0x90, 0xA6, 0xC0, 0x8F, + 0x4D, 0xF4, 0x35, 0xC9, 0x34, 0x02, 0x84, 0x92, + 0x36, 0xC3, 0xFA, 0xB4, 0xD2, 0x7C, 0x70, 0x26, + 0xC1, 0xD4, 0xDC, 0xB2, 0x60, 0x26, 0x46, 0xDE, + 0xC9, 0x75, 0x1E, 0x76, 0x3D, 0xBA, 0x37, 0xBD, + 0xF8, 0xFF, 0x94, 0x06, 0xAD, 0x9E, 0x53, 0x0E, + 0xE5, 0xDB, 0x38, 0x2F, 0x41, 0x30, 0x01, 0xAE, + 0xB0, 0x6A, 0x53, 0xED, 0x90, 0x27, 0xD8, 0x31, + 0x17, 0x97, 0x27, 0xB0, 0x86, 0x5A, 0x89, 0x18, + 0xDA, 0x3E, 0xDB, 0xEB, 0xCF, 0x9B, 0x14, 0xED, + 0x44, 0xCE, 0x6C, 0xBA, 0xCE, 0xD4, 0xBB, 0x1B, + 0xDB, 0x7F, 0x14, 0x47, 0xE6, 0xCC, 0x25, 0x4B, + 0x33, 0x20, 0x51, 0x51, 0x2B, 0xD7, 0xAF, 0x42, + 0x6F, 0xB8, 0xF4, 0x01, 0x37, 0x8C, 0xD2, 0xBF, + 0x59, 0x83, 0xCA, 0x01, 0xC6, 0x4B, 0x92, 0xEC, + 0xF0, 0x32, 0xEA, 0x15, 0xD1, 0x72, 0x1D, 0x03, + 0xF4, 0x82, 0xD7, 0xCE, 0x6E, 0x74, 0xFE, 0xF6, + 0xD5, 0x5E, 0x70, 0x2F, 0x46, 0x98, 0x0C, 0x82, + 0xB5, 0xA8, 0x40, 0x31, 0x90, 0x0B, 0x1C, 0x9E, + 0x59, 0xE7, 0xC9, 0x7F, 0xBE, 0xC7, 0xE8, 0xF3, + 0x23, 0xA9, 0x7A, 0x7E, 0x36, 0xCC, 0x88, 0xBE, + 0x0F, 0x1D, 0x45, 0xB7, 0xFF, 0x58, 0x5A, 0xC5, + 0x4B, 0xD4, 0x07, 0xB2, 0x2B, 0x41, 0x54, 0xAA, + 0xCC, 0x8F, 0x6D, 0x7E, 0xBF, 0x48, 0xE1, 0xD8, + 0x14, 0xCC, 0x5E, 0xD2, 0x0F, 0x80, 0x37, 0xE0, + 0xA7, 0x97, 0x15, 0xEE, 0xF2, 0x9B, 0xE3, 0x28, + 0x06, 0xA1, 0xD5, 0x8B, 0xB7, 0xC5, 0xDA, 0x76, + 0xF5, 0x50, 0xAA, 0x3D, 0x8A, 0x1F, 0xBF, 0xF0, + 0xEB, 0x19, 0xCC, 0xB1, 0xA3, 0x13, 0xD5, 0x5C, + 0xDA, 0x56, 0xC9, 0xEC, 0x2E, 0xF2, 0x96, 0x32, + 0x38, 0x7F, 0xE8, 0xD7, 0x6E, 0x3C, 0x04, 0x68, + 0x04, 0x3E, 0x8F, 0x66, 0x3F, 0x48, 0x60, 0xEE, + 0x12, 0xBF, 0x2D, 0x5B, 0x0B, 0x74, 0x74, 0xD6, + 0xE6, 0x94, 0xF9, 0x1E, 0x6D, 0xBE, 0x11, 0x59, + 0x74, 0xA3, 0x92, 0x6F, 0x12, 0xFE, 0xE5, 0xE4, + 0x38, 0x77, 0x7C, 0xB6, 0xA9, 0x32, 0xDF, 0x8C, + 0xD8, 0xBE, 0xC4, 0xD0, 0x73, 0xB9, 0x31, 0xBA, + 0x3B, 0xC8, 0x32, 0xB6, 0x8D, 0x9D, 0xD3, 0x00, + 0x74, 0x1F, 0xA7, 0xBF, 0x8A, 0xFC, 0x47, 0xED, + 0x25, 0x76, 0xF6, 0x93, 0x6B, 0xA4, 0x24, 0x66, + 0x3A, 0xAB, 0x63, 0x9C, 0x5A, 0xE4, 0xF5, 0x68, + 0x34, 0x23, 0xB4, 0x74, 0x2B, 0xF1, 0xC9, 0x78, + 0x23, 0x8F, 0x16, 0xCB, 0xE3, 0x9D, 0x65, 0x2D, + 0xE3, 0xFD, 0xB8, 0xBE, 0xFC, 0x84, 0x8A, 0xD9, + 0x22, 0x22, 0x2E, 0x04, 0xA4, 0x03, 0x7C, 0x07, + 0x13, 0xEB, 0x57, 0xA8, 0x1A, 0x23, 0xF0, 0xC7, + 0x34, 0x73, 0xFC, 0x64, 0x6C, 0xEA, 0x30, 0x6B, + 0x4B, 0xCB, 0xC8, 0x86, 0x2F, 0x83, 0x85, 0xDD, + 0xFA, 0x9D, 0x4B, 0x7F, 0xA2, 0xC0, 0x87, 0xE8, + 0x79, 0x68, 0x33, 0x03, 0xED, 0x5B, 0xDD, 0x3A, + 0x06, 0x2B, 0x3C, 0xF5, 0xB3, 0xA2, 0x78, 0xA6, + 0x6D, 0x2A, 0x13, 0xF8, 0x3F, 0x44, 0xF8, 0x2D, + 0xDF, 0x31, 0x0E, 0xE0, 0x74, 0xAB, 0x6A, 0x36, + 0x45, 0x97, 0xE8, 0x99, 0xA0, 0x25, 0x5D, 0xC1, + 0x64, 0xF3, 0x1C, 0xC5, 0x08, 0x46, 0x85, 0x1D, + 0xF9, 0xAB, 0x48, 0x19, 0x5D, 0xED, 0x7E, 0xA1, + 0xB1, 0xD5, 0x10, 0xBD, 0x7E, 0xE7, 0x4D, 0x73, + 0xFA, 0xF3, 0x6B, 0xC3, 0x1E, 0xCF, 0xA2, 0x68, + 0x35, 0x90, 0x46, 0xF4, 0xEB, 0x87, 0x9F, 0x92, + 0x40, 0x09, 0x43, 0x8B, 0x48, 0x1C, 0x6C, 0xD7, + 0x88, 0x9A, 0x00, 0x2E, 0xD5, 0xEE, 0x38, 0x2B, + 0xC9, 0x19, 0x0D, 0xA6, 0xFC, 0x02, 0x6E, 0x47, + 0x95, 0x58, 0xE4, 0x47, 0x56, 0x77, 0xE9, 0xAA, + 0x9E, 0x30, 0x50, 0xE2, 0x76, 0x56, 0x94, 0xDF, + 0xC8, 0x1F, 0x56, 0xE8, 0x80, 0xB9, 0x6E, 0x71, + 0x60, 0xC9, 0x80, 0xDD, 0x98, 0xED, 0xD3, 0xDF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF +}; + +/******************************* +* * +* TLS GROUPS (RFC 7919) * +* * +********************************/ + +static const BYTE rgbTls7919ffdhe2048[] = { + //P + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xAD, 0xF8, 0x54, 0x58, 0xA2, 0xBB, 0x4A, 0x9A, + 0xAF, 0xDC, 0x56, 0x20, 0x27, 0x3D, 0x3C, 0xF1, + 0xD8, 0xB9, 0xC5, 0x83, 0xCE, 0x2D, 0x36, 0x95, + 0xA9, 0xE1, 0x36, 0x41, 0x14, 0x64, 0x33, 0xFB, + 0xCC, 0x93, 0x9D, 0xCE, 0x24, 0x9B, 0x3E, 0xF9, + 0x7D, 0x2F, 0xE3, 0x63, 0x63, 0x0C, 0x75, 0xD8, + 0xF6, 0x81, 0xB2, 0x02, 0xAE, 0xC4, 0x61, 0x7A, + 0xD3, 0xDF, 0x1E, 0xD5, 0xD5, 0xFD, 0x65, 0x61, + 0x24, 0x33, 0xF5, 0x1F, 0x5F, 0x06, 0x6E, 0xD0, + 0x85, 0x63, 0x65, 0x55, 0x3D, 0xED, 0x1A, 0xF3, + 0xB5, 0x57, 0x13, 0x5E, 0x7F, 0x57, 0xC9, 0x35, + 0x98, 0x4F, 0x0C, 0x70, 0xE0, 0xE6, 0x8B, 0x77, + 0xE2, 0xA6, 0x89, 0xDA, 0xF3, 0xEF, 0xE8, 0x72, + 0x1D, 0xF1, 0x58, 0xA1, 0x36, 0xAD, 0xE7, 0x35, + 0x30, 0xAC, 0xCA, 0x4F, 0x48, 0x3A, 0x79, 0x7A, + 0xBC, 0x0A, 0xB1, 0x82, 0xB3, 0x24, 0xFB, 0x61, + 0xD1, 0x08, 0xA9, 0x4B, 0xB2, 0xC8, 0xE3, 0xFB, + 0xB9, 0x6A, 0xDA, 0xB7, 0x60, 0xD7, 0xF4, 0x68, + 0x1D, 0x4F, 0x42, 0xA3, 0xDE, 0x39, 0x4D, 0xF4, + 0xAE, 0x56, 0xED, 0xE7, 0x63, 0x72, 0xBB, 0x19, + 0x0B, 0x07, 0xA7, 0xC8, 0xEE, 0x0A, 0x6D, 0x70, + 0x9E, 0x02, 0xFC, 0xE1, 0xCD, 0xF7, 0xE2, 0xEC, + 0xC0, 0x34, 0x04, 0xCD, 0x28, 0x34, 0x2F, 0x61, + 0x91, 0x72, 0xFE, 0x9C, 0xE9, 0x85, 0x83, 0xFF, + 0x8E, 0x4F, 0x12, 0x32, 0xEE, 0xF2, 0x81, 0x83, + 0xC3, 0xFE, 0x3B, 0x1B, 0x4C, 0x6F, 0xAD, 0x73, + 0x3B, 0xB5, 0xFC, 0xBC, 0x2E, 0xC2, 0x20, 0x05, + 0xC5, 0x8E, 0xF1, 0x83, 0x7D, 0x16, 0x83, 0xB2, + 0xC6, 0xF3, 0x4A, 0x26, 0xC1, 0xB2, 0xEF, 0xFA, + 0x88, 0x6B, 0x42, 0x38, 0x61, 0x28, 0x5C, 0x97, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF +}; + +static const BYTE rgbTls7919ffdhe3072[] = { + //P + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xAD, 0xF8, 0x54, 0x58, 0xA2, 0xBB, 0x4A, 0x9A, + 0xAF, 0xDC, 0x56, 0x20, 0x27, 0x3D, 0x3C, 0xF1, + 0xD8, 0xB9, 0xC5, 0x83, 0xCE, 0x2D, 0x36, 0x95, + 0xA9, 0xE1, 0x36, 0x41, 0x14, 0x64, 0x33, 0xFB, + 0xCC, 0x93, 0x9D, 0xCE, 0x24, 0x9B, 0x3E, 0xF9, + 0x7D, 0x2F, 0xE3, 0x63, 0x63, 0x0C, 0x75, 0xD8, + 0xF6, 0x81, 0xB2, 0x02, 0xAE, 0xC4, 0x61, 0x7A, + 0xD3, 0xDF, 0x1E, 0xD5, 0xD5, 0xFD, 0x65, 0x61, + 0x24, 0x33, 0xF5, 0x1F, 0x5F, 0x06, 0x6E, 0xD0, + 0x85, 0x63, 0x65, 0x55, 0x3D, 0xED, 0x1A, 0xF3, + 0xB5, 0x57, 0x13, 0x5E, 0x7F, 0x57, 0xC9, 0x35, + 0x98, 0x4F, 0x0C, 0x70, 0xE0, 0xE6, 0x8B, 0x77, + 0xE2, 0xA6, 0x89, 0xDA, 0xF3, 0xEF, 0xE8, 0x72, + 0x1D, 0xF1, 0x58, 0xA1, 0x36, 0xAD, 0xE7, 0x35, + 0x30, 0xAC, 0xCA, 0x4F, 0x48, 0x3A, 0x79, 0x7A, + 0xBC, 0x0A, 0xB1, 0x82, 0xB3, 0x24, 0xFB, 0x61, + 0xD1, 0x08, 0xA9, 0x4B, 0xB2, 0xC8, 0xE3, 0xFB, + 0xB9, 0x6A, 0xDA, 0xB7, 0x60, 0xD7, 0xF4, 0x68, + 0x1D, 0x4F, 0x42, 0xA3, 0xDE, 0x39, 0x4D, 0xF4, + 0xAE, 0x56, 0xED, 0xE7, 0x63, 0x72, 0xBB, 0x19, + 0x0B, 0x07, 0xA7, 0xC8, 0xEE, 0x0A, 0x6D, 0x70, + 0x9E, 0x02, 0xFC, 0xE1, 0xCD, 0xF7, 0xE2, 0xEC, + 0xC0, 0x34, 0x04, 0xCD, 0x28, 0x34, 0x2F, 0x61, + 0x91, 0x72, 0xFE, 0x9C, 0xE9, 0x85, 0x83, 0xFF, + 0x8E, 0x4F, 0x12, 0x32, 0xEE, 0xF2, 0x81, 0x83, + 0xC3, 0xFE, 0x3B, 0x1B, 0x4C, 0x6F, 0xAD, 0x73, + 0x3B, 0xB5, 0xFC, 0xBC, 0x2E, 0xC2, 0x20, 0x05, + 0xC5, 0x8E, 0xF1, 0x83, 0x7D, 0x16, 0x83, 0xB2, + 0xC6, 0xF3, 0x4A, 0x26, 0xC1, 0xB2, 0xEF, 0xFA, + 0x88, 0x6B, 0x42, 0x38, 0x61, 0x1F, 0xCF, 0xDC, + 0xDE, 0x35, 0x5B, 0x3B, 0x65, 0x19, 0x03, 0x5B, + 0xBC, 0x34, 0xF4, 0xDE, 0xF9, 0x9C, 0x02, 0x38, + 0x61, 0xB4, 0x6F, 0xC9, 0xD6, 0xE6, 0xC9, 0x07, + 0x7A, 0xD9, 0x1D, 0x26, 0x91, 0xF7, 0xF7, 0xEE, + 0x59, 0x8C, 0xB0, 0xFA, 0xC1, 0x86, 0xD9, 0x1C, + 0xAE, 0xFE, 0x13, 0x09, 0x85, 0x13, 0x92, 0x70, + 0xB4, 0x13, 0x0C, 0x93, 0xBC, 0x43, 0x79, 0x44, + 0xF4, 0xFD, 0x44, 0x52, 0xE2, 0xD7, 0x4D, 0xD3, + 0x64, 0xF2, 0xE2, 0x1E, 0x71, 0xF5, 0x4B, 0xFF, + 0x5C, 0xAE, 0x82, 0xAB, 0x9C, 0x9D, 0xF6, 0x9E, + 0xE8, 0x6D, 0x2B, 0xC5, 0x22, 0x36, 0x3A, 0x0D, + 0xAB, 0xC5, 0x21, 0x97, 0x9B, 0x0D, 0xEA, 0xDA, + 0x1D, 0xBF, 0x9A, 0x42, 0xD5, 0xC4, 0x48, 0x4E, + 0x0A, 0xBC, 0xD0, 0x6B, 0xFA, 0x53, 0xDD, 0xEF, + 0x3C, 0x1B, 0x20, 0xEE, 0x3F, 0xD5, 0x9D, 0x7C, + 0x25, 0xE4, 0x1D, 0x2B, 0x66, 0xC6, 0x2E, 0x37, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF +}; + +static const BYTE rgbTls7919ffdhe4096[] = { + //P + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xAD, 0xF8, 0x54, 0x58, 0xA2, 0xBB, 0x4A, 0x9A, + 0xAF, 0xDC, 0x56, 0x20, 0x27, 0x3D, 0x3C, 0xF1, + 0xD8, 0xB9, 0xC5, 0x83, 0xCE, 0x2D, 0x36, 0x95, + 0xA9, 0xE1, 0x36, 0x41, 0x14, 0x64, 0x33, 0xFB, + 0xCC, 0x93, 0x9D, 0xCE, 0x24, 0x9B, 0x3E, 0xF9, + 0x7D, 0x2F, 0xE3, 0x63, 0x63, 0x0C, 0x75, 0xD8, + 0xF6, 0x81, 0xB2, 0x02, 0xAE, 0xC4, 0x61, 0x7A, + 0xD3, 0xDF, 0x1E, 0xD5, 0xD5, 0xFD, 0x65, 0x61, + 0x24, 0x33, 0xF5, 0x1F, 0x5F, 0x06, 0x6E, 0xD0, + 0x85, 0x63, 0x65, 0x55, 0x3D, 0xED, 0x1A, 0xF3, + 0xB5, 0x57, 0x13, 0x5E, 0x7F, 0x57, 0xC9, 0x35, + 0x98, 0x4F, 0x0C, 0x70, 0xE0, 0xE6, 0x8B, 0x77, + 0xE2, 0xA6, 0x89, 0xDA, 0xF3, 0xEF, 0xE8, 0x72, + 0x1D, 0xF1, 0x58, 0xA1, 0x36, 0xAD, 0xE7, 0x35, + 0x30, 0xAC, 0xCA, 0x4F, 0x48, 0x3A, 0x79, 0x7A, + 0xBC, 0x0A, 0xB1, 0x82, 0xB3, 0x24, 0xFB, 0x61, + 0xD1, 0x08, 0xA9, 0x4B, 0xB2, 0xC8, 0xE3, 0xFB, + 0xB9, 0x6A, 0xDA, 0xB7, 0x60, 0xD7, 0xF4, 0x68, + 0x1D, 0x4F, 0x42, 0xA3, 0xDE, 0x39, 0x4D, 0xF4, + 0xAE, 0x56, 0xED, 0xE7, 0x63, 0x72, 0xBB, 0x19, + 0x0B, 0x07, 0xA7, 0xC8, 0xEE, 0x0A, 0x6D, 0x70, + 0x9E, 0x02, 0xFC, 0xE1, 0xCD, 0xF7, 0xE2, 0xEC, + 0xC0, 0x34, 0x04, 0xCD, 0x28, 0x34, 0x2F, 0x61, + 0x91, 0x72, 0xFE, 0x9C, 0xE9, 0x85, 0x83, 0xFF, + 0x8E, 0x4F, 0x12, 0x32, 0xEE, 0xF2, 0x81, 0x83, + 0xC3, 0xFE, 0x3B, 0x1B, 0x4C, 0x6F, 0xAD, 0x73, + 0x3B, 0xB5, 0xFC, 0xBC, 0x2E, 0xC2, 0x20, 0x05, + 0xC5, 0x8E, 0xF1, 0x83, 0x7D, 0x16, 0x83, 0xB2, + 0xC6, 0xF3, 0x4A, 0x26, 0xC1, 0xB2, 0xEF, 0xFA, + 0x88, 0x6B, 0x42, 0x38, 0x61, 0x1F, 0xCF, 0xDC, + 0xDE, 0x35, 0x5B, 0x3B, 0x65, 0x19, 0x03, 0x5B, + 0xBC, 0x34, 0xF4, 0xDE, 0xF9, 0x9C, 0x02, 0x38, + 0x61, 0xB4, 0x6F, 0xC9, 0xD6, 0xE6, 0xC9, 0x07, + 0x7A, 0xD9, 0x1D, 0x26, 0x91, 0xF7, 0xF7, 0xEE, + 0x59, 0x8C, 0xB0, 0xFA, 0xC1, 0x86, 0xD9, 0x1C, + 0xAE, 0xFE, 0x13, 0x09, 0x85, 0x13, 0x92, 0x70, + 0xB4, 0x13, 0x0C, 0x93, 0xBC, 0x43, 0x79, 0x44, + 0xF4, 0xFD, 0x44, 0x52, 0xE2, 0xD7, 0x4D, 0xD3, + 0x64, 0xF2, 0xE2, 0x1E, 0x71, 0xF5, 0x4B, 0xFF, + 0x5C, 0xAE, 0x82, 0xAB, 0x9C, 0x9D, 0xF6, 0x9E, + 0xE8, 0x6D, 0x2B, 0xC5, 0x22, 0x36, 0x3A, 0x0D, + 0xAB, 0xC5, 0x21, 0x97, 0x9B, 0x0D, 0xEA, 0xDA, + 0x1D, 0xBF, 0x9A, 0x42, 0xD5, 0xC4, 0x48, 0x4E, + 0x0A, 0xBC, 0xD0, 0x6B, 0xFA, 0x53, 0xDD, 0xEF, + 0x3C, 0x1B, 0x20, 0xEE, 0x3F, 0xD5, 0x9D, 0x7C, + 0x25, 0xE4, 0x1D, 0x2B, 0x66, 0x9E, 0x1E, 0xF1, + 0x6E, 0x6F, 0x52, 0xC3, 0x16, 0x4D, 0xF4, 0xFB, + 0x79, 0x30, 0xE9, 0xE4, 0xE5, 0x88, 0x57, 0xB6, + 0xAC, 0x7D, 0x5F, 0x42, 0xD6, 0x9F, 0x6D, 0x18, + 0x77, 0x63, 0xCF, 0x1D, 0x55, 0x03, 0x40, 0x04, + 0x87, 0xF5, 0x5B, 0xA5, 0x7E, 0x31, 0xCC, 0x7A, + 0x71, 0x35, 0xC8, 0x86, 0xEF, 0xB4, 0x31, 0x8A, + 0xED, 0x6A, 0x1E, 0x01, 0x2D, 0x9E, 0x68, 0x32, + 0xA9, 0x07, 0x60, 0x0A, 0x91, 0x81, 0x30, 0xC4, + 0x6D, 0xC7, 0x78, 0xF9, 0x71, 0xAD, 0x00, 0x38, + 0x09, 0x29, 0x99, 0xA3, 0x33, 0xCB, 0x8B, 0x7A, + 0x1A, 0x1D, 0xB9, 0x3D, 0x71, 0x40, 0x00, 0x3C, + 0x2A, 0x4E, 0xCE, 0xA9, 0xF9, 0x8D, 0x0A, 0xCC, + 0x0A, 0x82, 0x91, 0xCD, 0xCE, 0xC9, 0x7D, 0xCF, + 0x8E, 0xC9, 0xB5, 0x5A, 0x7F, 0x88, 0xA4, 0x6B, + 0x4D, 0xB5, 0xA8, 0x51, 0xF4, 0x41, 0x82, 0xE1, + 0xC6, 0x8A, 0x00, 0x7E, 0x5E, 0x65, 0x5F, 0x6A, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF +}; + +static const BYTE rgbTls7919ffdhe6144[] = { + //P + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xAD, 0xF8, 0x54, 0x58, 0xA2, 0xBB, 0x4A, 0x9A, + 0xAF, 0xDC, 0x56, 0x20, 0x27, 0x3D, 0x3C, 0xF1, + 0xD8, 0xB9, 0xC5, 0x83, 0xCE, 0x2D, 0x36, 0x95, + 0xA9, 0xE1, 0x36, 0x41, 0x14, 0x64, 0x33, 0xFB, + 0xCC, 0x93, 0x9D, 0xCE, 0x24, 0x9B, 0x3E, 0xF9, + 0x7D, 0x2F, 0xE3, 0x63, 0x63, 0x0C, 0x75, 0xD8, + 0xF6, 0x81, 0xB2, 0x02, 0xAE, 0xC4, 0x61, 0x7A, + 0xD3, 0xDF, 0x1E, 0xD5, 0xD5, 0xFD, 0x65, 0x61, + 0x24, 0x33, 0xF5, 0x1F, 0x5F, 0x06, 0x6E, 0xD0, + 0x85, 0x63, 0x65, 0x55, 0x3D, 0xED, 0x1A, 0xF3, + 0xB5, 0x57, 0x13, 0x5E, 0x7F, 0x57, 0xC9, 0x35, + 0x98, 0x4F, 0x0C, 0x70, 0xE0, 0xE6, 0x8B, 0x77, + 0xE2, 0xA6, 0x89, 0xDA, 0xF3, 0xEF, 0xE8, 0x72, + 0x1D, 0xF1, 0x58, 0xA1, 0x36, 0xAD, 0xE7, 0x35, + 0x30, 0xAC, 0xCA, 0x4F, 0x48, 0x3A, 0x79, 0x7A, + 0xBC, 0x0A, 0xB1, 0x82, 0xB3, 0x24, 0xFB, 0x61, + 0xD1, 0x08, 0xA9, 0x4B, 0xB2, 0xC8, 0xE3, 0xFB, + 0xB9, 0x6A, 0xDA, 0xB7, 0x60, 0xD7, 0xF4, 0x68, + 0x1D, 0x4F, 0x42, 0xA3, 0xDE, 0x39, 0x4D, 0xF4, + 0xAE, 0x56, 0xED, 0xE7, 0x63, 0x72, 0xBB, 0x19, + 0x0B, 0x07, 0xA7, 0xC8, 0xEE, 0x0A, 0x6D, 0x70, + 0x9E, 0x02, 0xFC, 0xE1, 0xCD, 0xF7, 0xE2, 0xEC, + 0xC0, 0x34, 0x04, 0xCD, 0x28, 0x34, 0x2F, 0x61, + 0x91, 0x72, 0xFE, 0x9C, 0xE9, 0x85, 0x83, 0xFF, + 0x8E, 0x4F, 0x12, 0x32, 0xEE, 0xF2, 0x81, 0x83, + 0xC3, 0xFE, 0x3B, 0x1B, 0x4C, 0x6F, 0xAD, 0x73, + 0x3B, 0xB5, 0xFC, 0xBC, 0x2E, 0xC2, 0x20, 0x05, + 0xC5, 0x8E, 0xF1, 0x83, 0x7D, 0x16, 0x83, 0xB2, + 0xC6, 0xF3, 0x4A, 0x26, 0xC1, 0xB2, 0xEF, 0xFA, + 0x88, 0x6B, 0x42, 0x38, 0x61, 0x1F, 0xCF, 0xDC, + 0xDE, 0x35, 0x5B, 0x3B, 0x65, 0x19, 0x03, 0x5B, + 0xBC, 0x34, 0xF4, 0xDE, 0xF9, 0x9C, 0x02, 0x38, + 0x61, 0xB4, 0x6F, 0xC9, 0xD6, 0xE6, 0xC9, 0x07, + 0x7A, 0xD9, 0x1D, 0x26, 0x91, 0xF7, 0xF7, 0xEE, + 0x59, 0x8C, 0xB0, 0xFA, 0xC1, 0x86, 0xD9, 0x1C, + 0xAE, 0xFE, 0x13, 0x09, 0x85, 0x13, 0x92, 0x70, + 0xB4, 0x13, 0x0C, 0x93, 0xBC, 0x43, 0x79, 0x44, + 0xF4, 0xFD, 0x44, 0x52, 0xE2, 0xD7, 0x4D, 0xD3, + 0x64, 0xF2, 0xE2, 0x1E, 0x71, 0xF5, 0x4B, 0xFF, + 0x5C, 0xAE, 0x82, 0xAB, 0x9C, 0x9D, 0xF6, 0x9E, + 0xE8, 0x6D, 0x2B, 0xC5, 0x22, 0x36, 0x3A, 0x0D, + 0xAB, 0xC5, 0x21, 0x97, 0x9B, 0x0D, 0xEA, 0xDA, + 0x1D, 0xBF, 0x9A, 0x42, 0xD5, 0xC4, 0x48, 0x4E, + 0x0A, 0xBC, 0xD0, 0x6B, 0xFA, 0x53, 0xDD, 0xEF, + 0x3C, 0x1B, 0x20, 0xEE, 0x3F, 0xD5, 0x9D, 0x7C, + 0x25, 0xE4, 0x1D, 0x2B, 0x66, 0x9E, 0x1E, 0xF1, + 0x6E, 0x6F, 0x52, 0xC3, 0x16, 0x4D, 0xF4, 0xFB, + 0x79, 0x30, 0xE9, 0xE4, 0xE5, 0x88, 0x57, 0xB6, + 0xAC, 0x7D, 0x5F, 0x42, 0xD6, 0x9F, 0x6D, 0x18, + 0x77, 0x63, 0xCF, 0x1D, 0x55, 0x03, 0x40, 0x04, + 0x87, 0xF5, 0x5B, 0xA5, 0x7E, 0x31, 0xCC, 0x7A, + 0x71, 0x35, 0xC8, 0x86, 0xEF, 0xB4, 0x31, 0x8A, + 0xED, 0x6A, 0x1E, 0x01, 0x2D, 0x9E, 0x68, 0x32, + 0xA9, 0x07, 0x60, 0x0A, 0x91, 0x81, 0x30, 0xC4, + 0x6D, 0xC7, 0x78, 0xF9, 0x71, 0xAD, 0x00, 0x38, + 0x09, 0x29, 0x99, 0xA3, 0x33, 0xCB, 0x8B, 0x7A, + 0x1A, 0x1D, 0xB9, 0x3D, 0x71, 0x40, 0x00, 0x3C, + 0x2A, 0x4E, 0xCE, 0xA9, 0xF9, 0x8D, 0x0A, 0xCC, + 0x0A, 0x82, 0x91, 0xCD, 0xCE, 0xC9, 0x7D, 0xCF, + 0x8E, 0xC9, 0xB5, 0x5A, 0x7F, 0x88, 0xA4, 0x6B, + 0x4D, 0xB5, 0xA8, 0x51, 0xF4, 0x41, 0x82, 0xE1, + 0xC6, 0x8A, 0x00, 0x7E, 0x5E, 0x0D, 0xD9, 0x02, + 0x0B, 0xFD, 0x64, 0xB6, 0x45, 0x03, 0x6C, 0x7A, + 0x4E, 0x67, 0x7D, 0x2C, 0x38, 0x53, 0x2A, 0x3A, + 0x23, 0xBA, 0x44, 0x42, 0xCA, 0xF5, 0x3E, 0xA6, + 0x3B, 0xB4, 0x54, 0x32, 0x9B, 0x76, 0x24, 0xC8, + 0x91, 0x7B, 0xDD, 0x64, 0xB1, 0xC0, 0xFD, 0x4C, + 0xB3, 0x8E, 0x8C, 0x33, 0x4C, 0x70, 0x1C, 0x3A, + 0xCD, 0xAD, 0x06, 0x57, 0xFC, 0xCF, 0xEC, 0x71, + 0x9B, 0x1F, 0x5C, 0x3E, 0x4E, 0x46, 0x04, 0x1F, + 0x38, 0x81, 0x47, 0xFB, 0x4C, 0xFD, 0xB4, 0x77, + 0xA5, 0x24, 0x71, 0xF7, 0xA9, 0xA9, 0x69, 0x10, + 0xB8, 0x55, 0x32, 0x2E, 0xDB, 0x63, 0x40, 0xD8, + 0xA0, 0x0E, 0xF0, 0x92, 0x35, 0x05, 0x11, 0xE3, + 0x0A, 0xBE, 0xC1, 0xFF, 0xF9, 0xE3, 0xA2, 0x6E, + 0x7F, 0xB2, 0x9F, 0x8C, 0x18, 0x30, 0x23, 0xC3, + 0x58, 0x7E, 0x38, 0xDA, 0x00, 0x77, 0xD9, 0xB4, + 0x76, 0x3E, 0x4E, 0x4B, 0x94, 0xB2, 0xBB, 0xC1, + 0x94, 0xC6, 0x65, 0x1E, 0x77, 0xCA, 0xF9, 0x92, + 0xEE, 0xAA, 0xC0, 0x23, 0x2A, 0x28, 0x1B, 0xF6, + 0xB3, 0xA7, 0x39, 0xC1, 0x22, 0x61, 0x16, 0x82, + 0x0A, 0xE8, 0xDB, 0x58, 0x47, 0xA6, 0x7C, 0xBE, + 0xF9, 0xC9, 0x09, 0x1B, 0x46, 0x2D, 0x53, 0x8C, + 0xD7, 0x2B, 0x03, 0x74, 0x6A, 0xE7, 0x7F, 0x5E, + 0x62, 0x29, 0x2C, 0x31, 0x15, 0x62, 0xA8, 0x46, + 0x50, 0x5D, 0xC8, 0x2D, 0xB8, 0x54, 0x33, 0x8A, + 0xE4, 0x9F, 0x52, 0x35, 0xC9, 0x5B, 0x91, 0x17, + 0x8C, 0xCF, 0x2D, 0xD5, 0xCA, 0xCE, 0xF4, 0x03, + 0xEC, 0x9D, 0x18, 0x10, 0xC6, 0x27, 0x2B, 0x04, + 0x5B, 0x3B, 0x71, 0xF9, 0xDC, 0x6B, 0x80, 0xD6, + 0x3F, 0xDD, 0x4A, 0x8E, 0x9A, 0xDB, 0x1E, 0x69, + 0x62, 0xA6, 0x95, 0x26, 0xD4, 0x31, 0x61, 0xC1, + 0xA4, 0x1D, 0x57, 0x0D, 0x79, 0x38, 0xDA, 0xD4, + 0xA4, 0x0E, 0x32, 0x9C, 0xD0, 0xE4, 0x0E, 0x65, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF +}; + +static const BYTE rgbTls7919ffdhe8192[] = { + //P + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xAD, 0xF8, 0x54, 0x58, 0xA2, 0xBB, 0x4A, 0x9A, + 0xAF, 0xDC, 0x56, 0x20, 0x27, 0x3D, 0x3C, 0xF1, + 0xD8, 0xB9, 0xC5, 0x83, 0xCE, 0x2D, 0x36, 0x95, + 0xA9, 0xE1, 0x36, 0x41, 0x14, 0x64, 0x33, 0xFB, + 0xCC, 0x93, 0x9D, 0xCE, 0x24, 0x9B, 0x3E, 0xF9, + 0x7D, 0x2F, 0xE3, 0x63, 0x63, 0x0C, 0x75, 0xD8, + 0xF6, 0x81, 0xB2, 0x02, 0xAE, 0xC4, 0x61, 0x7A, + 0xD3, 0xDF, 0x1E, 0xD5, 0xD5, 0xFD, 0x65, 0x61, + 0x24, 0x33, 0xF5, 0x1F, 0x5F, 0x06, 0x6E, 0xD0, + 0x85, 0x63, 0x65, 0x55, 0x3D, 0xED, 0x1A, 0xF3, + 0xB5, 0x57, 0x13, 0x5E, 0x7F, 0x57, 0xC9, 0x35, + 0x98, 0x4F, 0x0C, 0x70, 0xE0, 0xE6, 0x8B, 0x77, + 0xE2, 0xA6, 0x89, 0xDA, 0xF3, 0xEF, 0xE8, 0x72, + 0x1D, 0xF1, 0x58, 0xA1, 0x36, 0xAD, 0xE7, 0x35, + 0x30, 0xAC, 0xCA, 0x4F, 0x48, 0x3A, 0x79, 0x7A, + 0xBC, 0x0A, 0xB1, 0x82, 0xB3, 0x24, 0xFB, 0x61, + 0xD1, 0x08, 0xA9, 0x4B, 0xB2, 0xC8, 0xE3, 0xFB, + 0xB9, 0x6A, 0xDA, 0xB7, 0x60, 0xD7, 0xF4, 0x68, + 0x1D, 0x4F, 0x42, 0xA3, 0xDE, 0x39, 0x4D, 0xF4, + 0xAE, 0x56, 0xED, 0xE7, 0x63, 0x72, 0xBB, 0x19, + 0x0B, 0x07, 0xA7, 0xC8, 0xEE, 0x0A, 0x6D, 0x70, + 0x9E, 0x02, 0xFC, 0xE1, 0xCD, 0xF7, 0xE2, 0xEC, + 0xC0, 0x34, 0x04, 0xCD, 0x28, 0x34, 0x2F, 0x61, + 0x91, 0x72, 0xFE, 0x9C, 0xE9, 0x85, 0x83, 0xFF, + 0x8E, 0x4F, 0x12, 0x32, 0xEE, 0xF2, 0x81, 0x83, + 0xC3, 0xFE, 0x3B, 0x1B, 0x4C, 0x6F, 0xAD, 0x73, + 0x3B, 0xB5, 0xFC, 0xBC, 0x2E, 0xC2, 0x20, 0x05, + 0xC5, 0x8E, 0xF1, 0x83, 0x7D, 0x16, 0x83, 0xB2, + 0xC6, 0xF3, 0x4A, 0x26, 0xC1, 0xB2, 0xEF, 0xFA, + 0x88, 0x6B, 0x42, 0x38, 0x61, 0x1F, 0xCF, 0xDC, + 0xDE, 0x35, 0x5B, 0x3B, 0x65, 0x19, 0x03, 0x5B, + 0xBC, 0x34, 0xF4, 0xDE, 0xF9, 0x9C, 0x02, 0x38, + 0x61, 0xB4, 0x6F, 0xC9, 0xD6, 0xE6, 0xC9, 0x07, + 0x7A, 0xD9, 0x1D, 0x26, 0x91, 0xF7, 0xF7, 0xEE, + 0x59, 0x8C, 0xB0, 0xFA, 0xC1, 0x86, 0xD9, 0x1C, + 0xAE, 0xFE, 0x13, 0x09, 0x85, 0x13, 0x92, 0x70, + 0xB4, 0x13, 0x0C, 0x93, 0xBC, 0x43, 0x79, 0x44, + 0xF4, 0xFD, 0x44, 0x52, 0xE2, 0xD7, 0x4D, 0xD3, + 0x64, 0xF2, 0xE2, 0x1E, 0x71, 0xF5, 0x4B, 0xFF, + 0x5C, 0xAE, 0x82, 0xAB, 0x9C, 0x9D, 0xF6, 0x9E, + 0xE8, 0x6D, 0x2B, 0xC5, 0x22, 0x36, 0x3A, 0x0D, + 0xAB, 0xC5, 0x21, 0x97, 0x9B, 0x0D, 0xEA, 0xDA, + 0x1D, 0xBF, 0x9A, 0x42, 0xD5, 0xC4, 0x48, 0x4E, + 0x0A, 0xBC, 0xD0, 0x6B, 0xFA, 0x53, 0xDD, 0xEF, + 0x3C, 0x1B, 0x20, 0xEE, 0x3F, 0xD5, 0x9D, 0x7C, + 0x25, 0xE4, 0x1D, 0x2B, 0x66, 0x9E, 0x1E, 0xF1, + 0x6E, 0x6F, 0x52, 0xC3, 0x16, 0x4D, 0xF4, 0xFB, + 0x79, 0x30, 0xE9, 0xE4, 0xE5, 0x88, 0x57, 0xB6, + 0xAC, 0x7D, 0x5F, 0x42, 0xD6, 0x9F, 0x6D, 0x18, + 0x77, 0x63, 0xCF, 0x1D, 0x55, 0x03, 0x40, 0x04, + 0x87, 0xF5, 0x5B, 0xA5, 0x7E, 0x31, 0xCC, 0x7A, + 0x71, 0x35, 0xC8, 0x86, 0xEF, 0xB4, 0x31, 0x8A, + 0xED, 0x6A, 0x1E, 0x01, 0x2D, 0x9E, 0x68, 0x32, + 0xA9, 0x07, 0x60, 0x0A, 0x91, 0x81, 0x30, 0xC4, + 0x6D, 0xC7, 0x78, 0xF9, 0x71, 0xAD, 0x00, 0x38, + 0x09, 0x29, 0x99, 0xA3, 0x33, 0xCB, 0x8B, 0x7A, + 0x1A, 0x1D, 0xB9, 0x3D, 0x71, 0x40, 0x00, 0x3C, + 0x2A, 0x4E, 0xCE, 0xA9, 0xF9, 0x8D, 0x0A, 0xCC, + 0x0A, 0x82, 0x91, 0xCD, 0xCE, 0xC9, 0x7D, 0xCF, + 0x8E, 0xC9, 0xB5, 0x5A, 0x7F, 0x88, 0xA4, 0x6B, + 0x4D, 0xB5, 0xA8, 0x51, 0xF4, 0x41, 0x82, 0xE1, + 0xC6, 0x8A, 0x00, 0x7E, 0x5E, 0x0D, 0xD9, 0x02, + 0x0B, 0xFD, 0x64, 0xB6, 0x45, 0x03, 0x6C, 0x7A, + 0x4E, 0x67, 0x7D, 0x2C, 0x38, 0x53, 0x2A, 0x3A, + 0x23, 0xBA, 0x44, 0x42, 0xCA, 0xF5, 0x3E, 0xA6, + 0x3B, 0xB4, 0x54, 0x32, 0x9B, 0x76, 0x24, 0xC8, + 0x91, 0x7B, 0xDD, 0x64, 0xB1, 0xC0, 0xFD, 0x4C, + 0xB3, 0x8E, 0x8C, 0x33, 0x4C, 0x70, 0x1C, 0x3A, + 0xCD, 0xAD, 0x06, 0x57, 0xFC, 0xCF, 0xEC, 0x71, + 0x9B, 0x1F, 0x5C, 0x3E, 0x4E, 0x46, 0x04, 0x1F, + 0x38, 0x81, 0x47, 0xFB, 0x4C, 0xFD, 0xB4, 0x77, + 0xA5, 0x24, 0x71, 0xF7, 0xA9, 0xA9, 0x69, 0x10, + 0xB8, 0x55, 0x32, 0x2E, 0xDB, 0x63, 0x40, 0xD8, + 0xA0, 0x0E, 0xF0, 0x92, 0x35, 0x05, 0x11, 0xE3, + 0x0A, 0xBE, 0xC1, 0xFF, 0xF9, 0xE3, 0xA2, 0x6E, + 0x7F, 0xB2, 0x9F, 0x8C, 0x18, 0x30, 0x23, 0xC3, + 0x58, 0x7E, 0x38, 0xDA, 0x00, 0x77, 0xD9, 0xB4, + 0x76, 0x3E, 0x4E, 0x4B, 0x94, 0xB2, 0xBB, 0xC1, + 0x94, 0xC6, 0x65, 0x1E, 0x77, 0xCA, 0xF9, 0x92, + 0xEE, 0xAA, 0xC0, 0x23, 0x2A, 0x28, 0x1B, 0xF6, + 0xB3, 0xA7, 0x39, 0xC1, 0x22, 0x61, 0x16, 0x82, + 0x0A, 0xE8, 0xDB, 0x58, 0x47, 0xA6, 0x7C, 0xBE, + 0xF9, 0xC9, 0x09, 0x1B, 0x46, 0x2D, 0x53, 0x8C, + 0xD7, 0x2B, 0x03, 0x74, 0x6A, 0xE7, 0x7F, 0x5E, + 0x62, 0x29, 0x2C, 0x31, 0x15, 0x62, 0xA8, 0x46, + 0x50, 0x5D, 0xC8, 0x2D, 0xB8, 0x54, 0x33, 0x8A, + 0xE4, 0x9F, 0x52, 0x35, 0xC9, 0x5B, 0x91, 0x17, + 0x8C, 0xCF, 0x2D, 0xD5, 0xCA, 0xCE, 0xF4, 0x03, + 0xEC, 0x9D, 0x18, 0x10, 0xC6, 0x27, 0x2B, 0x04, + 0x5B, 0x3B, 0x71, 0xF9, 0xDC, 0x6B, 0x80, 0xD6, + 0x3F, 0xDD, 0x4A, 0x8E, 0x9A, 0xDB, 0x1E, 0x69, + 0x62, 0xA6, 0x95, 0x26, 0xD4, 0x31, 0x61, 0xC1, + 0xA4, 0x1D, 0x57, 0x0D, 0x79, 0x38, 0xDA, 0xD4, + 0xA4, 0x0E, 0x32, 0x9C, 0xCF, 0xF4, 0x6A, 0xAA, + 0x36, 0xAD, 0x00, 0x4C, 0xF6, 0x00, 0xC8, 0x38, + 0x1E, 0x42, 0x5A, 0x31, 0xD9, 0x51, 0xAE, 0x64, + 0xFD, 0xB2, 0x3F, 0xCE, 0xC9, 0x50, 0x9D, 0x43, + 0x68, 0x7F, 0xEB, 0x69, 0xED, 0xD1, 0xCC, 0x5E, + 0x0B, 0x8C, 0xC3, 0xBD, 0xF6, 0x4B, 0x10, 0xEF, + 0x86, 0xB6, 0x31, 0x42, 0xA3, 0xAB, 0x88, 0x29, + 0x55, 0x5B, 0x2F, 0x74, 0x7C, 0x93, 0x26, 0x65, + 0xCB, 0x2C, 0x0F, 0x1C, 0xC0, 0x1B, 0xD7, 0x02, + 0x29, 0x38, 0x88, 0x39, 0xD2, 0xAF, 0x05, 0xE4, + 0x54, 0x50, 0x4A, 0xC7, 0x8B, 0x75, 0x82, 0x82, + 0x28, 0x46, 0xC0, 0xBA, 0x35, 0xC3, 0x5F, 0x5C, + 0x59, 0x16, 0x0C, 0xC0, 0x46, 0xFD, 0x82, 0x51, + 0x54, 0x1F, 0xC6, 0x8C, 0x9C, 0x86, 0xB0, 0x22, + 0xBB, 0x70, 0x99, 0x87, 0x6A, 0x46, 0x0E, 0x74, + 0x51, 0xA8, 0xA9, 0x31, 0x09, 0x70, 0x3F, 0xEE, + 0x1C, 0x21, 0x7E, 0x6C, 0x38, 0x26, 0xE5, 0x2C, + 0x51, 0xAA, 0x69, 0x1E, 0x0E, 0x42, 0x3C, 0xFC, + 0x99, 0xE9, 0xE3, 0x16, 0x50, 0xC1, 0x21, 0x7B, + 0x62, 0x48, 0x16, 0xCD, 0xAD, 0x9A, 0x95, 0xF9, + 0xD5, 0xB8, 0x01, 0x94, 0x88, 0xD9, 0xC0, 0xA0, + 0xA1, 0xFE, 0x30, 0x75, 0xA5, 0x77, 0xE2, 0x31, + 0x83, 0xF8, 0x1D, 0x4A, 0x3F, 0x2F, 0xA4, 0x57, + 0x1E, 0xFC, 0x8C, 0xE0, 0xBA, 0x8A, 0x4F, 0xE8, + 0xB6, 0x85, 0x5D, 0xFE, 0x72, 0xB0, 0xA6, 0x6E, + 0xDE, 0xD2, 0xFB, 0xAB, 0xFB, 0xE5, 0x8A, 0x30, + 0xFA, 0xFA, 0xBE, 0x1C, 0x5D, 0x71, 0xA8, 0x7E, + 0x2F, 0x74, 0x1E, 0xF8, 0xC1, 0xFE, 0x86, 0xFE, + 0xA6, 0xBB, 0xFD, 0xE5, 0x30, 0x67, 0x7F, 0x0D, + 0x97, 0xD1, 0x1D, 0x49, 0xF7, 0xA8, 0x44, 0x3D, + 0x08, 0x22, 0xE5, 0x06, 0xA9, 0xF4, 0x61, 0x4E, + 0x01, 0x1E, 0x2A, 0x94, 0x83, 0x8F, 0xF8, 0x8C, + 0xD6, 0x8C, 0x8B, 0xB7, 0xC5, 0xC6, 0x42, 0x4C, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF +}; + +#endif // 1 + +// Definitions +static const SYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS paramsModp2048 = +{ + .eDhSafePrimeType = SYMCRYPT_DLGROUP_DH_SAFEPRIMETYPE_IKE_3526, + .pcbPrimeP = rgbIke3526Modp2048, + .nBitsOfP = 2048, + .nMinBitsPriv = 224, // 2s = 2 * 112 + .nDefaultBitsPriv = 256 // rounding nMinBitsPriv up to the nearest 128 +}; + +static const SYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS paramsModp3072 = +{ + .eDhSafePrimeType = SYMCRYPT_DLGROUP_DH_SAFEPRIMETYPE_IKE_3526, + .pcbPrimeP = rgbIke3526Modp3072, + .nBitsOfP = 3072, + .nMinBitsPriv = 256, // 2s = 2 * 128 + .nDefaultBitsPriv = 256 // rounding nMinBitsPriv up to the nearest 128 +}; + +static const SYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS paramsModp4096 = +{ + .eDhSafePrimeType = SYMCRYPT_DLGROUP_DH_SAFEPRIMETYPE_IKE_3526, + .pcbPrimeP = rgbIke3526Modp4096, + .nBitsOfP = 4096, + .nMinBitsPriv = 304, // 2s = 2 * 152 + .nDefaultBitsPriv = 384 // rounding nMinBitsPriv up to the nearest 128 +}; + +static const SYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS paramsModp6144 = +{ + .eDhSafePrimeType = SYMCRYPT_DLGROUP_DH_SAFEPRIMETYPE_IKE_3526, + .pcbPrimeP = rgbIke3526Modp6144, + .nBitsOfP = 6144, + .nMinBitsPriv = 352, // 2s = 2 * 176 + .nDefaultBitsPriv = 384 // rounding nMinBitsPriv up to the nearest 128 +}; + +static const SYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS paramsModp8192 = +{ + .eDhSafePrimeType = SYMCRYPT_DLGROUP_DH_SAFEPRIMETYPE_IKE_3526, + .pcbPrimeP = rgbIke3526Modp8192, + .nBitsOfP = 8192, + .nMinBitsPriv = 400, // 2s = 2 * 200 + .nDefaultBitsPriv = 512 // rounding nMinBitsPriv up to the nearest 128 +}; + +static const SYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS paramsffdhe2048 = +{ + .eDhSafePrimeType = SYMCRYPT_DLGROUP_DH_SAFEPRIMETYPE_TLS_7919, + .pcbPrimeP = rgbTls7919ffdhe2048, + .nBitsOfP = 2048, + .nMinBitsPriv = 224, // 2s = 2 * 112 + .nDefaultBitsPriv = 256 // rounding nMinBitsPriv up to the nearest 128 +}; + +static const SYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS paramsffdhe3072 = +{ + .eDhSafePrimeType = SYMCRYPT_DLGROUP_DH_SAFEPRIMETYPE_TLS_7919, + .pcbPrimeP = rgbTls7919ffdhe3072, + .nBitsOfP = 3072, + .nMinBitsPriv = 256, // 2s = 2 * 128 + .nDefaultBitsPriv = 256 // rounding nMinBitsPriv up to the nearest 128 +}; + +static const SYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS paramsffdhe4096 = +{ + .eDhSafePrimeType = SYMCRYPT_DLGROUP_DH_SAFEPRIMETYPE_TLS_7919, + .pcbPrimeP = rgbTls7919ffdhe4096, + .nBitsOfP = 4096, + .nMinBitsPriv = 304, // 2s = 2 * 152 + .nDefaultBitsPriv = 384 // rounding nMinBitsPriv up to the nearest 128 +}; + +static const SYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS paramsffdhe6144 = +{ + .eDhSafePrimeType = SYMCRYPT_DLGROUP_DH_SAFEPRIMETYPE_TLS_7919, + .pcbPrimeP = rgbTls7919ffdhe6144, + .nBitsOfP = 6144, + .nMinBitsPriv = 352, // 2s = 2 * 176 + .nDefaultBitsPriv = 384 // rounding nMinBitsPriv up to the nearest 128 +}; + +static const SYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS paramsffdhe8192 = +{ + .eDhSafePrimeType = SYMCRYPT_DLGROUP_DH_SAFEPRIMETYPE_TLS_7919, + .pcbPrimeP = rgbTls7919ffdhe8192, + .nBitsOfP = 8192, + .nMinBitsPriv = 400, // 2s = 2 * 200 + .nDefaultBitsPriv = 512 // rounding nMinBitsPriv up to the nearest 128 +}; + +const PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS SymCryptDlgroupDhSafePrimeParamsModp2048 = ¶msModp2048; +const PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS SymCryptDlgroupDhSafePrimeParamsModp3072 = ¶msModp3072; +const PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS SymCryptDlgroupDhSafePrimeParamsModp4096 = ¶msModp4096; +const PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS SymCryptDlgroupDhSafePrimeParamsModp6144 = ¶msModp6144; +const PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS SymCryptDlgroupDhSafePrimeParamsModp8192 = ¶msModp8192; + +const PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS SymCryptDlgroupDhSafePrimeParamsffdhe2048 = ¶msffdhe2048; +const PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS SymCryptDlgroupDhSafePrimeParamsffdhe3072 = ¶msffdhe3072; +const PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS SymCryptDlgroupDhSafePrimeParamsffdhe4096 = ¶msffdhe4096; +const PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS SymCryptDlgroupDhSafePrimeParamsffdhe6144 = ¶msffdhe6144; +const PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS SymCryptDlgroupDhSafePrimeParamsffdhe8192 = ¶msffdhe8192; + +// Note, we rely on the ordering of the parameters from largest to smallest within each named set of +// safe-prime groups as we iterate through them assuming this order in SymCryptDlgroupSetValueSafePrime +const PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS SymCryptNamedSafePrimeGroups[SYMCRYPT_DH_SAFEPRIME_GROUP_COUNT] = +{ + ¶msModp8192, + ¶msModp6144, + ¶msModp4096, + ¶msModp3072, + ¶msModp2048, + ¶msffdhe8192, + ¶msffdhe6144, + ¶msffdhe4096, + ¶msffdhe3072, + ¶msffdhe2048, +}; diff --git a/libs/symcrypt/lib/dlgroup.c b/libs/symcrypt/lib/dlgroup.c new file mode 100644 index 00000000000..021c0e0145e --- /dev/null +++ b/libs/symcrypt/lib/dlgroup.c @@ -0,0 +1,2016 @@ +// +// dlgroup.c Dlgroup functions +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// + +#include "precomp.h" + +// Miller-Rabin iterations for prime generation +#define DLGROUP_MR_ITERATIONS (64) + +// Default size for Q according to FIPS 186-3 +static const struct _DSA_NBITSOFQ_CUTOFFS { + UINT32 nBitsOfP; + UINT32 nBitsOfQ; +} g_nBitsOfQ_Cutoffs[] = { + { 1024, 160 }, + { 2048, 256 }, + { UINT32_MAX, 256 }, +}; + +// Const label for the generation of generator G according to FIPS 186-3 +static const BYTE ggen[] = { 'g', 'g', 'e', 'n' }; + +UINT32 +SYMCRYPT_CALL +SymCryptDlgroupCalculateBitsizeOfQ( UINT32 nBitsOfP ) +{ + UINT32 i = 0; + while ( (i<SYMCRYPT_ARRAY_SIZE(g_nBitsOfQ_Cutoffs) - 1) && + (g_nBitsOfQ_Cutoffs[i].nBitsOfP < nBitsOfP) ) + { + i++; + }; + + return g_nBitsOfQ_Cutoffs[i].nBitsOfQ; +} + +PSYMCRYPT_DLGROUP +SYMCRYPT_CALL +SymCryptDlgroupAllocate( UINT32 nBitsOfP, UINT32 nBitsOfQ ) +{ + PVOID p; + SIZE_T cb; + PSYMCRYPT_DLGROUP res = NULL; + + // Invalid parameters + if ( (nBitsOfP < SYMCRYPT_DLGROUP_MIN_BITSIZE_P) || + ((nBitsOfQ > 0) && (nBitsOfQ < SYMCRYPT_DLGROUP_MIN_BITSIZE_Q)) || + (nBitsOfP < nBitsOfQ) ) + { + goto cleanup; + } + + cb = SymCryptSizeofDlgroupFromBitsizes( nBitsOfP, nBitsOfQ ); + + p = SymCryptCallbackAlloc( cb ); + + if ( p==NULL ) + { + goto cleanup; + } + + res = SymCryptDlgroupCreate( p, cb, nBitsOfP, nBitsOfQ ); + +cleanup: + return res; +} + +VOID +SYMCRYPT_CALL +SymCryptDlgroupFree( _Out_ PSYMCRYPT_DLGROUP pgObj ) +{ + SYMCRYPT_CHECK_MAGIC( pgObj ); + SymCryptDlgroupWipe( pgObj ); + SymCryptCallbackFree( pgObj ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptSizeofDlgroupFromBitsizes( UINT32 nBitsOfP, UINT32 nBitsOfQ ) +{ + UINT32 cbSeed = 0; + + if (nBitsOfQ == 0) + { + nBitsOfQ = nBitsOfP-1; // Default to the maximum possible size for Q + } + + // Invalid parameters + if ( (nBitsOfP < SYMCRYPT_DLGROUP_MIN_BITSIZE_P) || + (nBitsOfQ < SYMCRYPT_DLGROUP_MIN_BITSIZE_Q) || + (nBitsOfP < nBitsOfQ) ) + { + return 0; + } + + if ( nBitsOfP == nBitsOfQ ) + { + nBitsOfQ--; + } + + // Calculate the (tight) bytesize of the seed + cbSeed = (nBitsOfQ+7)/8; + + return sizeof(SYMCRYPT_DLGROUP) + + SYMCRYPT_SIZEOF_MODULUS_FROM_BITS( nBitsOfP ) + + SYMCRYPT_SIZEOF_MODULUS_FROM_BITS( nBitsOfQ ) + + SYMCRYPT_SIZEOF_MODELEMENT_FROM_BITS( nBitsOfP ) + + ((cbSeed + SYMCRYPT_ASYM_ALIGN_VALUE - 1)/SYMCRYPT_ASYM_ALIGN_VALUE)*SYMCRYPT_ASYM_ALIGN_VALUE; // Make sure that the entire structure is ASYM_ALIGNED. +} + +PSYMCRYPT_DLGROUP +SYMCRYPT_CALL +SymCryptDlgroupCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + UINT32 nBitsOfP, + UINT32 nBitsOfQ ) +{ + PSYMCRYPT_DLGROUP pDlgroup = NULL; + + UINT32 cbModP; + UINT32 cbModQ; + UINT32 cbModElement; + + SYMCRYPT_ASSERT( cbBuffer >= SymCryptSizeofDlgroupFromBitsizes( nBitsOfP, nBitsOfQ ) ); + UNREFERENCED_PARAMETER( cbBuffer ); // only referenced in ASSERTs... + SYMCRYPT_ASSERT_ASYM_ALIGNED( pbBuffer ); + + // Invalid parameters + if ( (nBitsOfP < SYMCRYPT_DLGROUP_MIN_BITSIZE_P) || + ((nBitsOfQ > 0) && (nBitsOfQ < SYMCRYPT_DLGROUP_MIN_BITSIZE_Q)) || + (nBitsOfP < nBitsOfQ) ) + { + goto cleanup; + } + + if ( nBitsOfP == nBitsOfQ ) + { + nBitsOfQ--; + } + + pDlgroup = (PSYMCRYPT_DLGROUP) pbBuffer; + + SYMCRYPT_ASSERT( cbBuffer > sizeof(SYMCRYPT_DLGROUP) ); + + // DLGROUP parameters + pDlgroup->cbTotalSize = SymCryptSizeofDlgroupFromBitsizes( nBitsOfP, nBitsOfQ ); + pDlgroup->fHasPrimeQ = FALSE; + + pDlgroup->nBitsOfP = nBitsOfP; + pDlgroup->cbPrimeP = (nBitsOfP+7)/8; + pDlgroup->nDigitsOfP = SymCryptDigitsFromBits( nBitsOfP ); + pDlgroup->nMaxBitsOfP = nBitsOfP; + + pDlgroup->nBitsOfQ = nBitsOfQ; // 0 value possible + pDlgroup->cbPrimeQ = (nBitsOfQ+7)/8; // 0 value possible + pDlgroup->nDigitsOfQ = (nBitsOfQ>0)?SymCryptDigitsFromBits( nBitsOfQ ):0; // 0 value possible + pDlgroup->nMaxBitsOfQ = (nBitsOfQ==0)?(nBitsOfP-1):nBitsOfQ; + + pDlgroup->isSafePrimeGroup = FALSE; + pDlgroup->nMinBitsPriv = 0; + pDlgroup->nDefaultBitsPriv = nBitsOfQ; // 0 value possible + + pDlgroup->nBitsOfSeed = nBitsOfQ; // 0 value possible + pDlgroup->cbSeed = (pDlgroup->nBitsOfSeed+7)/8; // 0 value possible + + pDlgroup->eFipsStandard = SYMCRYPT_DLGROUP_FIPS_NONE; // This will be set either on generate or import + pDlgroup->pHashAlgorithm = NULL; // Like-wise + pDlgroup->dwGenCounter = 0; // Like-wise + pDlgroup->bIndexGenG = 1; // Default: 1 + + // Create SymCrypt objects + pbBuffer += sizeof(SYMCRYPT_DLGROUP); + + cbModP = SymCryptSizeofModulusFromDigits( pDlgroup->nDigitsOfP ); + SYMCRYPT_ASSERT( cbBuffer > sizeof(SYMCRYPT_DLGROUP) + cbModP ); + pDlgroup->pmP = SymCryptModulusCreate( pbBuffer, cbModP, pDlgroup->nDigitsOfP ); + pbBuffer += cbModP; + + // + // **** Always defer the creation of the Q modulus until the group generation or + // import of the modulus. This way it is always the fastest possible even when the caller + // specified nBitsOfQ = 0. + // + if (nBitsOfQ>0) + { + cbModQ = SymCryptSizeofModulusFromDigits( pDlgroup->nDigitsOfQ ); + } + else + { + cbModQ = cbModP; + } + SYMCRYPT_ASSERT( cbBuffer > sizeof(SYMCRYPT_DLGROUP) + cbModP + cbModQ ); + pDlgroup->pbQ = pbBuffer; // Set the aligned buffer + pDlgroup->pmQ = NULL; + pbBuffer += cbModQ; + + cbModElement = SymCryptSizeofModElementFromModulus( pDlgroup->pmP ); + SYMCRYPT_ASSERT( cbBuffer > sizeof(SYMCRYPT_DLGROUP) + cbModP + cbModQ + cbModElement ); + pDlgroup->peG = SymCryptModElementCreate( pbBuffer, cbModElement, pDlgroup->pmP ); + pbBuffer += cbModElement; + + pDlgroup->pbSeed = pbBuffer; + + // Setting the magic + SYMCRYPT_SET_MAGIC( pDlgroup ); + +cleanup: + return pDlgroup; +} + +VOID +SYMCRYPT_CALL +SymCryptDlgroupWipe( _Out_ PSYMCRYPT_DLGROUP pgDst ) +{ + SymCryptWipe( (PBYTE) pgDst, pgDst->cbTotalSize ); +} + +VOID +SYMCRYPT_CALL +SymCryptDlgroupCopy( + _In_ PCSYMCRYPT_DLGROUP pgSrc, + _Out_ PSYMCRYPT_DLGROUP pgDst ) +{ + // + // in-place copy is somewhat common... + // + if( pgSrc != pgDst ) + { + pgDst->cbTotalSize = pgSrc->cbTotalSize; + pgDst->fHasPrimeQ = pgSrc->fHasPrimeQ; + + pgDst->nBitsOfP = pgSrc->nBitsOfP; + pgDst->cbPrimeP = pgSrc->cbPrimeP; + pgDst->nDigitsOfP = pgSrc->nDigitsOfP; + pgDst->nMaxBitsOfP = pgSrc->nMaxBitsOfP; + + pgDst->nBitsOfQ = pgSrc->nBitsOfQ; + pgDst->cbPrimeQ = pgSrc->cbPrimeQ; + pgDst->nDigitsOfQ = pgSrc->nDigitsOfQ; + pgDst->nMaxBitsOfQ = pgSrc->nMaxBitsOfQ; + + pgDst->isSafePrimeGroup = pgSrc->isSafePrimeGroup; + pgDst->nMinBitsPriv = pgSrc->nMinBitsPriv; + pgDst->nDefaultBitsPriv = pgSrc->nDefaultBitsPriv; + + pgDst->nBitsOfSeed = pgSrc->nBitsOfSeed; + pgDst->cbSeed = pgSrc->cbSeed; + + pgDst->eFipsStandard = pgSrc->eFipsStandard; + pgDst->pHashAlgorithm = pgSrc->pHashAlgorithm; + pgDst->dwGenCounter = pgSrc->dwGenCounter; + pgDst->bIndexGenG = pgSrc->bIndexGenG; + pgDst->pbQ = pgSrc->pbQ; + + memcpy( (PBYTE)pgDst + sizeof(SYMCRYPT_DLGROUP), (PCBYTE)pgSrc + sizeof(SYMCRYPT_DLGROUP), pgSrc->cbTotalSize - sizeof(SYMCRYPT_DLGROUP) ); + } +} + + +// DLGROUP-specific functions + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDlgroupGeneratePrimeQ_FIPS( + _In_ PSYMCRYPT_DLGROUP pDlgroup, + _In_ PCSYMCRYPT_TRIALDIVISION_CONTEXT + pTrialDivisionContext, + _Out_ PUINT32 pfPrimeQFound, + _Out_ PSYMCRYPT_INT piQ, + _Out_ PSYMCRYPT_DIVISOR pdDivTwoQ, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PCSYMCRYPT_HASH hashAlgorithm = pDlgroup->pHashAlgorithm; + UINT32 nBitsOfQ = pDlgroup->nBitsOfQ; + UINT32 cbPrimeQ = pDlgroup->cbPrimeQ; + PBYTE pbSeed = pDlgroup->pbSeed; + UINT32 cbSeed = pDlgroup->cbSeed; + + PSYMCRYPT_INT piDivTwoQ = SymCryptIntFromDivisor(pdDivTwoQ); + + SIZE_T cbHash = SymCryptHashResultSize( hashAlgorithm ); + PBYTE pbTrHash = NULL; // Pointer to the truncated hash value + PBYTE pbHashExtra = NULL; // Needed as temp buffer for 186-2 + + UINT32 dwShiftBits = (8-nBitsOfQ%8)%8; // When nBitsOfQ is a multiple of 8 -> dwShiftBits = 0; + + UINT32 carry = 0; + + UNREFERENCED_PARAMETER( cbScratch ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_INT_TO_DIVISOR(SymCryptDigitsFromBits(nBitsOfQ+1)), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_INT_IS_PRIME(pDlgroup->nDigitsOfQ), + 2 * cbHash )) ); + SYMCRYPT_ASSERT( cbHash >= cbPrimeQ ); + + // Hash the seed according to the standard specified + if (pDlgroup->eFipsStandard == SYMCRYPT_DLGROUP_FIPS_186_2) + { + SYMCRYPT_ASSERT( hashAlgorithm == SymCryptSha1Algorithm ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_MAX(2*cbHash, cbSeed) ); + + // Hash buffers + pbTrHash = pbScratch; + pbHashExtra = pbTrHash + cbHash; + + // Prepare an int for SEED + 1 + scError = SymCryptIntSetValue( pbSeed, cbSeed, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, piDivTwoQ ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // Add 1 + carry = SymCryptIntAddUint32( piDivTwoQ, 1, piDivTwoQ ); + if (carry > 0) + { + // This should never happen as the size of piDivTwoQ is at least one bit bigger than Q + scError = SYMCRYPT_FIPS_FAILURE; + goto cleanup; + } + + // (SEED+1) Mod 2^nBitsOfSeed + SymCryptIntModPow2( piDivTwoQ, nBitsOfQ, piDivTwoQ ); + + // Get the value into pbTrHash (Notice the cbSeed size) + scError = SymCryptIntGetValue( piDivTwoQ, pbTrHash, cbSeed, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // Hash it into pbHashExtra + SymCryptHash( hashAlgorithm, pbTrHash, cbPrimeQ, pbHashExtra, cbHash ); + + // Hash the seed + SymCryptHash( hashAlgorithm, pbSeed, cbSeed, pbTrHash, cbHash ); + + // Xor the two + SymCryptXorBytes( pbTrHash, pbHashExtra, pbTrHash, cbHash ); + + } + else if (pDlgroup->eFipsStandard == SYMCRYPT_DLGROUP_FIPS_186_3) + { + SYMCRYPT_ASSERT( cbScratch >= cbHash ); + pbTrHash = pbScratch; + SymCryptHash( hashAlgorithm, pbSeed, cbSeed, pbTrHash, cbHash ); + } + else + { + scError = SYMCRYPT_FIPS_FAILURE; + goto cleanup; + } + + // Convert it to (2^{N-1} + (Hash mod 2^{N-1})) | 1 + pbTrHash += (cbHash-cbPrimeQ); // Skip any leading zero bytes + pbTrHash[0] &= ((BYTE)0xff >> (dwShiftBits)); // Cut off top bits in the most significant byte + pbTrHash[0] |= ((BYTE)0x01 << (7 - dwShiftBits)); // Set the (N-1)-th bit + pbTrHash[cbPrimeQ-1] |= ((BYTE)0x01); // Make the entire number odd + + // Set the value + scError = SymCryptIntSetValue( pbTrHash, cbPrimeQ, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, piQ ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // Assume not a prime + *pfPrimeQFound = 0; + + // Fast compositeness check + if (SymCryptIntFindSmallDivisor( pTrialDivisionContext, piQ, NULL, 0 )) + { + goto cleanup; + } + + // IntMillerRabinPrimalityTest requirement: + // piQ > 3 since nBitsOfQ is bounded by SYMCRYPT_DLGROUP_MIN_BITSIZE_Q + *pfPrimeQFound = SymCryptIntMillerRabinPrimalityTest( + piQ, + nBitsOfQ, + DLGROUP_MR_ITERATIONS, + SYMCRYPT_FLAG_DATA_PUBLIC, // q and p will be public + pbScratch, + cbScratch ); + + // Set pdDivTwoQ + if (*pfPrimeQFound) + { + scError = SymCryptIntCopyMixedSize( piQ, piDivTwoQ ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + SymCryptIntMulPow2( piDivTwoQ, 1, piDivTwoQ ); + + // IntToDivisor requirement: + // Q is non-zero as prime --> 2*Q != 0 + SymCryptIntToDivisor( + piDivTwoQ, + pdDivTwoQ, + 4*pDlgroup->nBitsOfP, // 4*L + SYMCRYPT_FLAG_DATA_PUBLIC, + pbScratch, + cbScratch ); + } + +cleanup: + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDlgroupGeneratePrimeP_FIPS( + _In_ PSYMCRYPT_DLGROUP pDlgroup, + _In_ PSYMCRYPT_DIVISOR pdDivTwoQ, + _In_ UINT32 dwMaxCounter, // Maximum value of counter (used in validation) + _In_ PCSYMCRYPT_TRIALDIVISION_CONTEXT + pTrialDivisionContext, + _Out_ PUINT32 pfPrimePFound, + _Out_ PSYMCRYPT_INT piP, + _Out_ PUINT32 pdwCounter, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PCSYMCRYPT_HASH hashAlgorithm = pDlgroup->pHashAlgorithm; + UINT32 nBitsOfP = pDlgroup->nBitsOfP; + PBYTE pbSeed = pDlgroup->pbSeed; + UINT32 cbSeed = pDlgroup->cbSeed; + UINT32 nBitsOfSeed = pDlgroup->nBitsOfSeed; + + SIZE_T cbHash = SymCryptHashResultSize( hashAlgorithm ); + + UINT32 counter = 0; + + UINT32 ndDivTwoQ = SymCryptDivisorDigitsizeOfObject( pdDivTwoQ ); + UINT32 cbIntTwoQ = SymCryptSizeofIntFromDigits( ndDivTwoQ ); + + PSYMCRYPT_INT piPersistent = NULL; + PSYMCRYPT_INT piRemainder = NULL; + + PBYTE pbHashOutput = NULL; + PBYTE pbTempSeed = NULL; + + PBYTE pbW = NULL; + UINT32 cbW = pDlgroup->cbPrimeP; + + PBYTE pbWCurr = NULL; + SIZE_T cbWBytesLeft = 0; + + UINT32 carry = 0; + + // We will use internal scratch space at the start of pbScratch + // because cbHash, cbSeed and cbW are not necessarily aligned according + // to SYMCRYPT_ASYM_ALIGN_VALUE + PBYTE pbScratchInternal = 0; + SIZE_T cbScratchInternal = 0; + + UNREFERENCED_PARAMETER( cbScratch ); + SYMCRYPT_ASSERT( cbScratch >= 2*cbIntTwoQ + cbHash + cbSeed + cbW + + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_INT_DIVMOD( pDlgroup->nDigitsOfP, ndDivTwoQ ), + SYMCRYPT_SCRATCH_BYTES_FOR_INT_IS_PRIME( pDlgroup->nDigitsOfP )) ); + + // Create temporaries + pbScratchInternal = pbScratch; + cbScratchInternal = SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_INT_DIVMOD( pDlgroup->nDigitsOfP, ndDivTwoQ ), + SYMCRYPT_SCRATCH_BYTES_FOR_INT_IS_PRIME( pDlgroup->nDigitsOfP ) ); + pbScratch += cbScratchInternal; + + piPersistent = SymCryptIntCreate( pbScratch, cbIntTwoQ, ndDivTwoQ ); + pbScratch += cbIntTwoQ; + + piRemainder = SymCryptIntCreate( pbScratch, cbIntTwoQ, ndDivTwoQ ); + pbScratch += cbIntTwoQ; + + pbHashOutput = pbScratch; + pbScratch += cbHash; + + pbTempSeed = pbScratch; + pbScratch += cbSeed; + + pbW = pbScratch; + + // Set the value for the expression "domain_parameter_seed + offset + j" + scError = SymCryptIntSetValue( pbSeed, cbSeed, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, piPersistent ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // If the standard is 186-2 add 1 since the starting offset is 2 + if (pDlgroup->eFipsStandard == SYMCRYPT_DLGROUP_FIPS_186_2) + { + carry = SymCryptIntAddUint32( piPersistent, 1, piPersistent ); + if (carry!=0) + { + // This should never happen as piPersistent has at least one more bit than + // seedLen == nBitsOfQ + scError = SYMCRYPT_FIPS_FAILURE; + goto cleanup; + } + + // Mod 2^seedlen + SymCryptIntModPow2( piPersistent, nBitsOfSeed, piPersistent ); + } + + *pfPrimePFound = 0; + + for (counter = 0; counter < dwMaxCounter+1; counter++) + { + cbWBytesLeft = cbW; // Bytes left to write + pbWCurr = pbW + cbW - SYMCRYPT_MIN(cbW,cbHash); // Position of the first hash chunk to write (if cbW < cbHash then we write only 1 chunk) + + while (cbWBytesLeft > 0) + { + // Add 1 to piPersistent + // This can never generate a carry as piPersistent has at least one more bit than + // seedLen == nBitsOfQ and in the next step we always do mod 2^seedlen. + carry = SymCryptIntAddUint32( piPersistent, 1, piPersistent ); + if (carry!=0) + { + scError = SYMCRYPT_FIPS_FAILURE; + goto cleanup; + } + + // Mod 2^seedlen + SymCryptIntModPow2( piPersistent, nBitsOfSeed, piPersistent ); + + // Extract piPersistent into a byte array (this will always be equal to domain_parameter_seed + offset + j) + scError = SymCryptIntGetValue( piPersistent, pbTempSeed, cbSeed, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // Hash it + SymCryptHash( hashAlgorithm, pbTempSeed, cbSeed, pbHashOutput, cbHash ); + + if (cbWBytesLeft >= cbHash) + { + // Move the entire hash output to the correct location in the pbW buffer + memcpy(pbWCurr, pbHashOutput, cbHash ); + } + else + { + // Move only the last bytes of the hash output + memcpy(pbWCurr, pbHashOutput + cbHash - cbWBytesLeft, cbWBytesLeft ); + } + + // Update the positions on the W buffer + cbWBytesLeft -= SYMCRYPT_MIN(cbHash,cbWBytesLeft); + pbWCurr -= SYMCRYPT_MIN(cbHash,cbWBytesLeft); + } + + // Import the W buffer into P + scError = SymCryptIntSetValue( pbW, cbW, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, piP ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // Zero-out the top bits of the integer + SymCryptIntModPow2( piP, nBitsOfP, piP ); + + // Set the most significant bit + SymCryptIntSetBits( piP, 1, nBitsOfP-1, 1); + + // At this point piP = X = W + 2^{L-1} + + // Calculate c = X mod 2Q + SymCryptIntDivMod( piP, pdDivTwoQ, NULL, piRemainder, pbScratchInternal, cbScratchInternal ); + + if (SymCryptIntIsEqualUint32(piRemainder, 0)) + { + // Just add one to X + // We can never get a carry here because the remainder X mod 2Q + // is 0. Therefore X is even. + carry = SymCryptIntAddUint32( piP, 1, piP ); + SYMCRYPT_ASSERT( carry==0 ); + } + else + { + // Subtract 1 from c + // We can never get a borrow here because the remainder is not 0. + carry = SymCryptIntSubUint32( piRemainder, 1, piRemainder ); + SYMCRYPT_ASSERT( carry==0 ); + + // X-(c-1) + // We can never get a borrow here because c is smaller + // or equal to X. + carry = SymCryptIntSubMixedSize( piP, piRemainder, piP ); + SYMCRYPT_ASSERT( carry==0 ); + } + + // Check if smaller than 2^{L-1} by checking the L-1 bit + if (SymCryptIntGetBit( piP, nBitsOfP-1 ) == 0) + { + continue; + } + + // Fast compositeness check + if (SymCryptIntFindSmallDivisor( pTrialDivisionContext, piP, NULL, 0 )) + { + continue; + } + + // IntMillerRabinPrimalityTest requirement: + // piP > 3 since nBitsOfP is bounded by SYMCRYPT_DLGROUP_MIN_BITSIZE_P + *pfPrimePFound = SymCryptIntMillerRabinPrimalityTest( + piP, + nBitsOfP, + DLGROUP_MR_ITERATIONS, + SYMCRYPT_FLAG_DATA_PUBLIC, // q and p will be public + pbScratchInternal, + cbScratchInternal ); + + if (*pfPrimePFound) + { + *pdwCounter = counter; + break; + } + } +cleanup: + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDlgroupGenerateGenG_FIPS( + _In_ PSYMCRYPT_DLGROUP pDlgroup, + _Out_ PSYMCRYPT_MODELEMENT peG, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PCSYMCRYPT_HASH hashAlgorithm = pDlgroup->pHashAlgorithm; + PCSYMCRYPT_MODULUS pmP = pDlgroup->pmP; + UINT32 nDigitsOfP = pDlgroup->nDigitsOfP; + UINT32 nBitsOfP = pDlgroup->nBitsOfP; + PCSYMCRYPT_MODULUS pmQ = pDlgroup->pmQ; + UINT32 nDigitsOfQ = pDlgroup->nDigitsOfQ; + PBYTE pbSeed = pDlgroup->pbSeed; + UINT32 cbSeed = pDlgroup->cbSeed; + BYTE bIndexGenG = pDlgroup->bIndexGenG; + + SIZE_T cbHash = SymCryptHashResultSize( hashAlgorithm ); + SYMCRYPT_ASSERT( cbHash == hashAlgorithm->resultSize ); + SIZE_T cbState = SymCryptHashStateSize( hashAlgorithm ); + SYMCRYPT_ASSERT( cbState == hashAlgorithm->stateSize ); + + UINT16 count = 0; + BYTE bTmp = 0; + + PSYMCRYPT_INT piExp = NULL; + PSYMCRYPT_INT piRem = NULL; + PSYMCRYPT_MODELEMENT peOne = NULL; + PBYTE pbState = NULL; + PBYTE pbW = NULL; + + UINT32 cbExp = SymCryptSizeofIntFromDigits( nDigitsOfP ); + UINT32 cbRem = SymCryptSizeofIntFromDigits( nDigitsOfQ ); + UINT32 cbModElement = SymCryptSizeofModElementFromModulus( pmP ); + + UINT32 borrow = 0; + + // We will use internal scratch space at the start of pbScratch + // because cbHash is not necessarily aligned according + // to SYMCRYPT_ASYM_ALIGN_VALUE + PBYTE pbScratchInternal = 0; + SIZE_T cbScratchInternal = 0; + + UNREFERENCED_PARAMETER( cbScratch ); + UNREFERENCED_PARAMETER( nDigitsOfQ ); + + // Create temporaries + pbScratchInternal = pbScratch; + cbScratchInternal = SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_MODEXP( nDigitsOfP ), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_INT_DIVMOD( nDigitsOfP, nDigitsOfQ ), + SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigitsOfP ) )); + SYMCRYPT_ASSERT( cbScratch >= cbScratchInternal + cbExp + cbRem ); + SYMCRYPT_ASSERT( cbScratch >= cbScratchInternal + cbExp + cbModElement + cbHash + cbState ); + pbScratch += cbScratchInternal; + + piExp = SymCryptIntCreate( pbScratch, cbExp, nDigitsOfP ); + pbScratch += cbExp; + + piRem = SymCryptIntCreate( pbScratch, cbRem, nDigitsOfQ ); + + // Calculate the exponent e = (p-1)/q + borrow = SymCryptIntSubUint32( SymCryptIntFromModulus((PSYMCRYPT_MODULUS)pmP), 1, piExp ); + if (borrow!=0) + { + // The only way to get a borrow here is if the imported prime P + // is zero and we generate a G from P and Q. + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + SymCryptIntDivMod( + piExp, + SymCryptDivisorFromModulus( (PSYMCRYPT_MODULUS)pmQ ), + piExp, + piRem, + pbScratchInternal, + cbScratchInternal ); + + if ( !SymCryptIntIsEqualUint32(piRem, 0) ) + { + // The only way to get a non-zero remainder is if Q does not divide P-1 + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // To reach here we have guaranteed that P and Q are odd, with bitlength >= 32b, and Q divides P-1. + // It follows that piExp >= 2, as it must be even and non-zero. + + peOne = SymCryptModElementCreate( pbScratch, cbModElement, pmP); + pbScratch += cbModElement; + + pbState = pbScratch; + pbScratch += cbState; + + pbW = pbScratch; + + // Initialize the hash state + SymCryptHashInit( hashAlgorithm, pbState ); + + // Set the modelement equal to one + SymCryptModElementSetValueUint32( 1, pmP, peOne, pbScratchInternal, cbScratchInternal ); + + do + { + count += 1; + + if (count == 0) + { + scError = SYMCRYPT_FIPS_FAILURE; + goto cleanup; + } + + // Hash the seed + SymCryptHashAppend( hashAlgorithm, pbState, pbSeed, cbSeed ); + + // Hash the "ggen" string + SymCryptHashAppend( hashAlgorithm, pbState, ggen, sizeof(ggen) ); + + // Hash the index + SymCryptHashAppend( hashAlgorithm, pbState, &bIndexGenG, sizeof(bIndexGenG) ); + + // Hash the count (in MSB) + bTmp = (BYTE)(count >> 8); + SymCryptHashAppend( hashAlgorithm, pbState, &bTmp, sizeof(bTmp) ); + bTmp = (BYTE)count; + SymCryptHashAppend( hashAlgorithm, pbState, &bTmp, sizeof(bTmp) ); + + // Result into W + SymCryptHashResult( hashAlgorithm, pbState, pbW, cbHash ); + + // Set this into G + scError = SymCryptModElementSetValue( + pbW, + cbHash, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, + pmP, + peG, + pbScratchInternal, + cbScratchInternal ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // ModExp G in place + SymCryptModExp( + pmP, + peG, + piExp, + nBitsOfP, + SYMCRYPT_FLAG_DATA_PUBLIC, + peG, + pbScratchInternal, + cbScratchInternal ); + + } while (SymCryptModElementIsZero( pmP, peG ) || SymCryptModElementIsEqual( pmP, peG, peOne )); + +cleanup: + return scError; +} + +// Scratch space requirements for the entire FIPS standards generation of P,Q,G +UINT32 +SYMCRYPT_CALL +SymCryptDlgroupScratchSpace_FIPS( UINT32 nBitsOfP, UINT32 nBitsOfQ, PCSYMCRYPT_HASH pHashAlgorithm ) +{ + UINT32 nDigitsOfP = SymCryptDigitsFromBits( nBitsOfP ); + UINT32 nDigitsOfQ = SymCryptDigitsFromBits( nBitsOfQ ); + UINT32 ndDivTwoQ = SymCryptDigitsFromBits(nBitsOfQ + 1); + + UINT32 cbPrimeP = (nBitsOfP+7)/8; // Note: The upper bound for nBitsOfP is enforced by SymCryptDigitsFromBits + UINT32 cbDivTwoQ = SymCryptSizeofDivisorFromDigits(ndDivTwoQ); + UINT32 cbIntTwoQ = SymCryptSizeofIntFromDigits( ndDivTwoQ ); + UINT32 cbSeed = (nBitsOfQ+7)/8; // Note: The upper bound for nBitsOfP is enforced by SymCryptDigitsFromBits + + UINT32 cbExp = SymCryptSizeofIntFromDigits( nDigitsOfP ); + UINT32 cbRem = SymCryptSizeofIntFromDigits( nDigitsOfQ ); + UINT32 cbModElement = SYMCRYPT_SIZEOF_MODELEMENT_FROM_BITS( nBitsOfP ); + + UINT32 cbHash = (UINT32)SymCryptHashResultSize( pHashAlgorithm ); + UINT32 cbState = (UINT32) SymCryptHashStateSize( pHashAlgorithm ); + + // + // From symcrypt_internal.h we have: + // - sizeof results are upper bounded by 2^19 + // - SYMCRYPT_SCRATCH_BYTES results are upper bounded by 2^27 (including RSA and ECURVE) + // Thus the following calculation does not overflow the result and is bounded by 2^28. + // + return SYMCRYPT_MAX( cbDivTwoQ + SYMCRYPT_MAX( + // Generate Q + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_INT_TO_DIVISOR( ndDivTwoQ ), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_INT_IS_PRIME( nDigitsOfQ ), + 2 * cbHash)), + // Generate P + 2*cbIntTwoQ + cbHash + cbSeed + cbPrimeP + + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_INT_DIVMOD( nDigitsOfP, ndDivTwoQ ), + SYMCRYPT_SCRATCH_BYTES_FOR_INT_IS_PRIME( nDigitsOfP )) ), + SYMCRYPT_MAX( + // Convert P and Q to moduli + SYMCRYPT_SCRATCH_BYTES_FOR_INT_TO_MODULUS( nDigitsOfP ), + // Generate GenG + cbExp + SYMCRYPT_MAX(cbRem, cbModElement + cbState + cbHash) + + SYMCRYPT_MAX(SYMCRYPT_SCRATCH_BYTES_FOR_MODEXP( nDigitsOfP ), + SYMCRYPT_MAX(SYMCRYPT_SCRATCH_BYTES_FOR_INT_DIVMOD( nDigitsOfP, nDigitsOfQ ), + SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigitsOfP ) )) )); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDlgroupGenerate( + _In_ PCSYMCRYPT_HASH hashAlgorithm, + _In_ SYMCRYPT_DLGROUP_FIPS fipsStandard, + _Inout_ PSYMCRYPT_DLGROUP pDlgroup ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PBYTE pbScratch = NULL; + SIZE_T cbScratch = 0; + PBYTE pbScratchInternal = NULL; + SIZE_T cbScratchInternal = 0; + + UINT32 fPrimeQFound = 0; + UINT32 fPrimePFound = 0; + + // A divisor equal to 2*Q will be needed for the generation of P + PSYMCRYPT_DIVISOR pdDivTwoQ = NULL; + UINT32 cbDivTwoQ = 0; + UINT32 ndDivTwoQ = 0; + + UINT32 nBitsOfP = 0; + UINT32 nDigitsOfP = 0; + UINT32 nBitsOfQ = 0; + UINT32 nDigitsOfQ = 0; + + PCSYMCRYPT_TRIALDIVISION_CONTEXT pTrialDivisionContext = NULL; + + if (fipsStandard == SYMCRYPT_DLGROUP_FIPS_NONE) + { + fipsStandard = SYMCRYPT_DLGROUP_FIPS_LATEST; + } + + // Numbered comments refer to the steps in the FIPS standard + // 1. Check that L,N is in the list of acceptable pairs + // => Skipped as SymCrypt supports more sizes + + // 2. Check that seedlen >= N + // => Skipped as we always have seedlen == N (see below) + + + // Make sure that a hash algorithm is passed (if needed) + // and set the FIPS standard + if (fipsStandard == SYMCRYPT_DLGROUP_FIPS_186_2) + { + if (hashAlgorithm != NULL) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + pDlgroup->eFipsStandard = fipsStandard; + hashAlgorithm = SymCryptSha1Algorithm; + } + else + { + if (hashAlgorithm == NULL) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + pDlgroup->eFipsStandard = fipsStandard; + } + + // If during allocation the caller didn't know the size of Q + // and set it to 0, pick the default bitsize here + // and fix all the zero parameters. + if (pDlgroup->nBitsOfQ == 0) + { + pDlgroup->nBitsOfQ = SymCryptDlgroupCalculateBitsizeOfQ(pDlgroup->nBitsOfP); + + if (pDlgroup->nBitsOfQ > pDlgroup->nMaxBitsOfQ) + { + scError = SYMCRYPT_FIPS_FAILURE; // This hits when nMaxBitsOfQ = (nBitsOfP-1) <= 160 + goto cleanup; + } + + pDlgroup->cbPrimeQ = (pDlgroup->nBitsOfQ + 7)/8; + pDlgroup->nDigitsOfQ = SymCryptDigitsFromBits( pDlgroup->nBitsOfQ ); + pDlgroup->nDefaultBitsPriv = pDlgroup->nBitsOfQ; + pDlgroup->nBitsOfSeed = pDlgroup->nBitsOfQ; + pDlgroup->cbSeed = (pDlgroup->nBitsOfSeed+7)/8; + } + + // Helper variables + nBitsOfP = pDlgroup->nBitsOfP; + nDigitsOfP = pDlgroup->nDigitsOfP; + nBitsOfQ = pDlgroup->nBitsOfQ; + nDigitsOfQ = pDlgroup->nDigitsOfQ; + + // Create the modulus Q + pDlgroup->pmQ = SymCryptModulusCreate( pDlgroup->pbQ, SymCryptSizeofModulusFromDigits( nDigitsOfQ ), nDigitsOfQ ); + + // Conditions on the hash function output size + // The second condition is needed for generation of G in SymCrypt + // since it allows even very small sizes of P. + if ( (8*((UINT32)SymCryptHashResultSize( hashAlgorithm )) < nBitsOfQ) || + (8*((UINT32)SymCryptHashResultSize( hashAlgorithm )) > nBitsOfP) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Set the group's hash algorithm + pDlgroup->pHashAlgorithm = hashAlgorithm; + + // Calculate sizes for the 2*Q divisor + ndDivTwoQ = SymCryptDigitsFromBits(nBitsOfQ + 1); + cbDivTwoQ = SymCryptSizeofDivisorFromDigits(ndDivTwoQ); + + // Scratch space + // + // From symcrypt_internal.h we have: + // - sizeof results are upper bounded by 2^19 + // - SYMCRYPT_SCRATCH_BYTES results are upper bounded by 2^27 (including RSA and ECURVE) + // - SymCryptDlgroupScratchSpace_FIPS is bounded by 2^28. + // + // Thus the following calculation does not overflow cbScratch. + // + cbScratch = SymCryptDlgroupScratchSpace_FIPS( nBitsOfP, nBitsOfQ, hashAlgorithm ); + pbScratch = SymCryptCallbackAlloc(cbScratch); + if (pbScratch==NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + // Create a divisor 2*Q (needed for the generation of P) + pdDivTwoQ = SymCryptDivisorCreate( pbScratch, cbDivTwoQ, ndDivTwoQ ); + pbScratchInternal = pbScratch + cbDivTwoQ; + cbScratchInternal = cbScratch - cbDivTwoQ; + + // Create a trial division context for both P and Q + pTrialDivisionContext = SymCryptCreateTrialDivisionContext( pDlgroup->nDigitsOfP ); + if (pTrialDivisionContext == NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + do + { + do + { + // Fill the seed buffer in the DLGroup with seedlen bits + scError = SymCryptCallbackRandom( pDlgroup->pbSeed, pDlgroup->cbSeed ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // Zero-out the top bits if needed + if ((pDlgroup->nBitsOfSeed)%8 != 0) + { + pDlgroup->pbSeed[0] &= ((BYTE)0xff >> (8 - (pDlgroup->nBitsOfSeed)%8)); + } + + scError = SymCryptDlgroupGeneratePrimeQ_FIPS( + pDlgroup, + pTrialDivisionContext, + &fPrimeQFound, + SymCryptIntFromModulus(pDlgroup->pmQ), + pdDivTwoQ, + pbScratchInternal, + cbScratchInternal ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + } + while (fPrimeQFound == 0); + + scError = SymCryptDlgroupGeneratePrimeP_FIPS( + pDlgroup, + pdDivTwoQ, + 4*nBitsOfP - 1, + pTrialDivisionContext, + &fPrimePFound, + SymCryptIntFromModulus(pDlgroup->pmP), + &(pDlgroup->dwGenCounter), + pbScratchInternal, + cbScratchInternal ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + } + while (fPrimePFound == 0); + + // Specify that we have a Q + pDlgroup->fHasPrimeQ = TRUE; + + // Convert both of P and Q to moduli + // IntToModulus requirement: + // Both P,Q > 0 since they are primes + SymCryptIntToModulus( + SymCryptIntFromModulus( pDlgroup->pmP ), + pDlgroup->pmP, + 1000*nBitsOfP, // Average operations + SYMCRYPT_FLAG_DATA_PUBLIC | SYMCRYPT_FLAG_MODULUS_PRIME, + pbScratch, + cbScratch ); + + SymCryptIntToModulus( + SymCryptIntFromModulus( pDlgroup->pmQ ), + pDlgroup->pmQ, + 1000*nBitsOfP, // Average operations + SYMCRYPT_FLAG_DATA_PUBLIC | SYMCRYPT_FLAG_MODULUS_PRIME, + pbScratch, + cbScratch ); + + // Generate G + scError = SymCryptDlgroupGenerateGenG_FIPS( pDlgroup, pDlgroup->peG, pbScratch, cbScratch ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + +cleanup: + if (pTrialDivisionContext!=NULL) + { + SymCryptFreeTrialDivisionContext( pTrialDivisionContext ); + } + + if (pbScratch!=NULL) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + } + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDlgroupSetValueSafePrime( + SYMCRYPT_DLGROUP_DH_SAFEPRIMETYPE dhSafePrimeType, + _Inout_ PSYMCRYPT_DLGROUP pDlgroup ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PBYTE pbScratch = NULL; + SIZE_T cbScratch = 0; + + PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS safePrimeParams = NULL; + + UINT32 i; + UINT32 nBitsOfQ; + + // Given we know nBitsOfP = nBitsOfQ+1 for all safe-prime groups, this specifies a tight bound when selecting a group + UINT32 nMaxBitsOfP = SYMCRYPT_MIN(pDlgroup->nMaxBitsOfP, pDlgroup->nMaxBitsOfQ+1); + UINT32 nMaxDigitsOfP; + + if ( dhSafePrimeType == SYMCRYPT_DLGROUP_DH_SAFEPRIMETYPE_NONE ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Iterate through all named safe-prime groups until we find one which fits the requested parameters + // We can definitely do something smarter here, but we have only 10 values to check so do the dumb thing for now + // Relies on the fact the SymCryptNamedSafePrimeGroups is ordered from largest to smallest + for ( i=0; i<SYMCRYPT_DH_SAFEPRIME_GROUP_COUNT; i++ ) + { + if ( SymCryptNamedSafePrimeGroups[i]->eDhSafePrimeType == dhSafePrimeType && + SymCryptNamedSafePrimeGroups[i]->nBitsOfP <= nMaxBitsOfP ) + { + safePrimeParams = SymCryptNamedSafePrimeGroups[i]; + break; + } + } + + if (safePrimeParams == NULL) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + nMaxDigitsOfP = SymCryptDigitsFromBits(safePrimeParams->nBitsOfP); + + // Scratch space + // + // From symcrypt_internal.h we have: + // - SYMCRYPT_SCRATCH_BYTES results are upper bounded by 2^27 (including RSA and ECURVE) + // + // Thus the following calculation does not overflow cbScratch. + // + cbScratch = SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_INT_TO_MODULUS(nMaxDigitsOfP), + SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS(nMaxDigitsOfP) ); + pbScratch = SymCryptCallbackAlloc( cbScratch ); + if (pbScratch==NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + // Set fields marking the Dlgroup as being a named safe-prime group + pDlgroup->isSafePrimeGroup = TRUE; + pDlgroup->eFipsStandard = SYMCRYPT_DLGROUP_FIPS_NONE; + pDlgroup->nMinBitsPriv = safePrimeParams->nMinBitsPriv; + pDlgroup->nDefaultBitsPriv = safePrimeParams->nDefaultBitsPriv; + + // Ensure that fields which don't apply to named safe-prime groups are cleared + pDlgroup->pHashAlgorithm = NULL; + pDlgroup->dwGenCounter = 0; + + pDlgroup->nBitsOfSeed = 0; + pDlgroup->pbSeed = NULL; + pDlgroup->cbSeed = 0; + + // Set the bitsize and bytesize of P + pDlgroup->nBitsOfP = safePrimeParams->nBitsOfP; + pDlgroup->cbPrimeP = (safePrimeParams->nBitsOfP + 7)/ 8; + pDlgroup->nDigitsOfP = SymCryptDigitsFromBits(safePrimeParams->nBitsOfP); + + // Set the bitsize and bytesize of Q + nBitsOfQ = pDlgroup->nBitsOfP - 1; + pDlgroup->nBitsOfQ = nBitsOfQ; + pDlgroup->cbPrimeQ = (nBitsOfQ + 7)/8; + pDlgroup->nDigitsOfQ = SymCryptDigitsFromBits(nBitsOfQ); + pDlgroup->fHasPrimeQ = TRUE; + + // + // Prime P + // + + // Recreate the modulus P + // (this will set nDigits in the modulus object appropriately, which is necessary for use of SymCryptIntShr1 below) + pDlgroup->pmP = SymCryptModulusCreate( (PBYTE) pDlgroup->pmP, SymCryptSizeofModulusFromDigits( pDlgroup->nDigitsOfP ), pDlgroup->nDigitsOfP ); + + scError = SymCryptIntSetValue( safePrimeParams->pcbPrimeP, pDlgroup->cbPrimeP, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, SymCryptIntFromModulus(pDlgroup->pmP) ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // IntToModulus requirement: + // nBitsOfP >= SYMCRYPT_DLGROUP_MIN_BITSIZE_P --> P > 0 + SymCryptIntToModulus( + SymCryptIntFromModulus( pDlgroup->pmP ), + pDlgroup->pmP, + 1000*pDlgroup->nBitsOfP, // Average operations + SYMCRYPT_FLAG_DATA_PUBLIC | SYMCRYPT_FLAG_MODULUS_PRIME, + pbScratch, + cbScratch ); + + // + // Prime Q + // + + // Create the modulus Q + pDlgroup->pmQ = SymCryptModulusCreate( pDlgroup->pbQ, SymCryptSizeofModulusFromDigits( pDlgroup->nDigitsOfQ ), pDlgroup->nDigitsOfQ ); + + // Q = floor( P / 2 ) + SymCryptIntShr1( 0, SymCryptIntFromModulus(pDlgroup->pmP), SymCryptIntFromModulus(pDlgroup->pmQ) ); + + // IntToModulus requirement: + // nBitsOfQ >= SYMCRYPT_DLGROUP_MIN_BITSIZE_Q --> Q > 0 + SymCryptIntToModulus( + SymCryptIntFromModulus( pDlgroup->pmQ ), + pDlgroup->pmQ, + 1000*nBitsOfQ, // Average operations + SYMCRYPT_FLAG_DATA_PUBLIC | SYMCRYPT_FLAG_MODULUS_PRIME, + pbScratch, + cbScratch ); + + // + // Generator G + // + + // G to 2 + SymCryptModElementSetValueUint32( 2, pDlgroup->pmP, pDlgroup->peG, pbScratch, cbScratch ); + +cleanup: + if (pbScratch!=NULL) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + } + return scError; +} + +BOOLEAN +SYMCRYPT_CALL +SymCryptDlgroupIsSame( + _In_ PCSYMCRYPT_DLGROUP pDlgroup1, + _In_ PCSYMCRYPT_DLGROUP pDlgroup2 ) +{ + BOOLEAN fIsSameGroup = FALSE; + + if ( pDlgroup1 == pDlgroup2 ) + { + fIsSameGroup = TRUE; + goto cleanup; + } + + if ( (pDlgroup1->nBitsOfP != pDlgroup2->nBitsOfP) || + (pDlgroup1->nDigitsOfP != pDlgroup2->nDigitsOfP) || + !SymCryptIntIsEqual ( SymCryptIntFromModulus(pDlgroup1->pmP), SymCryptIntFromModulus(pDlgroup2->pmP) ) || + !SymCryptModElementIsEqual ( pDlgroup1->pmP, pDlgroup1->peG, pDlgroup2->peG )) + { + goto cleanup; + } + + fIsSameGroup = TRUE; + +cleanup: + return fIsSameGroup; +} + +VOID +SYMCRYPT_CALL +SymCryptDlgroupGetSizes( + _In_ PCSYMCRYPT_DLGROUP pDlgroup, + _Out_ SIZE_T* pcbPrimeP, + _Out_ SIZE_T* pcbPrimeQ, + _Out_ SIZE_T* pcbGenG, + _Out_ SIZE_T* pcbSeed ) +{ + if (pcbPrimeP!=NULL) + { + *pcbPrimeP = pDlgroup->cbPrimeP; + } + + if (pcbPrimeQ!=NULL) + { + *pcbPrimeQ = pDlgroup->cbPrimeQ; // This returns 0 if the group does not have a prime Q + } + + if (pcbGenG!=NULL) + { + *pcbGenG = pDlgroup->cbPrimeP; + } + + if (pcbSeed!=NULL) + { + *pcbSeed = pDlgroup->cbSeed; // This returns 0 if the group does not have a prime Q + } +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDlgroupAutoCompleteNamedSafePrimeGroup( + _Inout_ PSYMCRYPT_DLGROUP pDlgroup, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PBYTE pbScratchInternal; + SIZE_T cbScratchInternal; + + PSYMCRYPT_INT piTemp = NULL; + UINT32 cbTemp; + UINT32 i; + UINT32 nBitsOfQ; + PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS safePrimeParams = NULL; + + // Check whether bottom 64b of P all 1 - as first cheap check + if ( SymCryptIntGetValueLsbits64( SymCryptIntFromModulus(pDlgroup->pmP) ) != ((UINT64) -1) ) + { + goto cleanup; // Not a named safe-prime group + } + + cbTemp = SymCryptSizeofIntFromDigits( pDlgroup->nDigitsOfP ); + SYMCRYPT_ASSERT( cbScratch >= cbTemp ); + + // Create an integer piTemp + piTemp = SymCryptIntCreate( pbScratch, cbTemp, pDlgroup->nDigitsOfP ); + pbScratchInternal = pbScratch + cbTemp; + cbScratchInternal = cbScratch - cbTemp; + + // Set piTemp to the generator G (this will fail if the number cannot fit in the object) + SymCryptModElementToInt( pDlgroup->pmP, pDlgroup->peG, piTemp, pbScratchInternal, cbScratchInternal ); + + // Generator must be 2 mod P + if ( !SymCryptIntIsEqualUint32( piTemp, 2 ) ) + { + goto cleanup; // Not a named safe-prime group + } + + // Iterate through all named safe-prime groups and check whether any of them have matching Prime P + // We can definitely do something smarter here, but we have only 10 values to check so do the dumb thing for now + for ( i=0; i<SYMCRYPT_DH_SAFEPRIME_GROUP_COUNT; i++ ) + { + if ( SymCryptNamedSafePrimeGroups[i]->nBitsOfP == pDlgroup->nBitsOfP ) + { + // Set piTemp to the named safe-prime group's P (this will fail if the number cannot fit in the object) + SymCryptIntSetValue( SymCryptNamedSafePrimeGroups[i]->pcbPrimeP, pDlgroup->cbPrimeP, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, piTemp ); + + if ( SymCryptIntIsEqual( piTemp, SymCryptIntFromModulus(pDlgroup->pmP) ) ) + { + safePrimeParams = SymCryptNamedSafePrimeGroups[i]; + break; + } + } + } + + // If we found a match in the previous loop, auto-populate appropriate fields in pDlGroup + if (safePrimeParams != NULL) + { + if ( pDlgroup->eFipsStandard == SYMCRYPT_DLGROUP_FIPS_186_2 || + pDlgroup->eFipsStandard == SYMCRYPT_DLGROUP_FIPS_186_3 ) + { + // Inappropriate use of named safe-prime groups + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Set fields marking the Dlgroup as being a named safe-prime group + pDlgroup->isSafePrimeGroup = TRUE; + pDlgroup->eFipsStandard = SYMCRYPT_DLGROUP_FIPS_NONE; + pDlgroup->nMinBitsPriv = safePrimeParams->nMinBitsPriv; + pDlgroup->nDefaultBitsPriv = safePrimeParams->nDefaultBitsPriv; + + // Ensure that fields which don't apply to named safe-prime groups are cleared + pDlgroup->pHashAlgorithm = NULL; + pDlgroup->dwGenCounter = 0; + + pDlgroup->nBitsOfSeed = 0; + pDlgroup->pbSeed = NULL; + pDlgroup->cbSeed = 0; + + // Set the bitsize and bytesize of Q + nBitsOfQ = pDlgroup->nBitsOfP - 1; + pDlgroup->nBitsOfQ = nBitsOfQ; + pDlgroup->cbPrimeQ = (nBitsOfQ + 7)/8; + pDlgroup->nDigitsOfQ = SymCryptDigitsFromBits(nBitsOfQ); + + // Create the modulus Q + pDlgroup->pmQ = SymCryptModulusCreate( pDlgroup->pbQ, SymCryptSizeofModulusFromDigits( pDlgroup->nDigitsOfQ ), pDlgroup->nDigitsOfQ ); + + // piTemp still has the value of P, and Q = floor( P / 2 ) + SymCryptIntShr1( 0, piTemp, piTemp ); + + // Set the prime Q + scError = SymCryptIntCopyMixedSize( piTemp, SymCryptIntFromModulus(pDlgroup->pmQ) ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // IntToModulus requirement: + // nBitsOfQ >= SYMCRYPT_DLGROUP_MIN_BITSIZE_Q --> Q > 0 + SymCryptIntToModulus( + SymCryptIntFromModulus( pDlgroup->pmQ ), + pDlgroup->pmQ, + 1000*nBitsOfQ, // Average operations + SYMCRYPT_FLAG_DATA_PUBLIC | SYMCRYPT_FLAG_MODULUS_PRIME, + pbScratch, + cbScratch ); + + pDlgroup->fHasPrimeQ = TRUE; + } + +cleanup: + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDlgroupSetValue( + _In_reads_bytes_( cbPrimeP ) PCBYTE pbPrimeP, + SIZE_T cbPrimeP, + _In_reads_bytes_( cbPrimeQ ) PCBYTE pbPrimeQ, + SIZE_T cbPrimeQ, + _In_reads_bytes_( cbGenG ) PCBYTE pbGenG, + SIZE_T cbGenG, + SYMCRYPT_NUMBER_FORMAT numFormat, + _In_opt_ PCSYMCRYPT_HASH pHashAlgorithm, + _In_reads_bytes_( cbSeed ) PCBYTE pbSeed, + SIZE_T cbSeed, + UINT32 genCounter, + SYMCRYPT_DLGROUP_FIPS fipsStandard, + _Inout_ PSYMCRYPT_DLGROUP pDlgroup ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PBYTE pbScratch = NULL; + SIZE_T cbScratch = 0; + SIZE_T cbScratchVerify = 0; + + PSYMCRYPT_INT piTemp = NULL; + + UINT32 nBitsOfP = 0; + UINT32 nBitsOfQ = 0; + + UINT32 nMaxDigitsOfP = SymCryptDigitsFromBits(pDlgroup->nMaxBitsOfP); + UINT32 nMaxDigitsOfQ = SymCryptDigitsFromBits(pDlgroup->nMaxBitsOfQ); + + PCSYMCRYPT_TRIALDIVISION_CONTEXT pTrialDivisionContext = NULL; + + // Make sure that the inputs make sense + if ( (pbPrimeP==NULL) || (cbPrimeP==0) || // Prime P is needed + ((pbGenG==NULL)&&(cbGenG>0)) || + ((pbPrimeQ==NULL)&&(cbPrimeQ>0)) || + ((pbGenG==NULL)&&(pbPrimeQ==NULL)) || // We can't have both Q and G missing + ((pbSeed==NULL)&&(cbSeed>0)) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // FIPS 186-4 verification is needed + if (fipsStandard != SYMCRYPT_DLGROUP_FIPS_NONE) + { + // Make sure we have what we need + if ((pbPrimeQ == NULL)|| + (cbPrimeQ == 0) || + (pbSeed == NULL) || + (cbSeed == 0) || + ((fipsStandard == SYMCRYPT_DLGROUP_FIPS_186_2) && (pHashAlgorithm != NULL)) || + ((fipsStandard != SYMCRYPT_DLGROUP_FIPS_186_2) && (pHashAlgorithm == NULL)) ) + { + scError = SYMCRYPT_AUTHENTICATION_FAILURE; + goto cleanup; + } + } + + // Set the hashAlgorithm + if ( (fipsStandard == SYMCRYPT_DLGROUP_FIPS_186_2) || + ((pHashAlgorithm==NULL) && (pbGenG == NULL)) ) + { + // This hits either when: + // - The FIPS standard is 186-2 + // - When we don't specify an algorithm or generator G (thus we need a hash algorithm to generate it + // ourselves) + pDlgroup->pHashAlgorithm = SymCryptSha1Algorithm; + } + else + { + pDlgroup->pHashAlgorithm = pHashAlgorithm; + } + + if ( (fipsStandard != SYMCRYPT_DLGROUP_FIPS_NONE) || (pbGenG == NULL)) + { + // The following is the scratch space for generation / verification + // Notice that we take the maximum size possible so it can get relatively big. + // Also, we will need some additional space for the computed parameters: + // computedP, computedQ, and computedG. + cbScratchVerify = SymCryptDlgroupScratchSpace_FIPS( pDlgroup->nMaxBitsOfP, pDlgroup->nMaxBitsOfQ, pDlgroup->pHashAlgorithm ) + + SYMCRYPT_MAX( SymCryptSizeofIntFromDigits(nMaxDigitsOfP), + SYMCRYPT_MAX( SymCryptSizeofIntFromDigits(nMaxDigitsOfQ), + 2*SYMCRYPT_SIZEOF_MODELEMENT_FROM_BITS(nMaxDigitsOfP))); + } + + // Scratch space + // + // From symcrypt_internal.h we have: + // - sizeof results are upper bounded by 2^19 + // - SYMCRYPT_SCRATCH_BYTES results are upper bounded by 2^27 (including RSA and ECURVE) + // - SymCryptDlgroupScratchSpace_FIPS is bounded by 2^28. + // + // Thus the following calculation does not overflow cbScratch. + // + cbScratch = SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS(nMaxDigitsOfP) + + SYMCRYPT_MAX( SymCryptSizeofIntFromDigits(nMaxDigitsOfQ), + SYMCRYPT_SCRATCH_BYTES_FOR_INT_TO_MODULUS(nMaxDigitsOfP) ), + cbScratchVerify ); + pbScratch = SymCryptCallbackAlloc( cbScratch ); + if (pbScratch==NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + // + // Prime P + // + + // Set the prime P (this will fail if the number cannot fit in the object) + scError = SymCryptIntSetValue( pbPrimeP, cbPrimeP, numFormat, SymCryptIntFromModulus(pDlgroup->pmP) ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // Check the bitsize of value + nBitsOfP = SymCryptIntBitsizeOfValue(SymCryptIntFromModulus(pDlgroup->pmP)); + if ( nBitsOfP > pDlgroup->nMaxBitsOfP) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if (nBitsOfP < SYMCRYPT_DLGROUP_MIN_BITSIZE_P) + { + scError = SYMCRYPT_WRONG_KEY_SIZE; + goto cleanup; + } + + // FIPS 186-4 verification is needed + // Check genCounter is not too big + if (fipsStandard != SYMCRYPT_DLGROUP_FIPS_NONE && + genCounter > 4*nBitsOfP-1 ) + { + scError = SYMCRYPT_AUTHENTICATION_FAILURE; + goto cleanup; + } + + if( (SymCryptIntGetValueLsbits32( SymCryptIntFromModulus( pDlgroup->pmP ) ) & 1) == 0 ) + { + // P is even, when it should be a prime of at least 32 bits + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Set the bitsize and bytesize of the value + pDlgroup->nBitsOfP = nBitsOfP; + pDlgroup->cbPrimeP = (nBitsOfP + 7)/8; + + // IntToModulus requirement: + // nBitsOfP >= SYMCRYPT_DLGROUP_MIN_BITSIZE_P --> P > 0 + SymCryptIntToModulus( + SymCryptIntFromModulus( pDlgroup->pmP ), + pDlgroup->pmP, + 1000*nBitsOfP, // Average operations + SYMCRYPT_FLAG_DATA_PUBLIC | SYMCRYPT_FLAG_MODULUS_PRIME, + pbScratch, + cbScratch ); + + // + // Prime Q + // + + // Wiping of previous (optional) parameters related to Q + if (pDlgroup->pmQ != NULL) + { + SymCryptModulusWipe( pDlgroup->pmQ ); + } + if (pDlgroup->cbSeed != 0) + { + SymCryptWipe( pDlgroup->pbSeed, pDlgroup->cbSeed); + } + + if (pbPrimeQ != NULL) + { + // Create an integer piTemp + piTemp = SymCryptIntCreate( pbScratch, cbScratch, nMaxDigitsOfQ ); + + // Set the prime Q (this will fail if the number cannot fit in the object) + scError = SymCryptIntSetValue( pbPrimeQ, cbPrimeQ, numFormat, piTemp ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // Check the bitsize of value + nBitsOfQ = SymCryptIntBitsizeOfValue(piTemp); + if ( nBitsOfQ > pDlgroup->nMaxBitsOfQ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if (nBitsOfQ < SYMCRYPT_DLGROUP_MIN_BITSIZE_Q) + { + scError = SYMCRYPT_WRONG_KEY_SIZE; + goto cleanup; + } + + if( (SymCryptIntGetValueLsbits32( piTemp ) & 1) == 0 ) + { + // Some of our modinv algorithms require odd inputs, and Q should be odd as it + // claims to be a prime. + // (Q can't be 2 as it must be at least 32 bits long.) + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Set the bitsize and bytesize of the value + pDlgroup->nBitsOfQ = nBitsOfQ; + pDlgroup->cbPrimeQ = (nBitsOfQ + 7)/8; + pDlgroup->nDigitsOfQ = SymCryptDigitsFromBits(nBitsOfQ); + pDlgroup->nDefaultBitsPriv = nBitsOfQ; + pDlgroup->nBitsOfSeed = nBitsOfQ; + pDlgroup->cbSeed = (nBitsOfQ+7)/8; + + // Create the modulus Q + pDlgroup->pmQ = SymCryptModulusCreate( pDlgroup->pbQ, SymCryptSizeofModulusFromDigits( pDlgroup->nDigitsOfQ ), pDlgroup->nDigitsOfQ ); + + // Set the prime Q + scError = SymCryptIntCopyMixedSize( piTemp, SymCryptIntFromModulus(pDlgroup->pmQ) ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // piTemp is not needed any more so we are free to re-use the scratch space + + // IntToModulus requirement: + // nBitsOfQ >= SYMCRYPT_DLGROUP_MIN_BITSIZE_Q --> Q > 0 + SymCryptIntToModulus( + SymCryptIntFromModulus( pDlgroup->pmQ ), + pDlgroup->pmQ, + 1000*nBitsOfP, // Average operations + SYMCRYPT_FLAG_DATA_PUBLIC | SYMCRYPT_FLAG_MODULUS_PRIME, + pbScratch, + cbScratch ); + + pDlgroup->fHasPrimeQ = TRUE; + } + else + { + // Clear all info about Q + pDlgroup->cbPrimeQ = 0; + pDlgroup->nBitsOfQ = 0; + pDlgroup->nDigitsOfQ = 0; + + pDlgroup->nDefaultBitsPriv = 0; + pDlgroup->nBitsOfSeed = 0; + pDlgroup->cbSeed = 0; + + pDlgroup->pmQ = NULL; + pDlgroup->fHasPrimeQ = FALSE; + } + + pDlgroup->isSafePrimeGroup = FALSE; + pDlgroup->nMinBitsPriv = 0; + + // + // Provided Generator G + // + if (pbGenG != NULL) + { + // Set the generator G (this will fail if the number cannot fit in the object) + scError = SymCryptModElementSetValue( pbGenG, cbGenG, numFormat, pDlgroup->pmP, pDlgroup->peG, pbScratch, cbScratch ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + scError = SymCryptDlgroupAutoCompleteNamedSafePrimeGroup( pDlgroup, pbScratch, cbScratch ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // Successfully detected, validated and autocompleted named safe-prime group + if (pDlgroup->isSafePrimeGroup) + { + goto cleanup; + } + } + + // + // Verification data (this has to be done before possibly generating G) + // + + // Set the FIPS standard + pDlgroup->eFipsStandard = fipsStandard; + + // Set the seed + if (pbSeed != NULL) + { + if (cbSeed != pDlgroup->cbSeed) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + memcpy( pDlgroup->pbSeed, pbSeed, cbSeed ); + } + + // Set the genCounter + pDlgroup->dwGenCounter = genCounter; + + // + // Generator G + // + + if (pbGenG == NULL) + { + // Let's generate G here since none was given + + // // We need Q (check at the beginning) + // if (pbPrimeQ==NULL) + // { + // scError = SYMCRYPT_INVALID_ARGUMENT; + // goto cleanup; + // } + + // If no seed was given let's generate our own + if (pbSeed==NULL) + { + SymCryptCallbackRandom(pDlgroup->pbSeed, pDlgroup->cbSeed); + } + + scError = SymCryptDlgroupGenerateGenG_FIPS( + pDlgroup, + pDlgroup->peG, + pbScratch, + cbScratch ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + } + + + // Verification + if (fipsStandard != SYMCRYPT_DLGROUP_FIPS_NONE) + { + // Verification + PBYTE pbScratchInternal = pbScratch; + SIZE_T cbScratchInternal = cbScratch; + + UINT32 ndDivTwoQ = 0; + UINT32 cbDivTwoQ = 0; + PSYMCRYPT_DIVISOR pdDivTwoQ = NULL; + + UINT32 cbComputed = 0; + PSYMCRYPT_INT piComputed = NULL; + UINT32 fPrimeComputed = 0; + UINT32 dwComputedCounter = 0; + + PSYMCRYPT_MODELEMENT peComputed = NULL; + PSYMCRYPT_MODELEMENT peOne = NULL; + + // Step 3: Acceptable pairs of L,N => skipped + + // Step 6: nBitsOfSeed < nBitsOfQ => skipped + + // Create the divisor object + ndDivTwoQ = SymCryptDigitsFromBits(pDlgroup->nBitsOfQ + 1); + cbDivTwoQ = SymCryptSizeofDivisorFromDigits( ndDivTwoQ ); + pdDivTwoQ = SymCryptDivisorCreate( pbScratchInternal, cbDivTwoQ, ndDivTwoQ ); + pbScratchInternal += cbDivTwoQ; + cbScratchInternal -= cbDivTwoQ; + + // Create the temporary integer of size Q + cbComputed = SymCryptSizeofIntFromDigits( pDlgroup->nDigitsOfQ ); + piComputed = SymCryptIntCreate( pbScratchInternal, cbComputed, pDlgroup->nDigitsOfQ ); + pbScratchInternal += cbComputed; + cbScratchInternal -= cbComputed; + + // Create a trial division context for both P and Q + pTrialDivisionContext = SymCryptCreateTrialDivisionContext( pDlgroup->nDigitsOfP ); + if (pTrialDivisionContext == NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + // Steps 8,9: Check if computed_q is prime and equal to q + scError = SymCryptDlgroupGeneratePrimeQ_FIPS( + pDlgroup, + pTrialDivisionContext, + &fPrimeComputed, + piComputed, + pdDivTwoQ, + pbScratchInternal, + cbScratchInternal ); + if (scError != SYMCRYPT_NO_ERROR) + { + scError = SYMCRYPT_AUTHENTICATION_FAILURE; // Overwrite any possible error + goto cleanup; + } + + if ((!fPrimeComputed)||(!SymCryptIntIsEqual( piComputed, SymCryptIntFromModulus(pDlgroup->pmQ)))) + { + scError = SYMCRYPT_AUTHENTICATION_FAILURE; + goto cleanup; + } + + // Create the temporary integer of size P + pbScratchInternal -= cbComputed; + cbScratchInternal += cbComputed; + cbComputed = SymCryptSizeofIntFromDigits( pDlgroup->nDigitsOfP ); + piComputed = SymCryptIntCreate( pbScratchInternal, cbComputed, pDlgroup->nDigitsOfP ); + pbScratchInternal += cbComputed; + cbScratchInternal -= cbComputed; + + // Steps 10-14: Check if computed_p is prime and equal to p + scError = SymCryptDlgroupGeneratePrimeP_FIPS( + pDlgroup, + pdDivTwoQ, + pDlgroup->dwGenCounter, // Go up to this + pTrialDivisionContext, + &fPrimeComputed, + piComputed, + &dwComputedCounter, + pbScratchInternal, + cbScratchInternal ); + if (scError != SYMCRYPT_NO_ERROR) + { + scError = SYMCRYPT_AUTHENTICATION_FAILURE; // Overwrite any possible error + goto cleanup; + } + + if ((!fPrimeComputed)||(dwComputedCounter!=pDlgroup->dwGenCounter)||(!SymCryptIntIsEqual( piComputed, SymCryptIntFromModulus(pDlgroup->pmP)))) + { + scError = SYMCRYPT_AUTHENTICATION_FAILURE; + goto cleanup; + } + + // Validation of G + + // Create the temporary modelement mod P + pbScratchInternal -= cbComputed; + cbScratchInternal += cbComputed; + cbComputed = SymCryptSizeofModElementFromModulus( pDlgroup->pmP ); + peOne = SymCryptModElementCreate( pbScratchInternal, cbComputed, pDlgroup->pmP ); + pbScratchInternal += cbComputed; + cbScratchInternal -= cbComputed; + peComputed = SymCryptModElementCreate( pbScratchInternal, cbComputed, pDlgroup->pmP ); + pbScratchInternal += cbComputed; + cbScratchInternal -= cbComputed; + + // Step 2: Verify that 2<= G <= p-1 + SymCryptModElementSetValueUint32( 1, pDlgroup->pmP, peOne, pbScratchInternal, cbScratchInternal ); // Set the temporary to 1 + + if ((SymCryptModElementIsZero(pDlgroup->pmP, pDlgroup->peG)) || (SymCryptModElementIsEqual(pDlgroup->pmP, pDlgroup->peG, peOne))) + { + scError = SYMCRYPT_AUTHENTICATION_FAILURE; + goto cleanup; + } + + // Step 3: Verify that G^Q == 1 + SymCryptModExp( + pDlgroup->pmP, + pDlgroup->peG, + SymCryptIntFromModulus(pDlgroup->pmQ), + nBitsOfQ, + SYMCRYPT_FLAG_DATA_PUBLIC, + peComputed, + pbScratchInternal, + cbScratchInternal ); + + if (!SymCryptModElementIsEqual(pDlgroup->pmP, peComputed, peOne)) + { + scError = SYMCRYPT_AUTHENTICATION_FAILURE; + goto cleanup; + } + + } + +cleanup: + if (pTrialDivisionContext!=NULL) + { + SymCryptFreeTrialDivisionContext( pTrialDivisionContext ); + } + + if (pbScratch!=NULL) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + } + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDlgroupGetValue( + _In_ PCSYMCRYPT_DLGROUP pDlgroup, + _Out_writes_bytes_( cbPrimeP ) PBYTE pbPrimeP, + SIZE_T cbPrimeP, + _Out_writes_bytes_( cbPrimeQ ) PBYTE pbPrimeQ, + SIZE_T cbPrimeQ, + _Out_writes_bytes_( cbGenG ) PBYTE pbGenG, + SIZE_T cbGenG, + SYMCRYPT_NUMBER_FORMAT numFormat, + _Out_ PCSYMCRYPT_HASH * ppHashAlgorithm, + _Out_writes_bytes_( cbSeed ) PBYTE pbSeed, + SIZE_T cbSeed, + _Out_ PUINT32 pGenCounter ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PBYTE pbScratch = NULL; + SIZE_T cbScratch = 0; + + if ( ((pbPrimeP==NULL)&&(cbPrimeP>0)) || + ((pbPrimeQ==NULL)&&(cbPrimeQ>0)) || + ((pbGenG==NULL)&&(cbGenG>0)) || + ((pbSeed==NULL)&&(cbSeed>0)) || + ((pbSeed!=NULL)&&(cbSeed!=pDlgroup->cbSeed)) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if ((pbPrimeQ!=NULL) && (!pDlgroup->fHasPrimeQ)) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + if (pbPrimeP!=NULL) + { + scError = SymCryptIntGetValue( + SymCryptIntFromModulus(pDlgroup->pmP), + pbPrimeP, + cbPrimeP, + numFormat ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + } + + if (pbPrimeQ!=NULL) + { + scError = SymCryptIntGetValue( + SymCryptIntFromModulus(pDlgroup->pmQ), + pbPrimeQ, + cbPrimeQ, + numFormat ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + } + + if (pbGenG!=NULL) + { + // Scratch space is needed + cbScratch = SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pDlgroup->nDigitsOfP); + pbScratch = SymCryptCallbackAlloc( cbScratch ); + if (pbScratch==NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + scError = SymCryptModElementGetValue( + pDlgroup->pmP, + pDlgroup->peG, + pbGenG, + cbGenG, + numFormat, + pbScratch, + cbScratch ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + } + + if (ppHashAlgorithm!=NULL) + { + if (pDlgroup->eFipsStandard == SYMCRYPT_DLGROUP_FIPS_186_2) + { + *ppHashAlgorithm = NULL; + } + else + { + *ppHashAlgorithm = pDlgroup->pHashAlgorithm; + } + } + + if (pbSeed!=NULL && pDlgroup->pbSeed!=NULL) + { + memcpy( pbSeed, pDlgroup->pbSeed, pDlgroup->cbSeed); + } + + if (pGenCounter!=NULL) + { + *pGenCounter = pDlgroup->dwGenCounter; + } + +cleanup: + if (pbScratch!=NULL) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + } + return scError; +} diff --git a/libs/symcrypt/lib/dlkey.c b/libs/symcrypt/lib/dlkey.c new file mode 100644 index 00000000000..7df34d6125d --- /dev/null +++ b/libs/symcrypt/lib/dlkey.c @@ -0,0 +1,921 @@ +// +// dlkey.c Dlkey functions +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// + +#include "precomp.h" + +PSYMCRYPT_DLKEY +SYMCRYPT_CALL +SymCryptDlkeyAllocate( _In_ PCSYMCRYPT_DLGROUP pDlgroup ) +{ + PVOID p; + SIZE_T cb; + PSYMCRYPT_DLKEY res = NULL; + + cb = SymCryptSizeofDlkeyFromDlgroup( pDlgroup ); + + p = SymCryptCallbackAlloc( cb ); + + if ( p==NULL ) + { + goto cleanup; + } + + res = SymCryptDlkeyCreate( p, cb, pDlgroup ); + +cleanup: + return res; +} + +VOID +SYMCRYPT_CALL +SymCryptDlkeyFree( _Out_ PSYMCRYPT_DLKEY pkObj ) +{ + SYMCRYPT_CHECK_MAGIC( pkObj ); + SymCryptDlkeyWipe( pkObj ); + SymCryptCallbackFree( pkObj ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptSizeofDlkeyFromDlgroup( _In_ PCSYMCRYPT_DLGROUP pDlgroup ) +{ + // Always allocate memory for large private keys + return sizeof(SYMCRYPT_DLKEY) + SymCryptSizeofModElementFromModulus( pDlgroup->pmP ) + SymCryptSizeofIntFromDigits( pDlgroup->nDigitsOfP ); +} + +PSYMCRYPT_DLKEY +SYMCRYPT_CALL +SymCryptDlkeyCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + _In_ PCSYMCRYPT_DLGROUP pDlgroup ) +{ + PSYMCRYPT_DLKEY pkRes = NULL; + UINT32 cbModElement = SymCryptSizeofModElementFromModulus( pDlgroup->pmP ); + + SYMCRYPT_ASSERT( cbBuffer >= SymCryptSizeofDlkeyFromDlgroup( pDlgroup ) ); + SYMCRYPT_ASSERT( cbBuffer >= sizeof(SYMCRYPT_DLKEY) + cbModElement ); + UNREFERENCED_PARAMETER( cbBuffer ); // only referenced in above ASSERTs... + SYMCRYPT_ASSERT_ASYM_ALIGNED( pbBuffer ); + + pkRes = (PSYMCRYPT_DLKEY) pbBuffer; + + // DLKEY parameters + pkRes->fAlgorithmInfo = 0; + pkRes->pDlgroup = pDlgroup; + pkRes->fHasPrivateKey = FALSE; + pkRes->fPrivateModQ = FALSE; // This will be properly set during generate or setvalue + pkRes->nBitsPriv = pDlgroup->nDefaultBitsPriv; + + // Create SymCrypt objects + pbBuffer += sizeof(SYMCRYPT_DLKEY); + + pkRes->pePublicKey = SymCryptModElementCreate( pbBuffer, cbModElement, pDlgroup->pmP ); + if (pkRes->pePublicKey == NULL) + { + goto cleanup; + } + pbBuffer += cbModElement; + + // + // **** Always defer the creation of the private key until the key generation or + // set value. + // + // In place of the pbPrivate pointer store the pointer to the allocated buffer. + // + pkRes->pbPrivate = pbBuffer; + pkRes->piPrivateKey = NULL; + + // Setting the magic + SYMCRYPT_SET_MAGIC( pkRes ); + +cleanup: + return pkRes; +} + +VOID +SYMCRYPT_CALL +SymCryptDlkeyWipe( _Out_ PSYMCRYPT_DLKEY pkDst ) +{ + SymCryptWipe( (PBYTE) pkDst, SymCryptSizeofDlkeyFromDlgroup(pkDst->pDlgroup) ); +} + +VOID +SYMCRYPT_CALL +SymCryptDlkeyCopy( + _In_ PCSYMCRYPT_DLKEY pkSrc, + _Out_ PSYMCRYPT_DLKEY pkDst ) +{ + PCSYMCRYPT_DLGROUP pDlgroup = pkSrc->pDlgroup; + + // + // in-place copy is somewhat common... + // + if( pkSrc != pkDst ) + { + pkDst->fAlgorithmInfo = pkSrc->fAlgorithmInfo; + pkDst->fHasPrivateKey = pkSrc->fHasPrivateKey; + pkDst->fPrivateModQ = pkSrc->fPrivateModQ; + pkDst->nBitsPriv = pkSrc->nBitsPriv; + + // Copy the public key + SymCryptModElementCopy( pDlgroup->pmP, pkSrc->pePublicKey, pkDst->pePublicKey ); + + // Copy the private key + SymCryptIntCopy( pkSrc->piPrivateKey, pkDst->piPrivateKey ); + } +} + + +// DLKEY specific functions + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDlkeySetPrivateKeyLength( _Inout_ PSYMCRYPT_DLKEY pkDlkey, UINT32 nBitsPriv, UINT32 flags ) +{ + if( nBitsPriv > pkDlkey->pDlgroup->nBitsOfQ || + nBitsPriv < pkDlkey->pDlgroup->nMinBitsPriv || + flags != 0 ) + { + return SYMCRYPT_INVALID_ARGUMENT; + } + + pkDlkey->nBitsPriv = nBitsPriv; + return SYMCRYPT_NO_ERROR; +} + +PCSYMCRYPT_DLGROUP +SYMCRYPT_CALL +SymCryptDlkeyGetGroup( _In_ PCSYMCRYPT_DLKEY pkDlkey ) +{ + return pkDlkey->pDlgroup; +} + +UINT32 +SYMCRYPT_CALL +SymCryptDlkeySizeofPublicKey( _In_ PCSYMCRYPT_DLKEY pkDlkey ) +{ + return pkDlkey->pDlgroup->cbPrimeP; +} + +UINT32 +SYMCRYPT_CALL +SymCryptDlkeySizeofPrivateKey( _In_ PCSYMCRYPT_DLKEY pkDlkey ) +{ + PCSYMCRYPT_DLGROUP pDlgroup = pkDlkey->pDlgroup; + + if (pkDlkey->fPrivateModQ) + { + if (pDlgroup->fHasPrimeQ) + { + if (pkDlkey->nBitsPriv != pDlgroup->nBitsOfQ) + { + return (pkDlkey->nBitsPriv + 7) / 8; + } + else + { + return pDlgroup->cbPrimeQ; + } + } + else + { + return pDlgroup->cbPrimeP; // Somehow the group has no prime Q but the key was set with prime Q, return the safe option + } + } + else + { + return pDlgroup->cbPrimeP; + } +} + +BOOLEAN +SYMCRYPT_CALL +SymCryptDlkeyHasPrivateKey( _In_ PCSYMCRYPT_DLKEY pkDlkey ) +{ + return pkDlkey->fHasPrivateKey; +} + +#define SYMCRYPT_FLAG_DLKEY_PUBLIC_KEY_ORDER_VALIDATION (0x1) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDlkeyPerformPublicKeyValidation( + _In_ PCSYMCRYPT_DLKEY pkDlkey, + _In_ UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + PCSYMCRYPT_DLGROUP pDlgroup = pkDlkey->pDlgroup; + + PSYMCRYPT_MODELEMENT peTmp = NULL; + PSYMCRYPT_MODELEMENT peTmpPublicKeyExpQ = NULL; + UINT32 cbModElement = SymCryptSizeofModElementFromModulus( pDlgroup->pmP ); + + SYMCRYPT_ASSERT( cbScratch >= (2 * cbModElement) + + SYMCRYPT_SCRATCH_BYTES_FOR_MODEXP(pDlgroup->nDigitsOfP) ); + + // Check if Public key is 0 + if ( SymCryptModElementIsZero( pDlgroup->pmP, pkDlkey->pePublicKey ) ) + { + return SYMCRYPT_INVALID_ARGUMENT; + } + + peTmp = SymCryptModElementCreate( pbScratch, cbModElement, pDlgroup->pmP); + pbScratch += cbModElement; + cbScratch -= cbModElement; + + // Check if Public key is P-1 + SymCryptModElementSetValueNegUint32( 1, pDlgroup->pmP, peTmp, pbScratch, cbScratch ); + if ( SymCryptModElementIsEqual( pDlgroup->pmP, pkDlkey->pePublicKey, peTmp ) ) + { + return SYMCRYPT_INVALID_ARGUMENT; + } + + // Check if Public key is 1 (do this check second as we may reuse 1 element in next check) + SymCryptModElementSetValueUint32( 1, pDlgroup->pmP, peTmp, pbScratch, cbScratch ); + if ( SymCryptModElementIsEqual( pDlgroup->pmP, pkDlkey->pePublicKey, peTmp ) ) + { + return SYMCRYPT_INVALID_ARGUMENT; + } + + // Perform validation that Public key is in a subgroup of order Q. + if ( (flags & SYMCRYPT_FLAG_DLKEY_PUBLIC_KEY_ORDER_VALIDATION) != 0 ) + { + peTmpPublicKeyExpQ = SymCryptModElementCreate( pbScratch, cbModElement, pDlgroup->pmP); + pbScratch += cbModElement; + cbScratch -= cbModElement; + + // Ensure that Q is specified in the Dlgroup + if ( !pDlgroup->fHasPrimeQ ) + { + return SYMCRYPT_INVALID_ARGUMENT; + } + + // Calculate peTmpPublicKeyExpQ = (Public key)^Q + SymCryptModExp( + pDlgroup->pmP, + pkDlkey->pePublicKey, + SymCryptIntFromModulus( pDlgroup->pmQ ), + pDlgroup->nBitsOfQ, + SYMCRYPT_FLAG_DATA_PUBLIC, // No need for side-channel safety for public key validation + peTmpPublicKeyExpQ, + pbScratch, + cbScratch ); + + // Ensure (Public key)^Q == 1 mod P + if ( !SymCryptModElementIsEqual( pDlgroup->pmP, peTmpPublicKeyExpQ, peTmp ) ) + { + return SYMCRYPT_INVALID_ARGUMENT; + } + } + + return SYMCRYPT_NO_ERROR; +} + +#define DLKEY_GEN_RANDOM_GENERIC_LIMIT (1000) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDlkeyGenerate( + _In_ UINT32 flags, + _Inout_ PSYMCRYPT_DLKEY pkDlkey ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PBYTE pbScratch = NULL; + SIZE_T cbScratch = 0; + PBYTE pbScratchInternal = NULL; + SIZE_T cbScratchInternal = 0; + + PCSYMCRYPT_DLGROUP pDlgroup = pkDlkey->pDlgroup; + + PSYMCRYPT_MODELEMENT pePrivateKey = NULL; + UINT32 cbPrivateKey = 0; + + PSYMCRYPT_MODULUS pmPriv = NULL; + UINT32 nDigitsPriv = 0; + UINT32 nBitsPriv = 0; + UINT32 fFlagsForModSetRandom = 0; + + BOOLEAN useModSetRandom = TRUE; + UINT32 nBytesPriv = 0; + UINT32 dwShiftBits; + BYTE privMask; + UINT32 cntr; + + PSYMCRYPT_MODELEMENT peTmp = NULL; + UINT32 cbModElement = SymCryptSizeofModElementFromModulus( pDlgroup->pmP ); + + // Ensure caller has specified what algorithm(s) the key will be used with + UINT32 algorithmFlags = SYMCRYPT_FLAG_DLKEY_DSA | SYMCRYPT_FLAG_DLKEY_DH; + // Make sure only allowed flags are specified + UINT32 allowedFlags = SYMCRYPT_FLAG_DLKEY_GEN_MODP | SYMCRYPT_FLAG_KEY_NO_FIPS | algorithmFlags; + + if ( ( ( flags & ~allowedFlags ) != 0 ) || + ( ( flags & algorithmFlags ) == 0 ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Extra sanity checks when running with FIPS + // Either Dlgroup is named SafePrime group and key is for DH, + // or Dlgroup is not named SafePrime group and key is for DSA + if ( ( ( flags & SYMCRYPT_FLAG_KEY_NO_FIPS ) == 0 ) && + ( (pDlgroup->isSafePrimeGroup && (flags & SYMCRYPT_FLAG_DLKEY_DSA)) || + (!(pDlgroup->isSafePrimeGroup) && (flags & SYMCRYPT_FLAG_DLKEY_DH)) ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + pkDlkey->fPrivateModQ = (((flags & SYMCRYPT_FLAG_DLKEY_GEN_MODP)==0) && (pDlgroup->fHasPrimeQ)); + + if (pkDlkey->fPrivateModQ) + { + pmPriv = pDlgroup->pmQ; + nDigitsPriv = pDlgroup->nDigitsOfQ; + nBitsPriv = pDlgroup->nBitsOfQ; + fFlagsForModSetRandom = SYMCRYPT_FLAG_MODRANDOM_ALLOW_ONE | SYMCRYPT_FLAG_MODRANDOM_ALLOW_MINUSONE; // 1 to Q-1 + + if ( pDlgroup->isSafePrimeGroup && (pkDlkey->nBitsPriv != pDlgroup->nBitsOfQ) ) + { + useModSetRandom = FALSE; + SYMCRYPT_ASSERT( pkDlkey->nBitsPriv < pDlgroup->nBitsOfQ ); // 2^nBitsPriv < Q + + nBitsPriv = pkDlkey->nBitsPriv; // 1 to (2^nBitsPriv)-1 + nBytesPriv = (pkDlkey->nBitsPriv + 7) / 8; + } + } + else + { + // We perform Private key range validation by construction + // The Private key is constructed in the range [1,min(2^nBitsPriv,Q)-1] precisely when pkDlkey->fPrivateModQ + if ( (flags & SYMCRYPT_FLAG_KEY_NO_FIPS) == 0 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + pmPriv = pDlgroup->pmP; + nDigitsPriv = pDlgroup->nDigitsOfP; + nBitsPriv = pDlgroup->nBitsOfP; + fFlagsForModSetRandom = SYMCRYPT_FLAG_MODRANDOM_ALLOW_ONE; // 1 to P-2 + } + + cbPrivateKey = SymCryptSizeofModElementFromModulus( pmPriv ); + + // + // From symcrypt_internal.h we have: + // - sizeof results are upper bounded by 2^19 + // - SYMCRYPT_SCRATCH_BYTES results are upper bounded by 2^27 (including RSA and ECURVE) + // Thus the following calculation does not overflow cbScratch. + // + cbScratch = SYMCRYPT_MAX( cbPrivateKey + SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS(nDigitsPriv), + (2 * cbModElement) + SYMCRYPT_SCRATCH_BYTES_FOR_MODEXP(pDlgroup->nDigitsOfP)); + pbScratch = SymCryptCallbackAlloc( cbScratch ); + if (pbScratch == NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + // Create the private key integer + pkDlkey->piPrivateKey = SymCryptIntCreate( pkDlkey->pbPrivate, SymCryptSizeofIntFromDigits(nDigitsPriv), nDigitsPriv ); + + if (useModSetRandom) + { + // Create the private key modelement + pePrivateKey = SymCryptModElementCreate( pbScratch, cbPrivateKey, pmPriv ); + pbScratchInternal = pbScratch + cbPrivateKey; + cbScratchInternal = cbScratch - cbPrivateKey; + + // Set a modelement from 1 to q-1 (or 1 to p-2) + SymCryptModSetRandom( + pmPriv, + pePrivateKey, + fFlagsForModSetRandom, + pbScratchInternal, + cbScratchInternal ); + + // Set the private key + SymCryptModElementToInt( + pmPriv, + pePrivateKey, + pkDlkey->piPrivateKey, + pbScratchInternal, + cbScratchInternal ); + } + else + { + // Set private key from 1 to (2^nBitsPriv)-1 + // Wipe any bytes we won't fill with random + SymCryptWipe( pbScratch + nBytesPriv, (nDigitsPriv * SYMCRYPT_FDEF_DIGIT_SIZE) - nBytesPriv ); + + dwShiftBits = (0u-nBitsPriv) & 7; + privMask = (BYTE)(0xff >> dwShiftBits); + + for(cntr=0; cntr<DLKEY_GEN_RANDOM_GENERIC_LIMIT; cntr++) + { + // Try random values until we get one we like + SymCryptCallbackRandom( pbScratch, nBytesPriv ); + + pbScratch[nBytesPriv-1] &= privMask; + + // If non-zero we have a value in range [1, (2^nBitsPriv)-1] + if( !SymCryptFdefRawIsEqualUint32( (PCUINT32)pbScratch, nDigitsPriv, 0 ) ) + { + break; + } + } + + if (cntr >= DLKEY_GEN_RANDOM_GENERIC_LIMIT) + { + SymCryptFatal( 'rndl' ); + } + + scError = SymCryptIntSetValue( pbScratch, nBytesPriv, SYMCRYPT_NUMBER_FORMAT_LSB_FIRST, pkDlkey->piPrivateKey ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + } + + // Calculate the public key + SymCryptModExp( + pDlgroup->pmP, + pDlgroup->peG, + pkDlkey->piPrivateKey, + nBitsPriv, + 0, // Side-channel safe + pkDlkey->pePublicKey, + pbScratch, // We can overwrite pePrivateKey now + cbScratch ); + + // Perform range validation on generated Public key. + if ( (flags & SYMCRYPT_FLAG_KEY_NO_FIPS) == 0 ) + { + // Perform Public key validation. + // Always perform range validation, and validation that Public key is in subgroup of order Q + scError = SymCryptDlkeyPerformPublicKeyValidation( + pkDlkey, + SYMCRYPT_FLAG_DLKEY_PUBLIC_KEY_ORDER_VALIDATION, + pbScratch, + cbScratch ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + } + + // Set the fHasPrivateKey flag + pkDlkey->fHasPrivateKey = TRUE; + + pkDlkey->fAlgorithmInfo = flags; // We want to track all of the flags in the Dlkey + + if ( (flags & SYMCRYPT_FLAG_KEY_NO_FIPS) == 0 ) + { + if( ( flags & SYMCRYPT_FLAG_DLKEY_DSA ) != 0 ) + { + // Ensure DSA algorithm selftest is run before first use of DSA algorithm + SYMCRYPT_RUN_SELFTEST_ONCE( + SymCryptDsaSelftest, + SYMCRYPT_SELFTEST_ALGORITHM_DSA ); + + // Run PCT eagerly as the key can only be used for DSA - there is no value in deferring + SYMCRYPT_RUN_KEY_GEN_PCT( + SymCryptDsaPct, + pkDlkey, + SYMCRYPT_PCT_DSA ); + } + + if( ( flags & SYMCRYPT_FLAG_DLKEY_DH ) != 0 ) + { + // Ensure we have run the algorithm selftest at least once. + SYMCRYPT_RUN_SELFTEST_ONCE( + SymCryptDhSecretAgreementSelftest, + SYMCRYPT_SELFTEST_ALGORITHM_DH ); + + // Run PCT eagerly as the key can only be used for DH + + // DH PCT per SP80056a-rev3 5.6.2.1.4 b) + // Recompute the public key from the private key + // Option a) appears to be explicitly overruled by 140-3 IG + + // Calculate the public key from the private key in scratch + pbScratchInternal = pbScratch; + cbScratchInternal = cbScratch; + + peTmp = SymCryptModElementCreate( pbScratchInternal, cbModElement, pDlgroup->pmP ); + pbScratchInternal += cbModElement; + cbScratchInternal -= cbModElement; + + SymCryptModExp( + pDlgroup->pmP, + pDlgroup->peG, + pkDlkey->piPrivateKey, + nBitsPriv, // This is either bits of P, Q, or some caller-defined value i.e. public values + 0, // Side-channel safe + peTmp, + pbScratchInternal, + cbScratchInternal ); + + SYMCRYPT_FIPS_ASSERT( SymCryptModElementIsEqual(pDlgroup->pmP, peTmp, pkDlkey->pePublicKey) ); + } + } + +cleanup: + if (pbScratch!=NULL) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + } + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDlkeySetValue( + _In_reads_bytes_( cbPrivateKey ) PCBYTE pbPrivateKey, + SIZE_T cbPrivateKey, + _In_reads_bytes_( cbPublicKey ) PCBYTE pbPublicKey, + SIZE_T cbPublicKey, + SYMCRYPT_NUMBER_FORMAT numFormat, + UINT32 flags, + _Inout_ PSYMCRYPT_DLKEY pkDlkey ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PBYTE pbScratch = NULL; + UINT32 cbScratch = 0; + PBYTE pbScratchInternal = NULL; + UINT32 cbScratchInternal = 0; + + PCSYMCRYPT_DLGROUP pDlgroup = pkDlkey->pDlgroup; + + UINT32 nDigitsPriv = 0; + UINT32 nBitsPriv = 0; + + PSYMCRYPT_MODELEMENT peTmp = NULL; + UINT32 cbModElement = SymCryptSizeofModElementFromModulus( pDlgroup->pmP ); + UINT32 fValidatePublicKeyOrder = SYMCRYPT_FLAG_DLKEY_PUBLIC_KEY_ORDER_VALIDATION; + + if ( ((pbPrivateKey==NULL) && (cbPrivateKey!=0)) || + ((pbPublicKey==NULL) && (cbPublicKey!=0)) || + ((pbPrivateKey==NULL) && (pbPublicKey==NULL)) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Ensure caller has specified what algorithm(s) the key will be used with + UINT32 algorithmFlags = SYMCRYPT_FLAG_DLKEY_DSA | SYMCRYPT_FLAG_DLKEY_DH; + // Make sure only allowed flags are specified + UINT32 allowedFlags = SYMCRYPT_FLAG_KEY_NO_FIPS | SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION | algorithmFlags; + + if ( ( ( flags & ~allowedFlags ) != 0 ) || + ( ( flags & algorithmFlags ) == 0 ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Extra sanity checks when running with FIPS + // Either Dlgroup is named SafePrime group and key is for DH, + // or Dlgroup is not named SafePrime group and key is for DSA + if ( ( ( flags & SYMCRYPT_FLAG_KEY_NO_FIPS ) == 0 ) && + ( (pDlgroup->isSafePrimeGroup && (flags & SYMCRYPT_FLAG_DLKEY_DSA)) || + (!(pDlgroup->isSafePrimeGroup) && (flags & SYMCRYPT_FLAG_DLKEY_DH)) ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Check that minimal validation flag only specified with no fips + if ( ( ( flags & SYMCRYPT_FLAG_KEY_NO_FIPS ) == 0 ) && + ( ( flags & SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION ) != 0 ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if ( ( flags & SYMCRYPT_FLAG_KEY_NO_FIPS ) != 0 ) + { + fValidatePublicKeyOrder = 0; + } + + // + // From symcrypt_internal.h we have: + // - sizeof results are upper bounded by 2^19 + // - SYMCRYPT_SCRATCH_BYTES results are upper bounded by 2^27 (including RSA and ECURVE) + // Thus the following calculation does not overflow cbScratch. + // + cbScratch = SYMCRYPT_MAX( cbModElement + SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS(pDlgroup->nDigitsOfP), + (2 * cbModElement) + SYMCRYPT_SCRATCH_BYTES_FOR_MODEXP(pDlgroup->nDigitsOfP) ); + pbScratch = SymCryptCallbackAlloc( cbScratch ); + if (pbScratch == NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + if ( pbPrivateKey != NULL ) + { + // + // Check the size of the imported private key to detect if it is mod P or mod Q + // If the group does not have a Q assume that the imported key is modulo P as + // it wouldn't help us assume otherwise (the bitsize of the private key should be kept + // secret from SC attacks). + // If the private key has had some non-default value set for nBitsPriv then the caller + // has explicitly opted in to more stringent range checking. + // + pkDlkey->fPrivateModQ = ( (pDlgroup->fHasPrimeQ) && + ((cbPrivateKey < pDlgroup->cbPrimeQ) || + ((cbPrivateKey == pDlgroup->cbPrimeQ) && (pDlgroup->cbPrimeQ < pDlgroup->cbPrimeP)) || + (pkDlkey->nBitsPriv != pDlgroup->nDefaultBitsPriv)) ); + + if ( pkDlkey->fPrivateModQ ) + { + nDigitsPriv = pDlgroup->nDigitsOfQ; + nBitsPriv = pDlgroup->nBitsOfQ; + + if ( pDlgroup->isSafePrimeGroup ) + { + nBitsPriv = pkDlkey->nBitsPriv; + } + } + else + { + nDigitsPriv = pDlgroup->nDigitsOfP; + nBitsPriv = pDlgroup->nBitsOfP; + } + + pkDlkey->piPrivateKey = SymCryptIntCreate( pkDlkey->pbPrivate, SymCryptSizeofIntFromDigits(nDigitsPriv), nDigitsPriv ); + + scError = SymCryptIntSetValue( + pbPrivateKey, + cbPrivateKey, + numFormat, + pkDlkey->piPrivateKey ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Perform range validation on imported Private key. + // Check if Private key is 0 - perform unconditionally as it is cheap + // and it never makes sense for private key to be 0 intentionally + if ( SymCryptIntIsEqualUint32( pkDlkey->piPrivateKey, 0 ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Continue range validation on imported Private key. + if ( ( flags & SYMCRYPT_FLAG_KEY_NO_FIPS ) == 0 ) + { + // Ensure that Q is specified in the Dlgroup + if ( !pDlgroup->fHasPrimeQ ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // If nBitsPriv is specified, check if Private key is greater than or equal to 2^nBitsPriv + // Otherwise, check if Private key is greater than or equal to Q + if ( ( ( (nBitsPriv < pDlgroup->nBitsOfQ) && + SymCryptIntBitsizeOfValue( pkDlkey->piPrivateKey ) > nBitsPriv ) ) || + ( (nBitsPriv >= pDlgroup->nBitsOfQ) && + !SymCryptIntIsLessThan( pkDlkey->piPrivateKey, SymCryptIntFromModulus( pDlgroup->pmQ ) ) ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + } + + pkDlkey->fHasPrivateKey = TRUE; + } + + if ( pbPublicKey != NULL ) + { + scError = SymCryptModElementSetValue( + pbPublicKey, + cbPublicKey, + numFormat, + pDlgroup->pmP, + pkDlkey->pePublicKey, + pbScratch, + cbScratch ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Perform range validation on imported Public key. + if ( (flags & SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION) == 0 ) + { + // Perform Public key validation. + // Always perform range validation + // May also perform validation that Public key is in subgroup of order Q, depending on flags + scError = SymCryptDlkeyPerformPublicKeyValidation( + pkDlkey, + fValidatePublicKeyOrder, + pbScratch, + cbScratch ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + } + } + + // Calculating the public key if no key was provided + // or if needed for keypair regeneration validation + if ( (pbPublicKey==NULL) || + ( ( ( flags & SYMCRYPT_FLAG_KEY_NO_FIPS ) == 0 ) && + (pbPrivateKey!=NULL) && (pbPublicKey!=NULL) ) ) + { + // Calculate the public key from the private key + pbScratchInternal = pbScratch; + cbScratchInternal = cbScratch; + + // By default calculate the public key directly where it will be persisted + peTmp = pkDlkey->pePublicKey; + + if ( pbPublicKey != NULL ) + { + // If doing regeneration validation calculate the public key in scratch + peTmp = SymCryptModElementCreate( pbScratchInternal, cbModElement, pDlgroup->pmP); + pbScratchInternal += cbModElement; + cbScratchInternal -= cbModElement; + } + + SymCryptModExp( + pDlgroup->pmP, + pDlgroup->peG, + pkDlkey->piPrivateKey, + nBitsPriv, // This is either bits of P, Q, or some caller-defined value i.e. public values + 0, // Side-channel safe + peTmp, + pbScratchInternal, + cbScratchInternal ); + + if ( pbPublicKey != NULL ) + { + if ( !SymCryptModElementIsEqual(pDlgroup->pmP, peTmp, pkDlkey->pePublicKey) ) + { + scError = SYMCRYPT_AUTHENTICATION_FAILURE; + goto cleanup; + } + } + else if ( ( flags & SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION ) == 0 ) + { + // Perform Public key validation on generated public key. + // Always perform range validation + // May also perform validation that Public key is in subgroup of order Q, depending on flags + scError = SymCryptDlkeyPerformPublicKeyValidation( + pkDlkey, + fValidatePublicKeyOrder, + pbScratch, + cbScratch ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + } + } + + pkDlkey->fAlgorithmInfo = flags; // We want to track all of the flags in the Dlkey + + if ( (flags & SYMCRYPT_FLAG_KEY_NO_FIPS) == 0 ) + { + if( ( flags & SYMCRYPT_FLAG_DLKEY_DSA ) != 0 ) + { + // Ensure DSA algorithm selftest is run before first use of DSA algorithm + SYMCRYPT_RUN_SELFTEST_ONCE( + SymCryptDsaSelftest, + SYMCRYPT_SELFTEST_ALGORITHM_DSA ); + + // PCT does not need to be run on import - mark it as done + pkDlkey->fAlgorithmInfo |= SYMCRYPT_PCT_DSA; + } + + if( ( flags & SYMCRYPT_FLAG_DLKEY_DH ) != 0 ) + { + SYMCRYPT_RUN_SELFTEST_ONCE( + SymCryptDhSecretAgreementSelftest, + SYMCRYPT_SELFTEST_ALGORITHM_DH ); + } + } + +cleanup: + if (pbScratch!=NULL) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + } + return scError; +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDlkeyGetValue( + _In_ PCSYMCRYPT_DLKEY pkDlkey, + _Out_writes_bytes_( cbPrivateKey ) + PBYTE pbPrivateKey, + SIZE_T cbPrivateKey, + _Out_writes_bytes_( cbPublicKey ) + PBYTE pbPublicKey, + SIZE_T cbPublicKey, + SYMCRYPT_NUMBER_FORMAT numFormat, + UINT32 flags ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PBYTE pbScratch = NULL; + UINT32 cbScratch = 0; + + PCSYMCRYPT_DLGROUP pDlgroup = pkDlkey->pDlgroup; + + UNREFERENCED_PARAMETER( flags ); + + if ( ((pbPrivateKey==NULL) && (cbPrivateKey!=0)) || + ((pbPublicKey==NULL) && (cbPublicKey!=0)) || + ((pbPrivateKey==NULL) && (pbPublicKey==NULL)) || + ((pbPrivateKey!=NULL) && !pkDlkey->fHasPrivateKey) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if (pbPrivateKey != NULL) + { + scError = SymCryptIntGetValue( + pkDlkey->piPrivateKey, + pbPrivateKey, + cbPrivateKey, + numFormat ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + } + + if (pbPublicKey != NULL) + { + cbScratch = SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS(pDlgroup->nDigitsOfP); + pbScratch = SymCryptCallbackAlloc( cbScratch ); + if (pbScratch == NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + scError = SymCryptModElementGetValue( + pDlgroup->pmP, + pkDlkey->pePublicKey, + pbPublicKey, + cbPublicKey, + numFormat, + pbScratch, + cbScratch ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + } + +cleanup: + if (pbScratch!=NULL) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + } + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDlkeyExtendKeyUsage( + _Inout_ PSYMCRYPT_DLKEY pkDlkey, + UINT32 flags ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + // Ensure caller has specified what algorithm(s) the key will be used with + UINT32 algorithmFlags = SYMCRYPT_FLAG_DLKEY_DSA | SYMCRYPT_FLAG_DLKEY_DH; + + if ( ( ( flags & ~algorithmFlags ) != 0 ) || + ( ( flags & algorithmFlags ) == 0) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + pkDlkey->fAlgorithmInfo |= flags; + +cleanup: + return scError; +} diff --git a/libs/symcrypt/lib/dsa.c b/libs/symcrypt/lib/dsa.c new file mode 100644 index 00000000000..068d413c8a4 --- /dev/null +++ b/libs/symcrypt/lib/dsa.c @@ -0,0 +1,695 @@ +// +// dsa.c DSA functions +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// + +#include "precomp.h" + +// Truncating function according to the FIPS 186-4 standard +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDsaTruncateHash( + _In_ PCSYMCRYPT_DLGROUP pDlgroup, + _In_reads_bytes_( cbHashValue ) PCBYTE pbHashValue, + SIZE_T cbHashValue, + UINT32 flags, + _Out_ PSYMCRYPT_MODELEMENT peMsghash, + _Out_ PSYMCRYPT_INT piIntLarge, + _Out_ PSYMCRYPT_INT piIntQ, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + UNREFERENCED_PARAMETER( flags ); + + // Get the value of msghash into piIntLarge + scError = SymCryptIntSetValue( pbHashValue, cbHashValue, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, piIntLarge ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Truncate the rightmost bits if the value exceeds the size of the modulus Q + if (SymCryptIntBitsizeOfValue(piIntLarge) > pDlgroup->nBitsOfQ) + { + SymCryptIntDivPow2( piIntLarge, SymCryptIntBitsizeOfValue(piIntLarge) - pDlgroup->nBitsOfQ, piIntLarge ); + } + + scError = SymCryptIntCopyMixedSize( piIntLarge, piIntQ ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + // This should never fail here as we truncated the IntLarge + goto cleanup; + } + + // Now we can call IntToModElement as they have the same digit size + SymCryptIntToModElement( piIntQ, pDlgroup->pmQ, peMsghash, pbScratch, cbScratch ); // msghash mod Q + +cleanup: + return scError; +} + +#define SYMCRYPT_MAX_DSA_SIGNATURE_COUNT (100) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDsaSignEx( + _In_ PCSYMCRYPT_DLKEY pKey, + _In_reads_bytes_( cbHashValue ) PCBYTE pbHashValue, + SIZE_T cbHashValue, + _In_opt_ PCSYMCRYPT_INT piK, + SYMCRYPT_NUMBER_FORMAT format, + UINT32 flags, + _Out_writes_bytes_( cbSignature ) PBYTE pbSignature, + SIZE_T cbSignature ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PBYTE pbScratch = NULL; + SIZE_T cbScratch = 0; + PBYTE pbScratchInternal = NULL; + SIZE_T cbScratchInternal = 0; + + SIZE_T cbScratchInputK = 0; // Extra scratch space needed if the caller specified a K + + PCSYMCRYPT_DLGROUP pDlgroup = pKey->pDlgroup; + UINT32 nDigitsOfP = pDlgroup->nDigitsOfP; + UINT32 nDigitsOfQ = pDlgroup->nDigitsOfQ; + + UINT32 ndIntLarge = 0; + + UINT32 cbIntLarge = 0; + UINT32 cbIntQ = 0; + UINT32 cbIntP = 0; + + UINT32 cbModelementP = 0; + UINT32 cbModelementQ = 0; + + // Helper Integers + PSYMCRYPT_INT piIntLarge = NULL; // Safe size for all caller specified sizes + PSYMCRYPT_INT piIntP = NULL; // Same number of digits as P + PSYMCRYPT_INT piIntQ = NULL; // Same number of digits as Q + + // Elements modulo P + PSYMCRYPT_MODELEMENT peRmodP = NULL; + + // Elements modulo Q + PSYMCRYPT_MODELEMENT peMsghash = NULL; + PSYMCRYPT_MODELEMENT peRmodQ = NULL; + PSYMCRYPT_MODELEMENT peK = NULL; + PSYMCRYPT_MODELEMENT peS = NULL; + + UINT32 signatureCount = 0; + + UNREFERENCED_PARAMETER( flags ); + + // Make sure that the key may be used in DSA + if ( ((pKey->fAlgorithmInfo & SYMCRYPT_FLAG_DLKEY_DSA) == 0) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Make sure that the group and the key have all the + // information for dsa, i.e. prime q and private key + // modulo q, and we are not using a named DH safe-prime + // group + if ((!pDlgroup->fHasPrimeQ) || + (!pKey->fHasPrivateKey) || + (!pKey->fPrivateModQ) || + (pDlgroup->isSafePrimeGroup)) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Calculate the digit size for the HashValue + ndIntLarge = SymCryptDigitsFromBits( (UINT32)cbHashValue * 8 ); + + // Calculate the sizes of temp objects + cbIntLarge = SymCryptSizeofIntFromDigits(ndIntLarge); + cbIntQ = SymCryptSizeofIntFromDigits(nDigitsOfQ); + cbIntP = SymCryptSizeofIntFromDigits(nDigitsOfP); + + cbModelementP = SymCryptSizeofModElementFromModulus( pDlgroup-> pmP ); + cbModelementQ = SymCryptSizeofModElementFromModulus( pDlgroup-> pmQ ); + + // Allocate scratch space + cbScratchInputK = (piK==NULL)?0:SYMCRYPT_SCRATCH_BYTES_FOR_INT_DIVMOD(SymCryptIntDigitsizeOfObject(piK),nDigitsOfQ); + + // + // From symcrypt_internal.h we have: + // - sizeof results are upper bounded by 2^19 + // - SYMCRYPT_SCRATCH_BYTES results are upper bounded by 2^27 (including RSA and ECURVE) + // Thus the following calculation does not overflow cbScratch. + // + cbScratch = cbIntLarge + cbIntQ + cbIntP + cbModelementP + 4*cbModelementQ + + SYMCRYPT_MAX( cbScratchInputK, + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigitsOfQ ), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigitsOfP ), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_MODEXP( nDigitsOfP ), + SYMCRYPT_SCRATCH_BYTES_FOR_MODINV( nDigitsOfQ ) )))); + pbScratch = SymCryptCallbackAlloc( cbScratch ); + if (pbScratch==NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + // Create the objects + pbScratchInternal = pbScratch; + cbScratchInternal = cbScratch; + + piIntLarge = SymCryptIntCreate(pbScratchInternal, cbIntLarge, ndIntLarge); pbScratchInternal += cbIntLarge; cbScratchInternal -= cbIntLarge; + piIntQ = SymCryptIntCreate(pbScratchInternal, cbIntQ, nDigitsOfQ); pbScratchInternal += cbIntQ; cbScratchInternal -= cbIntQ; + piIntP = SymCryptIntCreate(pbScratchInternal, cbIntP, nDigitsOfP); pbScratchInternal += cbIntP; cbScratchInternal -= cbIntP; + + peRmodP = SymCryptModElementCreate(pbScratchInternal, cbModelementP, pDlgroup->pmP); pbScratchInternal += cbModelementP; cbScratchInternal -= cbModelementP; + + peMsghash = SymCryptModElementCreate(pbScratchInternal, cbModelementQ, pDlgroup->pmQ); pbScratchInternal += cbModelementQ; cbScratchInternal -= cbModelementQ; + peRmodQ = SymCryptModElementCreate(pbScratchInternal, cbModelementQ, pDlgroup->pmQ); pbScratchInternal += cbModelementQ; cbScratchInternal -= cbModelementQ; + peK = SymCryptModElementCreate(pbScratchInternal, cbModelementQ, pDlgroup->pmQ); pbScratchInternal += cbModelementQ; cbScratchInternal -= cbModelementQ; + peS = SymCryptModElementCreate(pbScratchInternal, cbModelementQ, pDlgroup->pmQ); pbScratchInternal += cbModelementQ; cbScratchInternal -= cbModelementQ; + + // Get the message into a modelement + scError = SymCryptDsaTruncateHash( + pDlgroup, + pbHashValue, + cbHashValue, + flags, + peMsghash, + piIntLarge, + piIntQ, + pbScratchInternal, + cbScratchInternal ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // + // Main loop: Stop when both R and S are not zero (unless a specific k is provided) + // + while( TRUE ) + { + if (piK==NULL) + { + SymCryptModSetRandom( + pDlgroup->pmQ, + peK, + SYMCRYPT_FLAG_MODRANDOM_ALLOW_ONE|SYMCRYPT_FLAG_MODRANDOM_ALLOW_MINUSONE, + pbScratchInternal, + cbScratchInternal ); + + SymCryptModElementToInt( + pDlgroup->pmQ, + peK, + piIntQ, + pbScratchInternal, + cbScratchInternal ); + } + else + { + SymCryptIntDivMod( + piK, + SymCryptDivisorFromModulus( pDlgroup->pmQ ), + NULL, + piIntQ, + pbScratchInternal, + cbScratchInternal ); + + SymCryptIntToModElement( + piIntQ, + pDlgroup->pmQ, + peK, + pbScratchInternal, + cbScratchInternal ); + + // Make sure that the K passed in is not zero + if (SymCryptModElementIsZero(pDlgroup->pmQ, peK)) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + } + + // Here piIntQ and peK hold the random exponent K + + // G^K mod P + SymCryptModExp( + pDlgroup->pmP, + pDlgroup->peG, + piIntQ, + pDlgroup->nBitsOfQ, + 0, // Side-channel safe + peRmodP, + pbScratchInternal, + cbScratchInternal ); + + // Convert to integer + SymCryptModElementToInt( + pDlgroup->pmP, + peRmodP, + piIntP, + pbScratchInternal, + cbScratchInternal ); + + // Convert to mod Q + SymCryptIntDivMod( + piIntP, + SymCryptDivisorFromModulus( pDlgroup->pmQ ), + NULL, + piIntQ, + pbScratchInternal, + cbScratchInternal ); + + // Convert to modelement + SymCryptIntToModElement( + piIntQ, + pDlgroup->pmQ, + peRmodQ, + pbScratchInternal, + cbScratchInternal ); + + // Invert k mod q + scError = SymCryptModInv( + pDlgroup->pmQ, + peK, + peK, // In place + 0, + pbScratchInternal, + cbScratchInternal ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Get the private key X to modelement + // *** We are sure here that the digit + // size of it is nDigitsOfQ + SymCryptIntToModElement( + pKey->piPrivateKey, + pDlgroup->pmQ, + peS, + pbScratchInternal, + cbScratchInternal ); + + // X*R + SymCryptModMul( + pDlgroup->pmQ, + peS, + peRmodQ, + peS, + pbScratchInternal, + cbScratchInternal ); + + // H(m)+X*R + SymCryptModAdd( + pDlgroup->pmQ, + peS, + peMsghash, + peS, + pbScratchInternal, + cbScratchInternal ); + + // S = k^{-1}*(H(m)+X*R) + SymCryptModMul( + pDlgroup->pmQ, + peK, + peS, + peS, + pbScratchInternal, + cbScratchInternal ); + + if ( !( SymCryptModElementIsZero( pDlgroup->pmQ, peRmodQ ) | + SymCryptModElementIsZero( pDlgroup->pmQ, peS ) ) ) + { + break; + } + + if (piK != NULL) + { + // piK resulted in 0 signature + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + signatureCount++; + if ( signatureCount >= SYMCRYPT_MAX_DSA_SIGNATURE_COUNT ) + { + // We have not generated a non-zero signature after SYMCRYPT_MAX_DSA_SIGNATURE_COUNT attempts; + // Something is wrong with the group setup + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + } + + // Output R + scError = SymCryptModElementGetValue( + pDlgroup->pmQ, + peRmodQ, + pbSignature, + cbSignature / 2, + format, + pbScratchInternal, + cbScratchInternal ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Output S + scError = SymCryptModElementGetValue( + pDlgroup->pmQ, + peS, + pbSignature + cbSignature / 2, + cbSignature / 2, + format, + pbScratchInternal, + cbScratchInternal ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + +cleanup: + if ( pbScratch != NULL ) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + } + + if (scError != SYMCRYPT_NO_ERROR) + { + SymCryptWipe( pbSignature, cbSignature ); + } + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDsaSign( + _In_ PCSYMCRYPT_DLKEY pKey, + _In_reads_bytes_( cbHashValue ) PCBYTE pbHashValue, + SIZE_T cbHashValue, + SYMCRYPT_NUMBER_FORMAT format, + UINT32 flags, + _Out_writes_bytes_( cbSignature ) PBYTE pbSignature, + SIZE_T cbSignature ) +{ + return SymCryptDsaSignEx( pKey, pbHashValue, cbHashValue, NULL, format, flags, pbSignature, cbSignature ); +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDsaVerify( + _In_ PCSYMCRYPT_DLKEY pKey, + _In_reads_bytes_( cbHashValue ) PCBYTE pbHashValue, + SIZE_T cbHashValue, + _In_reads_bytes_( cbSignature ) PCBYTE pbSignature, + SIZE_T cbSignature, + SYMCRYPT_NUMBER_FORMAT format, + UINT32 flags ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + BOOLEAN fValidSignature = FALSE; + + PBYTE pbScratch = NULL; + SIZE_T cbScratch = 0; + PBYTE pbScratchInternal = NULL; + SIZE_T cbScratchInternal = 0; + + PCSYMCRYPT_DLGROUP pDlgroup = pKey->pDlgroup; + UINT32 nDigitsOfP = pDlgroup->nDigitsOfP; + UINT32 nDigitsOfQ = pDlgroup->nDigitsOfQ; + + UINT32 ndIntLarge = 0; + + UINT32 cbIntLarge = 0; + UINT32 cbIntQ = 0; + UINT32 cbIntP = 0; + + UINT32 cbModelementP = 0; + UINT32 cbModelementQ = 0; + + PSYMCRYPT_MODELEMENT peBases[2] = { NULL, NULL }; // Array with pointers to base points + + // Helper Integers + PSYMCRYPT_INT piIntLarge = NULL; // Safe size for all caller specified sizes + PSYMCRYPT_INT piIntP = NULL; // Same number of digits as P + PSYMCRYPT_INT piIntQ[2] = { NULL, NULL }; // Same number of digits as Q + + // Elements modulo P + PSYMCRYPT_MODELEMENT peResP = NULL; + + // Elements modulo Q + PSYMCRYPT_MODELEMENT peR = NULL; + PSYMCRYPT_MODELEMENT peS = NULL; + PSYMCRYPT_MODELEMENT peT = NULL; // Temp + + UNREFERENCED_PARAMETER( flags ); + + // Make sure that the key may be used in DSA + if ( ((pKey->fAlgorithmInfo & SYMCRYPT_FLAG_DLKEY_DSA) == 0) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Make sure that the group has a prime q, and we are not using a named DH safe-prime group + if (!pDlgroup->fHasPrimeQ || pDlgroup->isSafePrimeGroup) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Calculate the digit sizes + ndIntLarge = SymCryptDigitsFromBits( (UINT32)cbHashValue * 8 ); + ndIntLarge = SYMCRYPT_MAX( ndIntLarge, SymCryptDigitsFromBits( (UINT32)cbSignature * 4 ) ); // pbSignature contains (R,S) + + // Calculate the sizes of temp objects + cbIntLarge = SymCryptSizeofIntFromDigits(ndIntLarge); + cbIntQ = SymCryptSizeofIntFromDigits(nDigitsOfQ); + cbIntP = SymCryptSizeofIntFromDigits(nDigitsOfP); + + cbModelementP = SymCryptSizeofModElementFromModulus( pDlgroup-> pmP ); + cbModelementQ = SymCryptSizeofModElementFromModulus( pDlgroup-> pmQ ); + + // + // From symcrypt_internal.h we have: + // - sizeof results are upper bounded by 2^19 + // - SYMCRYPT_SCRATCH_BYTES results are upper bounded by 2^27 (including RSA and ECURVE) + // Thus the following calculation does not overflow cbScratch. + // + cbScratch = cbIntLarge + cbIntP + 2*cbIntQ + cbModelementP + 3*cbModelementQ + + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_INT_DIVMOD(nDigitsOfP,nDigitsOfQ), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigitsOfQ ), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigitsOfP ), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_MODMULTIEXP( SymCryptModulusDigitsizeOfObject(pDlgroup->pmP), 2, pDlgroup->nBitsOfQ ), + SYMCRYPT_SCRATCH_BYTES_FOR_MODINV( nDigitsOfQ ) )))); + pbScratch = SymCryptCallbackAlloc( cbScratch ); + if (pbScratch==NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + // Create the objects + pbScratchInternal = pbScratch; + cbScratchInternal = cbScratch; + + piIntLarge = SymCryptIntCreate(pbScratchInternal, cbIntLarge, ndIntLarge); pbScratchInternal += cbIntLarge; cbScratchInternal -= cbIntLarge; + piIntP = SymCryptIntCreate(pbScratchInternal, cbIntP, nDigitsOfP); pbScratchInternal += cbIntP; cbScratchInternal -= cbIntP; + piIntQ[0] = SymCryptIntCreate(pbScratchInternal, cbIntQ, nDigitsOfQ); pbScratchInternal += cbIntQ; cbScratchInternal -= cbIntQ; + piIntQ[1] = SymCryptIntCreate(pbScratchInternal, cbIntQ, nDigitsOfQ); pbScratchInternal += cbIntQ; cbScratchInternal -= cbIntQ; + + peResP = SymCryptModElementCreate(pbScratchInternal, cbModelementP, pDlgroup->pmP); pbScratchInternal += cbModelementP; cbScratchInternal -= cbModelementP; + + peR = SymCryptModElementCreate(pbScratchInternal, cbModelementQ, pDlgroup->pmQ); pbScratchInternal += cbModelementQ; cbScratchInternal -= cbModelementQ; + peS = SymCryptModElementCreate(pbScratchInternal, cbModelementQ, pDlgroup->pmQ); pbScratchInternal += cbModelementQ; cbScratchInternal -= cbModelementQ; + peT = SymCryptModElementCreate(pbScratchInternal, cbModelementQ, pDlgroup->pmQ); pbScratchInternal += cbModelementQ; cbScratchInternal -= cbModelementQ; + + // Get R + scError = SymCryptIntSetValue( pbSignature, cbSignature / 2, format, piIntLarge ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Check if R is less than Q + if ( !SymCryptIntIsLessThan( piIntLarge, SymCryptIntFromModulus( pDlgroup->pmQ ) ) ) + { + goto cleanup; + } + + // R mod Q (use piIntQ[0] as temp space) + scError = SymCryptIntCopyMixedSize( piIntLarge, piIntQ[0] ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + // This should never fail here as we verified that IntLarge is less than Q + goto cleanup; + } + SymCryptIntToModElement( piIntQ[0], pDlgroup->pmQ, peR, pbScratchInternal, cbScratchInternal ); + + // Check if R is zero + if (SymCryptModElementIsZero( pDlgroup->pmQ, peR )) + { + goto cleanup; + } + + // Get S + scError = SymCryptIntSetValue( pbSignature + cbSignature / 2, cbSignature / 2, format, piIntLarge ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Check if S is less than Q + if ( !SymCryptIntIsLessThan( piIntLarge, SymCryptIntFromModulus( pDlgroup->pmQ ) ) ) + { + goto cleanup; + } + + // S mod Q (use piIntQ[0] as temp space) + scError = SymCryptIntCopyMixedSize( piIntLarge, piIntQ[0] ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + // This should never fail here as we verified that IntLarge is less than Q + goto cleanup; + } + SymCryptIntToModElement( piIntQ[0], pDlgroup->pmQ, peS, pbScratchInternal, cbScratchInternal ); + + // Check if S is zero + if (SymCryptModElementIsZero( pDlgroup->pmQ, peS )) + { + goto cleanup; + } + + // Calculate 1/S mod Q + // S is part of the signature and therefore not a secret. + // We mark it public to avoid the use of random blinding, which would require a source of randomness + // just to verify a DSA signature. + scError = SymCryptModInv( pDlgroup->pmQ, peS, peS, SYMCRYPT_FLAG_DATA_PUBLIC, pbScratchInternal, cbScratchInternal ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Get the message into a modelement + scError = SymCryptDsaTruncateHash( + pDlgroup, + pbHashValue, + cbHashValue, + flags, + peT, + piIntLarge, + piIntQ[0], + pbScratchInternal, + cbScratchInternal ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // Calculate U1 = Hash(M)/S modQ + SymCryptModMul( + pDlgroup->pmQ, + peT, + peS, + peT, + pbScratchInternal, + cbScratchInternal ); + + // Convert U1 to integer + SymCryptModElementToInt( + pDlgroup->pmQ, + peT, + piIntQ[0], + pbScratchInternal, + cbScratchInternal ); + + // Calculate U2 = R/S modQ + SymCryptModMul( + pDlgroup->pmQ, + peR, + peS, + peT, + pbScratchInternal, + cbScratchInternal ); + + // Convert U2 to integer + SymCryptModElementToInt( + pDlgroup->pmQ, + peT, + piIntQ[1], + pbScratchInternal, + cbScratchInternal ); + + // Arrange the pointers for v = G^U1 * Y^U2 + peBases[0] = pDlgroup->peG; + peBases[1] = pKey->pePublicKey; + + // v = G^U1 * Y^U2 + scError = SymCryptModMultiExp( + pDlgroup->pmP, + peBases, + piIntQ, + 2, + pDlgroup->nBitsOfQ, + SYMCRYPT_FLAG_DATA_PUBLIC, + peResP, + pbScratchInternal, + cbScratchInternal ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // Convert V to a modelement modulo Q + SymCryptModElementToInt( + pDlgroup->pmP, + peResP, + piIntP, + pbScratchInternal, + cbScratchInternal ); + SymCryptIntDivMod( + piIntP, + SymCryptDivisorFromModulus( pDlgroup->pmQ ), + NULL, + piIntQ[0], + pbScratchInternal, + cbScratchInternal ); + SymCryptIntToModElement( + piIntQ[0], + pDlgroup->pmQ, + peT, + pbScratchInternal, + cbScratchInternal ); + + // Comparison V = R + if (SymCryptModElementIsEqual( pDlgroup->pmQ, peT, peR )) + { + fValidSignature = TRUE; + } + + +cleanup: + + if (!fValidSignature) + { + scError = SYMCRYPT_SIGNATURE_VERIFICATION_FAILURE; + } + + if ( pbScratch != NULL ) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + } + + return scError; +} diff --git a/libs/symcrypt/lib/ec_dh.c b/libs/symcrypt/lib/ec_dh.c new file mode 100644 index 00000000000..e7b84078645 --- /dev/null +++ b/libs/symcrypt/lib/ec_dh.c @@ -0,0 +1,157 @@ +// +// ec_dh.c ECDH function +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// + +#include "precomp.h" + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEcDhSecretAgreement( + _In_ PCSYMCRYPT_ECKEY pkPrivate, + _In_ PCSYMCRYPT_ECKEY pkPublic, + SYMCRYPT_NUMBER_FORMAT format, + UINT32 flags, + _Out_writes_( cbAgreedSecret ) PBYTE pbAgreedSecret, + SIZE_T cbAgreedSecret ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PBYTE pbScratch = NULL; + SIZE_T cbScratch = 0; + SIZE_T cbScratchInternal = 0; + PBYTE pCurr = NULL; + + PCSYMCRYPT_ECURVE pCurve = NULL; + + PSYMCRYPT_ECPOINT poQ = NULL; + PBYTE pbX = NULL; + + UINT32 cbQ = 0; + UINT32 cbX = 0; + + // Make sure that the keys may be used in ECDH + if ( ((pkPrivate->fAlgorithmInfo & SYMCRYPT_FLAG_ECKEY_ECDH) == 0) || + ((pkPublic->fAlgorithmInfo & SYMCRYPT_FLAG_ECKEY_ECDH) == 0) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Make sure we only specify the correct flags and that + // there is a private key + if ( (flags != 0) || + (!pkPrivate->hasPrivateKey) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Check that the curve is the same for both keys + if ( SymCryptEcurveIsSame( pkPrivate->pCurve, pkPublic->pCurve ) ) + { + pCurve = pkPrivate->pCurve; + } + else + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Objects and scratch space size calculation + cbQ = SymCryptSizeofEcpointFromCurve( pCurve ); + cbX = SymCryptEcurveSizeofFieldElement( pCurve ); + + // Check the output buffer has the correct size + if (cbAgreedSecret != cbX) + { + scError = SYMCRYPT_WRONG_BLOCK_SIZE; + goto cleanup; + } + + cbScratchInternal = SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS(pCurve), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_SCALAR_ECURVE_OPERATIONS( pCurve ), + SYMCRYPT_SCRATCH_BYTES_FOR_GETSET_VALUE_ECURVE_OPERATIONS( pCurve ) )); + + // + // From symcrypt_internal.h we have: + // - sizeof results are upper bounded by 2^19 + // - SYMCRYPT_SCRATCH_BYTES results are upper bounded by 2^27 (including RSA and ECURVE) + // Thus the following calculation does not overflow cbScratch. + // + cbScratch = cbScratchInternal + cbQ + cbX; + + // Scratch space allocation + pbScratch = SymCryptCallbackAlloc( cbScratch ); + if ( pbScratch == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + // Creating temporaries + pCurr = pbScratch + cbScratchInternal; + poQ = SymCryptEcpointCreate( pCurr, cbQ, pCurve ); + pCurr += cbQ; + pbX = pCurr; + + SYMCRYPT_ASSERT( poQ != NULL); + + // Make sure that the public key is not the zero point + // No need to check that the point is on the curve; that check is done when the + // public key is created. + if (SymCryptEcpointIsZero(pCurve, pkPublic->poPublicKey, pbScratch, cbScratchInternal)) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Calculate the secret + // Always do low order clearing by multiplying by the cofactor. + // Note: the internal format of piPrivateKey is "DivH", so we + // get the correct result. + scError = SymCryptEcpointScalarMul( + pCurve, + pkPrivate->piPrivateKey, + pkPublic->poPublicKey, + SYMCRYPT_FLAG_ECC_LL_COFACTOR_MUL, + poQ, + pbScratch, + cbScratchInternal ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Check if the result is the identity point + if ( SymCryptEcpointIsZero( + pCurve, + poQ, + pbScratch, + cbScratchInternal ) ) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + // Get the x from poQ + scError = SymCryptEcpointGetValue( pCurve, poQ, format, SYMCRYPT_ECPOINT_FORMAT_X, pbX, cbX, 0, pbScratch, cbScratchInternal); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Store it in the destination + memcpy( pbAgreedSecret, pbX, cbX); + +cleanup: + if ( pbScratch != NULL ) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + } + + return scError; +} diff --git a/libs/symcrypt/lib/ec_dispatch.c b/libs/symcrypt/lib/ec_dispatch.c new file mode 100644 index 00000000000..02229ac5efc --- /dev/null +++ b/libs/symcrypt/lib/ec_dispatch.c @@ -0,0 +1,300 @@ +// +// ec_dispatch.c Dispatch file for elliptic curve crypto functions +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// + +#include "precomp.h" + +// Table with all the pointers to SYMCRYPT_ECURVE_FUNCTIONS +const SYMCRYPT_ECURVE_FUNCTIONS SymCryptEcurveDispatchTable[] = +{ + // NULL Type + { + NULL, // SymCryptEcpointSetZeroNotImplemented, + NULL, // SymCryptEcpointSetDistinguishedPointNotImplemented, + NULL, // SymCryptEcpointSetRandomNotImplemented, + NULL, // SymCryptEcpointIsEqualNotImplemented, + NULL, // SymCryptEcpointIsZeroNotImplemented, + NULL, // SymCryptEcpointOnCurveNotImplemented, + NULL, // SymCryptEcpointAddNotImplemented, + NULL, // SymCryptEcpointAddDiffNonZeroNotImplemented, + NULL, // SymCryptEcpointDoubleNotImplemented, + NULL, // SymCryptEcpointNegateNotImplemented, + NULL, // SymCryptEcpointScalarMulNotImplemented, + NULL, // SymCryptEcpointMultiScalarMulNotImplemented, + NULL, // SymCryptEcurveFillScratchSpacesNotImplemented, + }, + // Short Weierstrass + { + SymCryptShortWeierstrassSetZero, + SymCryptShortWeierstrassSetDistinguished, + SymCryptEcpointGenericSetRandom, + SymCryptShortWeierstrassIsEqual, + SymCryptShortWeierstrassIsZero, + SymCryptShortWeierstrassOnCurve, + SymCryptShortWeierstrassAdd, + SymCryptShortWeierstrassAddDiffNonZero, + SymCryptShortWeierstrassDouble, + SymCryptShortWeierstrassNegate, + SymCryptEcpointScalarMulFixedWindow, + SymCryptEcpointMultiScalarMulWnafWithInterleaving, + SymCryptShortWeierstrassFillScratchSpaces, + }, + // Twisted Edwards + { + SymCryptTwistedEdwardsSetZero, + SymCryptTwistedEdwardsSetDistinguished, + SymCryptEcpointGenericSetRandom, + SymCryptTwistedEdwardsIsEqual, + SymCryptTwistedEdwardsIsZero, + SymCryptTwistedEdwardsOnCurve, + SymCryptTwistedEdwardsAdd, + SymCryptTwistedEdwardsAddDiffNonZero, + SymCryptTwistedEdwardsDouble, + SymCryptTwistedEdwardsNegate, + SymCryptEcpointScalarMulFixedWindow, + SymCryptEcpointMultiScalarMulWnafWithInterleaving, + SymCryptTwistedEdwardsFillScratchSpaces, + }, + // Montgomery + { + NULL, // SymCryptEcpointSetZeroNotImplemented, + SymCryptMontgomerySetDistinguished, + SymCryptEcpointGenericSetRandom, + SymCryptMontgomeryIsEqual, + SymCryptMontgomeryIsZero, + NULL, // SymCryptEcpointOnCurveNotImplemented, + NULL, // SymCryptEcpointAddNotImplemented, + NULL, // SymCryptEcpointAddDiffNonZeroNotImplemented, + NULL, // SymCryptEcpointDoubleNotImplemented, + NULL, // SymCryptEcpointNegateNotImplemented, + SymCryptMontgomeryPointScalarMul, + NULL, // SymCryptEcpointMultiScalarMulNotImplemented, + SymCryptMontgomeryFillScratchSpaces, + }, + // Short Weierstrass with A==-3 + { + SymCryptShortWeierstrassSetZero, + SymCryptShortWeierstrassSetDistinguished, + SymCryptEcpointGenericSetRandom, + SymCryptShortWeierstrassIsEqual, + SymCryptShortWeierstrassIsZero, + SymCryptShortWeierstrassOnCurve, + SymCryptShortWeierstrassAdd, + SymCryptShortWeierstrassAddDiffNonZero, + SymCryptShortWeierstrassDoubleSpecializedAm3, + SymCryptShortWeierstrassNegate, + SymCryptEcpointScalarMulFixedWindow, + SymCryptEcpointMultiScalarMulWnafWithInterleaving, + SymCryptShortWeierstrassFillScratchSpaces, + }, + // Slack to make dispatch table size a power of 2 + {NULL,}, + {NULL,}, + {NULL,}, +}; + +#define SYMCRYPT_ECURVE_DISPATCH_TABLE_SIZE (sizeof( SymCryptEcurveDispatchTable )) + +// Ensure the table size is a power of 2 +C_ASSERT( (SYMCRYPT_ECURVE_DISPATCH_TABLE_SIZE & (SYMCRYPT_ECURVE_DISPATCH_TABLE_SIZE - 1)) == 0 ); + +// For now the ECurve type encodes the index into this dispatch table, so we just mask by the size of the table +// +// We could instead encode the absolute offset into the table in the type field (similar to the Modulus dispatch table), +// and this mask would be multiplied by SYMCRYPT_ECURVE_FUNCTIONS_SIZE +#define SYMCRYPT_ECURVE_DISPATCH_TABLE_MASK ((SYMCRYPT_ECURVE_DISPATCH_TABLE_SIZE / SYMCRYPT_ECURVE_FUNCTIONS_SIZE)-1) + +// We mask to constrain the unpredictable behaviour in the case of memory corruption; we do not want to interpret some data +// beyond the end of the dispatch table as function pointers +#define SYMCRYPT_ECURVE_CALL(v) (SymCryptEcurveDispatchTable[SYMCRYPT_FORCE_READ32(&(v)->type) & SYMCRYPT_ECURVE_DISPATCH_TABLE_MASK]). + +// We read the curve's internal type with a 32b read so it must be 4 bytes large +C_ASSERT(sizeof(((PCSYMCRYPT_ECURVE)0)->type) == 4); + +// Main functions +SYMCRYPT_DISABLE_CFG +VOID +SYMCRYPT_CALL +SymCryptEcpointSetZero( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ECURVE_CALL( pCurve ) setZeroFunc( pCurve, poDst, pbScratch, cbScratch ); +} + +SYMCRYPT_DISABLE_CFG +VOID +SYMCRYPT_CALL +SymCryptEcpointSetDistinguishedPoint( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ECURVE_CALL( pCurve ) setDistinguishedFunc( pCurve, poDst, pbScratch, cbScratch ); +} + +SYMCRYPT_DISABLE_CFG +VOID +SYMCRYPT_CALL +SymCryptEcpointSetRandom( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Out_ PSYMCRYPT_INT piScalar, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ECURVE_CALL( pCurve ) setRandomFunc( pCurve, piScalar, poDst, pbScratch, cbScratch ); +} + +SYMCRYPT_DISABLE_CFG +UINT32 +SYMCRYPT_CALL +SymCryptEcpointIsEqual( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc1, + _In_ PCSYMCRYPT_ECPOINT poSrc2, + UINT32 flags, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + return SYMCRYPT_ECURVE_CALL( pCurve ) isEqualFunc( pCurve, poSrc1, poSrc2, flags, pbScratch, cbScratch ); +} + +SYMCRYPT_DISABLE_CFG +UINT32 +SYMCRYPT_CALL +SymCryptEcpointIsZero( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + return SYMCRYPT_ECURVE_CALL( pCurve ) isZeroFunc( pCurve, poSrc, pbScratch, cbScratch ); +} + +SYMCRYPT_DISABLE_CFG +UINT32 +SYMCRYPT_CALL +SymCryptEcpointOnCurve( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + return SYMCRYPT_ECURVE_CALL( pCurve ) onCurveFunc( pCurve, poSrc, pbScratch, cbScratch ); +} + +SYMCRYPT_DISABLE_CFG +VOID +SYMCRYPT_CALL +SymCryptEcpointAdd( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc1, + _In_ PCSYMCRYPT_ECPOINT poSrc2, + _Out_ PSYMCRYPT_ECPOINT poDst, + UINT32 flags, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ECURVE_CALL( pCurve ) addFunc( pCurve, poSrc1, poSrc2, poDst, flags, pbScratch, cbScratch ); +} + +SYMCRYPT_DISABLE_CFG +VOID +SYMCRYPT_CALL +SymCryptEcpointAddDiffNonZero( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc1, + _In_ PCSYMCRYPT_ECPOINT poSrc2, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ECURVE_CALL( pCurve ) addDiffFunc( pCurve, poSrc1, poSrc2, poDst, pbScratch, cbScratch ); +} + +SYMCRYPT_DISABLE_CFG +VOID +SYMCRYPT_CALL +SymCryptEcpointDouble( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_ PSYMCRYPT_ECPOINT poDst, + UINT32 flags, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ECURVE_CALL( pCurve ) doubleFunc( pCurve, poSrc, poDst, flags, pbScratch, cbScratch ); +} + +SYMCRYPT_DISABLE_CFG +VOID +SYMCRYPT_CALL +SymCryptEcpointNegate( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Inout_ PSYMCRYPT_ECPOINT poSrc, + UINT32 mask, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ECURVE_CALL( pCurve ) negateFunc( pCurve, poSrc, mask, pbScratch, cbScratch ); +} + +SYMCRYPT_DISABLE_CFG +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEcpointScalarMul( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_INT piScalar, + _In_opt_ + PCSYMCRYPT_ECPOINT poSrc, + UINT32 flags, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + return SYMCRYPT_ECURVE_CALL( pCurve ) scalarMulFunc( pCurve, piScalar, poSrc, flags, poDst, pbScratch, cbScratch ); +} + +SYMCRYPT_DISABLE_CFG +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEcpointMultiScalarMul( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_INT * piSrcScalarArray, + _In_ PCSYMCRYPT_ECPOINT * poSrcEcpointArray, + UINT32 nPoints, + UINT32 flags, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + return SYMCRYPT_ECURVE_CALL( pCurve ) multiScalarMulFunc( pCurve, piSrcScalarArray, poSrcEcpointArray, nPoints, flags, poDst, pbScratch, cbScratch ); +} + +SYMCRYPT_DISABLE_CFG +VOID +SYMCRYPT_CALL +SymCryptEcurveFillScratchSpaces( + _Inout_ PSYMCRYPT_ECURVE pCurve ) +{ + SYMCRYPT_ECURVE_CALL( pCurve ) fillScratchSpacesFunc( pCurve ); +} diff --git a/libs/symcrypt/lib/ec_dsa.c b/libs/symcrypt/lib/ec_dsa.c new file mode 100644 index 00000000000..b9da0063e27 --- /dev/null +++ b/libs/symcrypt/lib/ec_dsa.c @@ -0,0 +1,694 @@ +// +// ec_dsa.c ECDSA functions +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// + +#include "precomp.h" + +/* + Sections 7.2.7 and 7.2.8 of the 29 August 2000 + IEEE Standard Specifications for Public-Key Cryptography, + IEEE Std 1363-2000, list DSA versions of the elliptic + curve signature and verification primitives. + This file has draft interfaces, + + 7.2.7 ECSP_DSA (pages 35-36) + + Inputs: + E -- An elliptic curve. + G (generator) -- A point on E of prime order r. + r -- See G. + s -- A secret exponent, 1 <= s < r (Private key) + msghash -- Hash of the message being signed. + + Outputs: + c, d -- Two integers in the interval [1, r-1] + + Algorithm: + 1) Generate random exponent k, 1 <= k < r, + to be kept from adversary. + Compute KG = k*G in E. + Note KG <> (point at infinity). + 2) Convert x(KG) (an element of GF(q)) + to an integer FE2IP(x(KG)). + Let c = FE2IP(x(KG)) (mod r). + 3) Compute d = (msghash + s*c)/k (mod r). + 4) If c == 0 or d == 0, return to 1). + 5) Output c and d as integers. + + 7.2.8 ECVP_DSA + + Inputs: + E, G, r, msghash -- Same as in ECSP_DSA. + W -- The signer's public key. Equal to + s*G where s was passed to ECSP_DSA. + c, d -- A signature to be checked. + + Output: + TRUE if signature OK, else FALSE. + + Algorithm: + 1) If c or d is not in [1, r-1], return FALSE. + 2) Compute h1 = msghash/d (mod r) + and h2 = c/d (mod r). + 3) Compute P = h1*G + h2*W. + If P == (point at infinity), return FALSE. + 4) If c == FE2IP(x(P)) mod r, return TRUE. + Otherwise return FALSE. + +FE2IP is a P1363 function that casts a field element to an +integer (MSB_FIRST). See Section 5.5.5 of P1363. +*/ + +// +// Truncating function according to the standard or +// the original CNG implementation: +// +// Initially both implementations truncate the last **bytes** +// of the hash that are over the group byte length. Then if +// the bit length of the hash is still bigger than the bit +// length of the group order, ... +// +// 1. According to the X9.62 standard, we do an appropriate right shift to the entire hash. +// An example of this is a 160-bit hash, but a 113-bit subgroup order. For this case: +// a. We would truncate cbHash to (113 + 7) / 8 = 15 bytes. +// b. Since 15*8 = 120 > 113 we need to right-shift by 7 bits. +// 2. According to the original CNG implementation, we mask an appropriate number of the +// topmost bits of the hash. +// In the same example as before we would zero out the top 7 bits. +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEcDsaTruncateHash( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_reads_bytes_( cbHashValue ) PCBYTE pbHashValue, + SIZE_T cbHashValue, + UINT32 flags, + _Out_ PSYMCRYPT_MODELEMENT peMsghash, + _Out_ PSYMCRYPT_INT piTmp, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + UINT32 uiBitsizeOfTmp = 0; + UINT32 uiBitsizeOfGroup = 0; + + // Make sure that only the correct flags are set + if ( (flags & ~SYMCRYPT_FLAG_ECDSA_NO_TRUNCATION) != 0 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Get the bitsize of the group order + uiBitsizeOfGroup = SymCryptEcurveBitsizeofGroupOrder( pCurve ); + + // Truncate the last bytes of the hash + if (cbHashValue*8 > uiBitsizeOfGroup) + { + cbHashValue = (uiBitsizeOfGroup + 7)/8; + } + + // Get the value of msghash + scError = SymCryptIntSetValue( pbHashValue, cbHashValue, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, piTmp ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Get the bit size of the hash + uiBitsizeOfTmp = (UINT32)cbHashValue * 8; + + // If SYMCRYPT_FLAG_ECDSA_NO_TRUNCATION is set, we don't do hash truncation. + // The caller can do their own truncation before calling into Symcrypt. + if ( ( flags & SYMCRYPT_FLAG_ECDSA_NO_TRUNCATION ) == 0) + { + // ******** Standard truncation ************** + // Shift right if needed + if ( uiBitsizeOfTmp > uiBitsizeOfGroup ) + { + SymCryptIntDivPow2( piTmp, uiBitsizeOfTmp - uiBitsizeOfGroup, piTmp ); + } + } + + SymCryptIntToModElement( piTmp, pCurve->GOrd, peMsghash, pbScratch, cbScratch ); // msghash mod r + +cleanup: + return scError; +} + +#define SYMCRYPT_MAX_ECDSA_SIGNATURE_COUNT (100) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEcDsaSignEx( + _In_ PCSYMCRYPT_ECKEY pKey, + _In_reads_bytes_( cbHashValue ) PCBYTE pbHashValue, + SIZE_T cbHashValue, + _In_opt_ PCSYMCRYPT_INT piK, + SYMCRYPT_NUMBER_FORMAT format, + UINT32 flags, + _Out_writes_bytes_( cbSignature ) PBYTE pbSignature, + SIZE_T cbSignature ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PBYTE pbScratch = NULL; + SIZE_T cbScratch = 0; + SIZE_T cbScratchInternal = 0; + PBYTE pCurr = NULL; + + PCSYMCRYPT_ECURVE pCurve = pKey->pCurve; + + PSYMCRYPT_INT piTmp = NULL; + PSYMCRYPT_INT piMul = NULL; + PSYMCRYPT_ECPOINT poKG = NULL; + + PSYMCRYPT_MODELEMENT peMsghash = NULL; + PSYMCRYPT_MODELEMENT peSigC = NULL; + PSYMCRYPT_MODELEMENT peSigD = NULL; + PSYMCRYPT_MODELEMENT peTmp = NULL; + + PBYTE pbX = NULL; + + UINT32 nDigitsInt = 0; + UINT32 nDigitsMul = 0; + + UINT32 cbInt = 0; + UINT32 cbMul = 0; + UINT32 cbKG = 0; + UINT32 cbRs = 0; + UINT32 cbX = 0; + + UINT32 signatureCount = 0; + UINT32 allowedFlags = SYMCRYPT_FLAG_ECDSA_NO_TRUNCATION | SYMCRYPT_FLAG_DATA_PUBLIC; + UINT32 publicFlag = flags & SYMCRYPT_FLAG_DATA_PUBLIC; + UINT32 truncationFlag = flags & SYMCRYPT_FLAG_ECDSA_NO_TRUNCATION; + + // Make sure that the key may be used in ECDSA + if ( ((pKey->fAlgorithmInfo & SYMCRYPT_FLAG_ECKEY_ECDSA) == 0) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Make sure only allowed flags are specified and + // there is a private key + if ( ((flags & ~(allowedFlags)) != 0) || + (!pKey->hasPrivateKey) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Calculating the digits for the temporary integers + nDigitsInt = pCurve->GOrdDigits; + + nDigitsMul = SymCryptEcurveDigitsofScalarMultiplier(pCurve); + + // Objects and scratch space size calculation + cbInt = SymCryptSizeofIntFromDigits( nDigitsInt ); + cbMul = SymCryptSizeofIntFromDigits( nDigitsMul ); + cbKG = SymCryptSizeofEcpointFromCurve( pCurve ); + cbRs = SymCryptSizeofModElementFromModulus( pCurve->GOrd ); + cbX = SymCryptEcurveSizeofFieldElement( pCurve ); + + cbScratchInternal = SYMCRYPT_SCRATCH_BYTES_FOR_SCALAR_ECURVE_OPERATIONS( pCurve ); + cbScratchInternal = SYMCRYPT_MAX( cbScratchInternal, SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pCurve->GOrdDigits ) ); + cbScratchInternal = SYMCRYPT_MAX( cbScratchInternal, SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pCurve->FModDigits ) ); + cbScratchInternal = SYMCRYPT_MAX( cbScratchInternal, SYMCRYPT_SCRATCH_BYTES_FOR_MODINV( pCurve->GOrdDigits ) ); + cbScratchInternal = SYMCRYPT_MAX( cbScratchInternal, SYMCRYPT_SCRATCH_BYTES_FOR_GETSET_VALUE_ECURVE_OPERATIONS( pCurve ) ); + + // + // From symcrypt_internal.h we have: + // - sizeof results are upper bounded by 2^19 + // - SYMCRYPT_SCRATCH_BYTES results are upper bounded by 2^27 (including RSA and ECURVE) + // Thus the following calculation does not overflow cbScratch. + // + cbScratch = cbScratchInternal + cbInt + cbMul + cbKG + 4*cbRs + cbX; + + // Scratch space allocation + pbScratch = SymCryptCallbackAlloc( cbScratch ); + if ( pbScratch == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + // Creating temporaries + pCurr = pbScratch + cbScratchInternal; + piTmp = SymCryptIntCreate( pCurr, cbInt, nDigitsInt ); + pCurr += cbInt; + piMul = SymCryptIntCreate( pCurr, cbMul, nDigitsMul ); + pCurr += cbMul; + poKG = SymCryptEcpointCreate( pCurr, cbKG, pCurve ); + pCurr += cbKG; + peMsghash = SymCryptModElementCreate( pCurr, cbRs, pCurve->GOrd ); + pCurr += cbRs; + peSigC = SymCryptModElementCreate( pCurr, cbRs, pCurve->GOrd ); + pCurr += cbRs; + peSigD = SymCryptModElementCreate( pCurr, cbRs, pCurve->GOrd ); + pCurr += cbRs; + peTmp = SymCryptModElementCreate( pCurr, cbRs, pCurve->GOrd ); + pCurr += cbRs; + pbX = pCurr; + + SYMCRYPT_ASSERT( piTmp != NULL); + SYMCRYPT_ASSERT( piMul != NULL); + SYMCRYPT_ASSERT( poKG != NULL); + SYMCRYPT_ASSERT( peMsghash != NULL); + SYMCRYPT_ASSERT( peSigC != NULL); + SYMCRYPT_ASSERT( peSigD != NULL); + SYMCRYPT_ASSERT( peTmp != NULL); + + // Truncate the message according to the flags + scError = SymCryptEcDsaTruncateHash( + pCurve, + pbHashValue, + cbHashValue, + truncationFlag, + peMsghash, + piTmp, + pbScratch, + cbScratchInternal ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // + // Main loop: Stop when both c and d are not zero (unless a specific k is provided) + // + while( TRUE ) + { + if ( piK == NULL ) + { + SymCryptEcpointSetRandom( pCurve, piMul, poKG, pbScratch, cbScratchInternal ); // Generate k and k*G + SymCryptIntToModElement( piMul, pCurve->GOrd, peTmp, pbScratch, cbScratchInternal ); + } + else + { + // Ensure that piK is in the range [1, GOrd-1] + if( SymCryptIntIsEqualUint32( piK, 0 ) || + !SymCryptIntIsLessThan( piK, SymCryptIntFromModulus( pCurve->GOrd ) ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + SymCryptIntCopy( piK, piMul ); + SymCryptIntToModElement( piMul, pCurve->GOrd, peTmp, pbScratch, cbScratchInternal ); + + scError = SymCryptEcpointScalarMul( pCurve, piMul, NULL, 0, poKG, pbScratch, cbScratchInternal ); // Generate k*G + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + } + + scError = SymCryptModInv( pCurve->GOrd, peTmp, peTmp, publicFlag, pbScratch, cbScratchInternal ); // Invert k + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Get the x coordinates from KG + scError = SymCryptEcpointGetValue( + pCurve, + poKG, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, + SYMCRYPT_ECPOINT_FORMAT_X, + pbX, + cbX, + publicFlag, + pbScratch, + cbScratchInternal ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Store c = x(KG) as an integer + scError = SymCryptModElementSetValue( pbX, cbX, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, pCurve->GOrd, peSigC, pbScratch, cbScratch ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Move the private key into peSigD + SymCryptIntToModElement( pKey->piPrivateKey, pCurve->GOrd, peSigD, pbScratch, cbScratchInternal ); + + // Multiply the private key by h since its internal format is "DivH" + for (UINT32 i=0; i<pCurve->coFactorPower; i++) + { + SymCryptModAdd( pCurve->GOrd, peSigD, peSigD, peSigD, pbScratch, cbScratchInternal ); + } + + SymCryptModMul( pCurve->GOrd, peSigC, peSigD, peSigD, pbScratch, cbScratchInternal ); // s * c + SymCryptModAdd( pCurve->GOrd, peMsghash, peSigD, peSigD, pbScratch, cbScratchInternal ); // msghash + s*c + SymCryptModMul( pCurve->GOrd, peSigD, peTmp, peSigD, pbScratch, cbScratchInternal ); // ( msghash + s*c ) / k + + if ( !( SymCryptModElementIsZero( pCurve->GOrd, peSigC ) | + SymCryptModElementIsZero( pCurve->GOrd, peSigD ) ) ) + { + break; + } + + if (piK != NULL) + { + // piK resulted in 0 signature + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + signatureCount++; + if ( signatureCount >= SYMCRYPT_MAX_ECDSA_SIGNATURE_COUNT ) + { + // We have not generated a non-zero signature after SYMCRYPT_MAX_ECDSA_SIGNATURE_COUNT attempts; + // Something is wrong with the group setup + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + } + + // Output c + scError = SymCryptModElementGetValue( pCurve->GOrd, peSigC, pbSignature, cbSignature / 2, format, pbScratch, cbScratchInternal ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Output d + scError = SymCryptModElementGetValue( pCurve->GOrd, peSigD, pbSignature + cbSignature / 2, cbSignature / 2, format, pbScratch, cbScratchInternal ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + +cleanup: + if ( pbScratch != NULL ) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + } + + if (scError != SYMCRYPT_NO_ERROR) + { + SymCryptWipe( pbSignature, cbSignature ); + } + + return scError; +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEcDsaSign( + _In_ PCSYMCRYPT_ECKEY pKey, + _In_reads_bytes_( cbHashValue ) PCBYTE pbHashValue, + SIZE_T cbHashValue, + SYMCRYPT_NUMBER_FORMAT format, + UINT32 flags, + _Out_writes_bytes_( cbSignature ) PBYTE pbSignature, + SIZE_T cbSignature ) +{ + // Make sure that only the correct flags are set + if ( (flags & ~SYMCRYPT_FLAG_ECDSA_NO_TRUNCATION) != 0 ) + { + return SYMCRYPT_INVALID_ARGUMENT; + } + + // We must have a private key to perform PCT or signature + if( !pKey->hasPrivateKey || !(pKey->fAlgorithmInfo & SYMCRYPT_FLAG_ECKEY_ECDSA) ) + { + return SYMCRYPT_INVALID_ARGUMENT; + } + + // If the key was generated in SymCrypt and has not yet had a PCT performed - perform PCT before first use + SYMCRYPT_RUN_KEY_GEN_PCT( + SymCryptEcDsaPct, + pKey, + SYMCRYPT_PCT_ECDSA ); + + return SymCryptEcDsaSignEx( pKey, pbHashValue, cbHashValue, NULL, format, flags, pbSignature, cbSignature ); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEcDsaVerify( + _In_ PCSYMCRYPT_ECKEY pKey, + _In_reads_bytes_( cbHashValue ) PCBYTE pbHashValue, + SIZE_T cbHashValue, + _In_reads_bytes_( cbSignature ) PCBYTE pbSignature, + SIZE_T cbSignature, + SYMCRYPT_NUMBER_FORMAT format, + UINT32 flags ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PBYTE pbScratch = NULL; + SIZE_T cbScratch = 0; + SIZE_T cbScratchInternal = 0; + PBYTE pCurr = NULL; + BOOLEAN fValidSignature = FALSE; + + PCSYMCRYPT_ECURVE pCurve = pKey->pCurve; + + PSYMCRYPT_INT piTmp = NULL; + PSYMCRYPT_INT piMul1 = NULL; + PSYMCRYPT_INT piMul2 = NULL; + PSYMCRYPT_ECPOINT poQ1 = NULL; + PSYMCRYPT_ECPOINT poQ2 = NULL; + + PSYMCRYPT_MODELEMENT peMsghash = NULL; + PSYMCRYPT_MODELEMENT peSigC = NULL; + PSYMCRYPT_MODELEMENT peSigD = NULL; + PSYMCRYPT_MODELEMENT peTmp = NULL; + + PBYTE pbX = NULL; + PCSYMCRYPT_ECPOINT poTable[2] = { 0 }; + PCSYMCRYPT_INT piTable[2] = { 0 }; + + UINT32 nDigitsInt = 0; + UINT32 nDigitsMul = 0; + + UINT32 cbInt = 0; + UINT32 cbMul = 0; + UINT32 cbKG = 0; + UINT32 cbRs = 0; + UINT32 cbX = 0; + + // Make sure that the key may be used in ECDSA + if ( ((pKey->fAlgorithmInfo & SYMCRYPT_FLAG_ECKEY_ECDSA) == 0) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Make sure that only the correct flags are set + if ( (flags & ~SYMCRYPT_FLAG_ECDSA_NO_TRUNCATION) != 0 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Calculating the digits for the temporary integer + nDigitsInt = SYMCRYPT_MAX( pCurve->FModDigits, pCurve->GOrdDigits ); + nDigitsInt = SYMCRYPT_MAX( nDigitsInt, SymCryptDigitsFromBits( (UINT32)cbSignature * 4 ) ); // pbSignature contains (c,d) + + nDigitsMul = SymCryptEcurveDigitsofScalarMultiplier(pCurve); + + // Objects and scratch space size calculation + cbInt = SymCryptSizeofIntFromDigits( nDigitsInt ); + cbMul = SymCryptSizeofIntFromDigits( nDigitsMul ); + cbKG = SymCryptSizeofEcpointFromCurve( pCurve ); + cbRs = SymCryptSizeofModElementFromModulus( pCurve->GOrd ); + cbX = SymCryptEcurveSizeofFieldElement( pCurve ); + + cbScratchInternal = SYMCRYPT_SCRATCH_BYTES_FOR_MULTI_SCALAR_ECURVE_OPERATIONS( pCurve, 2 ); + cbScratchInternal = SYMCRYPT_MAX( cbScratchInternal, SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pCurve->GOrdDigits ) ); + cbScratchInternal = SYMCRYPT_MAX( cbScratchInternal, SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pCurve->FModDigits ) ); + cbScratchInternal = SYMCRYPT_MAX( cbScratchInternal, SYMCRYPT_SCRATCH_BYTES_FOR_MODINV( pCurve->GOrdDigits ) ); + cbScratchInternal = SYMCRYPT_MAX( cbScratchInternal, SYMCRYPT_SCRATCH_BYTES_FOR_GETSET_VALUE_ECURVE_OPERATIONS( pCurve ) ); + cbScratchInternal = SYMCRYPT_MAX( cbScratchInternal, SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( pCurve ) ); + + // + // From symcrypt_internal.h we have: + // - sizeof results are upper bounded by 2^19 + // - SYMCRYPT_SCRATCH_BYTES results are upper bounded by 2^27 (including RSA and ECURVE) + // Thus the following calculation does not overflow cbScratch. + // + cbScratch = cbScratchInternal + cbInt + 2*cbMul + 2*cbKG + 4*cbRs + cbX; + + // Scratch space allocation + pbScratch = SymCryptCallbackAlloc( cbScratch ); + if ( pbScratch == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + // Creating temporaries + pCurr = pbScratch + cbScratchInternal; + piTmp = SymCryptIntCreate( pCurr, cbInt, nDigitsInt ); + pCurr += cbInt; + piMul1 = SymCryptIntCreate( pCurr, cbMul, nDigitsMul ); + pCurr += cbMul; + piMul2 = SymCryptIntCreate( pCurr, cbMul, nDigitsMul ); + pCurr += cbMul; + poQ1 = SymCryptEcpointCreate( pCurr, cbKG, pCurve ); + pCurr += cbKG; + poQ2 = SymCryptEcpointCreate( pCurr, cbKG, pCurve ); + pCurr += cbKG; + peMsghash = SymCryptModElementCreate( pCurr, cbRs, pCurve->GOrd ); + pCurr += cbRs; + peSigC = SymCryptModElementCreate( pCurr, cbRs, pCurve->GOrd ); + pCurr += cbRs; + peSigD = SymCryptModElementCreate( pCurr, cbRs, pCurve->GOrd ); + pCurr += cbRs; + peTmp = SymCryptModElementCreate( pCurr, cbRs, pCurve->GOrd ); + pCurr += cbRs; + pbX = pCurr; + + SYMCRYPT_ASSERT( piTmp != NULL); + SYMCRYPT_ASSERT( piMul1 != NULL); + SYMCRYPT_ASSERT( piMul2 != NULL); + SYMCRYPT_ASSERT( poQ1 != NULL); + SYMCRYPT_ASSERT( poQ2 != NULL); + SYMCRYPT_ASSERT( peMsghash != NULL); + SYMCRYPT_ASSERT( peSigC != NULL); + SYMCRYPT_ASSERT( peSigD != NULL); + SYMCRYPT_ASSERT( peTmp != NULL); + + // Get c + scError = SymCryptIntSetValue( pbSignature, cbSignature / 2, format, piTmp ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Check if c is less than r + if ( !SymCryptIntIsLessThan( piTmp, SymCryptIntFromModulus( pCurve->GOrd ) ) ) + { + goto cleanup; + } + + // c mod r + SymCryptIntToModElement( piTmp, pCurve->GOrd, peSigC, pbScratch, cbScratchInternal ); + + // Check if c is zero + if (SymCryptModElementIsZero( pCurve->GOrd, peSigC )) + { + goto cleanup; + } + + // Get d + scError = SymCryptIntSetValue( pbSignature + cbSignature / 2, cbSignature / 2, format, piTmp ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Check if d is less than r + if ( !SymCryptIntIsLessThan( piTmp, SymCryptIntFromModulus( pCurve->GOrd ) ) ) + { + goto cleanup; + } + + // d mod r + SymCryptIntToModElement( piTmp, pCurve->GOrd, peSigD, pbScratch, cbScratchInternal ); + + // Check if d is zero + if (SymCryptModElementIsZero( pCurve->GOrd, peSigD )) + { + goto cleanup; + } + + // Calculate 1/d mod r + // The D value is not secret; it is part of the signature. + // We mark it public to avoid the use of random blinding, which would require a source of randomness + // just to verify an ECDSA signature. + scError = SymCryptModInv( pCurve->GOrd, peSigD, peSigD, SYMCRYPT_FLAG_DATA_PUBLIC, pbScratch, cbScratchInternal ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Truncate the message according to the flags + scError = SymCryptEcDsaTruncateHash( + pCurve, + pbHashValue, + cbHashValue, + flags, + peMsghash, + piTmp, + pbScratch, + cbScratchInternal ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + SymCryptModMul( pCurve->GOrd, peMsghash, peSigD, peMsghash, pbScratch, cbScratchInternal ); // msghash / d = h1 + SymCryptModMul( pCurve->GOrd, peSigC, peSigD, peTmp, pbScratch, cbScratchInternal ); // c / d = h2 + + SymCryptModElementToInt( pCurve->GOrd, peMsghash, piMul1, pbScratch, cbScratchInternal ); + SymCryptModElementToInt( pCurve->GOrd, peTmp, piMul2, pbScratch, cbScratchInternal ); + + // h1*G + h2*W + piTable[0] = piMul1; + piTable[1] = piMul2; + + poTable[0] = NULL; // The first base point is the generator G of the group + poTable[1] = pKey->poPublicKey; + + scError = SymCryptEcpointMultiScalarMul( pCurve, piTable, poTable, 2, SYMCRYPT_FLAG_DATA_PUBLIC, poQ1, pbScratch, cbScratchInternal ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Check for point at infinity + if ( SymCryptEcpointIsZero( pCurve, poQ1, pbScratch, cbScratchInternal ) ) + { + goto cleanup; + } + + // Get the x from poQ1 + scError = SymCryptEcpointGetValue( pCurve, poQ1, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, SYMCRYPT_ECPOINT_FORMAT_X, pbX, cbX, SYMCRYPT_FLAG_DATA_PUBLIC, pbScratch, cbScratchInternal); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Store it in a big enough INT + scError = SymCryptIntSetValue( pbX, cbX, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, piTmp ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + SymCryptIntToModElement( piTmp, pCurve->GOrd, peTmp, pbScratch, cbScratchInternal ); // x mod r + + // Comparison c = x + if (SymCryptModElementIsEqual( pCurve->GOrd, peSigC, peTmp )) + { + fValidSignature = TRUE; + } + +cleanup: + + if (!fValidSignature) + { + scError = SYMCRYPT_SIGNATURE_VERIFICATION_FAILURE; + } + + if ( pbScratch != NULL ) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + } + + return scError; +} diff --git a/libs/symcrypt/lib/ec_internal_curve_params.c b/libs/symcrypt/lib/ec_internal_curve_params.c new file mode 100644 index 00000000000..4c0301f1c68 --- /dev/null +++ b/libs/symcrypt/lib/ec_internal_curve_params.c @@ -0,0 +1,597 @@ +// +// ec_internal_curve_params.c Parameters for internally supported curves. +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// + +#include "precomp.h" + +// Do not delete the following preprocessor directive. +// It is used for folding the parameters. +#if 1 + +/*********************************** + * * + * NIST CURVES * + * * + ***********************************/ + +static const BYTE rgbNistP192[] = { + //dwVersion + 0x01, 0x00, 0x00, 0x00, + //dwCurveType + 0x01, 0x00, 0x00, 0x00, + //dwCurveGenerationAlgId + 0x00, 0x00, 0x00, 0x00, + //cbFieldLength + 0x18, 0x00, 0x00, 0x00, + //cbSubgroupOrder + 0x18, 0x00, 0x00, 0x00, + //cbCofactor + 0x01, 0x00, 0x00, 0x00, + //cbSeed + 0x00, 0x00, 0x00, 0x00, + //p + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + //A + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFC, + //B + 0x64, 0x21, 0x05, 0x19, 0xE5, 0x9C, 0x80, 0xE7, + 0x0F, 0xA7, 0xE9, 0xAB, 0x72, 0x24, 0x30, 0x49, + 0xFE, 0xB8, 0xDE, 0xEC, 0xC1, 0x46, 0xB9, 0xB1, + //x + 0x18, 0x8D, 0xA8, 0x0E, 0xB0, 0x30, 0x90, 0xF6, + 0x7C, 0xBF, 0x20, 0xEB, 0x43, 0xA1, 0x88, 0x00, + 0xf4, 0xFF, 0x0A, 0xFD, 0x82, 0xFF, 0x10, 0x12, + //y + 0x07, 0x19, 0x2B, 0x95, 0xFF, 0xC8, 0xDA, 0x78, + 0x63, 0x10, 0x11, 0xED, 0x6B, 0x24, 0xCD, 0xD5, + 0x73, 0xF9, 0x77, 0xA1, 0x1E, 0x79, 0x48, 0x11, + //q + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x99, 0xDE, 0xF8, 0x36, + 0x14, 0x6B, 0xC9, 0xB1, 0xB4, 0xD2, 0x28, 0x31, + //h + 0x01 +}; + +static const BYTE rgbNistP224[] = { + //dwVersion + 0x01, 0x00, 0x00, 0x00, + //dwCurveType + 0x01, 0x00, 0x00, 0x00, + //dwCurveGenerationAlgId + 0x00, 0x00, 0x00, 0x00, + //cbFieldLength + 0x1C, 0x00, 0x00, 0x00, + //cbSubgroupOrder + 0x1C, 0x00, 0x00, 0x00, + //cbCofactor + 0x01, 0x00, 0x00, 0x00, + //cbSeed + 0x00, 0x00, 0x00, 0x00, + //p + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x01, + //A + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFE, + //B + 0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, + 0xF5, 0x41, 0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, + 0xD7, 0xBF, 0xD8, 0xBA, 0x27, 0x0B, 0x39, 0x43, + 0x23, 0x55, 0xFF, 0xB4, + //x + 0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, + 0x32, 0x13, 0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, + 0x56, 0xC2, 0x11, 0x22, 0x34, 0x32, 0x80, 0xD6, + 0x11, 0x5C, 0x1D, 0x21, + //y + 0xBD, 0x37, 0x63, 0x88, 0xB5, 0xF7, 0x23, 0xFB, + 0x4C, 0x22, 0xDF, 0xE6, 0xCD, 0x43, 0x75, 0xA0, + 0x5A, 0x07, 0x47, 0x64, 0x44, 0xD5, 0x81, 0x99, + 0x85, 0x00, 0x7E, 0x34, + //q + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x16, 0xA2, + 0xE0, 0xB8, 0xF0, 0x3E, 0x13, 0xDD, 0x29, 0x45, + 0x5C, 0x5C, 0x2A, 0x3D, + //h + 0x01 +}; + +static const BYTE rgbNistP256[] = { + //dwVersion + 0x01, 0x00, 0x00, 0x00, + //dwCurveType + 0x01, 0x00, 0x00, 0x00, + //dwCurveGenerationAlgId + 0x00, 0x00, 0x00, 0x00, + //cbFieldLength + 0x20, 0x00, 0x00, 0x00, + //cbSubgroupOrder + 0x20, 0x00, 0x00, 0x00, + //cbCofactor + 0x01, 0x00, 0x00, 0x00, + //cbSeed + 0x00, 0x00, 0x00, 0x00, + //p + 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + //A + 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFC, + //B + 0x5A, 0xC6, 0x35, 0xD8, 0xAA, 0x3A, 0x93, 0xE7, + 0xB3, 0xEB, 0xBD, 0x55, 0x76, 0x98, 0x86, 0xBC, + 0x65, 0x1D, 0x06, 0xB0, 0xCC, 0x53, 0xB0, 0xF6, + 0x3B, 0xCE, 0x3C, 0x3E, 0x27, 0xD2, 0x60, 0x4B, + //x + 0x6B, 0x17, 0xD1, 0xF2, 0xE1, 0x2C, 0x42, 0x47, + 0xF8, 0xBC, 0xE6, 0xE5, 0x63, 0xA4, 0x40, 0xF2, + 0x77, 0x03, 0x7D, 0x81, 0x2D, 0xEB, 0x33, 0xA0, + 0xF4, 0xA1, 0x39, 0x45, 0xD8, 0x98, 0xC2, 0x96, + //y + 0x4F, 0xE3, 0x42, 0xE2, 0xFE, 0x1A, 0x7F, 0x9B, + 0x8E, 0xE7, 0xEB, 0x4A, 0x7C, 0x0F, 0x9E, 0x16, + 0x2B, 0xCE, 0x33, 0x57, 0x6B, 0x31, 0x5E, 0xCE, + 0xCB, 0xB6, 0x40, 0x68, 0x37, 0xBF, 0x51, 0xF5, + //q + 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xBC, 0xE6, 0xFA, 0xAD, 0xA7, 0x17, 0x9E, 0x84, + 0xF3, 0xB9, 0xCA, 0xC2, 0xFC, 0x63, 0x25, 0x51, + //h + 0x01 +}; + +static const BYTE rgbNistP384[] = { + //dwVersion + 0x01, 0x00, 0x00, 0x00, + //dwCurveType + 0x01, 0x00, 0x00, 0x00, + //dwCurveGenerationAlgId + 0x00, 0x00, 0x00, 0x00, + //cbFieldLength + 0x30, 0x00, 0x00, 0x00, + //cbSubgroupOrder + 0x30, 0x00, 0x00, 0x00, + //cbCofactor + 0x01, 0x00, 0x00, 0x00, + //cbSeed + 0x00, 0x00, 0x00, 0x00, + //p + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, + 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, + //A + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, + 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFC, + //B + 0xB3, 0x31, 0x2F, 0xA7, 0xE2, 0x3E, 0xE7, 0xE4, + 0x98, 0x8E, 0x05, 0x6B, 0xE3, 0xF8, 0x2D, 0x19, + 0x18, 0x1D, 0x9C, 0x6E, 0xFE, 0x81, 0x41, 0x12, + 0x03, 0x14, 0x08, 0x8F, 0x50, 0x13, 0x87, 0x5A, + 0xC6, 0x56, 0x39, 0x8D, 0x8A, 0x2E, 0xD1, 0x9D, + 0x2A, 0x85, 0xC8, 0xED, 0xD3, 0xEC, 0x2A, 0xEF, + //x + 0xAA, 0x87, 0xCA, 0x22, 0xBE, 0x8B, 0x05, 0x37, + 0x8E, 0xB1, 0xC7, 0x1E, 0xF3, 0x20, 0xAD, 0x74, + 0x6E, 0x1D, 0x3B, 0x62, 0x8B, 0xA7, 0x9B, 0x98, + 0x59, 0xF7, 0x41, 0xE0, 0x82, 0x54, 0x2A, 0x38, + 0x55, 0x02, 0xF2, 0x5D, 0xBF, 0x55, 0x29, 0x6C, + 0x3A, 0x54, 0x5E, 0x38, 0x72, 0x76, 0x0A, 0xB7, + //y + 0x36, 0x17, 0xDE, 0x4A, 0x96, 0x26, 0x2C, 0x6F, + 0x5D, 0x9E, 0x98, 0xBF, 0x92, 0x92, 0xDC, 0x29, + 0xF8, 0xF4, 0x1D, 0xBD, 0x28, 0x9A, 0x14, 0x7C, + 0xE9, 0xDA, 0x31, 0x13, 0xB5, 0xF0, 0xB8, 0xC0, + 0x0A, 0x60, 0xB1, 0xCE, 0x1D, 0x7E, 0x81, 0x9D, + 0x7A, 0x43, 0x1D, 0x7C, 0x90, 0xEA, 0x0E, 0x5F, + //q + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xC7, 0x63, 0x4D, 0x81, 0xF4, 0x37, 0x2D, 0xDF, + 0x58, 0x1A, 0x0D, 0xB2, 0x48, 0xB0, 0xA7, 0x7A, + 0xEC, 0xEC, 0x19, 0x6A, 0xCC, 0xC5, 0x29, 0x73, + //h + 0x01 +}; + +static const BYTE rgbNistP521[] = { + //dwVersion + 0x01, 0x00, 0x00, 0x00, + //dwCurveType + 0x01, 0x00, 0x00, 0x00, + //dwCurveGenerationAlgId + 0x00, 0x00, 0x00, 0x00, + //cbFieldLength + 0x42, 0x00, 0x00, 0x00, + //cbSubgroupOrder + 0x42, 0x00, 0x00, 0x00, + //cbCofactor + 0x01, 0x00, 0x00, 0x00, + //cbSeed + 0x00, 0x00, 0x00, 0x00, + //p + 0x01, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, + //A + 0x01, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFC, + //B + 0x00, 0x51, 0x95, 0x3E, 0xB9, 0x61, 0x8E, 0x1C, + 0x9A, 0x1F, 0x92, 0x9A, 0x21, 0xA0, 0xB6, 0x85, + 0x40, 0xEE, 0xA2, 0xDA, 0x72, 0x5B, 0x99, 0xB3, + 0x15, 0xF3, 0xB8, 0xB4, 0x89, 0x91, 0x8E, 0xF1, + 0x09, 0xE1, 0x56, 0x19, 0x39, 0x51, 0xEC, 0x7E, + 0x93, 0x7B, 0x16, 0x52, 0xC0, 0xBD, 0x3B, 0xB1, + 0xBF, 0x07, 0x35, 0x73, 0xDF, 0x88, 0x3D, 0x2C, + 0x34, 0xF1, 0xEF, 0x45, 0x1F, 0xD4, 0x6B, 0x50, + 0x3F, 0x00, + //x + 0x00, 0xC6, 0x85, 0x8E, 0x06, 0xB7, 0x04, 0x04, + 0xE9, 0xCD, 0x9E, 0x3E, 0xCB, 0x66, 0x23, 0x95, + 0xB4, 0x42, 0x9C, 0x64, 0x81, 0x39, 0x05, 0x3F, + 0xB5, 0x21, 0xF8, 0x28, 0xAF, 0x60, 0x6B, 0x4D, + 0x3D, 0xBA, 0xA1, 0x4B, 0x5E, 0x77, 0xEF, 0xE7, + 0x59, 0x28, 0xFE, 0x1D, 0xC1, 0x27, 0xA2, 0xFF, + 0xA8, 0xDE, 0x33, 0x48, 0xB3, 0xC1, 0x85, 0x6A, + 0x42, 0x9B, 0xF9, 0x7E, 0x7E, 0x31, 0xC2, 0xE5, + 0xBD, 0x66, + //y + 0x01, 0x18, 0x39, 0x29, 0x6A, 0x78, 0x9A, 0x3B, + 0xC0, 0x04, 0x5C, 0x8A, 0x5F, 0xB4, 0x2C, 0x7D, + 0x1B, 0xD9, 0x98, 0xF5, 0x44, 0x49, 0x57, 0x9B, + 0x44, 0x68, 0x17, 0xAF, 0xBD, 0x17, 0x27, 0x3E, + 0x66, 0x2C, 0x97, 0xEE, 0x72, 0x99, 0x5E, 0xF4, + 0x26, 0x40, 0xC5, 0x50, 0xB9, 0x01, 0x3F, 0xAD, + 0x07, 0x61, 0x35, 0x3C, 0x70, 0x86, 0xA2, 0x72, + 0xC2, 0x40, 0x88, 0xBE, 0x94, 0x76, 0x9F, 0xD1, + 0x66, 0x50, + //q + 0x01, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFA, 0x51, 0x86, 0x87, 0x83, 0xBF, 0x2F, + 0x96, 0x6B, 0x7F, 0xCC, 0x01, 0x48, 0xF7, 0x09, + 0xA5, 0xD0, 0x3B, 0xB5, 0xC9, 0xB8, 0x89, 0x9C, + 0x47, 0xAE, 0xBB, 0x6F, 0xB7, 0x1E, 0x91, 0x38, + 0x64, 0x09, + //h + 0x01 +}; + +/***************************************** +* * +* TWISTED EDWARDS CURVES * +* * +******************************************/ +static const BYTE rgbNumsP256t1[] = { + //dwVersion + 0x01, 0x00, 0x00, 0x00, + //dwCurveType + 0x02, 0x00, 0x00, 0x00, + //dwCurveGenerationAlgId + 0x00, 0x00, 0x00, 0x00, + //cbFieldLength + 0x20, 0x00, 0x00, 0x00, + //cbSubgroupOrder + 0x20, 0x00, 0x00, 0x00, + //cbCofactor + 0x01, 0x00, 0x00, 0x00, + //cbSeed + 0x00, 0x00, 0x00, 0x00, + //p + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x43, + //A + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x42, + //d + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3B, 0xEE, + //x + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0D, + //y + 0x7D, 0x0A, 0xB4, 0x1E, 0x2A, 0x12, 0x76, 0xDB, + 0xA3, 0xD3, 0x30, 0xB3, 0x9F, 0xA0, 0x46, 0xBF, + 0xBE, 0x2A, 0x6D, 0x63, 0x82, 0x4D, 0x30, 0x3F, + 0x70, 0x7F, 0x6F, 0xB5, 0x33, 0x1C, 0xAD, 0xBA, + //q + 0x3F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xBE, 0x6A, 0xA5, 0x5A, 0xD0, 0xA6, 0xBC, 0x64, + 0xE5, 0xB8, 0x4E, 0x6F, 0x11, 0x22, 0xB4, 0xAD, + //h + 0x04 +}; + +static const BYTE rgbNumsP384t1[] = { + //dwVersion + 0x01, 0x00, 0x00, 0x00, + //dwCurveType + 0x02, 0x00, 0x00, 0x00, + //dwCurveGenerationAlgId + 0x00, 0x00, 0x00, 0x00, + //cbFieldLength + 0x30, 0x00, 0x00, 0x00, + //cbSubgroupOrder + 0x30, 0x00, 0x00, 0x00, + //cbCofactor + 0x01, 0x00, 0x00, 0x00, + //cbSeed + 0x00, 0x00, 0x00, 0x00, + //p + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xC3, + //A + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xC2, + //d + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x15, 0x8A, + //x + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, + //y + 0x74, 0x9C, 0xDA, 0xBA, 0x13, 0x6C, 0xE9, 0xB6, + 0x5B, 0xD4, 0x47, 0x17, 0x94, 0xAA, 0x61, 0x9D, + 0xAA, 0x5C, 0x7B, 0x4C, 0x93, 0x0B, 0xFF, 0x8E, + 0xBD, 0x79, 0x8A, 0x8A, 0xE7, 0x53, 0xC6, 0xD7, + 0x2F, 0x00, 0x38, 0x60, 0xFE, 0xBA, 0xBA, 0xD5, + 0x34, 0xA4, 0xAC, 0xF5, 0xFA, 0x7F, 0x5B, 0xEE, + //q + 0x3F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xEC, 0xD7, 0xD1, 0x1E, 0xD5, 0xA2, 0x59, 0xA2, + 0x5A, 0x13, 0xA0, 0x45, 0x8E, 0x39, 0xF4, 0xE4, + 0x51, 0xD6, 0xD7, 0x1F, 0x70, 0x42, 0x6E, 0x25, + //h + 0x04 +}; + +static const BYTE rgbNumsP512t1[] = { + //dwVersion + 0x01, 0x00, 0x00, 0x00, + //dwCurveType + 0x02, 0x00, 0x00, 0x00, + //dwCurveGenerationAlgId + 0x00, 0x00, 0x00, 0x00, + //cbFieldLength + 0x40, 0x00, 0x00, 0x00, + //cbSubgroupOrder + 0x40, 0x00, 0x00, 0x00, + //cbCofactor + 0x01, 0x00, 0x00, 0x00, + //cbSeed + 0x00, 0x00, 0x00, 0x00, + //p + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFD, 0xC7, + //A + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFD, 0xC6, + //d + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0xBA, 0xA8, + //x + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, + //y + 0x7D, 0x67, 0xE8, 0x41, 0xDC, 0x4C, 0x46, 0x7B, + 0x60, 0x50, 0x91, 0xD8, 0x08, 0x69, 0x21, 0x2F, + 0x9C, 0xEB, 0x12, 0x4B, 0xF7, 0x26, 0x97, 0x3F, + 0x9F, 0xF0, 0x48, 0x77, 0x9E, 0x1D, 0x61, 0x4E, + 0x62, 0xAE, 0x2E, 0xCE, 0x50, 0x57, 0xB5, 0xDA, + 0xD9, 0x6B, 0x7A, 0x89, 0x7C, 0x1D, 0x72, 0x79, + 0x92, 0x61, 0x13, 0x46, 0x38, 0x75, 0x0F, 0x4F, + 0x0C, 0xB9, 0x10, 0x27, 0x54, 0x3B, 0x1C, 0x5E, + //q + 0x3F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xA7, 0xE5, 0x08, 0x09, 0xEF, 0xDA, 0xBB, 0xB9, + 0xA6, 0x24, 0x78, 0x4F, 0x44, 0x95, 0x45, 0xF0, + 0xDC, 0xEA, 0x5F, 0xF0, 0xCB, 0x80, 0x0F, 0x89, + 0x4E, 0x78, 0xD1, 0xCB, 0x0B, 0x5F, 0x01, 0x89, + //h + 0x04 +}; + + +/************************************ +* * +* MONTGOMERY CURVES * +* * +*************************************/ + +static const BYTE rgbCurve25519[] = { + //dwVersion + 0x02, 0x00, 0x00, 0x00, + //dwCurveType + 0x03, 0x00, 0x00, 0x00, + //dwCurveGenerationAlgId + 0x00, 0x00, 0x00, 0x00, + //cbFieldLength + 0x20, 0x00, 0x00, 0x00, + //cbSubgroupOrder + 0x20, 0x00, 0x00, 0x00, + //cbCofactor + 0x01, 0x00, 0x00, 0x00, + //cbSeed + 0x00, 0x00, 0x00, 0x00, + //p + 0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xED, + //A + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x6D, 0x06, + //B + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + //x + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, + //y + 0x20, 0xAE, 0x19, 0xA1, 0xB8, 0xA0, 0x86, 0xB4, + 0xE0, 0x1E, 0xDD, 0x2C, 0x77, 0x48, 0xD1, 0x4C, + 0x92, 0x3D, 0x4D, 0x7E, 0x6D, 0x7C, 0x61, 0xB2, + 0x29, 0xE9, 0xC5, 0xA2, 0x7E, 0xCE, 0xD3, 0xD9, + //q + 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x14, 0xDE, 0xF9, 0xDE, 0xA2, 0xF7, 0x9C, 0xD6, + 0x58, 0x12, 0x63, 0x1A, 0x5C, 0xF5, 0xD3, 0xED, + //h + 0x08, + + // Version 2 parameters + // PrivateKeyDefaultFormat + 0x03, 0x00, 0x00, 0x00, + // HighBitRestrictionNumOfBits + 0x02, 0x00, 0x00, 0x00, + // HighBitRestrictionPosition + 0xFE, 0x00, 0x00, 0x00, + // HighBitRestrictionValue + 0x01, 0x00, 0x00, 0x00, +}; + +#endif // 1 + +// Version 2 parameter extension +static const SYMCRYPT_ECURVE_PARAMS_V2_EXTENSION paramsV2ExtensionShortWeierstrass = +{ + SYMCRYPT_ECKEY_PRIVATE_FORMAT_CANONICAL, + 0, + 0, + 0, +}; + +static const SYMCRYPT_ECURVE_PARAMS_V2_EXTENSION paramsV2ExtensionTwistedEdwards = +{ + SYMCRYPT_ECKEY_PRIVATE_FORMAT_DIVH, + 0, + 0, + 0, +}; + +static const SYMCRYPT_ECURVE_PARAMS_V2_EXTENSION paramsV2ExtensionMontgomery = +{ + SYMCRYPT_ECKEY_PRIVATE_FORMAT_DIVH_TIMESH, + 0, + 0, + 0, +}; + +// Definitions +const PCSYMCRYPT_ECURVE_PARAMS SymCryptEcurveParamsNistP192 = (PCSYMCRYPT_ECURVE_PARAMS) rgbNistP192; +const PCSYMCRYPT_ECURVE_PARAMS SymCryptEcurveParamsNistP224 = (PCSYMCRYPT_ECURVE_PARAMS) rgbNistP224; +const PCSYMCRYPT_ECURVE_PARAMS SymCryptEcurveParamsNistP256 = (PCSYMCRYPT_ECURVE_PARAMS) rgbNistP256; +const PCSYMCRYPT_ECURVE_PARAMS SymCryptEcurveParamsNistP384 = (PCSYMCRYPT_ECURVE_PARAMS) rgbNistP384; +const PCSYMCRYPT_ECURVE_PARAMS SymCryptEcurveParamsNistP521 = (PCSYMCRYPT_ECURVE_PARAMS) rgbNistP521; + +const PCSYMCRYPT_ECURVE_PARAMS SymCryptEcurveParamsNumsP256t1 = (PCSYMCRYPT_ECURVE_PARAMS) rgbNumsP256t1; +const PCSYMCRYPT_ECURVE_PARAMS SymCryptEcurveParamsNumsP384t1 = (PCSYMCRYPT_ECURVE_PARAMS) rgbNumsP384t1; +const PCSYMCRYPT_ECURVE_PARAMS SymCryptEcurveParamsNumsP512t1 = (PCSYMCRYPT_ECURVE_PARAMS) rgbNumsP512t1; + +const PCSYMCRYPT_ECURVE_PARAMS SymCryptEcurveParamsCurve25519 = (PCSYMCRYPT_ECURVE_PARAMS) rgbCurve25519; + +const PCSYMCRYPT_ECURVE_PARAMS_V2_EXTENSION SymCryptEcurveParamsV2ExtensionShortWeierstrass = ¶msV2ExtensionShortWeierstrass; +const PCSYMCRYPT_ECURVE_PARAMS_V2_EXTENSION SymCryptEcurveParamsV2ExtensionTwistedEdwards = ¶msV2ExtensionTwistedEdwards; +const PCSYMCRYPT_ECURVE_PARAMS_V2_EXTENSION SymCryptEcurveParamsV2ExtensionMontgomery = ¶msV2ExtensionMontgomery; diff --git a/libs/symcrypt/lib/ec_internal_curves.c b/libs/symcrypt/lib/ec_internal_curves.c new file mode 100644 index 00000000000..d0107f431c7 --- /dev/null +++ b/libs/symcrypt/lib/ec_internal_curves.c @@ -0,0 +1,79 @@ +// +// ec_internal_curves.c Internally allocated elliptic curves. +// +// These curves are lazy-initialized. Currently only used +// for composite algorithms to avoid per-key allocation overhead. +// + +#include "precomp.h" + +static PCSYMCRYPT_ECURVE rgpCachedCurves[SYMCRYPT_CACHED_ECURVE_ID_COUNT] = { 0 }; + +static +PCSYMCRYPT_ECURVE_PARAMS +SYMCRYPT_CALL +SymCryptGetCachedEcurveParams( + SYMCRYPT_CACHED_ECURVE_ID curveId ) +{ + switch (curveId) + { + case SYMCRYPT_CACHED_ECURVE_ID_NIST_P256: + return SymCryptEcurveParamsNistP256; + case SYMCRYPT_CACHED_ECURVE_ID_NIST_P384: + return SymCryptEcurveParamsNistP384; + case SYMCRYPT_CACHED_ECURVE_ID_CURVE_25519: + return SymCryptEcurveParamsCurve25519; + default: + return NULL; + } +} + +PCSYMCRYPT_ECURVE +SYMCRYPT_CALL +SymCryptGetCachedEcurve( + SYMCRYPT_CACHED_ECURVE_ID curveId ) +{ + PCSYMCRYPT_ECURVE pCachedCurve = NULL; + PSYMCRYPT_ECURVE pNewCurve = NULL; + PSYMCRYPT_ECURVE pCurrCurve = NULL; + PCSYMCRYPT_ECURVE_PARAMS pParams = NULL; + + if ( curveId < 0 || curveId >= SYMCRYPT_CACHED_ECURVE_ID_COUNT ) + { + return NULL; + } + + pCachedCurve = (PCSYMCRYPT_ECURVE) SYMCRYPT_ATOMIC_LOADPTR_ACQUIRE( &rgpCachedCurves[curveId] ); + if ( pCachedCurve != NULL ) + { + return pCachedCurve; + } + + pParams = SymCryptGetCachedEcurveParams( curveId ); + if ( pParams == NULL ) + { + return NULL; + } + + pNewCurve = SymCryptEcurveAllocate( pParams, 0 ); + if ( pNewCurve == NULL ) + { + return NULL; + } + + pCurrCurve = SYMCRYPT_ATOMIC_CAS_PTR_ACQUIRE_RELEASE( + &rgpCachedCurves[curveId], + pNewCurve, + NULL); + + // Means the original curve was already filled + // and that our new curve was not used. So we + // free the new curve and return the existing one. + if ( pCurrCurve != NULL ) + { + SymCryptEcurveFree( pNewCurve ); + return pCurrCurve; + } + + return pNewCurve; +} diff --git a/libs/symcrypt/lib/ec_montgomery.c b/libs/symcrypt/lib/ec_montgomery.c new file mode 100644 index 00000000000..ced3fcdaafc --- /dev/null +++ b/libs/symcrypt/lib/ec_montgomery.c @@ -0,0 +1,443 @@ +// +// ec_montgomery.c Montgomery Implementation +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +VOID +SYMCRYPT_CALL +SymCryptMontgomeryFillScratchSpaces(_In_ PSYMCRYPT_ECURVE pCurve) +{ + UINT32 nDigits = SymCryptDigitsFromBits( pCurve->FModBitsize ); + UINT32 nBytes = SymCryptSizeofModElementFromModulus( pCurve->FMod ); + UINT32 nCommon = SYMCRYPT_MAX( SymCryptSizeofIntFromDigits( nDigits ), SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigits ), SYMCRYPT_SCRATCH_BYTES_FOR_MODINV( nDigits ) ) ); + UINT32 cbModElement = pCurve->cbModElement; + UINT32 nDigitsFieldLength = pCurve->FModDigits; + + // + // All the scratch space computations are upper bounded by the SizeofXXX bound (2^19) and + // the SCRATCH_BYTES_FOR_XXX bound (2^24) (see symcrypt_internal.h). + // + // One caveat is SymCryptSizeofEcpointFromCurve and SymCryptSizeofEcpointEx which calculate the + // size of EcPoint with 4 coordinates (each one a modelement of max size 2^17). Thus upper + // bounded by 2^20. + // + + pCurve->cbScratchCommon = nCommon; + pCurve->cbScratchScalar = + SymCryptSizeofIntFromDigits(nDigits) + + 6 * nBytes + + nCommon; + + pCurve->cbScratchScalarMulti = 0; + pCurve->cbScratchGetSetValue = + SymCryptSizeofEcpointEx( cbModElement, SYMCRYPT_ECPOINT_FORMAT_MAX_LENGTH ) + + 2 * cbModElement + + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigitsFieldLength ), + SYMCRYPT_SCRATCH_BYTES_FOR_MODINV( nDigitsFieldLength ) ); + + pCurve->cbScratchGetSetValue = SYMCRYPT_MAX( pCurve->cbScratchGetSetValue, SymCryptSizeofIntFromDigits( nDigits ) ); + + pCurve->cbScratchEckey = + SYMCRYPT_MAX( cbModElement + SymCryptSizeofIntFromDigits(SymCryptEcurveDigitsofScalarMultiplier(pCurve)), + SymCryptSizeofEcpointFromCurve( pCurve ) ) + + SYMCRYPT_MAX( pCurve->cbScratchScalar, pCurve->cbScratchGetSetValue ); +} + +VOID +SYMCRYPT_CALL +SymCryptMontgomerySetDistinguished( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ASSERT( SYMCRYPT_CURVE_IS_MONTGOMERY_TYPE(pCurve) ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poDst->pCurve) ); + + UNREFERENCED_PARAMETER( pbScratch ); + UNREFERENCED_PARAMETER( cbScratch ); + + SymCryptEcpointCopy( pCurve, pCurve->G, poDst ); +} + +// +// Verify poSrc1(X1, Z1) = poSrc2(X2, Z2) +// To avoid ModInv for 1/Z, we do +// X1 * Z2 = X2 * Z1 +// +// This function currently ignores the flags parameter as there is no distinction between equal and +// negative equal case in Single Projective Coordinates used in Montgomery curves. We accept the flags +// to maintain the same API as for other curves. +// +UINT32 +SYMCRYPT_CALL +SymCryptMontgomeryIsEqual( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc1, + _In_ PCSYMCRYPT_ECPOINT poSrc2, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch) +{ + PSYMCRYPT_MODELEMENT peTemp[2]; + PSYMCRYPT_MODELEMENT peSrc1X, peSrc1Z; + PSYMCRYPT_MODELEMENT peSrc2X, peSrc2Z; + PSYMCRYPT_MODULUS pmMod = pCurve->FMod; + SIZE_T nBytes; + + SYMCRYPT_ASSERT( (flags & ~(SYMCRYPT_FLAG_ECPOINT_EQUAL|SYMCRYPT_FLAG_ECPOINT_NEG_EQUAL)) == 0 ); + SYMCRYPT_ASSERT( SYMCRYPT_CURVE_IS_MONTGOMERY_TYPE(pCurve) ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poSrc1->pCurve) && SymCryptEcurveIsSame(pCurve, poSrc2->pCurve) ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( pCurve ) ); + + UNREFERENCED_PARAMETER( flags ); + + nBytes = SymCryptSizeofModElementFromModulus( pmMod ); + + SYMCRYPT_ASSERT( cbScratch >= 2 * nBytes ); + + for (UINT32 i = 0; i < 2; ++i) + { + peTemp[i] = SymCryptModElementCreate( pbScratch, nBytes, pmMod ); + pbScratch += nBytes; + cbScratch -= nBytes; + } + + peSrc1X = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc1 ); + peSrc1Z = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc1 ); + + peSrc2X = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc2 ); + peSrc2Z = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc2 ); + + // peTemp[0] = X1 * Z2 + SymCryptModMul( pmMod, peSrc1X, peSrc2Z, peTemp[0], pbScratch, cbScratch ); + + // peTemp[1] = X2 * Z1 + SymCryptModMul( pmMod, peSrc2X, peSrc1Z, peTemp[1], pbScratch, cbScratch ); + + return SymCryptModElementIsEqual( pmMod, peTemp[0], peTemp[1] ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptMontgomeryIsZero( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + PCSYMCRYPT_MODULUS FMod = pCurve->FMod; + PSYMCRYPT_MODELEMENT peZ = NULL; // Pointer to Z + + SYMCRYPT_ASSERT( SYMCRYPT_CURVE_IS_MONTGOMERY_TYPE(pCurve) ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poSrc->pCurve) ); + + UNREFERENCED_PARAMETER( pbScratch ); + UNREFERENCED_PARAMETER( cbScratch ); + + // Getting pointer to Z of the source point + peZ = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc ); + + return SymCryptModElementIsZero( FMod, peZ ); +} + +VOID +SymCryptMontgomeryDoubleAndAdd( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peX1, + _In_opt_ PCSYMCRYPT_MODELEMENT peZ1, + _In_ PCSYMCRYPT_MODELEMENT peA24, + _Inout_ PSYMCRYPT_MODELEMENT peX2, + _Inout_ PSYMCRYPT_MODELEMENT peZ2, + _Inout_ PSYMCRYPT_MODELEMENT peX3, + _Inout_ PSYMCRYPT_MODELEMENT peZ3, + _Inout_ PSYMCRYPT_MODELEMENT peTemp1, + _Inout_ PSYMCRYPT_MODELEMENT peTemp2, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch) +/* +We use the notation of ladd-1987-m-3, this is a generic Montgomery ladder implementation. +This is similar to RFC7748 for TLS use of curve25519, however, unlike in the RFC, we support the case when Z1 != 1. + +When it is statically known that Z1 == 1 the caller can set peZ1 to NULL to skip one redundant modular multiplication. + Note that this will be revealed through timing, so peZ1 can only be set to NULL it is not secret that Z1 == 1. + Z1 == 1 is statically known for points which have just been imported into SymCrypt (and for the distinguished point of the + curve), and this knowledge is tracked in an ecPoint's normalized flag. + +The (X,Z) values represent an x-coordinate (X/Z) but it avoids the modular division. + +The value a24 is such that 4*a24 = a+2 where a is one of the Montgomery curve parameters. +Thus, a24 = (a+2)/4. For curve25519, A = 486662, so a24 = 121666 (=0x01db42) + +Algorithm (ladd-1987-m-3), with all operations expanded + A = X2 + Z2 + AA = A^2 + B = X2 - Z2 + BB = B^2 + E = AA - BB + C = X3 + Z3 + D = X3 - Z3 + DA = D * A + CB = C * B + X5 = (DA + CB)^2 + DApCB = DA + CB + X5 = DApCB^2 + if peZ1 != NULL: + X5 = Z1 * X5 + Z5 = X1 * (DA - CB)^2 + DAmCB = DA - CB + DAmCB2 = DAmCB ^ 2 + Z5 = X1 * DAmCB2 + X4 = AA * BB + Z4 = E * (BB + a24 * E) + A24E = A24 * E + BAE = BB + A24 * E + Z4 = E * BAE + +If we write a = (X2,Z2) and b = (X3,Z3), and a-b = (X1,Z1), then this algorithm computes +(2*a) and (a+b) into (X4, Z4) and (X5,Z5) respectively. +The Montgomery ladder uses this as follows: +- Store xP and (x+1)P +- To process a 0 bit in the scalar, apply the DoubleAndAdd to (xP,(x+1)P) to get (2xP, (2x+1)P) +- To process a 1 bit in the scalar, apply the DoubleAndAdd to ((x+1)P, xP) to get ((2x+2)P, (2x+1)P) +This updates the state to either (2xP, (2x+1)P) or to ((2x+1)P, (2x+2)P) and corresponds to updating +x to either 2x or 2x+1. + +The starting value is (0,P), represented as ((1,0),(P_x,P_z) +The algorithm above, when applied to (1, 0, X, Z) produces: + A = 1, AA = 1, B = 1, BB = 1, E = 0, + C = X+Z, D = X-Z, DA = X-Z, CB = X+Z, + X5 = 4(X^2)Z, Z5 = 4X(Z^2) + X4 = 1, Z4 = 0 +for an output of (1, 0, 4(X^2)Z, 4X(Z^2)) +But (4(X^2)Z, 4X(Z^2)) is just another representation of (X,Z) as only the quotient of the two numbers is significant. +So even if an exponent starts with a bunch of 0 bits, the DoubleAndAdd-based function computes the right result in constant time. + +*/ +{ + // Temp1 = A = X2 + Z2 + SymCryptModAdd( pmMod, peX2, peZ2, peTemp1, pbScratch, cbScratch ); + + // Z2 = B = X2 - Z2 + SymCryptModSub( pmMod, peX2, peZ2, peZ2, pbScratch, cbScratch ); + + // Temp2 = C = X3 + Z3 + SymCryptModAdd( pmMod, peX3, peZ3, peTemp2, pbScratch, cbScratch ); + + // Z3 = D = X3 - Z3 + SymCryptModSub( pmMod, peX3, peZ3, peZ3, pbScratch, cbScratch ); + + // X3 = CB = C * B = Temp2 * Z2 + SymCryptModMul( pmMod, peTemp2, peZ2, peX3, pbScratch, cbScratch ); + + // Z3 = DA = D * A = Z3 * Temp1 + SymCryptModMul( pmMod, peZ3, peTemp1, peZ3, pbScratch, cbScratch ); + + // From this point on, the outputs (X5,Z5) depend only on (X3,Z3) and (X1,Z1) + // and the outputs (X4,Z4) only on (Temp1,Z2) and A24 + // We'll do the (X4,Z4) first + + // X2 = AA = A * A = Temp1 * Temp1 + SymCryptModSquare( pmMod, peTemp1, peX2, pbScratch, cbScratch ); + + // Temp1 = BB = B * B = Z2 * Z2 + SymCryptModSquare( pmMod, peZ2, peTemp1, pbScratch, cbScratch ); + + // Temp2 = E = AA - BB = X2 - Temp1 + SymCryptModSub( pmMod, peX2, peTemp1, peTemp2, pbScratch, cbScratch ); + + // X2 = X4 = AA * BB = X2 * Temp1 + SymCryptModMul( pmMod, peX2, peTemp1, peX2, pbScratch, cbScratch ); + + // Z2 = A24E = A24 * E = A24 * Temp2 + SymCryptModMul( pmMod, peA24, peTemp2, peZ2, pbScratch, cbScratch ); + + // Z2 = BAE = (BB + a24 * E) = BB + A24E = Temp1 + Z2 + SymCryptModAdd( pmMod, peTemp1, peZ2, peZ2, pbScratch, cbScratch ); + + // Z2 = Z4 = E * BAE = Temp2 + Z2 + SymCryptModMul( pmMod, peTemp2, peZ2, peZ2, pbScratch, cbScratch ); + + // Now we compute (X5, Z5) + + // Temp1 = DApCB = DA + CB = Z3 + X3 + SymCryptModAdd( pmMod, peZ3, peX3, peTemp1, pbScratch, cbScratch ); + + // Z3 = DAmCB = DA - CB = Z3 - X3 + SymCryptModSub( pmMod, peZ3, peX3, peZ3, pbScratch, cbScratch ); + + // X3 = DApCB^2 = Temp1 ^ 2 ( = X5 when (peZ1 == NULL) => Z1 == 1) + SymCryptModSquare( pmMod, peTemp1, peX3, pbScratch, cbScratch ); + + if (peZ1 != NULL) // source point is not normalized + { + // X3 = X5 = Z1 * DApCB^2 = Z1 * X3 + SymCryptModMul( pmMod, peZ1, peX3, peX3, pbScratch, cbScratch ); + } + + // Z3 = DAmCB2 = DAmCB ^ 2 = Z3 ^ 2 + SymCryptModSquare( pmMod, peZ3, peZ3, pbScratch, cbScratch ); + + // Z3 = Z5 = X1 * DAmCB2 = X1 * Z3 + SymCryptModMul( pmMod, peX1, peZ3, peZ3, pbScratch, cbScratch ); +} + +// +// Montgomery point multiplication only works on X-coordinates. +// We ignore the Y-coordinates. +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMontgomeryPointScalarMul( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_INT piScalar, + _In_opt_ + PCSYMCRYPT_ECPOINT poSrc, + UINT32 flags, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PSYMCRYPT_MODULUS pmMod; + PSYMCRYPT_MODELEMENT peX1, peZ1, peA24, peX2, peZ2, peX3, peZ3, peTemp1, peTemp2, peResult; + UINT32 i, nBytes, nDigits, cond, newcond, nCommon; + PBYTE pBegin; + SIZE_T cbAllScratch; + + SYMCRYPT_ASSERT( SYMCRYPT_CURVE_IS_MONTGOMERY_TYPE(pCurve) ); + SYMCRYPT_ASSERT( (poSrc == NULL || SymCryptEcurveIsSame(pCurve, poSrc->pCurve)) && SymCryptEcurveIsSame(pCurve, poDst->pCurve) ); + + // Make sure we only specify the correct flags + if ((flags & ~SYMCRYPT_FLAG_ECC_LL_COFACTOR_MUL) != 0) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if (poSrc == NULL) + { + poSrc = pCurve->G; + } + + // + // Set up structure for X2, Z2, X3, Z3, Temp1, and Temp2, and the scratch space. + // + pmMod = pCurve->FMod; + + nDigits = SymCryptDigitsFromBits( pCurve->FModBitsize ); + nBytes = SymCryptSizeofModElementFromModulus( pmMod ); + nCommon = SYMCRYPT_MAX( SymCryptSizeofIntFromDigits(nDigits), SYMCRYPT_MAX(SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS(nDigits), SYMCRYPT_SCRATCH_BYTES_FOR_MODINV(nDigits))); + + SYMCRYPT_ASSERT( cbScratch >= 6 * nBytes + nCommon ); + + cbAllScratch = cbScratch; + pBegin = pbScratch; + + // + // Create mod elements + // + peX2 = SymCryptModElementCreate( pbScratch, nBytes, pmMod ); + pbScratch += nBytes; + + peZ2 = SymCryptModElementCreate( pbScratch, nBytes, pmMod ); + pbScratch += nBytes; + + peX3 = SymCryptModElementCreate( pbScratch, nBytes, pmMod ); + pbScratch += nBytes; + + peZ3 = SymCryptModElementCreate( pbScratch, nBytes, pmMod ); + pbScratch += nBytes; + + peTemp1 = SymCryptModElementCreate( pbScratch, nBytes, pmMod ); + pbScratch += nBytes; + + peTemp2 = SymCryptModElementCreate( pbScratch, nBytes, pmMod ); + pbScratch += nBytes; + + cbScratch = nCommon; + + // + // Set up values + // + + peA24 = pCurve->A; + + // X1 = X, Z1 = Z + peX1 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc); + peZ1 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc); + + // X2 = 1, Z2 = 0, X3 = X, Z3 = Z + SymCryptModElementSetValueUint32( 1, pmMod, peX2, pbScratch, cbScratch ); + SymCryptModElementSetValueUint32( 0, pmMod, peZ2, pbScratch, cbScratch ); + SymCryptModElementCopy( pmMod, peX1, peX3 ); + SymCryptModElementCopy( pmMod, peZ1, peZ3 ); + + if ( poSrc->normalized ) + { + // Set peZ1 to NULL to avoid redundant multiplications in SymCryptMontgomeryDoubleAndAdd + peZ1 = NULL; + } + + // + // Montgomery ladder scalar multiplication + // + + i = (pCurve->GOrdBitsize + pCurve->coFactorPower); + cond = 0; + while ( i != 0 ) + { + // If cond = 0, we have (X2, Z2, X3, Z3) + // if cond = 1, we have (X3, Z3, X2, Z2) + i--; + newcond = SymCryptIntGetBit( piScalar, i ); + cond ^= newcond; + + SymCryptModElementConditionalSwap( pmMod, peX2, peX3, cond); + SymCryptModElementConditionalSwap( pmMod, peZ2, peZ3, cond); + + cond = newcond; + + SymCryptMontgomeryDoubleAndAdd( pmMod, peX1, peZ1, peA24, peX2, peZ2, peX3, peZ3, peTemp1, peTemp2, pbScratch, cbScratch ); + } + + // Now put them back in the normal order + SymCryptModElementConditionalSwap( pmMod, peX2, peX3, cond); + SymCryptModElementConditionalSwap( pmMod, peZ2, peZ3, cond); + + // Multiply by the cofactor (if needed) by continuing the doubling + if ((flags & SYMCRYPT_FLAG_ECC_LL_COFACTOR_MUL) != 0) + { + i = pCurve->coFactorPower; + while (i!=0) + { + i--; + // We only use the doubling output here, so we definitely don't need to provide Z1 + // We could refactor to have a separate SymCryptMontgomeryDouble function but for Curve25519 this loop is ~1% of runtime + SymCryptMontgomeryDoubleAndAdd( pmMod, peX1, NULL, peA24, peX2, peZ2, peX3, peZ3, peTemp1, peTemp2, pbScratch, cbScratch ); + } + } + + // Set X coordinate + peResult = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poDst); + SymCryptModElementCopy( pCurve->FMod, peX2, peResult ); + + // Set Z coordinate + peResult = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poDst); + SymCryptModElementCopy( pCurve->FMod, peZ2, peResult ); + + poDst->normalized = FALSE; + + scError = SYMCRYPT_NO_ERROR; + +cleanup: + return scError; +} diff --git a/libs/symcrypt/lib/ec_mul.c b/libs/symcrypt/lib/ec_mul.c new file mode 100644 index 00000000000..615edffa201 --- /dev/null +++ b/libs/symcrypt/lib/ec_mul.c @@ -0,0 +1,571 @@ +// +// ec_mul.c Generic multiplication algorithms for elliptic curves +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// + +#include "precomp.h" + +// +// Most of the following algorithms were presented in the paper +// "Selecting Elliptic Curves for Cryptography: An Efficiency and +// Security Analysis" by Bos, Costello, Longa, and Naehrig +// + +// +// The following is an adaptation of algorithm 4: "Precomputation +// scheme for Weierstrass curves" +// +// Input: Point P and number of precomputed points nPoints (=2^(w-2)) +// +// Output: P[i] = (2*i+1)P for 0<=i<2^(w-2) +// +// Remarks: +// 1. We store each point in an array of 4*2^(w-2) = 2^w modelements where +// each point is represented with X,Y,Z Jacobian coordinates and the W=-Y +// negated Y coordinate (so that we can get the negative of a point easily) +// 2. The source point P is already in the 0'th position of the array. +// +VOID +SYMCRYPT_CALL +SymCryptPrecomputation( + _In_ PCSYMCRYPT_ECURVE pCurve, + UINT32 nPoints, + _In_reads_( SYMCRYPT_ECURVE_SW_MAX_NPRECOMP_POINTS ) + PSYMCRYPT_ECPOINT * poPIs, + _Out_ PSYMCRYPT_ECPOINT poQ, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poQ->pCurve) ); + // Calculation for Q = 2*P + SymCryptEcpointDouble( pCurve, poPIs[0], poQ, 0, pbScratch, cbScratch ); + + for (UINT32 i=1; i<nPoints; i++) + { + // Calculation for (2i+1)*P = i*Q + P + SymCryptEcpointAddDiffNonZero( pCurve, poQ, poPIs[i-1], poPIs[i], pbScratch, cbScratch ); + } +} + +VOID +SYMCRYPT_CALL +SymCryptOfflinePrecomputation( + _In_ PSYMCRYPT_ECURVE pCurve, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + PSYMCRYPT_ECPOINT poQ = NULL; + + UINT32 cbEcpoint = SymCryptSizeofEcpointFromCurve( pCurve ); + + SYMCRYPT_ASSERT( cbScratch >= cbEcpoint + SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pCurve->FModDigits ) ); + + poQ = SymCryptEcpointCreate( pbScratch, cbEcpoint, pCurve ); + SYMCRYPT_ASSERT( poQ != NULL ); + pbScratch += cbEcpoint; + cbScratch -= cbEcpoint; + + SymCryptPrecomputation( + pCurve, + pCurve->info.sw.nPrecompPoints, + pCurve->info.sw.poPrecompPoints, + poQ, + pbScratch, + cbScratch ); +} + +// Mask which is 0xffffffff only when _index == _target +#define DELTA_MASK( _index, _target) SYMCRYPT_MASK32_ZERO( (_index) ^ (_target) ) + +// +// The following is an adaptation of algorithm 1: "Variable-base scalar multiplication +// using the fixed-window method" +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEcpointScalarMulFixedWindow( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_INT piScalar, + _In_opt_ + PCSYMCRYPT_ECPOINT poSrc, + UINT32 flags, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + + PCSYMCRYPT_MODULUS FMod = pCurve->FMod; + + UINT32 i, j; + + UINT32 w = pCurve->info.sw.window; + UINT32 nPrecompPoints = pCurve->info.sw.nPrecompPoints; + // dcl - assuming that nRecodedDigits has some reasonably small range - please document + // so that we can know usage of this variable will not cause problems + // Also, documentation of inputs, notes, etc at the function definition would be quite helpful + UINT32 nRecodedDigits = ((pCurve->GOrdBitsize + w - 2) / (w-1)) + 1; + + // Masks + UINT32 fZero = 0; + UINT32 fEven = 0; + UINT32 indexMask = 0; + + BOOLEAN bPrecompOffline = FALSE; + + // ==================================================== + // Temporaries + PSYMCRYPT_MODELEMENT peT = NULL; + PSYMCRYPT_ECPOINT poPIs[SYMCRYPT_ECURVE_SW_MAX_NPRECOMP_POINTS] = { 0 }; + PSYMCRYPT_ECPOINT poQ = NULL; + PSYMCRYPT_ECPOINT poTmp = NULL; + PSYMCRYPT_INT piRem = NULL; + PSYMCRYPT_INT piTmp = NULL; + PUINT32 absofKIs = NULL; + PUINT32 sigofKIs = NULL; + // =================================================== + + PSYMCRYPT_MODELEMENT peQX = NULL; + PSYMCRYPT_MODELEMENT peQY = NULL; + PSYMCRYPT_MODELEMENT peQZ = NULL; + + SIZE_T cbEcpoint = SymCryptSizeofEcpointFromCurve( pCurve ); + SIZE_T cbScalar = SymCryptSizeofIntFromDigits( pCurve->GOrdDigits ); + + // Make sure we only specify the correct flags + if ((flags & ~SYMCRYPT_FLAG_ECC_LL_COFACTOR_MUL) != 0) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto exit; + } + + // Check if poSrc is NULL and if yes set it to G + if (poSrc == NULL) + { + poSrc = pCurve->G; + bPrecompOffline = TRUE; + } + + SYMCRYPT_ASSERT( SYMCRYPT_CURVE_IS_SHORT_WEIERSTRASS_TYPE(pCurve) || + SYMCRYPT_CURVE_IS_TWISTED_EDWARDS_TYPE(pCurve) ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poSrc->pCurve) && SymCryptEcurveIsSame(pCurve, poDst->pCurve) ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_SCALAR_ECURVE_OPERATIONS(pCurve, 1) ); + + SYMCRYPT_ASSERT( cbScratch >= + pCurve->cbModElement + + (nPrecompPoints+2)*cbEcpoint + + 2*cbScalar + + ((2*nRecodedDigits*sizeof(UINT32) + SYMCRYPT_ASYM_ALIGN_VALUE - 1)/SYMCRYPT_ASYM_ALIGN_VALUE )*SYMCRYPT_ASYM_ALIGN_VALUE + + SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( pCurve ) ); + + // Creating temporary modelement + peT = SymCryptModElementCreate( pbScratch, pCurve->cbModElement, FMod ); + SYMCRYPT_ASSERT( peT != NULL ); + pbScratch += pCurve->cbModElement; + + // Creating temporary precomputed points (if needed) + SYMCRYPT_ASSERT( nPrecompPoints <= SYMCRYPT_ECURVE_SW_MAX_NPRECOMP_POINTS ); + for (i=0; i<nPrecompPoints; i++) + { + if (bPrecompOffline) + { + poPIs[i] = pCurve->info.sw.poPrecompPoints[i]; + } + else + { + poPIs[i] = SymCryptEcpointCreate( pbScratch, cbEcpoint, pCurve ); + SYMCRYPT_ASSERT( poPIs[i] != NULL ); + pbScratch += cbEcpoint; + } + } + + // Creating temporary points + poQ = SymCryptEcpointCreate( pbScratch, cbEcpoint, pCurve ); + SYMCRYPT_ASSERT( poQ != NULL ); + pbScratch += cbEcpoint; + + poTmp = SymCryptEcpointCreate( pbScratch, cbEcpoint, pCurve ); + SYMCRYPT_ASSERT( poTmp != NULL ); + pbScratch += cbEcpoint; + + // Creating temporary scalar for the remainder + piRem = SymCryptIntCreate( pbScratch, cbScalar, pCurve->GOrdDigits ); + SYMCRYPT_ASSERT( piRem != NULL); + pbScratch += cbScalar; + + piTmp = SymCryptIntCreate( pbScratch, cbScalar, pCurve->GOrdDigits ); + SYMCRYPT_ASSERT( piTmp != NULL); + pbScratch += cbScalar; + + // Fixing pointers to recoded digits (be careful that the remaining space is SYMCRYPT_ASYM_ALIGNed) + absofKIs = (PUINT32) pbScratch; + pbScratch += nRecodedDigits * sizeof(UINT32); + sigofKIs = (PUINT32) pbScratch; + pbScratch += nRecodedDigits * sizeof(UINT32); + pbScratch = (PBYTE) ( ((SIZE_T)pbScratch + SYMCRYPT_ASYM_ALIGN_VALUE - 1) & ~(SYMCRYPT_ASYM_ALIGN_VALUE - 1) ); + + // Fixing remaining scratch space size + cbScratch -= ( pCurve->cbModElement + (nPrecompPoints+2)*cbEcpoint + 2*cbScalar ); + cbScratch -= (((2*nRecodedDigits*sizeof(UINT32) + SYMCRYPT_ASYM_ALIGN_VALUE - 1)/SYMCRYPT_ASYM_ALIGN_VALUE )*SYMCRYPT_ASYM_ALIGN_VALUE); + + // + // Main algorithm + // + + // It is the caller's responsibility to ensure that the provided piScalar <= GOrd, double check this in debug mode + SYMCRYPT_ASSERT( !SymCryptIntIsLessThan( SymCryptIntFromModulus( pCurve->GOrd ), piScalar ) ); + + // Store k into an int + SymCryptIntCopy( piScalar, piRem ); + + // Check if k is 0 + fZero = SymCryptIntIsEqualUint32( piRem, 0 ); + + // Or if the src point is zero + fZero |= SymCryptEcpointIsZero( pCurve, poSrc, pbScratch, cbScratch ); + + // Check if k is even and convert it to r-k if true + fEven = SYMCRYPT_MASK32_ZERO(SymCryptIntGetBit( piRem, 0 )); + SymCryptIntSubSameSize( SymCryptIntFromModulus(pCurve->GOrd), piRem, piTmp); + SymCryptIntMaskedCopy( piTmp, piRem, fEven ); + + // Recoding stage + SymCryptFixedWindowRecoding( w, piRem, piTmp, absofKIs, sigofKIs, nRecodedDigits ); + + // Precomputation stage + if (!bPrecompOffline) + { + // Copy the first point in the start of the poPIs array + SymCryptEcpointCopy( pCurve, poSrc, poPIs[0] ); + + SymCryptPrecomputation( pCurve, nPrecompPoints, poPIs, poQ, pbScratch, cbScratch ); + } + + + // Get the pointers to Q + peQX = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poQ ); + peQY = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poQ ); + peQZ = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poQ ); + + // Q = P[ (|k_t|-1)/2 ] in memory access side-channel safe way + // That is, we touch all the precomputed points. The access pattern of KIs is fixed. + for (j=0; j<nPrecompPoints; j++) + { + indexMask = DELTA_MASK( j, absofKIs[nRecodedDigits-1] ); + SymCryptEcpointMaskedCopy( pCurve, poPIs[j], poQ, indexMask); + } + + for (i=nRecodedDigits - 2; i>0; i--) + { + // Q = 2^(w-1) * Q + for (j=0; j<w-1; j++) + { + SymCryptEcpointDouble( pCurve, poQ, poQ, 0, pbScratch, cbScratch ); + } + + // Copy the required precomputed point into poTmp (touch all points) + for (j=0; j<nPrecompPoints; j++) + { + indexMask = DELTA_MASK( j, absofKIs[i] ); + SymCryptEcpointMaskedCopy( pCurve, poPIs[j], poTmp, indexMask); + } + + // Negate if needed + SymCryptEcpointNegate( pCurve, poTmp, sigofKIs[i], pbScratch, cbScratch ); + + // Do the addition Q + s_i P[k_i] + SymCryptEcpointAddDiffNonZero( pCurve, poQ, poTmp, poQ, pbScratch, cbScratch ); + } + + // Q = 2^(w-1) * Q + for (j=0; j<w-1; j++) + { + SymCryptEcpointDouble( pCurve, poQ, poQ, 0, pbScratch, cbScratch ); + } + + // Copy the point s_0 P[k_0] into poTmp + for (j=0; j<nPrecompPoints; j++) + { + indexMask = DELTA_MASK( j, absofKIs[0] ); + SymCryptEcpointMaskedCopy( pCurve, poPIs[j], poTmp, indexMask); + } + + // Negate if needed + SymCryptEcpointNegate( pCurve, poTmp, sigofKIs[0], pbScratch, cbScratch ); + + // Complete addition routine + SymCryptEcpointAdd( pCurve, poQ, poTmp, poQ, 0, pbScratch, cbScratch ); + + // If even invert + SymCryptEcpointNegate( pCurve, poQ, fEven, pbScratch, cbScratch ); + + // Multiply by the cofactor (if needed) by continuing the doubling + if ((pCurve->coFactorPower!=0) && ((flags & SYMCRYPT_FLAG_ECC_LL_COFACTOR_MUL) != 0)) + { + for (j=0; j<pCurve->coFactorPower; j++) + { + SymCryptEcpointDouble( pCurve, poQ, poQ, 0, pbScratch, cbScratch ); + } + } + + // If the resultant point is zero, ensure it will be set to the canonical zero point + fZero |= SymCryptEcpointIsZero( pCurve, poQ, pbScratch, cbScratch ); + + // Set the zero point + SymCryptEcpointSetZero( pCurve, poTmp, pbScratch, cbScratch ); + SymCryptEcpointMaskedCopy( pCurve, poTmp, poQ, fZero ); + + // Output the result (normalized flag == FALSE) + SymCryptEcpointCopy( pCurve, poQ, poDst ); + + scError = SYMCRYPT_NO_ERROR; + +exit: + + return scError; +} + +// +// The following is an adaptation of algorithm 9: "Double-scalar multiplication using the +// width-w NAF with interleaving" +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEcpointMultiScalarMulWnafWithInterleaving( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_reads_( nPoints ) PCSYMCRYPT_INT * piSrcScalarArray, + _In_reads_( nPoints ) PCSYMCRYPT_ECPOINT * poSrcEcpointArray, + _In_ UINT32 nPoints, + _In_ UINT32 flags, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + + UINT32 i, j; + + UINT32 w = pCurve->info.sw.window; + UINT32 nPrecompPoints = pCurve->info.sw.nPrecompPoints; // One table for each base + UINT32 nRecodedDigits = pCurve->GOrdBitsize + 1; // Notice the difference with the fixed window + + // Masks + UINT32 fZero[SYMCRYPT_ECURVE_MULTI_SCALAR_MUL_MAX_NPOINTS] = { 0 }; + UINT32 fZeroTot = 0xffffffff; + + BOOLEAN bPrecompOffline = FALSE; + + // ==================================================== + // Temporaries + PSYMCRYPT_ECPOINT poPIs[SYMCRYPT_ECURVE_SW_MAX_NPRECOMP_POINTS] = { 0 }; + PSYMCRYPT_ECPOINT poQ = NULL; + PSYMCRYPT_ECPOINT poTmp = NULL; + PSYMCRYPT_INT piRem = NULL; + PSYMCRYPT_INT piTmp = NULL; + + PUINT32 absofKIs = NULL; + PUINT32 sigofKIs = NULL; + // =================================================== + + SIZE_T cbEcpoint = SymCryptSizeofEcpointFromCurve( pCurve ); + SIZE_T cbScalar = SymCryptSizeofIntFromDigits( pCurve->GOrdDigits ); + + PBYTE pbScratchEnd = pbScratch + cbScratch; + UNREFERENCED_PARAMETER( pbScratchEnd ); // Used in asserts + + // Make sure we only specify the correct flags + if ((flags & ~(SYMCRYPT_FLAG_DATA_PUBLIC | SYMCRYPT_FLAG_ECC_LL_COFACTOR_MUL)) != 0) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto exit; + } + + // Check the maximum number of points + if (nPoints > SYMCRYPT_ECURVE_MULTI_SCALAR_MUL_MAX_NPOINTS) + { + scError = SYMCRYPT_NOT_IMPLEMENTED; + goto exit; + } + + // Check if the first point is NULL + if (poSrcEcpointArray[0] == NULL) + { + poSrcEcpointArray[0] = pCurve->G; + bPrecompOffline = TRUE; + } + + // Make sure that the non side-channel flag is specified + if ((flags & SYMCRYPT_FLAG_DATA_PUBLIC) == 0 ) + { + scError = SYMCRYPT_NOT_IMPLEMENTED; + goto exit; + } + + SYMCRYPT_ASSERT( SYMCRYPT_CURVE_IS_SHORT_WEIERSTRASS_TYPE(pCurve) || + SYMCRYPT_CURVE_IS_TWISTED_EDWARDS_TYPE(pCurve) ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poDst->pCurve) ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_SCALAR_ECURVE_OPERATIONS(pCurve, nPoints) ); + + // Creating temporary precomputed points (if needed for the first point) + for (i=0; i<nPoints*nPrecompPoints; i++) + { + if ((i<nPrecompPoints) && bPrecompOffline) + { + poPIs[i] = pCurve->info.sw.poPrecompPoints[i]; + } + else + { + SYMCRYPT_ASSERT( pbScratch + cbEcpoint <= pbScratchEnd ); + poPIs[i] = SymCryptEcpointCreate( pbScratch, cbEcpoint, pCurve ); + SYMCRYPT_ASSERT( poPIs[i] != NULL ); + pbScratch += cbEcpoint; + } + } + + SYMCRYPT_ASSERT( pbScratch + 2*cbEcpoint + 2*cbScalar + 2*nPoints*nRecodedDigits*sizeof(UINT32) <= pbScratchEnd ); + // Creating temporary points + poQ = SymCryptEcpointCreate( pbScratch, cbEcpoint, pCurve ); + SYMCRYPT_ASSERT( poQ != NULL ); + pbScratch += cbEcpoint; + + poTmp = SymCryptEcpointCreate( pbScratch, cbEcpoint, pCurve ); + SYMCRYPT_ASSERT( poTmp != NULL ); + pbScratch += cbEcpoint; + + // Creating temporary scalar for the remainder + piRem = SymCryptIntCreate( pbScratch, cbScalar, pCurve->GOrdDigits ); + SYMCRYPT_ASSERT( piRem != NULL); + pbScratch += cbScalar; + + piTmp = SymCryptIntCreate( pbScratch, cbScalar, pCurve->GOrdDigits ); + SYMCRYPT_ASSERT( piTmp != NULL); + pbScratch += cbScalar; + + // Fixing pointers to recoded digits (be careful that the remaining space is SYMCRYPT_ASYM_ALIGNed) + absofKIs = (PUINT32) pbScratch; + pbScratch += nPoints * nRecodedDigits * sizeof(UINT32); + sigofKIs = (PUINT32) pbScratch; + pbScratch += nPoints * nRecodedDigits * sizeof(UINT32); + pbScratch = (PBYTE) ( ((SIZE_T)pbScratch + SYMCRYPT_ASYM_ALIGN_VALUE - 1) & ~(SYMCRYPT_ASYM_ALIGN_VALUE - 1) ); + + // Fixing remaining scratch space size + // dcl - my guess is that the values here are small enough that there should not be a problem, but + // would be better if that were documented. + cbScratch -= ( (nPoints*nPrecompPoints+2)*cbEcpoint + 2*cbScalar ); + cbScratch -= (((2*nPoints*nRecodedDigits*sizeof(UINT32) + SYMCRYPT_ASYM_ALIGN_VALUE - 1)/SYMCRYPT_ASYM_ALIGN_VALUE )*SYMCRYPT_ASYM_ALIGN_VALUE); + + // + // Main algorithm + // + for (j = 0; j<nPoints; j++) + { + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poSrcEcpointArray[j]->pCurve) ); + + // Check if k is 0 or if the src point is zero + fZero[j] = ( SymCryptIntIsEqualUint32( piSrcScalarArray[j], 0 ) | SymCryptEcpointIsZero( pCurve, poSrcEcpointArray[j], pbScratch, cbScratch ) ); + fZeroTot &= fZero[j]; + + // Skip the recoding stage (and all remaining steps) if this point will give result zero + if (!fZero[j]) + { + SymCryptIntCopy( piSrcScalarArray[j], piRem ); + + // Recoding stage + SymCryptWidthNafRecoding( w, piRem, &absofKIs[j*nRecodedDigits], &sigofKIs[j*nRecodedDigits], nRecodedDigits ); + + // Precomputation stage + if ((j>0) || !bPrecompOffline) + { + // Copy the first point in the start of the poPIs array + SymCryptEcpointCopy( pCurve, poSrcEcpointArray[j], poPIs[j*nPrecompPoints] ); + + SymCryptPrecomputation( pCurve, nPrecompPoints, &poPIs[j*nPrecompPoints], poQ, pbScratch, cbScratch ); + } + } + } + + // Set poQ to zero point + SymCryptEcpointSetZero( pCurve, poQ, pbScratch, cbScratch ); + + if (!fZeroTot) + { + // Main loop + for (INT32 i = nRecodedDigits-1; i>-1; i--) + { + SymCryptEcpointDouble( pCurve, poQ, poQ, 0, pbScratch, cbScratch ); + + for (j = 0; j<nPoints; j++) + { + if (!fZero[j] && sigofKIs[j*nRecodedDigits + i] != 0) + { + SymCryptEcpointCopy( pCurve, poPIs[j*nPrecompPoints + absofKIs[j*nRecodedDigits + i]/2], poTmp ); + + if (sigofKIs[j*nRecodedDigits + i] == 0xffffffff) + { + SymCryptEcpointNegate( pCurve, poTmp, 0xffffffff, pbScratch, cbScratch ); + } + + SymCryptEcpointAdd( pCurve, poQ, poTmp, poQ, SYMCRYPT_FLAG_DATA_PUBLIC, pbScratch, cbScratch ); + } + } + } + } + + // Multiply by the cofactor (if needed) by continuing the doubling + if ((pCurve->coFactorPower!=0) && ((flags & SYMCRYPT_FLAG_ECC_LL_COFACTOR_MUL) != 0)) + { + for (j=0; j<pCurve->coFactorPower; j++) + { + SymCryptEcpointDouble( pCurve, poQ, poQ, 0, pbScratch, cbScratch ); + } + } + + // If the resultant point is zero, ensure it will be set to the canonical zero point + if ( SymCryptEcpointIsZero( pCurve, poQ, pbScratch, cbScratch ) ) + { + // Set poQ to zero point + SymCryptEcpointSetZero( pCurve, poQ, pbScratch, cbScratch ); + } + + // Copy the result to the destination (normalized flag == FALSE) + SymCryptEcpointCopy( pCurve, poQ, poDst ); + + scError = SYMCRYPT_NO_ERROR; + +exit: + return scError; +} + +VOID +SYMCRYPT_CALL +SymCryptEcpointGenericSetRandom( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Out_ PSYMCRYPT_INT piScalar, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + PSYMCRYPT_MODELEMENT peScalar = NULL; + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poDst->pCurve) ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_SCALAR_ECURVE_OPERATIONS(pCurve, 1) ); + SYMCRYPT_ASSERT( cbScratch >= pCurve->cbModElement ); + + peScalar = SymCryptModElementCreate( pbScratch, pCurve->cbModElement, pCurve->GOrd ); + SYMCRYPT_ASSERT( peScalar != NULL ); + + // Setting a random mod element in the [1, SubgroupOrder-1] set + SymCryptModSetRandom( pCurve->GOrd, peScalar, (SYMCRYPT_FLAG_MODRANDOM_ALLOW_ONE|SYMCRYPT_FLAG_MODRANDOM_ALLOW_MINUSONE), pbScratch + pCurve->cbModElement, cbScratch - pCurve->cbModElement ); + + // Setting the integer + SymCryptModElementToInt( pCurve->GOrd, peScalar, piScalar, pbScratch + pCurve->cbModElement, cbScratch - pCurve->cbModElement ); + + // Do the multiplication (pass over the entire scratch space as it is not needed anymore) + // !! Explicitly not checking the error return here as the only error is from specifying invalid flags !! + SymCryptEcpointScalarMul( pCurve, piScalar, NULL, 0, poDst, pbScratch, cbScratch ); +} diff --git a/libs/symcrypt/lib/ec_short_weierstrass.c b/libs/symcrypt/lib/ec_short_weierstrass.c new file mode 100644 index 00000000000..ca8399a5fd7 --- /dev/null +++ b/libs/symcrypt/lib/ec_short_weierstrass.c @@ -0,0 +1,935 @@ +// +// ec_short_weierstrass.c ECPOINT functions for short Weierstrass curves. +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// + +#include "precomp.h" + +// +// Scratch space requirements for each ECPOINT function. +// +// A function's requirements in scratch space consist of requirements for its own arithmetic +// operations and temporaries ("self" scratch space) and scratch space requirements for other +// ECPOINT functions it might call ("callee" scratch space). +// +// If the outer function does not need the temporaries after calling the inner ECPOINT +// function, then the total scratch space can be the maximum of both. Otherwise the scratch +// space of the outer function should be the concatenation of the "self" scratch space and +// the "callee" scratch space. +// +// The following table shows the scratch space requirements of each function with appropriate +// abbreviations. The calling sequence implies a directed graph that starting from the "leaves" +// (functions that do no call others) allows to calculate the total scratch space requirements. +// +// #N Function Calls Function Self Temporaries Self Scratch +// 1 SetZero - 0 COM_MOD(FMod) +// 2 SetDistinguishedPoint - 0 0 +// 3 IsEqual - 4 ModEl COM_MOD(FMod) +// 4 IsZero - 1 ModEl COM_MOD(FMod) +// 5 OnCurve - 2 ModEl COM_MOD(FMod) +// 6 Double 1,4,9 2 Ecp 0 +// 7 Add 1,3,4,8,9 2 Ecp 0 +// 8 AddDiffNonZero - 8 ModEl COM_MOD(FMod) +// 9 Double - 6 ModEl COM_MOD(FMod) +// +// 10 SetRandom 11 0 COM_MOD(GOrd) +// 11 ScalarMul 4,5,7 1ModEl + (n+2)Ecp + 2Int COM_MOD(GOrd) +// +// Since only 4 functions call others and to keep things simple, we will have 2 +// types of scratch space: "ECURVE_COMMON" and "ECURVE_SCALAR" +// +// ---- All functions except 10 and 11 will use the "ECURVE_COMMON" scratch space. The size of it +// depends only on parameters of the curve. Schematically it will be: +// |----------COMMON------------------------------------------------------------------| +// |------8 ModEl + 2 Ecpoint----||------COM_MOD(FMod)--------------------------------| +// +// ---- The SetRandom and ScalarMul have requirements that depend on temporaries for the pre-computation. +// Also they depend on the "self" temporaries after calling the inner functions. +// Therefore, these will require the "ECURVE_SCALAR" scratch space which +// consists of two parts: The self space for the above two functions and the +// common scratch space. These parts SHOULD NOT overlap. Schematically: +// +// |--------------SCALAR---------------------------------------------------| +// |----1ModEl + (n+2)Ecp + 2Int--------||---max(COMMON, COM_MOD(GOrd)----| + +// The scratch space sizes are all calculated by the following function. +// *** Notice that almost all the curve parameters (exception is the distinguished point) +// must have been initialized before calling this function. +VOID +SYMCRYPT_CALL +SymCryptShortWeierstrassFillScratchSpaces( _In_ PSYMCRYPT_ECURVE pCurve ) +{ + UINT32 nDigits = SymCryptDigitsFromBits( pCurve->FModBitsize ); + + // + // All the scratch space computations are upper bounded by the SizeofXXX bound (2^19) and + // the SCRATCH_BYTES_FOR_XXX bound (2^24) (see symcrypt_internal.h). + // + // One caveat is SymCryptSizeofEcpointFromCurve and SymCryptSizeofEcpointEx which calculate + // the size of EcPoint with 4 coordinates (each one a modelement of max size 2^17). Thus upper + // bounded by 2^20. + // + // Another is the precomp points computation where the nPrecompPoints are up to + // 2^SYMCRYPT_ECURVE_SW_DEF_WINDOW = 2^6 and the nRecodedDigits are equal to the + // GOrd bitsize < 2^20. + // + // Thus cbScratchScalarMulti is upper bounded by 2^6*2^20 + 2*2^20*2^4 ~ 2^26. + // + + // Common + pCurve->cbScratchCommon = + 8 * pCurve->cbModElement + + 2 * SymCryptSizeofEcpointFromCurve( pCurve ) + + SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pCurve->FModDigits ); + + // Scalar (Overhead) + pCurve->cbScratchScalar = + pCurve->cbModElement + + 2 * SymCryptSizeofEcpointFromCurve( pCurve ) + + 2 * SymCryptSizeofIntFromDigits( pCurve->GOrdDigits ) + + SYMCRYPT_MAX( pCurve->cbScratchCommon, SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pCurve->GOrdDigits )); + + // Scalar dependent on precomp points (be careful to align the UINT32 arrays properly) + pCurve->cbScratchScalarMulti = + pCurve->info.sw.nPrecompPoints * SymCryptSizeofEcpointFromCurve( pCurve ) + + ((2*pCurve->info.sw.nRecodedDigits * sizeof(UINT32) + SYMCRYPT_ASYM_ALIGN_VALUE - 1 )/SYMCRYPT_ASYM_ALIGN_VALUE) * SYMCRYPT_ASYM_ALIGN_VALUE; + + // GetSetValue + pCurve->cbScratchGetSetValue = + SymCryptSizeofEcpointEx( pCurve->cbModElement, SYMCRYPT_ECPOINT_FORMAT_MAX_LENGTH ) + + 2 * pCurve->cbModElement + + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pCurve->FModDigits ), + SYMCRYPT_SCRATCH_BYTES_FOR_MODINV( pCurve->FModDigits ) ); + + pCurve->cbScratchGetSetValue = SYMCRYPT_MAX( pCurve->cbScratchGetSetValue, SymCryptSizeofIntFromDigits( nDigits ) ); + + // Eckey + pCurve->cbScratchEckey = + SYMCRYPT_MAX( pCurve->cbModElement + SymCryptSizeofIntFromDigits(SymCryptEcurveDigitsofScalarMultiplier(pCurve)), + SymCryptSizeofEcpointFromCurve( pCurve ) ) + + SYMCRYPT_MAX( pCurve->cbScratchScalar + pCurve->cbScratchScalarMulti, pCurve->cbScratchGetSetValue ); +} + +// +// The following function sets the point to (1:1:0) in Jacobian coordinates. +// +VOID +SYMCRYPT_CALL +SymCryptShortWeierstrassSetZero( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + PCSYMCRYPT_MODULUS FMod = pCurve->FMod; + PSYMCRYPT_MODELEMENT peTmp = NULL; + + SYMCRYPT_ASSERT( SYMCRYPT_CURVE_IS_SHORT_WEIERSTRASS_TYPE(pCurve) ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poDst->pCurve) ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pCurve->FModDigits ) ); + + // Getting handle to X + peTmp = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poDst ); + + // Setting the right value (always 1) + SymCryptModElementSetValueUint32( 1, FMod, peTmp, pbScratch, cbScratch ); + + // Getting handle to Y + peTmp = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poDst ); + + // Setting the right value (always 1) + SymCryptModElementSetValueUint32( 1, FMod, peTmp, pbScratch, cbScratch ); + + // Getting handle to Z + peTmp = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poDst ); + + // Setting the right value (always 0) + SymCryptModElementSetValueUint32( 0, pCurve->FMod, peTmp, pbScratch, cbScratch ); +} + +VOID +SYMCRYPT_CALL +SymCryptShortWeierstrassSetDistinguished( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ASSERT( SYMCRYPT_CURVE_IS_SHORT_WEIERSTRASS_TYPE(pCurve) ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poDst->pCurve) ); + + UNREFERENCED_PARAMETER( pbScratch ); + UNREFERENCED_PARAMETER( cbScratch ); + + SymCryptEcpointCopy( pCurve, pCurve->G, poDst ); +} + +// +// The following function checks if +// - X1*Z2^2 = X2*Z1^2 and Y1*Z2^3 = Y2*Z1^3 (Equal case) +// - X1*Z2^2 = X2*Z1^2 and Y1*Z2^3 = -Y2*Z1^3 (Negative case) +// +// Remark: The case where Z1 = Z2 = 0 is covered above (the zero point +// is equal to its negative). +// +UINT32 +SYMCRYPT_CALL +SymCryptShortWeierstrassIsEqual( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc1, + _In_ PCSYMCRYPT_ECPOINT poSrc2, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + PCSYMCRYPT_MODULUS FMod = pCurve->FMod; + + PSYMCRYPT_MODELEMENT peX1 = NULL; // Pointer to X1 + PSYMCRYPT_MODELEMENT peY1 = NULL; // Pointer to Y1 + PSYMCRYPT_MODELEMENT peZ1 = NULL; // Pointer to Z1 + PSYMCRYPT_MODELEMENT peX2 = NULL; // Pointer to X2 + PSYMCRYPT_MODELEMENT peY2 = NULL; // Pointer to Y2 + PSYMCRYPT_MODELEMENT peZ2 = NULL; // Pointer to Z2 + + UINT32 dResX = 0; + UINT32 dResY = 0; + UINT32 dResYN = 0; + + PSYMCRYPT_MODELEMENT peT[4] = { 0 }; // Temporaries + + SYMCRYPT_ASSERT( SYMCRYPT_CURVE_IS_SHORT_WEIERSTRASS_TYPE(pCurve) ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poSrc1->pCurve) && SymCryptEcurveIsSame(pCurve, poSrc2->pCurve) ); + SYMCRYPT_ASSERT( (flags & ~(SYMCRYPT_FLAG_ECPOINT_EQUAL|SYMCRYPT_FLAG_ECPOINT_NEG_EQUAL)) == 0 ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pCurve->FModDigits ) + 4 * pCurve->cbModElement ); + + // Creating temporaries + for (UINT32 i=0; i<4; i++) + { + peT[i] = SymCryptModElementCreate( + pbScratch, + pCurve->cbModElement, + FMod ); + + SYMCRYPT_ASSERT( peT[i] != NULL); + + pbScratch += pCurve->cbModElement; + } + + // Fixing remaining scratch space size + cbScratch -= 4 * pCurve->cbModElement; + + // Getting pointers to x and y of the source point + peX1 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc1 ); + peY1 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc1 ); + peZ1 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poSrc1 ); + peX2 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc2 ); + peY2 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc2 ); + peZ2 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poSrc2 ); + + // Setting the default flag if flags == 0 + flags |= ( SYMCRYPT_MASK32_ZERO( flags ) & SYMCRYPT_FLAG_ECPOINT_EQUAL ); + + // Calculation + SymCryptModSquare( FMod, peZ1, peT[0], pbScratch, cbScratch ); // T0 := Z1 * Z1 = Z1^2 + SymCryptModSquare( FMod, peZ2, peT[1], pbScratch, cbScratch ); // T1 := Z2 * Z2 = Z2^2 + SymCryptModMul( FMod, peX1, peT[1], peT[2], pbScratch, cbScratch ); // T2 := X1 * T1 = X1*Z2^2 + SymCryptModMul( FMod, peX2, peT[0], peT[3], pbScratch, cbScratch ); // T3 := X2 * T0 = X2*Z1^2 + + dResX = SymCryptModElementIsEqual( FMod, peT[2], peT[3] ); + + SymCryptModMul( FMod, peZ1, peT[0], peT[0], pbScratch, cbScratch ); // T0 := Z1 * T0 = Z1^3 + SymCryptModMul( FMod, peZ2, peT[1], peT[1], pbScratch, cbScratch ); // T1 := Z2 * T1 = Z2^3 + SymCryptModMul( FMod, peY1, peT[1], peT[2], pbScratch, cbScratch ); // T2 := Y1 * T1 = Y1*Z2^3 + SymCryptModMul( FMod, peY2, peT[0], peT[3], pbScratch, cbScratch ); // T3 := Y2 * T0 = Y2*Z1^3 + + dResY = SymCryptModElementIsEqual( FMod, peT[2], peT[3] ); + + SymCryptModNeg( FMod, peT[3], peT[3], pbScratch, cbScratch ); // T3 := -T3 = -Y2*Z1^3 + + dResYN = SymCryptModElementIsEqual( FMod, peT[2], peT[3] ); + + return (SYMCRYPT_MASK32_NONZERO(flags & SYMCRYPT_FLAG_ECPOINT_EQUAL) & dResX & dResY) | + (SYMCRYPT_MASK32_NONZERO(flags & SYMCRYPT_FLAG_ECPOINT_NEG_EQUAL) & dResX & dResYN); +} + +UINT32 +SYMCRYPT_CALL +SymCryptShortWeierstrassIsZero( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + PCSYMCRYPT_MODULUS FMod = pCurve->FMod; + PSYMCRYPT_MODELEMENT peZ = NULL; // Pointer to Z + + SYMCRYPT_ASSERT( SYMCRYPT_CURVE_IS_SHORT_WEIERSTRASS_TYPE(pCurve) ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poSrc->pCurve) ); + + UNREFERENCED_PARAMETER( pbScratch ); + UNREFERENCED_PARAMETER( cbScratch ); + + // Getting pointer to Z of the source point + peZ = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poSrc ); + + // Setting temporary to 0 + return SymCryptModElementIsZero( FMod, peZ ); +} + +// +// The following function verifies if the point (X:Y:Z) in Jacobian +// coordinates satisfies the equation Y^2 = X^3 + aXZ^4+bZ^6 . +// +UINT32 +SYMCRYPT_CALL +SymCryptShortWeierstrassOnCurve( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + PCSYMCRYPT_MODULUS FMod = pCurve->FMod; + + PSYMCRYPT_MODELEMENT peX = NULL; // Pointer to X + PSYMCRYPT_MODELEMENT peY = NULL; // Pointer to Y + PSYMCRYPT_MODELEMENT peZ = NULL; // Pointer to Z + + PSYMCRYPT_MODELEMENT peT[2] = { 0 }; // Temporaries + + SYMCRYPT_ASSERT( SYMCRYPT_CURVE_IS_SHORT_WEIERSTRASS_TYPE(pCurve) ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poSrc->pCurve) ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pCurve->FModDigits ) + 2 * pCurve->cbModElement ); + + // Creating temporaries + for (UINT32 i=0; i<2; i++) + { + peT[i] = SymCryptModElementCreate( + pbScratch, + pCurve->cbModElement, + FMod ); + + SYMCRYPT_ASSERT( peT[i] != NULL); + + pbScratch += pCurve->cbModElement; + } + + // Fixing remaining scratch space size + cbScratch -= 2*pCurve->cbModElement; + + // Getting pointers to coordinates of the source point + peX = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc ); + peY = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc ); + peZ = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poSrc ); + + // Calculation + SymCryptModSquare( FMod, peZ, peT[0], pbScratch, cbScratch ); // T1 := Z * Z = Z^2 + SymCryptModSquare( FMod, peT[0], peT[1], pbScratch, cbScratch ); // T2 := T1 * T1 = Z^4 + SymCryptModMul( FMod, peT[0], peT[1], peT[0], pbScratch, cbScratch ); // T1 := T1 * T2 = Z^6 + + SymCryptModMul( FMod, peT[0], pCurve->B, peT[0], pbScratch, cbScratch ); // T1 := T1 * b = bZ^6 + + SymCryptModMul( FMod, peT[1], peX, peT[1], pbScratch, cbScratch ); // T2 := T2 * X = XZ^4 + SymCryptModMul( FMod, peT[1], pCurve->A, peT[1], pbScratch, cbScratch ); // T2 := T2 * a = aXZ^4 + + SymCryptModAdd( FMod, peT[0], peT[1], peT[1], pbScratch, cbScratch ); // T2 := T1 + T2 = aXZ^4 + bZ^6 + + SymCryptModSquare( FMod, peX, peT[0], pbScratch, cbScratch ); // T1 := X * X = X^2 + SymCryptModMul( FMod, peT[0], peX, peT[0], pbScratch, cbScratch ); // T1 := T1 * X = X^3 + SymCryptModAdd( FMod, peT[0], peT[1], peT[1], pbScratch, cbScratch ); // T2 := T1 + T2 = X^3 + aXZ^4 + bZ^6 + + SymCryptModSquare( FMod, peY, peT[0], pbScratch, cbScratch ); // T1 := Y * Y = Y^2 + + return SymCryptModElementIsEqual( FMod, peT[0], peT[1] ); +} + +// +// based on dbl-2007-bl formula +// but tweaked by saml to +// a) remove overeager conversions from modular multiplication to modular squaring which introduce +// more addition/subtraction. With current implementations (based on montgomery reduction), +// the cost of [a square and an add/sub] is greater than the cost of [a multiplication] +// b) share intermediate results of producing 8YYYY. [add/sub] is ~10% of cost of mul, so reducing +// count of these operation has a real impact +// +// 2Y = 2*Y1 +// 2YY = 2Y*Y1 +// 4YY = 2*2YY +// 8YYYY = 2YY*4YY +// S = X1*4YY +// XX = X1^2 +// ZZ = Z1^2 +// ZZZZ = ZZ^2 +// M = 3*XX+a*ZZZZ +// T = M^2-2*S +// X3 = T +// Y3 = M*(S-T)-8YYYY +// Z3 = Z1*2Y +// +// Total cost: +// 6 Mul (1 by a) +// 4 Sqr +// 2 Add +// 4 Sub +// 3 Dbl +// +// Special Case: +// If the source point is equal to the identity +// point of the curve (i.e. Z1 = 0 in Jacobian +// coordinates) then the resulting point has +// Z3 = Z1*2Y1 = 0. Thus, this formula is +// complete (it works for all points). +// +VOID +SYMCRYPT_CALL +SymCryptShortWeierstrassDouble( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_ PSYMCRYPT_ECPOINT poDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + PCSYMCRYPT_MODULUS FMod = pCurve->FMod; + PSYMCRYPT_MODELEMENT peT[3] = { 0 }; // Temporaries + + PCSYMCRYPT_MODELEMENT peX1 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc ); + PCSYMCRYPT_MODELEMENT peY1 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc ); + PCSYMCRYPT_MODELEMENT peZ1 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poSrc ); + + PSYMCRYPT_MODELEMENT peX3 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poDst ); + PSYMCRYPT_MODELEMENT peY3 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poDst ); + PSYMCRYPT_MODELEMENT peZ3 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poDst ); + + SYMCRYPT_ASSERT( pCurve->type == SYMCRYPT_INTERNAL_ECURVE_TYPE_SHORT_WEIERSTRASS ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poSrc->pCurve) && SymCryptEcurveIsSame(pCurve, poDst->pCurve) ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pCurve->FModDigits ) + 3 * pCurve->cbModElement ); + + UNREFERENCED_PARAMETER( flags ); + + // Creating temporaries + for (UINT32 i=0; i<3; i++) + { + peT[i] = SymCryptModElementCreate( + pbScratch, + pCurve->cbModElement, + FMod ); + + SYMCRYPT_ASSERT( peT[i] != NULL); + + pbScratch += pCurve->cbModElement; + } + + // Fixing remaining scratch space size + cbScratch -= 3*pCurve->cbModElement; + + // Calculate the points + SymCryptModAdd( FMod, peY1, peY1, peT[0], pbScratch, cbScratch ); /* T0 := Y1 + Y1 = 2Y */ + SymCryptModSquare( FMod, peZ1, peT[1], pbScratch, cbScratch ); /* T1 := Z1 * Z1 = ZZ */ + SymCryptModMul( FMod, peT[0], peZ1, peZ3, pbScratch, cbScratch ); /* Z3 := 2Y * Z1 = 2YZ */ + + SymCryptModMul( FMod, peY1, peT[0], peY3, pbScratch, cbScratch ); /* Y3 := 2Y * Y1 = 2YY */ + SymCryptModAdd( FMod, peY3, peY3, peT[0], pbScratch, cbScratch ); /* T0 := 2YY + 2YY = 4YY */ + SymCryptModMul( FMod, peT[0], peY3, peY3, pbScratch, cbScratch ); /* Y3 := 2YY * 4YY = 8YYYY */ + + SymCryptModMul( FMod, peT[0], peX1, peT[0], pbScratch, cbScratch ); /* T0 := X1 * 4YY = 4XYY = S */ + + SymCryptModSquare( FMod, peT[1], peT[1], pbScratch, cbScratch ); /* T1 := T1 * T1 = ZZZZ */ + SymCryptModSquare( FMod, peX1, peT[2], pbScratch, cbScratch ); /* T2 := X1 * X1 = XX */ + SymCryptModMul( FMod, peT[1], pCurve->A, peT[1], pbScratch, cbScratch ); /* T1 := T1 * a = a*ZZZZ */ + SymCryptModAdd( FMod, peT[2], peT[1], peT[1], pbScratch, cbScratch ); /* T1 := T2 + T1 = XX + a*ZZZZ */ + SymCryptModAdd( FMod, peT[2], peT[2], peT[2], pbScratch, cbScratch ); /* T2 := T2 + T2 = 2*XX */ + SymCryptModAdd( FMod, peT[0], peT[0], peX3, pbScratch, cbScratch ); /* X3 := 2*S */ + SymCryptModAdd( FMod, peT[2], peT[1], peT[1], pbScratch, cbScratch ); /* T1 := T2 + T1 = 3*XX + a*ZZZZ = M */ + + SymCryptModSquare( FMod, peT[1], peT[2], pbScratch, cbScratch ); /* T2 := M^2 */ + SymCryptModSub( FMod, peT[2], peX3, peX3, pbScratch, cbScratch ); /* X3 := M^2 - 2*S = T */ + + SymCryptModSub( FMod, peT[0], peX3, peT[0], pbScratch, cbScratch ); /* T0 := S - T */ + SymCryptModMul( FMod, peT[1], peT[0], peT[0], pbScratch, cbScratch ); /* T0 := M * (S - T) */ + SymCryptModSub( FMod, peT[0], peY3, peY3, pbScratch, cbScratch ); /* Y3 := M * (S - T) - 8*YYYY */ +} + + +// +// based on dbl-2007-bl / dbl-2001-b formulae +// but tweaked by saml to +// a) remove overeager conversions from modular multiplication to modular squaring which introduce +// more addition/subtraction. With current implementations (based on montgomery reduction), +// the cost of [a square and an add/sub] is greater than the cost of [a multiplication] +// b) share intermediate results of producing 8YYYY. [add/sub] is ~10% of cost of mul, so reducing +// count of these operation has a real impact +// c) make use of knowledge that curve has a == -3, so M can be calculated more efficiently +// +// 2Y = 2*Y1 +// 2YY = 2Y*Y1 +// 4YY = 2*2YY +// 8YYYY = 2YY*4YY +// ZZ = Z1^2 +// S = X1*4YY +// M = 3*(X1+ZZ)*(X1-ZZ) = 3*(XX - ZZZZ) +// T = M^2-2*S +// X3 = T +// Y3 = M*(S-T)-8YYYY +// Z3 = 2Y*Z1 +// +// Total cost: +// 6 Mul +// 2 Sqr +// 2 Add +// 4 Sub +// 4 Dbl +// +// Special Case: +// If the source point is equal to the identity +// point of the curve (i.e. Z1 = 0 in Jacobian +// coordinates) then the resulting point has +// Z3 = Z1*2Y1 = 0. Thus, this formula is +// complete (it works for all points). +// +VOID +SYMCRYPT_CALL +SymCryptShortWeierstrassDoubleSpecializedAm3( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_ PSYMCRYPT_ECPOINT poDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + PCSYMCRYPT_MODULUS FMod = pCurve->FMod; + PSYMCRYPT_MODELEMENT peT[3] = { 0 }; // Temporaries + + PCSYMCRYPT_MODELEMENT peX1 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc ); + PCSYMCRYPT_MODELEMENT peY1 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc ); + PCSYMCRYPT_MODELEMENT peZ1 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poSrc ); + + PSYMCRYPT_MODELEMENT peX3 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poDst ); + PSYMCRYPT_MODELEMENT peY3 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poDst ); + PSYMCRYPT_MODELEMENT peZ3 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poDst ); + + SYMCRYPT_ASSERT( pCurve->type == SYMCRYPT_INTERNAL_ECURVE_TYPE_SHORT_WEIERSTRASS_AM3 ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poSrc->pCurve) && SymCryptEcurveIsSame(pCurve, poDst->pCurve) ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pCurve->FModDigits ) + 3 * pCurve->cbModElement ); + + UNREFERENCED_PARAMETER( flags ); + + // Creating temporaries + for (UINT32 i=0; i<3; i++) + { + peT[i] = SymCryptModElementCreate( + pbScratch, + pCurve->cbModElement, + FMod ); + + SYMCRYPT_ASSERT( peT[i] != NULL); + + pbScratch += pCurve->cbModElement; + } + + // Fixing remaining scratch space size + cbScratch -= 3*pCurve->cbModElement; + + // Calculate the points + SymCryptModAdd( FMod, peY1, peY1, peT[0], pbScratch, cbScratch ); /* T0 := Y1 + Y1 = 2Y */ + SymCryptModSquare( FMod, peZ1, peT[1], pbScratch, cbScratch ); /* T1 := Z1 * Z1 = ZZ */ + SymCryptModMul( FMod, peY1, peT[0], peY3, pbScratch, cbScratch ); /* Y3 := 2Y * Y1 = 2YY */ + + SymCryptModMul( FMod, peT[0], peZ1, peZ3, pbScratch, cbScratch ); /* Z3 := 2Y * Z1 = 2YZ */ + + SymCryptModAdd( FMod, peY3, peY3, peT[0], pbScratch, cbScratch ); /* T0 := 2YY + 2YY = 4YY */ + SymCryptModAdd( FMod, peX1, peT[1], peT[2], pbScratch, cbScratch ); /* T2 := X1 + ZZ */ + SymCryptModMul( FMod, peT[0], peY3, peY3, pbScratch, cbScratch ); /* Y3 := 2YY * 4YY = 8YYYY */ + + SymCryptModSub( FMod, peX1, peT[1], peT[1], pbScratch, cbScratch ); /* T1 := X1 - ZZ */ + SymCryptModMul( FMod, peT[0], peX1, peT[0], pbScratch, cbScratch ); /* T0 := X1 * 4YY = 4XYY = S */ + + SymCryptModMul( FMod, peT[2], peT[1], peT[2], pbScratch, cbScratch ); /* T2 := (X1 + ZZ)*(X1 - ZZ) = XX - ZZZZ */ + SymCryptModAdd( FMod, peT[2], peT[2], peT[1], pbScratch, cbScratch ); /* T1 := 2*(XX - ZZZZ) */ + SymCryptModAdd( FMod, peT[0], peT[0], peX3, pbScratch, cbScratch ); /* X3 := 2*S */ + SymCryptModAdd( FMod, peT[1], peT[2], peT[1], pbScratch, cbScratch ); /* T1 := 3*(XX - ZZZZ) = M */ + + SymCryptModSquare( FMod, peT[1], peT[2], pbScratch, cbScratch ); /* T2 := M^2 */ + SymCryptModSub( FMod, peT[2], peX3, peX3, pbScratch, cbScratch ); /* X3 := M^2 - 2*S = T */ + + SymCryptModSub( FMod, peT[0], peX3, peT[0], pbScratch, cbScratch ); /* T0 := S - T */ + SymCryptModMul( FMod, peT[1], peT[0], peT[0], pbScratch, cbScratch ); /* T0 := M * (S - T) */ + SymCryptModSub( FMod, peT[0], peY3, peY3, pbScratch, cbScratch ); /* Y3 := M * (S - T) - 8*YYYY */ +} + +// +// based on add-2007-bl formula +// but tweaked by saml to +// remove overeager conversions from modular multiplication to modular squaring which introduce +// more addition/subtraction. +// +// Z1Z1 = Z1^2 +// Z2Z2 = Z2^2 +// U1 = X1*Z2Z2 +// U2 = X2*Z1Z1 +// S1 = Y1*Z2*Z2Z2 +// S2 = Y2*Z1*Z1Z1 +// H = U2-U1 +// 2H = 2*H +// I = (2H)^2 +// J = H*I +// r = 2*(S2-S1) +// V = U1*I +// X3 = r^2-J-2*V +// Y3 = r*(V-X3)-2*S1*J +// Z3 = (Z1*Z2)*2H +// +// Total cost: +// 12 Mul +// 4 Sqr +// 0 Add +// 7 Sub +// 3 Dbl +// +// Special Case: +// If the two source points are opposite (X1 / Z1^2 == X2 / Z2^2), +// then H = U2-U1 = 0. Thus Z3 = 0 and the result is correct. +// +VOID +SYMCRYPT_CALL +SymCryptShortWeierstrassAddDiffNonZero( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc1, + _In_ PCSYMCRYPT_ECPOINT poSrc2, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + PCSYMCRYPT_MODULUS FMod = pCurve->FMod; + + PCSYMCRYPT_MODELEMENT peX1 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc1 ); + PCSYMCRYPT_MODELEMENT peY1 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc1 ); + PCSYMCRYPT_MODELEMENT peZ1 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poSrc1 ); + + PCSYMCRYPT_MODELEMENT peX2 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc2 ); + PCSYMCRYPT_MODELEMENT peY2 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc2 ); + PCSYMCRYPT_MODELEMENT peZ2 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poSrc2 ); + + PSYMCRYPT_MODELEMENT peX3 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poDst ); + PSYMCRYPT_MODELEMENT peY3 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poDst ); + PSYMCRYPT_MODELEMENT peZ3 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poDst ); + + PSYMCRYPT_MODELEMENT peT[7] = { 0 }; // Temporaries + + SYMCRYPT_ASSERT( SYMCRYPT_CURVE_IS_SHORT_WEIERSTRASS_TYPE(pCurve) ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poSrc1->pCurve) && SymCryptEcurveIsSame(pCurve, poSrc2->pCurve) && SymCryptEcurveIsSame(pCurve, poDst->pCurve) ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pCurve->FModDigits ) + 7 * pCurve->cbModElement ); + + // Creating temporaries + for (UINT32 i=0; i<7; i++) + { + peT[i] = SymCryptModElementCreate( + pbScratch, + pCurve->cbModElement, + FMod ); + + SYMCRYPT_ASSERT( peT[i] != NULL); + + pbScratch += pCurve->cbModElement; + } + + // Fixing remaining scratch space size + cbScratch -= 7*pCurve->cbModElement; + + // Calculation + + SymCryptModSquare( FMod, peZ1, peT[0], pbScratch, cbScratch ); /* T0 := Z1 * Z1 = Z1Z1 */ + SymCryptModMul( FMod, peZ1, peT[0], peT[1], pbScratch, cbScratch ); /* T1 := Z1*Z1Z1 */ + + SymCryptModSquare( FMod, peZ2, peT[6], pbScratch, cbScratch ); /* T6 := Z2 * Z2 = Z2Z2 */ + SymCryptModMul( FMod, peX1, peT[6], peT[2], pbScratch, cbScratch ); /* T2 := X1 * T6 = X1*Z2Z2 = U1 */ + SymCryptModMul( FMod, peX2, peT[0], peT[3], pbScratch, cbScratch ); /* T3 := X2 * Z1Z1 = U2 */ + SymCryptModSub( FMod, peT[3], peT[2], peT[5], pbScratch, cbScratch ); /* T5 := T3 - T2 = U2 - U1 = H */ + SymCryptModAdd( FMod, peT[5], peT[5], peT[3], pbScratch, cbScratch ); /* T3 := T5 + T5 = 2H */ + + SymCryptModMul( FMod, peZ1, peZ2, peT[4], pbScratch, cbScratch ); /* T4 := Z1 * Z2 */ + + SymCryptModMul( FMod, peZ2, peT[6], peT[6], pbScratch, cbScratch ); /* T6 := Z2 * T6 = Z2*Z2Z2 */ + SymCryptModMul( FMod, peT[4], peT[3], peZ3, pbScratch, cbScratch ); /* Z3 := T4 * T3 = Z1*Z2*2H */ + + SymCryptModMul( FMod, peY1, peT[6], peT[6], pbScratch, cbScratch ); /* T6 := Y1 * T6 = Y1*Z2*Z2Z2 = S1 */ + SymCryptModMul( FMod, peY2, peT[1], peT[4], pbScratch, cbScratch ); /* T4 := Y2*Z1*Z1Z1 = S2 */ + SymCryptModSub( FMod, peT[4], peT[6], peT[4], pbScratch, cbScratch ); /* T4 := T4 - T6 = S2-S1 */ + SymCryptModAdd( FMod, peT[4], peT[4], peT[4], pbScratch, cbScratch ); /* T4 := T4 + T4 = 2*(S2-S1) = r */ + + SymCryptModSquare( FMod, peT[3], peT[3], pbScratch, cbScratch ); /* T3 := T3 * T3 = (2*H)^2 = I */ + SymCryptModMul( FMod, peT[3], peT[5], peT[5], pbScratch, cbScratch ); /* T5 := T3 * T5 = H*I = J */ + SymCryptModMul( FMod, peT[2], peT[3], peT[3], pbScratch, cbScratch ); /* T3 := T2 * T3 = U1*I = V */ + + SymCryptModSquare( FMod, peT[4], peT[2], pbScratch, cbScratch ); /* T2 := T4 * T4 = r^2 */ + SymCryptModSub( FMod, peT[2], peT[5], peT[2], pbScratch, cbScratch ); /* T2 := T2 - T5 = r^2 - J */ + SymCryptModSub( FMod, peT[2], peT[3], peT[2], pbScratch, cbScratch ); /* T2 := T2 - T3 = r^2 - J - V */ + SymCryptModSub( FMod, peT[2], peT[3], peX3, pbScratch, cbScratch ); /* T2 := T2 - T3 = r^2 - J - 2*V = X3 */ + + SymCryptModSub( FMod, peT[3], peX3, peT[3], pbScratch, cbScratch ); /* T3 := T3 - T2 = V - X3 */ + SymCryptModMul( FMod, peT[3], peT[4], peT[3], pbScratch, cbScratch ); /* T3 := T3 * T4 = r*(V-X3) */ + SymCryptModMul( FMod, peT[6], peT[5], peT[6], pbScratch, cbScratch ); /* T6 := T6 * T5 = S1*J */ + SymCryptModAdd( FMod, peT[6], peT[6], peT[6], pbScratch, cbScratch ); /* T6 := T6 + T6 = 2*S1*J */ + SymCryptModSub( FMod, peT[3], peT[6], peY3, pbScratch, cbScratch ); /* Y3 := T6 - T3 = r*(V-X3) - 2*S1*J */ +} + +// +// The following function is a complete **SIDE-CHANNEL-UNSAFE** +// addition of points that detects as fast as possible the special cases +// and merges the two previous calls. +// +VOID +SYMCRYPT_CALL +SymCryptShortWeierstrassAddSideChannelUnsafe( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc1, + _In_ PCSYMCRYPT_ECPOINT poSrc2, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + PCSYMCRYPT_MODULUS FMod = pCurve->FMod; + + PCSYMCRYPT_MODELEMENT peX1 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc1 ); + PCSYMCRYPT_MODELEMENT peY1 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc1 ); + PCSYMCRYPT_MODELEMENT peZ1 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poSrc1 ); + + PCSYMCRYPT_MODELEMENT peX2 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc2 ); + PCSYMCRYPT_MODELEMENT peY2 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc2 ); + PCSYMCRYPT_MODELEMENT peZ2 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poSrc2 ); + + PSYMCRYPT_MODELEMENT peT[8] = { 0 }; // Temporaries + + SYMCRYPT_ASSERT( SYMCRYPT_CURVE_IS_SHORT_WEIERSTRASS_TYPE(pCurve) ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poSrc1->pCurve) && SymCryptEcurveIsSame(pCurve, poSrc2->pCurve) && SymCryptEcurveIsSame(pCurve, poDst->pCurve) ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pCurve->FModDigits ) + 8 * pCurve->cbModElement ); + + // Check if one of the points is zero + if (SymCryptModElementIsZero( FMod, peZ1 )) + { + SymCryptEcpointCopy( pCurve, poSrc2, poDst); + return; + } + + if (SymCryptModElementIsZero( FMod, peZ2 )) + { + SymCryptEcpointCopy( pCurve, poSrc1, poDst); + return; + } + + // Creating temporaries + for (UINT32 i=0; i<8; i++) + { + peT[i] = SymCryptModElementCreate( + pbScratch, + pCurve->cbModElement, + FMod ); + + SYMCRYPT_ASSERT( peT[i] != NULL); + + pbScratch += pCurve->cbModElement; + } + + // Fixing remaining scratch space size + cbScratch -= 8*pCurve->cbModElement; + + // Calculation + + SymCryptModSquare( FMod, peZ1, peT[0], pbScratch, cbScratch ); /* T0 := Z1 * Z1 = Z1Z1 */ + SymCryptModMul( FMod, peZ1, peT[0], peT[1], pbScratch, cbScratch ); /* T1 := Z1*Z1Z1 */ + + SymCryptModSquare( FMod, peZ2, peT[6], pbScratch, cbScratch ); /* T6 := Z2 * Z2 = Z2Z2 */ + SymCryptModMul( FMod, peX1, peT[6], peT[2], pbScratch, cbScratch ); /* T2 := X1 * T6 = X1*Z2Z2 = U1 */ + SymCryptModMul( FMod, peX2, peT[0], peT[3], pbScratch, cbScratch ); /* T3 := X2 * Z1Z1 = U2 */ + SymCryptModSub( FMod, peT[3], peT[2], peT[5], pbScratch, cbScratch ); /* T5 := T3 - T2 = U2 - U1 = H */ + + SymCryptModMul( FMod, peY2, peT[1], peT[7], pbScratch, cbScratch ); /* T7 := Y2 * T1 = Y2*Z1*Z1Z1 = S2 */ + SymCryptModMul( FMod, peZ2, peT[6], peT[1], pbScratch, cbScratch ); /* T1 := Z2 * T6 = Z2*Z2Z2 */ + SymCryptModMul( FMod, peY1, peT[1], peT[1], pbScratch, cbScratch ); /* T1 := Y1 * T1 = Y1*Z2*Z2Z2 = S1 */ + SymCryptModSub( FMod, peT[7], peT[1], peT[7], pbScratch, cbScratch ); /* T7 := T7 - T1 = S2-S1 */ + + if (SymCryptModElementIsZero( FMod, peT[5] ) & SymCryptModElementIsZero( FMod, peT[7] )) + { + // Points are equal - run double on poSrc1 + + SymCryptModElementCopy( FMod, peT[0], peT[4] ); /* Move Z1Z1 for later */ + + SymCryptModSquare( FMod, peX1, peT[0], pbScratch, cbScratch ); /* T0 := X1 * X1 = XX */ + SymCryptModSquare( FMod, peY1, peT[3], pbScratch, cbScratch ); /* T3 := Y1 * Y1 = YY */ + SymCryptModSquare( FMod, peT[3], peT[5], pbScratch, cbScratch ); /* T5 := T3 * T3 = YYYY */ + + SymCryptModAdd( FMod, peX1, peT[3], peT[1], pbScratch, cbScratch ); /* T1 := X1 + T3 = X + YY */ + SymCryptModSquare( FMod, peT[1], peT[1], pbScratch, cbScratch ); /* T1 := T1 * T1 = (X + YY)^2 */ + SymCryptModSub( FMod, peT[1], peT[0], peT[1], pbScratch, cbScratch ); /* T1 := T1 - T0 = (X + YY)^2 - XX */ + SymCryptModSub( FMod, peT[1], peT[5], peT[1], pbScratch, cbScratch ); /* T1 := T1 - T5 = (X + YY)^2 - XX - YYYY */ + SymCryptModAdd( FMod, peT[1], peT[1], peT[1], pbScratch, cbScratch ); /* T1 := T1 + T1 = 2*((X + YY)^2 - XX - YYYY) = S */ + + //SymCryptModSquare( FMod, peZ1, peT[4], pbScratch, cbScratch ); /* T4 := Z1 * Z1 = ZZ */ + + SymCryptModSquare( FMod, peT[4], peT[2], pbScratch, cbScratch ); /* T2 := T4 * T4 = ZZ^2 */ + SymCryptModMul( FMod, peT[2], pCurve->A, peT[2], pbScratch, cbScratch ); /* T2 := T2 * a = a*ZZ^2 */ + SymCryptModAdd( FMod, peT[2], peT[0], peT[2], pbScratch, cbScratch ); /* T2 := T2 + T0 = XX + a*ZZ^2 */ + SymCryptModAdd( FMod, peT[0], peT[0], peT[0], pbScratch, cbScratch ); /* T0 := T0 + T0 = 2*XX */ + SymCryptModAdd( FMod, peT[2], peT[0], peT[2], pbScratch, cbScratch ); /* T2 := T2 + T0 = 3*XX + a*ZZ^2 = M */ + + SymCryptModSquare( FMod, peT[2], peT[0], pbScratch, cbScratch ); /* T0 := T2 * T2 = M^2 */ + SymCryptModSub( FMod, peT[0], peT[1], peT[0], pbScratch, cbScratch ); /* T0 := T0 - T1 = M^2 - S */ + SymCryptModSub( FMod, peT[0], peT[1], peT[0], pbScratch, cbScratch ); /* T0 := T0 - T1 = M^2 - 2*S = T = X3 */ + + SymCryptModSub( FMod, peT[1], peT[0], peT[1], pbScratch, cbScratch ); /* T1 := T1 - T0 = S - T */ + SymCryptModMul( FMod, peT[2], peT[1], peT[1], pbScratch, cbScratch ); /* T1 := T2 * T1 = M * (S - T) */ + SymCryptModAdd( FMod, peT[5], peT[5], peT[5], pbScratch, cbScratch ); /* T5 := T5 + T5 = 2*YYYY */ + SymCryptModAdd( FMod, peT[5], peT[5], peT[5], pbScratch, cbScratch ); /* T5 := T5 + T5 = 4*YYYY */ + SymCryptModAdd( FMod, peT[5], peT[5], peT[5], pbScratch, cbScratch ); /* T5 := T5 + T5 = 8*YYYY */ + SymCryptModSub( FMod, peT[1], peT[5], peT[1], pbScratch, cbScratch ); /* T1 := T1 - T5 = M * (S - T) - 8*YYYY = Y3 */ + + SymCryptModAdd( FMod, peY1, peZ1, peT[2], pbScratch, cbScratch ); /* T2 := Y1 + Z1 */ + SymCryptModSquare( FMod, peT[2], peT[2], pbScratch, cbScratch ); /* T2 := T2 * T2 = (Y + Z )^2 */ + SymCryptModSub( FMod, peT[2], peT[3], peT[2], pbScratch, cbScratch ); /* T2 := T2 - T3 = (Y + Z )^2 - YY */ + SymCryptModSub( FMod, peT[2], peT[4], peT[2], pbScratch, cbScratch ); /* T2 := T2 - T4 = (Y + Z )^2 - YY - ZZ = Z3 */ + + // Setting the result + SymCryptModElementCopy( FMod, peT[0], SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poDst ) ); + SymCryptModElementCopy( FMod, peT[1], SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poDst ) ); + SymCryptModElementCopy( FMod, peT[2], SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poDst ) ); + } + else + { + // Continue the addition + + SymCryptModAdd( FMod, peZ1, peZ2, peT[4], pbScratch, cbScratch ); /* T4 := Z1 + Z2 */ + SymCryptModSquare( FMod, peT[4], peT[4], pbScratch, cbScratch ); /* T4 := T4 * T4 = (Z1 + Z2)^2 */ + SymCryptModSub( FMod, peT[4], peT[0], peT[4], pbScratch, cbScratch ); /* T4 := T4 - Z1Z1 = (Z1 + Z2)^2 - Z1Z1 */ + SymCryptModSub( FMod, peT[4], peT[6], peT[4], pbScratch, cbScratch ); /* T4 := T4 - T6 = (Z1 + Z2)^2 - Z1Z1 - Z2Z2 */ + SymCryptModMul( FMod, peT[4], peT[5], peT[4], pbScratch, cbScratch ); /* T4 := T4 * T5 = ((Z1 + Z2)^2 - Z1Z1 - Z2Z2)*H = Z3 */ + + SymCryptModAdd( FMod, peT[7], peT[7], peT[7], pbScratch, cbScratch ); /* T7 := T7 + T7 = 2*(S2-S1) = r */ + + SymCryptModAdd( FMod, peT[5], peT[5], peT[3], pbScratch, cbScratch ); /* T3 := T5 + T5 = 2*H */ + SymCryptModSquare( FMod, peT[3], peT[3], pbScratch, cbScratch ); /* T3 := T3 * T3 = (2*H)^2 = I */ + SymCryptModMul( FMod, peT[3], peT[5], peT[5], pbScratch, cbScratch ); /* T5 := T3 * T5 = H*I = J */ + SymCryptModMul( FMod, peT[2], peT[3], peT[3], pbScratch, cbScratch ); /* T3 := T2 * T3 = U1*I = V */ + + SymCryptModSquare( FMod, peT[7], peT[2], pbScratch, cbScratch ); /* T2 := T7 * T7 = r^2 */ + SymCryptModSub( FMod, peT[2], peT[5], peT[2], pbScratch, cbScratch ); /* T2 := T2 - T5 = r^2 - J */ + SymCryptModSub( FMod, peT[2], peT[3], peT[2], pbScratch, cbScratch ); /* T2 := T2 - T3 = r^2 - J - V */ + SymCryptModSub( FMod, peT[2], peT[3], peT[2], pbScratch, cbScratch ); /* T2 := T2 - T3 = r^2 - J - 2*V = X3 */ + + SymCryptModSub( FMod, peT[3], peT[2], peT[3], pbScratch, cbScratch ); /* T3 := T3 - T2 = V - X3 */ + SymCryptModMul( FMod, peT[3], peT[7], peT[3], pbScratch, cbScratch ); /* T3 := T3 * T7 = r*(V-X3) */ + SymCryptModMul( FMod, peT[1], peT[5], peT[6], pbScratch, cbScratch ); /* T6 := T1 * T5 = S1*J */ + SymCryptModAdd( FMod, peT[6], peT[6], peT[6], pbScratch, cbScratch ); /* T6 := T6 + T6 = 2*S1*J */ + SymCryptModSub( FMod, peT[3], peT[6], peT[3], pbScratch, cbScratch ); /* T3 := T6 - T3 = r*(V-X3) - 2*S1*J = Y3 */ + + // Setting the result + SymCryptModElementCopy( FMod, peT[2], SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poDst ) ); + SymCryptModElementCopy( FMod, peT[3], SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poDst ) ); + SymCryptModElementCopy( FMod, peT[4], SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poDst ) ); + } +} + +VOID +SYMCRYPT_CALL +SymCryptShortWeierstrassAdd( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc1, + _In_ PCSYMCRYPT_ECPOINT poSrc2, + _Out_ PSYMCRYPT_ECPOINT poDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 dSrc1Zero = 0; + UINT32 dSrc2Zero = 0; + UINT32 dSrcEqual = 0; + + // Temporary points + PSYMCRYPT_ECPOINT poQ0 = NULL; + PSYMCRYPT_ECPOINT poQ1 = NULL; + + SIZE_T cbEcpoint = SymCryptSizeofEcpointFromCurve( pCurve ); + + SYMCRYPT_ASSERT( SYMCRYPT_CURVE_IS_SHORT_WEIERSTRASS_TYPE(pCurve) ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poSrc1->pCurve) && SymCryptEcurveIsSame(pCurve, poSrc2->pCurve) && SymCryptEcurveIsSame(pCurve, poDst->pCurve) ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( pCurve ) ); // We will need the entire scratch space + + SYMCRYPT_ASSERT( cbScratch > 2*cbEcpoint ); + + if ((flags & SYMCRYPT_FLAG_DATA_PUBLIC) != 0) + { + SymCryptShortWeierstrassAddSideChannelUnsafe( pCurve, poSrc1, poSrc2, poDst, pbScratch, cbScratch ); + } + else + { + // Creating temporary points + poQ0 = SymCryptEcpointCreate( pbScratch, cbEcpoint, pCurve ); + SYMCRYPT_ASSERT( poQ0 != NULL); + pbScratch += cbEcpoint; + + poQ1 = SymCryptEcpointCreate( pbScratch, cbEcpoint, pCurve ); + SYMCRYPT_ASSERT( poQ1 != NULL); + pbScratch += cbEcpoint; + + // Fixing remaining scratch space size + cbScratch -= 2*cbEcpoint; + + // Calculate the masks + dSrc1Zero = SymCryptShortWeierstrassIsZero( pCurve, poSrc1, pbScratch, cbScratch ); + dSrc2Zero = SymCryptShortWeierstrassIsZero( pCurve, poSrc2, pbScratch, cbScratch ); + dSrcEqual = SymCryptShortWeierstrassIsEqual( pCurve, poSrc1, poSrc2, SYMCRYPT_FLAG_ECPOINT_EQUAL, pbScratch, cbScratch ); + + // Side-channel safe computations + SymCryptShortWeierstrassAddDiffNonZero( pCurve, poSrc1, poSrc2, poQ0, pbScratch, cbScratch ); // This covers the cases where Src1 != Src2 or Src1 = -Src2 + + SymCryptEcpointDouble( pCurve, poSrc1, poQ1, 0, pbScratch, cbScratch ); // Dispatch to Double function; enables type assertion on SymCryptShortWeierstrassDouble to be specific + SymCryptEcpointMaskedCopy( pCurve, poQ1, poQ0, dSrcEqual ); // (Masked) copy if the points are equal + + SymCryptEcpointMaskedCopy( pCurve, poSrc1, poQ0, dSrc2Zero ); // (Masked) copy if Src2 = 0 + SymCryptEcpointMaskedCopy( pCurve, poSrc2, poQ0, dSrc1Zero ); // (Masked) copy if Src1 = 0 + + SymCryptEcpointCopy( pCurve, poQ0, poDst ); // Copy the final result to destination + } +} + +VOID +SYMCRYPT_CALL +SymCryptShortWeierstrassNegate( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Inout_ PSYMCRYPT_ECPOINT poSrc, + UINT32 mask, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + PCSYMCRYPT_MODULUS FMod = pCurve->FMod; + PSYMCRYPT_MODELEMENT peY = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc); + + PSYMCRYPT_MODELEMENT peTmp = NULL; + + SYMCRYPT_ASSERT( SYMCRYPT_CURVE_IS_SHORT_WEIERSTRASS_TYPE(pCurve) ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poSrc->pCurve) ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pCurve->FModDigits ) + pCurve->cbModElement); + + peTmp = SymCryptModElementCreate( + pbScratch, + pCurve->cbModElement, + FMod ); + SYMCRYPT_ASSERT( peTmp != NULL); + + pbScratch += pCurve->cbModElement; + cbScratch -= pCurve->cbModElement; + + SymCryptModNeg( FMod, peY, peTmp, pbScratch, cbScratch ); + SymCryptModElementMaskedCopy( FMod, peTmp, peY, mask ); +} diff --git a/libs/symcrypt/lib/ec_twisted_edwards.c b/libs/symcrypt/lib/ec_twisted_edwards.c new file mode 100644 index 00000000000..9d4371c40d0 --- /dev/null +++ b/libs/symcrypt/lib/ec_twisted_edwards.c @@ -0,0 +1,575 @@ +// +// ec_twisted_edwards.c Twisted Edwards Curve Implementation +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +VOID +SYMCRYPT_CALL +SymCryptTwistedEdwardsFillScratchSpaces( _In_ PSYMCRYPT_ECURVE pCurve ) +{ + UINT32 nDigits = SymCryptDigitsFromBits( pCurve->FModBitsize ); + UINT32 cbModElement = pCurve->cbModElement; + UINT32 nDigitsFieldLength = pCurve->FModDigits; + + // + // All the scratch space computations are upper bounded by the SizeofXXX bound (2^19) and + // the SCRATCH_BYTES_FOR_XXX bound (2^24) (see symcrypt_internal.h). + // + // One caveat is SymCryptSizeofEcpointFromCurve and SymCryptSizeofEcpointEx which calculate + // the size of EcPoint with 4 coordinates (each one a modelement of max size 2^17). Thus upper + // bounded by 2^20. + // + // Another is the precomp points computation where the nPrecompPoints are up to + // 2^SYMCRYPT_ECURVE_SW_DEF_WINDOW = 2^6 and the nRecodedDigits are equal to the + // GOrd bitsize < 2^20. + // + // Thus cbScratchScalarMulti is upper bounded by 2^6*2^20 + 2*2^20*2^4 ~ 2^26. + // + + pCurve->cbScratchCommon = 8 * cbModElement + SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigits ); + + pCurve->cbScratchScalar = + (pCurve->cbModElement) + + 2 * SymCryptSizeofEcpointFromCurve( pCurve ) + + 2 * SymCryptSizeofIntFromDigits( pCurve->GOrdDigits ) + + SYMCRYPT_MAX( pCurve->cbScratchCommon, SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pCurve->GOrdDigits )); + + pCurve->cbScratchScalarMulti = + pCurve->info.sw.nPrecompPoints * SymCryptSizeofEcpointFromCurve( pCurve ) + + ((2*pCurve->info.sw.nRecodedDigits * sizeof(UINT32) + SYMCRYPT_ASYM_ALIGN_VALUE - 1 )/SYMCRYPT_ASYM_ALIGN_VALUE) * SYMCRYPT_ASYM_ALIGN_VALUE; + + pCurve->cbScratchGetSetValue = + SymCryptSizeofEcpointEx(cbModElement, SYMCRYPT_ECPOINT_FORMAT_MAX_LENGTH) + + 2 * cbModElement + + SYMCRYPT_MAX(SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS(nDigitsFieldLength), + SYMCRYPT_SCRATCH_BYTES_FOR_MODINV(nDigitsFieldLength)); + + pCurve->cbScratchGetSetValue = SYMCRYPT_MAX( pCurve->cbScratchGetSetValue, SymCryptSizeofIntFromDigits( nDigits ) ); + + pCurve->cbScratchEckey = + SYMCRYPT_MAX( pCurve->cbModElement + SymCryptSizeofIntFromDigits(SymCryptEcurveDigitsofScalarMultiplier(pCurve)), + SymCryptSizeofEcpointFromCurve( pCurve ) ) + + SYMCRYPT_MAX( pCurve->cbScratchScalar + pCurve->cbScratchScalarMulti, pCurve->cbScratchGetSetValue ); +} + +VOID +SYMCRYPT_CALL +SymCryptTwistedEdwardsSetDistinguished( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ASSERT( SYMCRYPT_CURVE_IS_TWISTED_EDWARDS_TYPE(pCurve) ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poDst->pCurve) ); + + UNREFERENCED_PARAMETER( pbScratch ); + UNREFERENCED_PARAMETER( cbScratch ); + + SymCryptEcpointCopy( pCurve, pCurve->G, poDst ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptTwistedEdwardsIsZero( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch) +{ + PSYMCRYPT_MODULUS pmMod = pCurve->FMod; + UINT32 dResX = 0, dResY = 0; + + SYMCRYPT_ASSERT( SYMCRYPT_CURVE_IS_TWISTED_EDWARDS_TYPE(pCurve) ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poSrc->pCurve) ); + + UNREFERENCED_PARAMETER( pbScratch ); + UNREFERENCED_PARAMETER( cbScratch ); + + PSYMCRYPT_MODELEMENT peSrcX = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc ); + PSYMCRYPT_MODELEMENT peSrcY = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc ); + PSYMCRYPT_MODELEMENT peSrcZ = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poSrc ); + + dResX = SymCryptModElementIsZero( pmMod, peSrcX ); + dResY = SymCryptModElementIsEqual( pmMod, peSrcY, peSrcZ ); + + return ( dResX & dResY ); +} + +// +// Verify that +// a * x^2 + y^2 = 1 + d * x^2 * y^2 +// x = X/Z, y = Y/Z, +// To avoid mod inv calculation which is expensive, +// we verify Z^2(aX^2 + Y^2) = Z^4 + d * X^2 * Y^2 +// +UINT32 +SYMCRYPT_CALL +SymCryptTwistedEdwardsOnCurve( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch) +{ + PSYMCRYPT_MODELEMENT peTemp[4]; + PSYMCRYPT_MODULUS pmMod = pCurve->FMod; + SIZE_T nBytes; + + SYMCRYPT_ASSERT( SYMCRYPT_CURVE_IS_TWISTED_EDWARDS_TYPE(pCurve) ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poSrc->pCurve) ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( pCurve ) ); + + nBytes = SymCryptSizeofModElementFromModulus( pmMod ); + + SYMCRYPT_ASSERT( cbScratch >= 4*nBytes ); + + for (UINT32 i = 0; i < 4; ++i) + { + peTemp[i] = SymCryptModElementCreate( pbScratch, nBytes, pmMod ); + pbScratch += nBytes; + cbScratch -= nBytes; + } + + PSYMCRYPT_MODELEMENT peSrcX = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc ); + PSYMCRYPT_MODELEMENT peSrcY = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc ); + PSYMCRYPT_MODELEMENT peSrcZ = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poSrc ); + + // peTemp[0] = X^2 + SymCryptModSquare( pmMod, peSrcX, peTemp[0], pbScratch, cbScratch); + + // peTemp[1] = Y^2 + SymCryptModSquare( pmMod, peSrcY, peTemp[1], pbScratch, cbScratch); + + // peTemp[2] = Z^2 + SymCryptModSquare( pmMod, peSrcZ, peTemp[2], pbScratch, cbScratch); + + // peTemp[3] = a * X^2 + SymCryptModMul( pmMod, pCurve->A, peTemp[0], peTemp[3], pbScratch, cbScratch ); + + // peTemp[3] = a * X^2 + Y^2 + SymCryptModAdd( pmMod, peTemp[3], peTemp[1], peTemp[3], pbScratch, cbScratch ); + + // peTemp[3] = Z^2 (a * X^2 + Y^2) + SymCryptModMul( pmMod, peTemp[3], peTemp[2], peTemp[3], pbScratch, cbScratch ); + + // peTemp[1] = X^2 * Y^2 + SymCryptModMul( pmMod, peTemp[0], peTemp[1], peTemp[1], pbScratch, cbScratch ); + + // peTemp[1] = d * X^2 *Y^2 + SymCryptModMul( pmMod, pCurve->B, peTemp[1], peTemp[1], pbScratch, cbScratch ); + + // peTemp[2] = Z^4 + SymCryptModMul( pmMod, peTemp[2], peTemp[2], peTemp[2], pbScratch, cbScratch ); + + // peTemp[1] = Z^4 + d * X^2 * Y^2 + SymCryptModAdd( pmMod, peTemp[2], peTemp[1], peTemp[1], pbScratch, cbScratch ); + + return SymCryptModElementIsEqual( pmMod, peTemp[1], peTemp[3] ); +} + +// +// Point doubling: dbl-2008-hwcd, 5Mul + 4Square + 2Add + 5Sub +// +// poDst (X, Y, Z, T) = 2 * poSrc(X, Y, Z, T) +// 1. A = X1 ^ 2 +// 2. B = Y1 ^ 2 +// 3. C = 2 * Z1 ^ 2 +// 4. D = a * A +// 5. E = (X1 + Y1) ^ 2 - A - B +// 6. G = D + B +// 7. F = G - C +// 8. H = D - B +// 9. X3 = E * F +// 10. Y3 = G * H +// 11. T3 = E * H +// 12. Z3 = F * G +// +VOID +SYMCRYPT_CALL +SymCryptTwistedEdwardsDouble( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_ PSYMCRYPT_ECPOINT poDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch) +{ + PSYMCRYPT_MODELEMENT peTemp[8]; + PSYMCRYPT_MODULUS pmMod = pCurve->FMod; + SIZE_T nBytes; + + SYMCRYPT_ASSERT( SYMCRYPT_CURVE_IS_TWISTED_EDWARDS_TYPE(pCurve) ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poSrc->pCurve) && SymCryptEcurveIsSame(pCurve, poDst->pCurve) ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( pCurve ) ); + + UNREFERENCED_PARAMETER( flags ); + + nBytes = SymCryptSizeofModElementFromModulus( pmMod ); + + SYMCRYPT_ASSERT( cbScratch >= 8*nBytes ); + + for (UINT32 i = 0; i < 8; ++i) + { + peTemp[i] = SymCryptModElementCreate( pbScratch, nBytes, pmMod ); + pbScratch += nBytes; + cbScratch -= nBytes; + } + + PSYMCRYPT_MODELEMENT peSrcX = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc ); + PSYMCRYPT_MODELEMENT peSrcY = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc ); + PSYMCRYPT_MODELEMENT peSrcZ = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poSrc ); + + PSYMCRYPT_MODELEMENT peDstX = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poDst ); + PSYMCRYPT_MODELEMENT peDstY = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poDst ); + PSYMCRYPT_MODELEMENT peDstZ = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poDst ); + PSYMCRYPT_MODELEMENT peDstT = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 3, pCurve, poDst ); + + PSYMCRYPT_MODELEMENT peA = peTemp[0]; + PSYMCRYPT_MODELEMENT peB = peTemp[1]; + PSYMCRYPT_MODELEMENT peC = peTemp[2]; + PSYMCRYPT_MODELEMENT peD = peTemp[3]; + PSYMCRYPT_MODELEMENT peE = peTemp[4]; + PSYMCRYPT_MODELEMENT peF = peTemp[5]; + PSYMCRYPT_MODELEMENT peG = peTemp[6]; + PSYMCRYPT_MODELEMENT peH = peTemp[7]; + + + // A = X1^2 + SymCryptModSquare( pmMod, peSrcX, peA, pbScratch, cbScratch ); + + // B = Y1^2 + SymCryptModSquare( pmMod, peSrcY, peB, pbScratch, cbScratch ); + + // C1 = Z1^2 + SymCryptModSquare( pmMod, peSrcZ, peC, pbScratch, cbScratch ); + + // C = C1 + C1 = Z1^2 + Z1^2 = 2 * Z1^2 + SymCryptModAdd( pmMod, peC, peC, peC, pbScratch, cbScratch ); + + // D = a * A + SymCryptModMul( pmMod, pCurve->A, peA, peD, pbScratch, cbScratch ); + + // E1 = X1 + Y1 + SymCryptModAdd( pmMod, peSrcX, peSrcY, peE, pbScratch, cbScratch ); + + // E2 = E1^2 = (X1 + Y1)^2 + SymCryptModSquare( pmMod, peE, peE, pbScratch, cbScratch ); + + // E3 = E2 - A = (X1 + Y1)^2 - A + SymCryptModSub( pmMod, peE, peA, peE, pbScratch, cbScratch ); + + // E = E3 - B = (X1 + Y1)^2 - A - B + SymCryptModSub( pmMod, peE, peB, peE, pbScratch, cbScratch ); + + // G = D + B + SymCryptModAdd( pmMod, peD, peB, peG, pbScratch, cbScratch ); + + // F = G - C + SymCryptModSub( pmMod, peG, peC, peF, pbScratch, cbScratch ); + + // H = D - B + SymCryptModSub( pmMod, peD, peB, peH, pbScratch, cbScratch ); + + // X3 = E * F + SymCryptModMul( pmMod, peE, peF, peDstX, pbScratch, cbScratch ); + + // Y3 = G * H + SymCryptModMul( pmMod, peG, peH, peDstY, pbScratch, cbScratch ); + + // T3 = E * H + SymCryptModMul( pmMod, peE, peH, peDstT, pbScratch, cbScratch ); + + // Z3 = F * G + SymCryptModMul( pmMod, peF, peG, peDstZ, pbScratch, cbScratch ); +} + + +// +// Point addition: add-2008-hwcd 11Mul + 3add + 4sub +// +// poDst(X, Y, Z, T) = poSrc(X, Y, Z, T) + poSrc2(X, Y, Z, T) +// 1. A = X1 * X2 +// 2. B = Y1 * Y2 +// 3. C = d * T1 * T2 +// 4. D = Z1 * Z2 +// 5. E = (X1 + Y1) * (X2 + Y2) - A - B +// 6. F = D - C +// 7. G = D + C +// 8. H = B - a * A +// 9. X3 = E * F +// 10. Y3 = G * H +// 11. T3 = E * H +// 12. Z3 = F * G +// +VOID +SYMCRYPT_CALL +SymCryptTwistedEdwardsAdd( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc1, + _In_ PCSYMCRYPT_ECPOINT poSrc2, + _Out_ PSYMCRYPT_ECPOINT poDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + PSYMCRYPT_MODELEMENT peTemp[8]; + PSYMCRYPT_MODULUS pmMod = pCurve->FMod; + SIZE_T nBytes; + + SYMCRYPT_ASSERT( SYMCRYPT_CURVE_IS_TWISTED_EDWARDS_TYPE(pCurve) ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poSrc1->pCurve) && SymCryptEcurveIsSame(pCurve, poSrc2->pCurve) && SymCryptEcurveIsSame(pCurve, poDst->pCurve) ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( pCurve ) ); + + UNREFERENCED_PARAMETER( flags ); + + nBytes = SymCryptSizeofModElementFromModulus( pmMod ); + + SYMCRYPT_ASSERT( cbScratch >= 8*nBytes ); + + for (UINT32 i = 0; i < 8; ++i) + { + peTemp[i] = SymCryptModElementCreate( pbScratch, nBytes, pmMod ); + pbScratch += nBytes; + cbScratch -= nBytes; + } + + PSYMCRYPT_MODELEMENT peSrc1X = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc1 ); + PSYMCRYPT_MODELEMENT peSrc1Y = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc1 ); + PSYMCRYPT_MODELEMENT peSrc1Z = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poSrc1 ); + PSYMCRYPT_MODELEMENT peSrc1T = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 3, pCurve, poSrc1 ); + + PSYMCRYPT_MODELEMENT peSrc2X = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc2 ); + PSYMCRYPT_MODELEMENT peSrc2Y = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc2 ); + PSYMCRYPT_MODELEMENT peSrc2Z = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poSrc2 ); + PSYMCRYPT_MODELEMENT peSrc2T = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 3, pCurve, poSrc2 ); + + PSYMCRYPT_MODELEMENT peDstX = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poDst ); + PSYMCRYPT_MODELEMENT peDstY = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poDst ); + PSYMCRYPT_MODELEMENT peDstZ = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poDst ); + PSYMCRYPT_MODELEMENT peDstT = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 3, pCurve, poDst ); + + PSYMCRYPT_MODELEMENT peA = peTemp[0]; + PSYMCRYPT_MODELEMENT peB = peTemp[1]; + PSYMCRYPT_MODELEMENT peC = peTemp[2]; + PSYMCRYPT_MODELEMENT peD = peTemp[3]; + PSYMCRYPT_MODELEMENT peE = peTemp[4]; + PSYMCRYPT_MODELEMENT peF = peTemp[5]; + PSYMCRYPT_MODELEMENT peG = peTemp[6]; + PSYMCRYPT_MODELEMENT peH = peTemp[7]; + + // A = X1 * X2 + SymCryptModMul( pmMod, peSrc1X, peSrc2X, peA, pbScratch, cbScratch ); + + // B = Y1 * Y2 + SymCryptModMul( pmMod, peSrc1Y, peSrc2Y, peB, pbScratch, cbScratch ); + + // C1 = T1 * T2 + SymCryptModMul( pmMod, peSrc1T, peSrc2T, peC, pbScratch, cbScratch ); + + // C = d * C1 = d * T1 * T2 + SymCryptModMul( pmMod, pCurve->B, peC, peC, pbScratch, cbScratch ); + + // D = Z1 * Z2 + SymCryptModMul( pmMod, peSrc1Z, peSrc2Z, peD, pbScratch, cbScratch ); + + // E1 = X1 + Y1 + SymCryptModAdd( pmMod, peSrc1X, peSrc1Y, peE, pbScratch, cbScratch ); + + // E2 = X2 + Y2 + SymCryptModAdd( pmMod, peSrc2X, peSrc2Y, peF, pbScratch, cbScratch ); + + // E = E * F + SymCryptModMul( pmMod, peE, peF, peE, pbScratch, cbScratch ); + + // E = E - A + SymCryptModSub( pmMod, peE, peA, peE, pbScratch, cbScratch ); + + // E = E - B + SymCryptModSub( pmMod, peE, peB, peE, pbScratch, cbScratch ); + + // F = D - C + SymCryptModSub( pmMod, peD, peC, peF, pbScratch, cbScratch ); + + // G = D + C + SymCryptModAdd( pmMod, peD, peC, peG, pbScratch, cbScratch ); + + // H = a * A + SymCryptModMul( pmMod, pCurve->A, peA, peH, pbScratch, cbScratch ); + + // H = B - a * A + SymCryptModSub( pmMod, peB, peH, peH, pbScratch, cbScratch ); + + // X3 = E * F + SymCryptModMul( pmMod, peE, peF, peDstX, pbScratch, cbScratch ); + + // Y3 = G * H + SymCryptModMul( pmMod, peG, peH, peDstY, pbScratch, cbScratch ); + + // T3 = E * H + SymCryptModMul( pmMod, peE, peH, peDstT, pbScratch, cbScratch ); + + // Y3 = F * G + SymCryptModMul( pmMod, peF, peG, peDstZ, pbScratch, cbScratch ); +} + +VOID +SYMCRYPT_CALL +SymCryptTwistedEdwardsAddDiffNonZero( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc1, + _In_ PCSYMCRYPT_ECPOINT poSrc2, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_(cbScratch) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SymCryptTwistedEdwardsAdd( pCurve, poSrc1, poSrc2, poDst, 0, pbScratch, cbScratch ); +} + +// +// Verify poSrc1(X1, Y1, Z1, T1) = poSrc2(X2, Y2, Z2, T2) +// To avoid ModInv for 1/Z, we do +// X1 * Z2 = X2 * Z1, and +// Y1 * Z2 = Y2 * Z1 +// +// This function also do poSrc1 = -1 * poSrc check as flags indicates +// +UINT32 +SYMCRYPT_CALL +SymCryptTwistedEdwardsIsEqual( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc1, + _In_ PCSYMCRYPT_ECPOINT poSrc2, + UINT32 flags, + _Out_writes_bytes_(cbScratch) + PBYTE pbScratch, + SIZE_T cbScratch) +{ + PSYMCRYPT_MODELEMENT peTemp[2]; + PSYMCRYPT_MODELEMENT peSrc1X, peSrc1Y, peSrc1Z; + PSYMCRYPT_MODELEMENT peSrc2X, peSrc2Y, peSrc2Z; + PSYMCRYPT_MODULUS pmMod = pCurve->FMod; + SIZE_T nBytes; + UINT32 dResX = 0; + UINT32 dResXN = 0; + UINT32 dResY = 0; + + SYMCRYPT_ASSERT( SYMCRYPT_CURVE_IS_TWISTED_EDWARDS_TYPE(pCurve) ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poSrc1->pCurve) && SymCryptEcurveIsSame(pCurve, poSrc2->pCurve) ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( pCurve ) ); + + nBytes = SymCryptSizeofModElementFromModulus( pmMod ); + + SYMCRYPT_ASSERT( cbScratch >= 2*nBytes ); + + for (UINT32 i = 0; i < 2; ++i) + { + peTemp[i] = SymCryptModElementCreate( pbScratch, nBytes, pmMod ); + pbScratch += nBytes; + cbScratch -= nBytes; + } + + peSrc1X = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc1 ); + peSrc1Y = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc1 ); + peSrc1Z = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poSrc1 ); + + peSrc2X = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc2 ); + peSrc2Y = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc2 ); + peSrc2Z = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poSrc2 ); + + // Setting the default flag if flags == 0 + flags |= (SYMCRYPT_MASK32_ZERO(flags) & SYMCRYPT_FLAG_ECPOINT_EQUAL); + + // peTemp[0] = X1 * Z2 + SymCryptModMul( pmMod, peSrc1X, peSrc2Z, peTemp[0], pbScratch, cbScratch ); + + // peTemp[1] = X2 * Z1 + SymCryptModMul( pmMod, peSrc2X, peSrc1Z, peTemp[1], pbScratch, cbScratch ); + + dResX = SymCryptModElementIsEqual( pmMod, peTemp[0], peTemp[1] ); + + // Neg peTemp[1] + SymCryptModNeg(pmMod, peTemp[1], peTemp[1], pbScratch, cbScratch); + dResXN = SymCryptModElementIsEqual(pmMod, peTemp[0], peTemp[1]); + + // peTemp[0] = Y1 * Z2 + SymCryptModMul( pmMod, peSrc1Y, peSrc2Z, peTemp[0], pbScratch, cbScratch ); + + // peTemp[1] = Y2 * Z1 + SymCryptModMul( pmMod, peSrc2Y, peSrc1Z, peTemp[1], pbScratch, cbScratch ); + + dResY = SymCryptModElementIsEqual( pmMod, peTemp[0], peTemp[1] ); + + return (SYMCRYPT_MASK32_NONZERO( flags & SYMCRYPT_FLAG_ECPOINT_EQUAL ) & dResX & dResY ) | + (SYMCRYPT_MASK32_NONZERO( flags & SYMCRYPT_FLAG_ECPOINT_NEG_EQUAL ) & dResXN & dResY ); +} + +VOID +SYMCRYPT_CALL +SymCryptTwistedEdwardsSetZero( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_(cbScratch) + PBYTE pbScratch, + SIZE_T cbScratch) +{ + SYMCRYPT_ASSERT( SYMCRYPT_CURVE_IS_TWISTED_EDWARDS_TYPE(pCurve) ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poDst->pCurve) ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( pCurve ) ); + + PSYMCRYPT_MODULUS pmMod = pCurve->FMod; + + PSYMCRYPT_MODELEMENT peDstX = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poDst ); + PSYMCRYPT_MODELEMENT peDstY = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poDst ); + PSYMCRYPT_MODELEMENT peDstZ = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poDst ); + PSYMCRYPT_MODELEMENT peDstT = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 3, pCurve, poDst ); + + SymCryptModElementSetValueUint32( 0, pmMod, peDstX, pbScratch, cbScratch ); + SymCryptModElementSetValueUint32( 1, pmMod, peDstY, pbScratch, cbScratch ); + SymCryptModElementSetValueUint32( 1, pmMod, peDstZ, pbScratch, cbScratch ); + SymCryptModElementSetValueUint32( 0, pmMod, peDstT, pbScratch, cbScratch ); +} + +VOID +SYMCRYPT_CALL +SymCryptTwistedEdwardsNegate( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Inout_ PSYMCRYPT_ECPOINT poSrc, + UINT32 mask, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + PCSYMCRYPT_MODULUS FMod = pCurve->FMod; + PSYMCRYPT_MODELEMENT peX = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0,pCurve, poSrc); + PSYMCRYPT_MODELEMENT peT = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 3,pCurve, poSrc); + + PSYMCRYPT_MODELEMENT peTmp = NULL; + + SYMCRYPT_ASSERT( SYMCRYPT_CURVE_IS_TWISTED_EDWARDS_TYPE(pCurve) ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poSrc->pCurve) ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pCurve->FModDigits ) + pCurve->cbModElement ); + + peTmp = SymCryptModElementCreate( + pbScratch, + pCurve->cbModElement, + FMod ); + SYMCRYPT_ASSERT( peTmp != NULL); + + pbScratch += pCurve->cbModElement; + cbScratch -= pCurve->cbModElement; + + SymCryptModNeg( FMod, peX, peTmp, pbScratch, cbScratch ); + SymCryptModElementMaskedCopy( FMod, peTmp, peX, mask ); + + SymCryptModNeg( FMod, peT, peTmp, pbScratch, cbScratch ); + SymCryptModElementMaskedCopy( FMod, peTmp, peT, mask ); +} diff --git a/libs/symcrypt/lib/eckey.c b/libs/symcrypt/lib/eckey.c new file mode 100644 index 00000000000..0e05518614e --- /dev/null +++ b/libs/symcrypt/lib/eckey.c @@ -0,0 +1,996 @@ +// +// eckey.c Functions for the ECKEY object +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// + +#include "precomp.h" + +PSYMCRYPT_ECKEY +SYMCRYPT_CALL +SymCryptEckeyAllocate( _In_ PCSYMCRYPT_ECURVE pCurve ) +{ + PVOID p; + SIZE_T cb; + PSYMCRYPT_ECKEY res = NULL; + + cb = SymCryptSizeofEckeyFromCurve( pCurve ); + + p = SymCryptCallbackAlloc( cb ); + + if ( p==NULL ) + { + goto cleanup; + } + + res = SymCryptEckeyCreate( p, cb, pCurve ); + +cleanup: + return res; +} + +VOID +SYMCRYPT_CALL +SymCryptEckeyFree( _Out_ PSYMCRYPT_ECKEY pkObj ) +{ + SYMCRYPT_CHECK_MAGIC( pkObj ); + SymCryptEckeyWipe( pkObj ); + SymCryptCallbackFree( pkObj ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptSizeofEckeyFromCurve( _In_ PCSYMCRYPT_ECURVE pCurve ) +{ + // + // From symcrypt_internal.h we have: + // - sizeof results are upper bounded by 2^19 + // - SYMCRYPT_SCRATCH_BYTES results are upper bounded by 2^27 (including RSA and ECURVE) + // - SymCryptSizeofEcpointFromCurve outputs the size of up to 4 modelements + some overhead + // Thus the following calculation does not overflow the result. + // + return sizeof(SYMCRYPT_ECKEY) + SymCryptSizeofEcpointFromCurve( pCurve ) + SymCryptSizeofIntFromDigits(SymCryptEcurveDigitsofScalarMultiplier(pCurve)); +} + +PSYMCRYPT_ECKEY +SYMCRYPT_CALL +SymCryptEckeyCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + PCSYMCRYPT_ECURVE pCurve ) +{ + PSYMCRYPT_ECKEY pkObj = NULL; + UINT32 privateKeyDigits = SymCryptEcurveDigitsofScalarMultiplier(pCurve); + + SIZE_T cbPublicKey = SymCryptSizeofEcpointFromCurve( pCurve ); + SIZE_T cbPrivateKey = SymCryptSizeofIntFromDigits( privateKeyDigits ); + + UNREFERENCED_PARAMETER( cbBuffer ); // only referenced in ASSERTs... + + SYMCRYPT_ASSERT( pCurve != NULL ); + SYMCRYPT_ASSERT( cbBuffer >= SymCryptSizeofEckeyFromCurve( pCurve ) ); + + SYMCRYPT_ASSERT( cbBuffer >= sizeof(SYMCRYPT_ECKEY) + + cbPublicKey + + cbPrivateKey ); + + SYMCRYPT_ASSERT_ASYM_ALIGNED( pbBuffer ); + + pkObj = (PSYMCRYPT_ECKEY) pbBuffer; + + pkObj->fAlgorithmInfo = 0; + pkObj->hasPrivateKey = FALSE; + pkObj->pCurve = pCurve; + + pkObj->poPublicKey = SymCryptEcpointCreate( + pbBuffer + sizeof(SYMCRYPT_ECKEY), + cbPublicKey, + pCurve ); + SYMCRYPT_ASSERT( pkObj->poPublicKey != NULL ); + + pkObj->piPrivateKey = SymCryptIntCreate( + pbBuffer + sizeof(SYMCRYPT_ECKEY) + cbPublicKey, + cbPrivateKey, + privateKeyDigits ); + SYMCRYPT_ASSERT( pkObj->piPrivateKey ); + + // Setting the magic + SYMCRYPT_SET_MAGIC( pkObj ); + + return pkObj; +} + +VOID +SYMCRYPT_CALL +SymCryptEckeyWipePrivateState( + _Inout_ PSYMCRYPT_ECKEY pkEckey ) +{ + SymCryptIntSetValueUint32( 0, pkEckey->piPrivateKey ); + pkEckey->hasPrivateKey = FALSE; +} + +VOID +SYMCRYPT_CALL +SymCryptEckeyWipe( _Out_ PSYMCRYPT_ECKEY pkDst ) +{ + // Wipe the whole structure in one go. + SymCryptWipe( pkDst, SymCryptSizeofEckeyFromCurve( pkDst->pCurve ) ); +} + +VOID +SymCryptEckeyCopy( + _In_ PCSYMCRYPT_ECKEY pkSrc, + _Out_ PSYMCRYPT_ECKEY pkDst ) +{ + // + // in-place copy is somewhat common... + // + if( pkSrc != pkDst ) + { + // Copy the fAlgorithmInfo flags + pkDst->fAlgorithmInfo = pkSrc->fAlgorithmInfo; + + // Copy the hasPrivateKey flag + pkDst->hasPrivateKey = pkSrc->hasPrivateKey; + + // Copy the public key + SymCryptEcpointCopy( pkSrc->pCurve, pkSrc->poPublicKey, pkDst->poPublicKey ); + + // Copy the private key + SymCryptIntCopy( pkSrc->piPrivateKey, pkDst->piPrivateKey ); + } +} + +UINT32 +SYMCRYPT_CALL +SymCryptEckeySizeofPublicKey( + _In_ PCSYMCRYPT_ECKEY pkEckey, + _In_ SYMCRYPT_ECPOINT_FORMAT ecPointFormat ) +{ + // + // From symcrypt_internal.h we have: + // - sizeof results are upper bounded by 2^19 + // - SYMCRYPT_SCRATCH_BYTES results are upper bounded by 2^27 (including RSA and ECURVE) + // - SymCryptEcpointFormatNumberofElements returns up to 4 elements. + // + // Thus the following calculation does not overflow cbScratch. + // + return SymCryptEcpointFormatNumberofElements[ecPointFormat] * SymCryptEcurveSizeofFieldElement( pkEckey->pCurve ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptEckeySizeofPrivateKey( _In_ PCSYMCRYPT_ECKEY pkEckey ) +{ + return SymCryptEcurveSizeofScalarMultiplier( pkEckey->pCurve ); +} + +BOOLEAN +SYMCRYPT_CALL +SymCryptEckeyHasPrivateKey( _In_ PCSYMCRYPT_ECKEY pkEckey ) +{ + return pkEckey->hasPrivateKey; +} + +#define SYMCRYPT_FLAG_ECKEY_PUBLIC_KEY_ORDER_VALIDATION (0x1) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEckeyPerformPublicKeyValidation( + _In_ PCSYMCRYPT_ECKEY pEckey, + _In_ UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PCSYMCRYPT_ECURVE pCurve = pEckey->pCurve; + + PSYMCRYPT_ECPOINT poNPub = NULL; + UINT32 cbNPub = SymCryptSizeofEcpointFromCurve( pCurve ); + + // This is an excessive amount of space to require, but all callers can currently provide it, and it's easy to phrase + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_ECKEY_ECURVE_OPERATIONS( pCurve ) ); + + SYMCRYPT_ASSERT( cbScratch >= cbNPub ); + + // Check if Public key is O + if ( SymCryptEcpointIsZero( pCurve, pEckey->poPublicKey, pbScratch, cbScratch ) ) + { + return SYMCRYPT_INVALID_ARGUMENT; + } + + // Public key is represented by Modelements of the underlying finite field for the curve + // If we have reached this point we have either: + // Constructed the Public key to have coordinates in the field (Generate case), or + // Verified the Public key has coordinates in the field (SetValue case) + + // Check that Public key is on the curve + // Skip check for Montgomery curves as we do not have an EcpointOnCurve function for them + if ( !SYMCRYPT_CURVE_IS_MONTGOMERY_TYPE(pCurve) && + !SymCryptEcpointOnCurve( pCurve, pEckey->poPublicKey, pbScratch, cbScratch ) ) + { + return SYMCRYPT_INVALID_ARGUMENT; + } + + // Perform validation that Public key is in a subgroup of order GOrd. + if ( (flags & SYMCRYPT_FLAG_ECKEY_PUBLIC_KEY_ORDER_VALIDATION) != 0 ) + { + if ( SymCryptIntIsEqualUint32( pCurve->H, 1 ) ) + { + // If cofactor is 1 then to validate that Public key has order GOrd + // it is sufficient to validate Public key is on the curve + // We just performed this check - so we are done. + } + else + { + // Ensure GOrd*(Public key) == O + poNPub = SymCryptEcpointCreate( pbScratch, cbNPub, pCurve ); + pbScratch += cbNPub; + cbScratch -= cbNPub; + + SYMCRYPT_ASSERT( poNPub != NULL ); + + // Do the multiplication + scError = SymCryptEcpointScalarMul( + pCurve, + SymCryptIntFromModulus( pCurve->GOrd ), + pEckey->poPublicKey, + 0, // Do not multiply by cofactor! + poNPub, + pbScratch, + cbScratch ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + return scError; + } + + if ( !SymCryptEcpointIsZero( pCurve, poNPub, pbScratch, cbScratch ) ) + { + return SYMCRYPT_INVALID_ARGUMENT; + } + } + } + + return SYMCRYPT_NO_ERROR; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEckeySetValue( + _In_reads_bytes_( cbPrivateKey ) + PCBYTE pbPrivateKey, + SIZE_T cbPrivateKey, + _In_reads_bytes_( cbPublicKey ) + PCBYTE pbPublicKey, + SIZE_T cbPublicKey, + SYMCRYPT_NUMBER_FORMAT numFormat, + SYMCRYPT_ECPOINT_FORMAT ecPointFormat, + UINT32 flags, + _Inout_ PSYMCRYPT_ECKEY pEckey ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PBYTE pbScratch = NULL; + UINT32 cbScratch = 0; + PBYTE pbScratchInternal = NULL; + UINT32 cbScratchInternal = 0; + + PCSYMCRYPT_ECURVE pCurve = pEckey->pCurve; + + PSYMCRYPT_ECPOINT poTmp = NULL; + UINT32 cbTmp = 0; + + PSYMCRYPT_INT piTmpInteger = NULL; + UINT32 cbTmpInteger = 0; + PSYMCRYPT_MODELEMENT peTmpModElement = NULL; + UINT32 cbTmpModElement = pCurve->cbModElement; + + UINT32 privateKeyDigits = SymCryptEcurveDigitsofScalarMultiplier(pCurve); + + UINT32 fValidatePublicKeyOrder = SYMCRYPT_FLAG_ECKEY_PUBLIC_KEY_ORDER_VALIDATION; + + // Ensure caller has specified what algorithm(s) the key will be used with + UINT32 algorithmFlags = SYMCRYPT_FLAG_ECKEY_ECDSA | SYMCRYPT_FLAG_ECKEY_ECDH; + // Make sure only allowed flags are specified + UINT32 allowedFlags = SYMCRYPT_FLAG_KEY_NO_FIPS | SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION | algorithmFlags; + + if ( ( ( flags & ~allowedFlags ) != 0 ) || + ( ( flags & algorithmFlags ) == 0 ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Check that minimal validation flag only specified with no fips + if ( ( ( flags & SYMCRYPT_FLAG_KEY_NO_FIPS ) == 0 ) && + ( ( flags & SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION ) != 0 ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if ( ( flags & SYMCRYPT_FLAG_KEY_NO_FIPS ) != 0 ) + { + fValidatePublicKeyOrder = 0; + } + + if ( ( ( cbPrivateKey == 0 ) && ( cbPublicKey == 0 ) ) || + ( ( cbPrivateKey != 0 ) && ( cbPrivateKey != SymCryptEcurveSizeofScalarMultiplier( pEckey->pCurve ) ) ) || + ( ( cbPublicKey != 0 ) && ( cbPublicKey != SymCryptEckeySizeofPublicKey( pEckey, ecPointFormat ) ) ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Allocate scratch space + cbScratch = SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_ECKEY_ECURVE_OPERATIONS( pCurve ); + pbScratch = SymCryptCallbackAlloc( cbScratch ); + if ( pbScratch == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + if ( pbPrivateKey != NULL ) + { + // + // Private key calculations + // + + pbScratchInternal = pbScratch; + cbScratchInternal = cbScratch; + + // Allocate the integer + cbTmpInteger = SymCryptSizeofIntFromDigits( privateKeyDigits ); + piTmpInteger = SymCryptIntCreate( pbScratchInternal, cbTmpInteger, privateKeyDigits ); + SYMCRYPT_ASSERT( piTmpInteger != NULL ); + + pbScratchInternal += cbTmpInteger; + cbScratchInternal -= cbTmpInteger; + + // Allocate the modelement + peTmpModElement = SymCryptModElementCreate( pbScratchInternal, cbTmpModElement, pCurve->GOrd ); + SYMCRYPT_ASSERT( peTmpModElement != NULL ); + + pbScratchInternal += cbTmpModElement; + cbScratchInternal -= cbTmpModElement; + + // Get the "raw" private key + scError = SymCryptIntSetValue( pbPrivateKey, cbPrivateKey, numFormat, piTmpInteger ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // Validation steps + if ( ( flags & SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION ) == 0 ) + { + // Perform range validation on imported Private key if it is in canonical format + if ( pCurve->PrivateKeyDefaultFormat == SYMCRYPT_ECKEY_PRIVATE_FORMAT_CANONICAL ) + { + // Check if Private key is greater than or equal to GOrd + if ( !SymCryptIntIsLessThan( piTmpInteger, SymCryptIntFromModulus( pCurve->GOrd ) ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + } + + // "TimesH" formats + // IntGetBits requirements: + // We know that coFactorPower is up to SYMCRYPT_ECURVE_MAX_COFACTOR_POWER. Thus + // less than 32 and less than the digits size in bits. + if ( (pCurve->coFactorPower>0) && + (pCurve->PrivateKeyDefaultFormat == SYMCRYPT_ECKEY_PRIVATE_FORMAT_DIVH_TIMESH) && + (SymCryptIntGetBits( piTmpInteger, 0, pCurve->coFactorPower) != 0) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + + // High bit restrictions + // IntGetBits requirements: + // Satisfied by asserting that + // HighBitRestrictionPosition + HighBitRestrictionNumOfBits <= GOrdBitsize + coFactorPower + // during EcurveAllocate. + if ( (pCurve->HighBitRestrictionNumOfBits>0) && + (SymCryptIntGetBits( + piTmpInteger, + pCurve->HighBitRestrictionPosition, + pCurve->HighBitRestrictionNumOfBits) != pCurve->HighBitRestrictionValue) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + } + + // Convert the private key to "DivH" format + if (pCurve->coFactorPower>0) + { + // "TimesH" format: Divide the input private key with the cofactor + // by shifting right the appropriate number of bits + if (pCurve->PrivateKeyDefaultFormat == SYMCRYPT_ECKEY_PRIVATE_FORMAT_DIVH_TIMESH) + { + SymCryptIntDivPow2( piTmpInteger, pCurve->coFactorPower, piTmpInteger ); + } + + // "Canonical" format: Divide by h modulo GOrd + if (pCurve->PrivateKeyDefaultFormat == SYMCRYPT_ECKEY_PRIVATE_FORMAT_CANONICAL) + { + SymCryptIntToModElement( piTmpInteger, pCurve->GOrd, peTmpModElement, pbScratchInternal, cbScratchInternal ); + SymCryptModDivPow2( pCurve->GOrd, peTmpModElement, pCurve->coFactorPower, peTmpModElement, pbScratchInternal, cbScratchInternal ); + SymCryptModElementToInt( pCurve->GOrd, peTmpModElement, piTmpInteger, pbScratchInternal, cbScratchInternal ); + } + } + + // Divide the input private key since it could be larger than subgroup order + SymCryptIntDivMod( + piTmpInteger, + SymCryptDivisorFromModulus(pCurve->GOrd), + NULL, + piTmpInteger, + pbScratchInternal, + cbScratchInternal ); + + // Check if Private key is 0 after dividing it by the subgroup order + // Other part of range validation - perform unconditionally as it is cheap + // and it never makes sense for private key to be 0 intentionally + if (SymCryptIntIsEqualUint32( piTmpInteger, 0 )) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Copy into the ECKEY + SymCryptIntCopy( piTmpInteger, pEckey->piPrivateKey ); + + pEckey->hasPrivateKey = TRUE; + } + + if ( pbPublicKey != NULL ) + { + scError = SymCryptEcpointSetValue( + pCurve, + pbPublicKey, + cbPublicKey, + numFormat, + ecPointFormat, + pEckey->poPublicKey, + SYMCRYPT_FLAG_DATA_PUBLIC, + pbScratch, + cbScratch ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Perform Public key validation on imported Public key. + if ( ( flags & SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION ) == 0 ) + { + scError = SymCryptEckeyPerformPublicKeyValidation( + pEckey, + fValidatePublicKeyOrder, + pbScratch, + cbScratch ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + } + } + + // Calculating the public key if no key was provided + // or if needed for keypair regeneration validation + if ( (pbPublicKey==NULL) || + ( ( ( flags & SYMCRYPT_FLAG_KEY_NO_FIPS ) == 0 ) && + (pbPrivateKey!=NULL) && (pbPublicKey!=NULL) ) ) + { + // Calculate the public key from the private key + pbScratchInternal = pbScratch; + cbScratchInternal = cbScratch; + + // By default calculate the Public key directly where it will be persisted + poTmp = pEckey->poPublicKey; + + if ( pbPublicKey != NULL ) + { + // If doing regeneration validation calculate the Public key in scratch + cbTmp = SymCryptSizeofEcpointFromCurve( pCurve ); + poTmp = SymCryptEcpointCreate( pbScratchInternal, cbTmp, pCurve ); + pbScratchInternal += cbTmp; + cbScratchInternal -= cbTmp; + } + + SYMCRYPT_ASSERT( poTmp != NULL ); + + // Always multiply by the cofactor since the internal format is "DIVH" + scError = SymCryptEcpointScalarMul( + pCurve, + pEckey->piPrivateKey, + NULL, + SYMCRYPT_FLAG_ECC_LL_COFACTOR_MUL, + poTmp, + pbScratchInternal, + cbScratchInternal ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + if ( pbPublicKey != NULL ) + { + if ( !SymCryptEcpointIsEqual( pCurve, poTmp, pEckey->poPublicKey, 0, pbScratchInternal, cbScratchInternal ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + } + else if ( ( flags & SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION ) == 0 ) + { + // Perform Public key validation on generated Public key. + scError = SymCryptEckeyPerformPublicKeyValidation( + pEckey, + fValidatePublicKeyOrder, + pbScratch, + cbScratch ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + } + } + + pEckey->fAlgorithmInfo = flags; // We want to track all of the flags in the Eckey + + if ( ( flags & SYMCRYPT_FLAG_KEY_NO_FIPS ) == 0 ) + { + if ( ( flags & SYMCRYPT_FLAG_ECKEY_ECDSA ) != 0 ) + { + // Ensure ECDSA algorithm selftest is run before first use of ECDSA algorithm + SYMCRYPT_RUN_SELFTEST_ONCE( + SymCryptEcDsaSelftest, + SYMCRYPT_SELFTEST_ALGORITHM_ECDSA ); + + // PCT does not need to be run on import - mark it as done + pEckey->fAlgorithmInfo |= SYMCRYPT_PCT_ECDSA; + } + + if ( ( flags & SYMCRYPT_FLAG_ECKEY_ECDH ) != 0 ) + { + SYMCRYPT_RUN_SELFTEST_ONCE( + SymCryptEcDhSecretAgreementSelftest, + SYMCRYPT_SELFTEST_ALGORITHM_ECDH ); + } + } + +cleanup: + + if ( pbScratch != NULL ) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + } + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEckeyGetValue( + _In_ PCSYMCRYPT_ECKEY pEckey, + _Out_writes_bytes_( cbPrivateKey ) + PBYTE pbPrivateKey, + SIZE_T cbPrivateKey, + _Out_writes_bytes_( cbPublicKey ) + PBYTE pbPublicKey, + SIZE_T cbPublicKey, + SYMCRYPT_NUMBER_FORMAT numFormat, + SYMCRYPT_ECPOINT_FORMAT ecPointFormat, + UINT32 flags ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PBYTE pbScratch = NULL; + UINT32 cbScratch = 0; + PBYTE pbScratchInternal = NULL; + UINT32 cbScratchInternal = 0; + + PCSYMCRYPT_ECURVE pCurve = pEckey->pCurve; + + PSYMCRYPT_INT piTmpInteger = NULL; + UINT32 cbTmpInteger = 0; + PSYMCRYPT_MODELEMENT peTmpModElement = NULL; + UINT32 cbTmpModElement = pCurve->cbModElement; + + UINT32 privateKeyDigits = SymCryptEcurveDigitsofScalarMultiplier(pCurve); + + SYMCRYPT_ASSERT( (cbPrivateKey==0) || (cbPrivateKey == SymCryptEcurveSizeofScalarMultiplier( pEckey->pCurve )) ); + SYMCRYPT_ASSERT( (cbPublicKey==0) || (cbPublicKey == SymCryptEckeySizeofPublicKey( pEckey, ecPointFormat)) ); + + // Make sure we only specify the correct flags + if (flags != 0) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Allocate scratch space + cbScratch = SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_ECKEY_ECURVE_OPERATIONS( pCurve ); + pbScratch = SymCryptCallbackAlloc( cbScratch ); + if ( pbScratch == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + pbScratchInternal = pbScratch; + cbScratchInternal = cbScratch; + + // Allocate the integer + cbTmpInteger = SymCryptSizeofIntFromDigits( privateKeyDigits ); + piTmpInteger = SymCryptIntCreate( pbScratchInternal, cbTmpInteger, privateKeyDigits ); + SYMCRYPT_ASSERT( piTmpInteger != NULL ); + + pbScratchInternal += cbTmpInteger; + cbScratchInternal -= cbTmpInteger; + + // Allocate the modelement + peTmpModElement = SymCryptModElementCreate( pbScratchInternal, cbTmpModElement, pCurve->GOrd ); + SYMCRYPT_ASSERT( peTmpModElement != NULL ); + + pbScratchInternal += cbTmpModElement; + cbScratchInternal -= cbTmpModElement; + + if ((cbPrivateKey == 0) && (cbPublicKey == 0)) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if (cbPrivateKey != 0) + { + if (!pEckey->hasPrivateKey) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + // If this keypair may be used in ECDSA, and does not have the no FIPS flag, run the PCT if + // it has not already been run + if ( ((pEckey->fAlgorithmInfo & SYMCRYPT_FLAG_ECKEY_ECDSA) != 0) && + ((pEckey->fAlgorithmInfo & SYMCRYPT_FLAG_KEY_NO_FIPS) == 0) ) + { + SYMCRYPT_RUN_KEY_GEN_PCT( + SymCryptEcDsaPct, + pEckey, + SYMCRYPT_PCT_ECDSA ); + } + + // Copy the key into the temporary integer + SymCryptIntCopy( pEckey->piPrivateKey, piTmpInteger ); + + // Convert the "DivH" format into the external format + if (pCurve->coFactorPower>0) + { + // For the "Canonical" format: Multiply the integer by h + // and then take the result modulo GOrd + if (pCurve->PrivateKeyDefaultFormat == SYMCRYPT_ECKEY_PRIVATE_FORMAT_CANONICAL) + { + SymCryptIntMulPow2( piTmpInteger, pCurve->coFactorPower, piTmpInteger ); + SymCryptIntDivMod( + piTmpInteger, + SymCryptDivisorFromModulus(pCurve->GOrd), + NULL, + piTmpInteger, + pbScratchInternal, + cbScratchInternal ); + } + + // For the "TimesH" format: Multiply the integer by h again by shifting + if (pCurve->PrivateKeyDefaultFormat == SYMCRYPT_ECKEY_PRIVATE_FORMAT_DIVH_TIMESH) + { + SymCryptIntMulPow2( piTmpInteger, pCurve->coFactorPower, piTmpInteger ); + } + } + + scError = SymCryptIntGetValue( piTmpInteger, pbPrivateKey, cbPrivateKey, numFormat ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + } + + if (cbPublicKey != 0) + { + scError = SymCryptEcpointGetValue( + pCurve, + pEckey->poPublicKey, + numFormat, + ecPointFormat, + pbPublicKey, + cbPublicKey, + SYMCRYPT_FLAG_DATA_PUBLIC, + pbScratch, + cbScratch ); + } + +cleanup: + + if ( pbScratch != NULL ) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + } + + return scError; +} + +#define SYMCRYPT_ECPOINT_SET_RANDOM_MAX_TRIES (1000) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEckeySetRandom( + _In_ UINT32 flags, + _Inout_ PSYMCRYPT_ECKEY pEckey ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PBYTE pbScratch = NULL; + UINT32 cbScratch = 0; + PBYTE pbScratchInternal = NULL; + UINT32 cbScratchInternal = 0; + + PCSYMCRYPT_ECURVE pCurve = pEckey->pCurve; + + PSYMCRYPT_ECPOINT poTmp = NULL; + UINT32 cbTmp = 0; + + INT32 cntr = SYMCRYPT_ECPOINT_SET_RANDOM_MAX_TRIES; + + PSYMCRYPT_MODELEMENT peScalar = NULL; + PSYMCRYPT_INT piScalar = NULL; + UINT32 cbScalar = 0; + + UINT32 highBitRestrictionPosition = pCurve->HighBitRestrictionPosition; + + // Ensure caller has specified what algorithm(s) the key will be used with + UINT32 algorithmFlags = SYMCRYPT_FLAG_ECKEY_ECDSA | SYMCRYPT_FLAG_ECKEY_ECDH; + // Make sure only allowed flags are specified + UINT32 allowedFlags = SYMCRYPT_FLAG_KEY_NO_FIPS | algorithmFlags; + + if ( ( ( flags & ~allowedFlags ) != 0 ) || + ( ( flags & algorithmFlags ) == 0 ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // + // From symcrypt_internal.h we have: + // - sizeof results are upper bounded by 2^19 + // - SYMCRYPT_SCRATCH_BYTES results are upper bounded by 2^27 (including RSA and ECURVE) + // Thus the following calculation does not overflow cbScratch. + // + cbScratch = SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_ECKEY_ECURVE_OPERATIONS( pCurve ); + pbScratch = SymCryptCallbackAlloc( cbScratch ); + if ( pbScratch == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + // Allocating temporaries + pbScratchInternal = pbScratch; + cbScratchInternal = cbScratch; + + peScalar = SymCryptModElementCreate( pbScratchInternal, pCurve->cbModElement, pCurve->GOrd ); + SYMCRYPT_ASSERT( peScalar != NULL ); + + pbScratchInternal += pCurve->cbModElement; + cbScratchInternal -= pCurve->cbModElement; + + cbScalar = SymCryptSizeofIntFromDigits( SymCryptEcurveDigitsofScalarMultiplier(pCurve) ); + piScalar = SymCryptIntCreate( pbScratchInternal, cbScalar, SymCryptEcurveDigitsofScalarMultiplier(pCurve) ); + + pbScratchInternal += cbScalar; + cbScratchInternal -= cbScalar; + + // Shift the high bit position if the format is "TIMESH" + // Note: Do not actually multiply the integer as we will check if it is + // less than the group order + if (pCurve->PrivateKeyDefaultFormat == SYMCRYPT_ECKEY_PRIVATE_FORMAT_DIVH_TIMESH) + { + highBitRestrictionPosition -= pCurve->coFactorPower; + } + + // Main loop + do + { + // We perform Private key range validation by construction + // Setting a random mod element in the [1, SubgroupOrder-1] set + // This will be the "DivH" format of the private key. This means + // that PublicKey = h * PrivateKey * G + SymCryptModSetRandom( + pCurve->GOrd, + peScalar, + (SYMCRYPT_FLAG_MODRANDOM_ALLOW_ONE|SYMCRYPT_FLAG_MODRANDOM_ALLOW_MINUSONE), + pbScratchInternal, + cbScratchInternal ); + + // Converting to "canonical" format + if (pCurve->PrivateKeyDefaultFormat == SYMCRYPT_ECKEY_PRIVATE_FORMAT_CANONICAL) + { + for (UINT32 i=0; i<pCurve->coFactorPower; i++) + { + SymCryptModAdd( pCurve->GOrd, peScalar, peScalar, peScalar, pbScratchInternal, cbScratchInternal ); + } + } + + // Set the temporary scalar to verify the format + SymCryptModElementToInt( pCurve->GOrd, peScalar, piScalar, pbScratchInternal, cbScratchInternal ); + + if (pCurve->HighBitRestrictionNumOfBits > 0) + { + // Set the desired bits + SymCryptIntSetBits( + piScalar, + pCurve->HighBitRestrictionValue, + highBitRestrictionPosition, + pCurve->HighBitRestrictionNumOfBits ); + + // Make sure we didn't exceed the group order + if ( SymCryptIntIsLessThan( + piScalar, + SymCryptIntFromModulus( pCurve->GOrd )) ) + { + break; + } + } + else + { + // No high bit restriction was specified + break; + } + + cntr--; + } + while (cntr>0); + + if (cntr <= 0) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Here piScalar has a private key that satisfies the restriction(s) + // Move it to the modelement + SymCryptIntToModElement( piScalar, pCurve->GOrd, peScalar, pbScratchInternal, cbScratchInternal ); + + // Convert the private key back to "DIVH" format + if (pCurve->PrivateKeyDefaultFormat == SYMCRYPT_ECKEY_PRIVATE_FORMAT_CANONICAL) + { + SymCryptModDivPow2( pCurve->GOrd, peScalar, pCurve->coFactorPower, peScalar, pbScratchInternal, cbScratchInternal ); + } + + // Set the private key + SymCryptModElementToInt( pCurve->GOrd, peScalar, pEckey->piPrivateKey, pbScratchInternal, cbScratchInternal ); + + // Do the multiplication (pass over the entire scratch space as it is not needed anymore) + scError = SymCryptEcpointScalarMul( + pCurve, + pEckey->piPrivateKey, + NULL, + SYMCRYPT_FLAG_ECC_LL_COFACTOR_MUL, + pEckey->poPublicKey, + pbScratch, + cbScratch ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Perform range and public key order validation on generated Public key. + if ( (flags & SYMCRYPT_FLAG_KEY_NO_FIPS) == 0 ) + { + // Perform Public key validation. + // Always perform range validation and validation that Public key is in subgroup of order GOrd + scError = SymCryptEckeyPerformPublicKeyValidation( + pEckey, + SYMCRYPT_FLAG_ECKEY_PUBLIC_KEY_ORDER_VALIDATION, + pbScratch, + cbScratch ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + } + + pEckey->hasPrivateKey = TRUE; + + pEckey->fAlgorithmInfo = flags; // We want to track all of the flags in the Eckey + + if ( (flags & SYMCRYPT_FLAG_KEY_NO_FIPS) == 0 ) + { + if( ( flags & SYMCRYPT_FLAG_ECKEY_ECDSA ) != 0 ) + { + // Ensure ECDSA algorithm selftest is run before first use of ECDSA algorithm + SYMCRYPT_RUN_SELFTEST_ONCE( + SymCryptEcDsaSelftest, + SYMCRYPT_SELFTEST_ALGORITHM_ECDSA ); + } + + if( ( flags & SYMCRYPT_FLAG_ECKEY_ECDH ) != 0 ) + { + // Ensure we have run the algorithm selftest at least once. + SYMCRYPT_RUN_SELFTEST_ONCE( + SymCryptEcDhSecretAgreementSelftest, + SYMCRYPT_SELFTEST_ALGORITHM_ECDH ); + + // Run PCT eagerly so it only needs to be defined here + // The important case for performance is ECDH key generation + + // ECDH PCT per SP80056a-rev3 5.6.2.1.4 b) + // Recompute the public key from the private key + // Option a) appears to be explicitly overruled by 140-3 IG + pbScratchInternal = pbScratch; + cbScratchInternal = cbScratch; + + cbTmp = SymCryptSizeofEcpointFromCurve( pCurve ); + poTmp = SymCryptEcpointCreate( pbScratchInternal, cbTmp, pCurve ); + pbScratchInternal += cbTmp; + cbScratchInternal -= cbTmp; + + SYMCRYPT_ASSERT( poTmp != NULL ); + + // Always multiply by the cofactor since the internal format is "DIVH" + scError = SymCryptEcpointScalarMul( + pCurve, + pEckey->piPrivateKey, + NULL, + SYMCRYPT_FLAG_ECC_LL_COFACTOR_MUL, + poTmp, + pbScratchInternal, + cbScratchInternal ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + SYMCRYPT_FIPS_ASSERT( SymCryptEcpointIsEqual( pCurve, poTmp, pEckey->poPublicKey, 0, pbScratchInternal, cbScratchInternal ) ); + } + } + +cleanup: + + if ( pbScratch != NULL ) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + } + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEckeyExtendKeyUsage( + _Inout_ PSYMCRYPT_ECKEY pEckey, + UINT32 flags ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + // Ensure caller has specified what algorithm(s) the key will be used with + UINT32 algorithmFlags = SYMCRYPT_FLAG_ECKEY_ECDSA | SYMCRYPT_FLAG_ECKEY_ECDH; + + if ( ( ( flags & ~algorithmFlags ) != 0 ) || + ( ( flags & algorithmFlags ) == 0) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + pEckey->fAlgorithmInfo |= flags; + +cleanup: + return scError; +} diff --git a/libs/symcrypt/lib/ecpoint.c b/libs/symcrypt/lib/ecpoint.c new file mode 100644 index 00000000000..d2f36fa57e5 --- /dev/null +++ b/libs/symcrypt/lib/ecpoint.c @@ -0,0 +1,785 @@ +// +// ecpoint.c Ecpoint functions +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// + +#include "precomp.h" + +// Table with the number of field elements for each point format +const UINT32 SymCryptEcpointFormatNumberofElements[] = { + 0, + 1, // SYMCRYPT_ECPOINT_FORMAT_X + 2, // SYMCRYPT_ECPOINT_FORMAT_XY +}; + +UINT32 +SYMCRYPT_CALL +SymCryptSizeofEcpointEx( + UINT32 cbModElement, + UINT32 numOfCoordinates ) +{ + SYMCRYPT_ASSERT(numOfCoordinates > 0); + SYMCRYPT_ASSERT(numOfCoordinates <= SYMCRYPT_ECPOINT_FORMAT_MAX_LENGTH); + + // Callers should never specify numOfCoordinates equal to 0 or greater than + // SYMCRYPT_ECPOINT_FORMAT_MAX_LENGTH + // Return 0 to indicate failure if a caller does specify invalid numOfCoordinates + if( (numOfCoordinates == 0) || (numOfCoordinates > SYMCRYPT_ECPOINT_FORMAT_MAX_LENGTH) ) + { + return 0; + } + + // Since the maximum number of coordinates is 4 this result is bounded + // by 4*2^17 + overhead ~ 2^20 + return sizeof(SYMCRYPT_ECPOINT) + numOfCoordinates * cbModElement; +} + +UINT32 +SYMCRYPT_CALL +SymCryptSizeofEcpointFromCurve( PCSYMCRYPT_ECURVE pCurve ) +{ + // Same bound as SymCryptSizeofEcpointEx + return SymCryptSizeofEcpointEx( pCurve->cbModElement, SYMCRYPT_INTERNAL_NUMOF_COORDINATES(pCurve->eCoordinates) ); +} + +PSYMCRYPT_ECPOINT +SYMCRYPT_CALL +SymCryptEcpointAllocate( _In_ PCSYMCRYPT_ECURVE pCurve ) +{ + PVOID p = NULL; + SIZE_T cb; + PSYMCRYPT_ECPOINT res = NULL; + + cb = SymCryptSizeofEcpointFromCurve( pCurve ); + + if ( cb != 0 ) + { + p = SymCryptCallbackAlloc( cb ); + } + + if ( p==NULL ) + { + goto cleanup; + } + + res = SymCryptEcpointCreate( p, cb, pCurve ); + +cleanup: + return res; +} + +VOID +SYMCRYPT_CALL +SymCryptEcpointFree( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Out_ PSYMCRYPT_ECPOINT poDst ) +{ + SYMCRYPT_CHECK_MAGIC( poDst ); + SymCryptEcpointWipe( pCurve, poDst ); + SymCryptCallbackFree( poDst ); +} + +PSYMCRYPT_ECPOINT +SYMCRYPT_CALL +SymCryptEcpointCreateEx( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + PCSYMCRYPT_ECURVE pCurve, + UINT32 numOfCoordinates ) +{ + PSYMCRYPT_ECPOINT poPoint = NULL; + + PSYMCRYPT_MODELEMENT pmTmp = NULL; + UINT32 cbModElement = pCurve->cbModElement; + + PBYTE pbBufferEnd = pbBuffer + cbBuffer; + UNREFERENCED_PARAMETER( pbBufferEnd ); // only referenced in an ASSERT... + + SYMCRYPT_ASSERT( pCurve->FMod != 0 ); + SYMCRYPT_ASSERT( pCurve->cbModElement != 0 ); + SYMCRYPT_ASSERT( cbBuffer >= SymCryptSizeofEcpointEx( pCurve->cbModElement, numOfCoordinates ) ); + if ( cbBuffer == 0 || numOfCoordinates == 0 ) + { + goto cleanup; + } + + SYMCRYPT_ASSERT_ASYM_ALIGNED( pbBuffer ); + + poPoint = (PSYMCRYPT_ECPOINT) pbBuffer; + + pbBuffer += sizeof(SYMCRYPT_ECPOINT); + + // Setting the point coordinates + for (UINT32 i=0; i<numOfCoordinates; i++) + { + SYMCRYPT_ASSERT( pbBuffer + cbModElement <= pbBufferEnd ); + pmTmp = SymCryptModElementCreate( pbBuffer, cbModElement, pCurve->FMod ); + if ( pmTmp == NULL ) + { + poPoint = NULL; + goto cleanup; + } + pbBuffer += cbModElement; + } + + // Setting the normalized flag + poPoint->normalized = FALSE; + + // Setting the curve + poPoint->pCurve = pCurve; + + // Setting the magic + SYMCRYPT_SET_MAGIC( poPoint ); + +cleanup: + return poPoint; +} + +PSYMCRYPT_ECPOINT +SYMCRYPT_CALL +SymCryptEcpointCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + _In_ PCSYMCRYPT_ECURVE pCurve ) +{ + + SYMCRYPT_ASSERT( pCurve->eCoordinates != 0 ); + + return SymCryptEcpointCreateEx( pbBuffer, cbBuffer, pCurve, SYMCRYPT_INTERNAL_NUMOF_COORDINATES(pCurve->eCoordinates) ); +} + +PSYMCRYPT_ECPOINT +SYMCRYPT_CALL +SymCryptEcpointRetrieveHandle( _In_ PBYTE pbBuffer ) +{ + SYMCRYPT_ASSERT_ASYM_ALIGNED( pbBuffer ); + + return (PSYMCRYPT_ECPOINT) pbBuffer; +} + +VOID +SYMCRYPT_CALL +SymCryptEcpointWipe( _In_ PCSYMCRYPT_ECURVE pCurve, _Out_ PSYMCRYPT_ECPOINT poDst ) +{ + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poDst->pCurve) ); + + // Wipe the whole structure in one go. + SymCryptWipe( poDst, SymCryptSizeofEcpointFromCurve( pCurve ) ); +} + +VOID +SymCryptEcpointCopy( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_ PSYMCRYPT_ECPOINT poDst ) +{ + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poSrc->pCurve) && SymCryptEcurveIsSame(pCurve, poDst->pCurve) ); + + if( poSrc != poDst ) + { + // Unconditionally set the normalization state of destination to source + poDst->normalized = poSrc->normalized; + + memcpy(poDst + 1, poSrc + 1, SYMCRYPT_INTERNAL_NUMOF_COORDINATES(pCurve->eCoordinates) * pCurve->FModDigits * SYMCRYPT_FDEF_DIGIT_SIZE); + } +} + +VOID +SymCryptEcpointMaskedCopy( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_ PSYMCRYPT_ECPOINT poDst, + UINT32 mask ) +{ + SYMCRYPT_ASSERT( (mask == 0) || (mask == 0xffffffff) ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poSrc->pCurve) && SymCryptEcurveIsSame(pCurve, poDst->pCurve) ); + + // Unconditionally combine the normalization state of source and destination to avoid potential for + // leak of mask. Normalized is a non-secret value and is permitted to be leaked by side-channels + poDst->normalized &= poSrc->normalized; + + // dcl - this looks like the equivalent of memcpy + // should be proven that arguments cannot be the result of an integer overflow + SymCryptFdefMaskedCopy((PCBYTE)poSrc + sizeof(SYMCRYPT_ECPOINT), (PBYTE)poDst + sizeof(SYMCRYPT_ECPOINT), SYMCRYPT_INTERNAL_NUMOF_COORDINATES(pCurve->eCoordinates) * pCurve->FModDigits, mask ); +} + +// +// SymCryptEcpointTransform: Internal function to transform an ECPOINT +// from one coordinate representation to another. One point has the default +// format of the curve. The other point has a format large enough for the external +// SYMCRYPT_ECPOINT_FORMAT. +// +// When the boolean setValue is set to TRUE, the source point is the one with +// the external format eformat, and the destination point has the default +// format of the curve. If setValue = FALSE the roles are reversed. +// This function is only called by the Get / Set Value functions. +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEcpointTransform( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_ PSYMCRYPT_ECPOINT poDst, + SYMCRYPT_ECPOINT_FORMAT eformat, + BOOLEAN setValue, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PSYMCRYPT_MODELEMENT peSrc = NULL; + PSYMCRYPT_MODELEMENT peDst = NULL; + PSYMCRYPT_MODELEMENT peX = NULL; + PSYMCRYPT_MODELEMENT peY = NULL; + + SYMCRYPT_ECPOINT_COORDINATES coFrom = SYMCRYPT_ECPOINT_COORDINATES_INVALID; + SYMCRYPT_ECPOINT_COORDINATES coTo = SYMCRYPT_ECPOINT_COORDINATES_INVALID; + + PSYMCRYPT_MODELEMENT peT[2] = { 0 }; // Temporaries + + SYMCRYPT_ASSERT( (flags & ~SYMCRYPT_FLAG_DATA_PUBLIC) == 0 ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poSrc->pCurve) && SymCryptEcurveIsSame(pCurve, poDst->pCurve) ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pCurve->FModDigits ), + SYMCRYPT_SCRATCH_BYTES_FOR_MODINV( pCurve->FModDigits )) + + 2 * pCurve->cbModElement ); + + // Get the assumed representation from the external format + switch (eformat) + { + case (SYMCRYPT_ECPOINT_FORMAT_X): + coFrom = SYMCRYPT_ECPOINT_COORDINATES_SINGLE; + break; + case (SYMCRYPT_ECPOINT_FORMAT_XY): + coFrom = SYMCRYPT_ECPOINT_COORDINATES_AFFINE; + break; + default: + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Find out whether we are setting or getting the value of the ECPOINT + if (setValue) + { + coTo = pCurve->eCoordinates; + } + else + { + coTo = coFrom; + coFrom = pCurve->eCoordinates; + } + + // Take all the possible supported transformations: + // - From SYMCRYPT_ECPOINT_COORDINATES_SINGLE to + // * SYMCRYPT_ECPOINT_COORDINATES_SINGLE (identity transformation) + // * SYMCRYPT_ECPOINT_COORDINATES_AFFINE (** Set all zeros to the Y coordinate **) + // * SYMCRYPT_ECPOINT_COORDINATES_SINGLE_PROJECTIVE + // - From SYMCRYPT_ECPOINT_COORDINATES_AFFINE to + // * SYMCRYPT_ECPOINT_COORDINATES_SINGLE (** Ignore Y coordinate **) + // * SYMCRYPT_ECPOINT_COORDINATES_AFFINE (identity transformation) + // * SYMCRYPT_ECPOINT_COORDINATES_JACOBIAN + // * SYMCRYPT_ECPOINT_COORDINATES_EXTENDED_PROJECTIVE + // * SYMCRYPT_ECPOINT_COORDINATES_SINGLE_PROJECTIVE (** Ignore Y coordinate **) + // - From SYMCRYPT_ECPOINT_COORDINATES_JACOBIAN to + // * SYMCRYPT_ECPOINT_COORDINATES_SINGLE + // * SYMCRYPT_ECPOINT_COORDINATES_AFFINE + // * SYMCRYPT_ECPOINT_COORDINATES_JACOBIAN (identity transformation) + // - From SYMCRYPT_ECPOINT_COORDINATES_EXTENDED_PROJECTIVE to + // * SYMCRYPT_ECPOINT_COORDINATES_SINGLE + // * SYMCRYPT_ECPOINT_COORDINATES_AFFINE + // * SYMCRYPT_ECPOINT_COORDINATES_EXTENDED_PROJECTIVE (identity transformation) + // - From SYMCRYPT_ECPOINT_COORDINATES_SINGLE_PROJECTIVE + // * SYMCRYPT_ECPOINT_COORDINATES_SINGLE + // * SYMCRYPT_ECPOINT_COORDINATES_AFFINE (** Set all zeros to the Y coordinate **) + // * SYMCRYPT_ECPOINT_COORDINATES_SINGLE_PROJECTIVE (identity transformation) + + // dcl - this appears that it might be a candidate for refactoring. Lots of code that looks + // duplicated across sections. Maybe some number of small functions would make it less fragile? + if ( coFrom == coTo ) + { + SymCryptEcpointCopy( pCurve, poSrc, poDst ); // All the identity transformations. + } + else if (coFrom == SYMCRYPT_ECPOINT_COORDINATES_SINGLE) + { + if (coTo == SYMCRYPT_ECPOINT_COORDINATES_AFFINE) + { + // Copy X + peX = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc ); + SYMCRYPT_ASSERT( peX != NULL ); + + peDst = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poDst ); + SYMCRYPT_ASSERT( peDst != NULL ); + + SymCryptModElementCopy( pCurve->FMod, peX, peDst ); + + // Set Y to 0 + peDst = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poDst ); + SYMCRYPT_ASSERT( peDst != NULL ); + + SymCryptModElementSetValueUint32( 0, pCurve->FMod, peDst, pbScratch, cbScratch ); + } + else if (coTo == SYMCRYPT_ECPOINT_COORDINATES_SINGLE_PROJECTIVE) + { + // Copy X + peX = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc ); + SYMCRYPT_ASSERT( peX != NULL ); + + peDst = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poDst ); + SYMCRYPT_ASSERT( peDst != NULL ); + + SymCryptModElementCopy( pCurve->FMod, peX, peDst ); + + // Set Y to 1 + peDst = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poDst ); + SYMCRYPT_ASSERT( peDst != NULL ); + + SymCryptModElementSetValueUint32( 1, pCurve->FMod, peDst, pbScratch, cbScratch ); + + // Setting the normalized flag + poDst->normalized = TRUE; + } + else + { + scError = SYMCRYPT_NOT_IMPLEMENTED; + goto cleanup; + } + } + else if (coFrom == SYMCRYPT_ECPOINT_COORDINATES_AFFINE) + { + if ( (coTo == SYMCRYPT_ECPOINT_COORDINATES_SINGLE) || + (coTo == SYMCRYPT_ECPOINT_COORDINATES_JACOBIAN) || + (coTo == SYMCRYPT_ECPOINT_COORDINATES_EXTENDED_PROJECTIVE) || + (coTo == SYMCRYPT_ECPOINT_COORDINATES_SINGLE_PROJECTIVE) + ) + { + // Copy X + peX = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc ); + SYMCRYPT_ASSERT( peX != NULL ); + + peDst = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poDst ); + SYMCRYPT_ASSERT( peDst != NULL ); + + SymCryptModElementCopy( pCurve->FMod, peX, peDst ); + + if ( (coTo == SYMCRYPT_ECPOINT_COORDINATES_JACOBIAN) || + (coTo == SYMCRYPT_ECPOINT_COORDINATES_EXTENDED_PROJECTIVE) ) + { + // Copy Y + peY = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc ); + SYMCRYPT_ASSERT( peY != NULL ); + + peDst = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poDst ); + SYMCRYPT_ASSERT( peDst != NULL ); + + SymCryptModElementCopy( pCurve->FMod, peY, peDst ); + + // Set Z to 1 + peDst = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poDst ); + SYMCRYPT_ASSERT( peDst != NULL ); + + SymCryptModElementSetValueUint32( 1, pCurve->FMod, peDst, pbScratch, cbScratch ); + + if (coTo == SYMCRYPT_ECPOINT_COORDINATES_EXTENDED_PROJECTIVE) + { + // T = x * y * z + peDst = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 3, pCurve, poDst ); + SYMCRYPT_ASSERT( peDst != NULL ); + + SymCryptModMul( pCurve->FMod, peX, peY, peDst, pbScratch, cbScratch ); + } + + // Setting the normalized flag + poDst->normalized = TRUE; + } + else if (coTo == SYMCRYPT_ECPOINT_COORDINATES_SINGLE_PROJECTIVE) + { + // Set Y to 1 (Ignore the second coordinate of the source point) + peDst = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poDst ); + SYMCRYPT_ASSERT( peDst != NULL ); + + SymCryptModElementSetValueUint32( 1, pCurve->FMod, peDst, pbScratch, cbScratch ); + + // Setting the normalized flag + poDst->normalized = TRUE; + } + } + else + { + scError = SYMCRYPT_NOT_IMPLEMENTED; + goto cleanup; + } + } + else if (coFrom == SYMCRYPT_ECPOINT_COORDINATES_JACOBIAN) + { + if ( (coTo == SYMCRYPT_ECPOINT_COORDINATES_SINGLE) || + (coTo == SYMCRYPT_ECPOINT_COORDINATES_AFFINE) ) + { + // Creating temporaries + for (UINT32 i=0; i<2; i++) + { + peT[i] = SymCryptModElementCreate( pbScratch, pCurve->cbModElement, pCurve->FMod ); + SYMCRYPT_ASSERT( peT[i] != NULL); + + pbScratch += pCurve->cbModElement; + } + + cbScratch -= 2*pCurve->cbModElement; + + // Get the Z coordinate of the source point + peSrc = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poSrc ); + SYMCRYPT_ASSERT( peSrc != NULL ); + + // Check if Z is equal to 0 (i.e. the point is the point at infinity) + if (SymCryptModElementIsZero(pCurve->FMod, peSrc)) + { + scError = SYMCRYPT_INCOMPATIBLE_FORMAT; + goto cleanup; + } + + // Calculation + // T0 := 1 / Z + scError = SymCryptModInv( pCurve->FMod, peSrc, peT[0], flags, pbScratch, cbScratch ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + SymCryptModMul( pCurve->FMod, peT[0], peT[0], peT[1], pbScratch, cbScratch ); // T1 := T0 * T0 = 1/Z^2 + + // Get the X coordinates + peSrc = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc ); + SYMCRYPT_ASSERT( peSrc != NULL ); + + peDst = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poDst ); + SYMCRYPT_ASSERT( peDst != NULL ); + + // Set the new X + SymCryptModMul( pCurve->FMod, peSrc, peT[1], peDst, pbScratch, cbScratch ); // X2 := X * T1 = X/Z^2 + + if (coTo == SYMCRYPT_ECPOINT_COORDINATES_AFFINE) + { + SymCryptModMul( pCurve->FMod, peT[0], peT[1], peT[1], pbScratch, cbScratch ); // T1 := T0 * T1 = 1/Z^3 + + // Get the Y coordinates + peSrc = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc ); + SYMCRYPT_ASSERT( peSrc != NULL ); + + peDst = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poDst ); + SYMCRYPT_ASSERT( peDst != NULL ); + + // Set the new Y + SymCryptModMul( pCurve->FMod, peSrc, peT[1], peDst, pbScratch, cbScratch ); // Y2 := Y * T1 = Y/Z^3 + } + } + else + { + scError = SYMCRYPT_NOT_IMPLEMENTED; + goto cleanup; + } + } + else if ( coFrom == SYMCRYPT_ECPOINT_COORDINATES_EXTENDED_PROJECTIVE ) + { + + if ( (coTo == SYMCRYPT_ECPOINT_COORDINATES_SINGLE) || + (coTo == SYMCRYPT_ECPOINT_COORDINATES_AFFINE) ) + { + // Creating temporary + peT[0] = SymCryptModElementCreate( pbScratch, pCurve->cbModElement, pCurve->FMod ); + SYMCRYPT_ASSERT( peT[0] != NULL); + pbScratch += pCurve->cbModElement; + cbScratch -= 2*pCurve->cbModElement; + + // Get the Z coordinate of the source point + peSrc = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poSrc ); + SYMCRYPT_ASSERT( peSrc != NULL ); + + // Check if Z is equal to 0 (i.e. the point is the point at infinity) + if (SymCryptModElementIsZero(pCurve->FMod, peSrc)) + { + scError = SYMCRYPT_INCOMPATIBLE_FORMAT; + goto cleanup; + } + + // peT[0] = 1 / Z + scError = SymCryptModInv( pCurve->FMod, peSrc, peT[0], flags, pbScratch, cbScratch ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Get the X coordinates + peSrc = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc ); + SYMCRYPT_ASSERT( peSrc != NULL ); + + peDst = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poDst ); + SYMCRYPT_ASSERT( peDst != NULL ); + + // x = X * (1 / Z) + SymCryptModMul( pCurve->FMod, peSrc, peT[0], peDst, pbScratch, cbScratch ); + + if (coTo == SYMCRYPT_ECPOINT_COORDINATES_AFFINE) + { + // Get the Y coordinates + peSrc = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc ); + SYMCRYPT_ASSERT( peSrc != NULL ); + + peDst = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poDst ); + SYMCRYPT_ASSERT( peDst != NULL ); + + // y = Y * (1 / Z) + SymCryptModMul( pCurve->FMod, peSrc, peT[0], peDst, pbScratch, cbScratch ); + } + } + else + { + scError = SYMCRYPT_NOT_IMPLEMENTED; + goto cleanup; + } + } + else if (coFrom == SYMCRYPT_ECPOINT_COORDINATES_SINGLE_PROJECTIVE) + { + if ( (coTo == SYMCRYPT_ECPOINT_COORDINATES_SINGLE) || + (coTo == SYMCRYPT_ECPOINT_COORDINATES_AFFINE) ) + { + // Creating temporary + peT[0] = SymCryptModElementCreate( pbScratch, pCurve->cbModElement, pCurve->FMod ); + SYMCRYPT_ASSERT( peT[0] != NULL); + + pbScratch += pCurve->cbModElement; + cbScratch -= pCurve->cbModElement; + + // Get the Y coordinate of the source point + peSrc = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc ); + SYMCRYPT_ASSERT( peSrc != NULL ); + + // Check if Y is equal to 0 (i.e. the point is the point at infinity) + if (SymCryptModElementIsZero(pCurve->FMod, peSrc)) + { + scError = SYMCRYPT_INCOMPATIBLE_FORMAT; + goto cleanup; + } + + // Calculation + scError = SymCryptModInv( pCurve->FMod, peSrc, peT[0], flags, pbScratch, cbScratch ); // T0 := 1 / Y + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Get the X coordinates + peSrc = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc ); + SYMCRYPT_ASSERT( peSrc != NULL ); + + peDst = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poDst ); + SYMCRYPT_ASSERT( peDst != NULL ); + + // Set the new X + SymCryptModMul( pCurve->FMod, peSrc, peT[0], peDst, pbScratch, cbScratch ); // X2 := X * T0 = X/Y + + if (coTo == SYMCRYPT_ECPOINT_COORDINATES_AFFINE) + { + // Set Y to 0 + peDst = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poDst ); + SYMCRYPT_ASSERT( peDst != NULL ); + + SymCryptModElementSetValueUint32( 0, pCurve->FMod, peDst, pbScratch, cbScratch ); + } + } + } + else + { + scError = SYMCRYPT_NOT_IMPLEMENTED; + goto cleanup; + } + +cleanup: + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEcpointSetValue( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_reads_bytes_(cbSrc) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_NUMBER_FORMAT nformat, + SYMCRYPT_ECPOINT_FORMAT eformat, + _Out_ PSYMCRYPT_ECPOINT poDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NOT_IMPLEMENTED; + PSYMCRYPT_MODELEMENT peTmp = NULL; // Temporary MODELEMENT handle + PSYMCRYPT_ECPOINT poLarge = NULL; // ECPOINT with the largest format available + UINT32 cbLarge = 0; + PSYMCRYPT_INT piTemp = NULL; + UINT32 cbTemp = 0; + UINT32 publicKeyDigits = SymCryptEcurveDigitsofFieldElement( pCurve ); + + SYMCRYPT_ASSERT( (flags & ~SYMCRYPT_FLAG_DATA_PUBLIC) == 0 ); + + SYMCRYPT_ASSERT( pCurve->FMod != 0 ); + SYMCRYPT_ASSERT( pCurve->eCoordinates != 0 ); + SYMCRYPT_ASSERT( pCurve->cbModElement != 0 ); + + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_GETSET_VALUE_ECURVE_OPERATIONS( pCurve ) ); + + // Check that the buffer is of correct size + if ( cbSrc != SymCryptEcpointFormatNumberofElements[ eformat ] * SymCryptEcurveSizeofFieldElement( pCurve ) ) + { + scError = SYMCRYPT_BUFFER_TOO_SMALL; + goto cleanup; + } + cbSrc = cbSrc / SymCryptEcpointFormatNumberofElements[ eformat ]; + + cbTemp = SymCryptSizeofIntFromDigits( publicKeyDigits ); + SYMCRYPT_ASSERT( cbScratch > cbTemp ); + + piTemp = SymCryptIntCreate( pbScratch, cbTemp, publicKeyDigits ); + + // Validate the coordinate of the input public key is less than the field modulus + for ( UINT32 i = 0; i < SymCryptEcpointFormatNumberofElements[eformat]; i++ ) + { + scError = SymCryptIntSetValue( pbSrc + i * cbSrc, cbSrc, nformat, piTemp ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + if ( !SymCryptIntIsLessThan( piTemp, SymCryptIntFromModulus( pCurve->FMod ) ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + } + + // Create the large point + cbLarge = SymCryptSizeofEcpointEx( pCurve->cbModElement, SYMCRYPT_ECPOINT_FORMAT_MAX_LENGTH ); + SYMCRYPT_ASSERT( cbScratch > cbLarge ); + poLarge = SymCryptEcpointCreateEx( pbScratch, cbLarge, pCurve, SYMCRYPT_ECPOINT_FORMAT_MAX_LENGTH ); + if ( poLarge == NULL ) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + // Setting the point coordinates into the big point + for (UINT32 i=0; i<SymCryptEcpointFormatNumberofElements[eformat]; i++) + { + peTmp = (PSYMCRYPT_MODELEMENT)((PBYTE)poLarge + SYMCRYPT_INTERNAL_ECPOINT_COORDINATE_OFFSET( pCurve, i )); + if ( peTmp == NULL ) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + scError = SymCryptModElementSetValue( + pbSrc, + cbSrc, + nformat, + pCurve->FMod, + peTmp, + pbScratch + cbLarge, + cbScratch - cbLarge ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + pbSrc += cbSrc; + } + + // Transform the big point into the destination point + scError = SymCryptEcpointTransform( pCurve, poLarge, poDst, eformat, TRUE, flags, pbScratch + cbLarge, cbScratch - cbLarge); + +cleanup: + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEcpointGetValue( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + SYMCRYPT_NUMBER_FORMAT nformat, + SYMCRYPT_ECPOINT_FORMAT eformat, + _Out_writes_bytes_(cbDst) PBYTE pbDst, + SIZE_T cbDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NOT_IMPLEMENTED; + PSYMCRYPT_MODELEMENT peTmp = NULL; // Temporary MODELEMENT handle + PSYMCRYPT_ECPOINT poLarge = NULL; // ECPOINT with the largest format available + UINT32 cbLarge = 0; + SIZE_T cbDstElem; + + SYMCRYPT_ASSERT( (flags & ~SYMCRYPT_FLAG_DATA_PUBLIC) == 0 ); + SYMCRYPT_ASSERT( pCurve->FMod != 0 ); + SYMCRYPT_ASSERT( pCurve->eCoordinates != 0 ); + SYMCRYPT_ASSERT( pCurve->cbModElement != 0 ); + + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_GETSET_VALUE_ECURVE_OPERATIONS( pCurve ) ); + + // Check that the buffer is of correct size + if ( cbDst != SymCryptEcpointFormatNumberofElements[ eformat ] * SymCryptEcurveSizeofFieldElement( pCurve ) ) + { + scError = SYMCRYPT_BUFFER_TOO_SMALL; + goto cleanup; + } + SYMCRYPT_ASSERT( SymCryptEcpointFormatNumberofElements[ eformat ] > 0 ); + cbDstElem = cbDst / SymCryptEcpointFormatNumberofElements[ eformat ]; + + // Create the big point + cbLarge = SymCryptSizeofEcpointEx( pCurve->cbModElement, SYMCRYPT_ECPOINT_FORMAT_MAX_LENGTH ); + SYMCRYPT_ASSERT( cbScratch > cbLarge ); + poLarge = SymCryptEcpointCreateEx( pbScratch, cbLarge, pCurve, SYMCRYPT_ECPOINT_FORMAT_MAX_LENGTH ); + if ( poLarge == NULL ) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + // Transform the source point into the big point if needed + scError = SymCryptEcpointTransform( pCurve, poSrc, poLarge, eformat, FALSE, flags, pbScratch + cbLarge, cbScratch - cbLarge); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // Getting the point coordinates into the destination buffer + for (UINT32 i=0; i<SymCryptEcpointFormatNumberofElements[eformat]; i++) + { + SYMCRYPT_ASSERT( cbDst >= cbDstElem ); + peTmp = (PSYMCRYPT_MODELEMENT)( (PBYTE)poLarge + SYMCRYPT_INTERNAL_ECPOINT_COORDINATE_OFFSET( pCurve, i ) ); + if ( peTmp == NULL ) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + scError = SymCryptModElementGetValue( + pCurve->FMod, + peTmp, + pbDst, + cbDstElem, + nformat, + pbScratch + cbLarge, + cbScratch - cbLarge ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + pbDst += cbDstElem; + cbDst -= cbDstElem; + } + +cleanup: + + return scError; +} diff --git a/libs/symcrypt/lib/ecurve.c b/libs/symcrypt/lib/ecurve.c new file mode 100644 index 00000000000..7e5cc37f313 --- /dev/null +++ b/libs/symcrypt/lib/ecurve.c @@ -0,0 +1,771 @@ +// +// ecurve.c Ecurve functions +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// + +#include "precomp.h" + +// Approximate number of consecutive operations with the modulus and the +// (sub)group order of the curve. These numbers can trigger special optimizations +// on the underlying code, e.g. use of Montgomery multiplication or not. +#define SYMCRYPT_INTERNAL_ECURVE_MODULUS_NUMOF_OPERATIONS( _bitsize ) ( 100 * (_bitsize) ) +#define SYMCRYPT_INTERNAL_ECURVE_GROUP_ORDER_NUMOF_OPERATIONS ( 1 ) + +// We limit the max size of the elliptic curve to avoid denial-of-service attacks when +// an attacker sends a curve specification. +// Elliptic curve operations are O(n^3) in the curve size. Theoretically SymCrypt supports +// values up to 2^20 bits at the moment, so that is 2^12 times more than a typical curve size +// of 256 bits. Operations are then 2^36 times slower, and a single operation could take months. +// Our largest curve is 521 bits, and we won't see curves > 1024 bits for a while yet. +#define SYMCRYPT_INTERNAL_MAX_ECURVE_SIZE (1024) + +// Private struct which records the sizes of various different parts of the elliptic curve +// structure. +typedef struct _SYMCRYPT_ECURVE_SIZES { + UINT32 nDigitsFieldLength; + UINT32 nDigitsSubgroupOrder; + UINT32 nDigitsCoFactor; + UINT32 cbAlloc; // Length of the whole curve buffer + UINT32 cbModulus; + UINT32 cbModElement; + UINT32 cbEcpoint; + UINT32 cbSubgroupOrder; + UINT32 cbCoFactor; + UINT32 cbScratch; + SYMCRYPT_ECPOINT_COORDINATES eCoordinates; +} SYMCRYPT_ECURVE_SIZES, *PSYMCRYPT_ECURVE_SIZES; +typedef const SYMCRYPT_ECURVE_SIZES * PCSYMCRYPT_ECURVE_SIZES; + +// Helper function which validates curve parameters and computes various buffer sizes. +static +BOOLEAN +SymCryptEcurveValidateAndComputeSizes( + _In_ PCSYMCRYPT_ECURVE_PARAMS pParams, + _Out_ PSYMCRYPT_ECURVE_SIZES pSizes ) +{ + BOOLEAN fSuccess = FALSE; + + // Check that the parameters are well formatted + SYMCRYPT_ASSERT( pParams != NULL ); + SYMCRYPT_ASSERT( (pParams->version == 1) || (pParams->version == 2) ); + SYMCRYPT_ASSERT( pParams->cbFieldLength != 0 ); + SYMCRYPT_ASSERT( pParams->cbSubgroupOrder != 0 ); + SYMCRYPT_ASSERT( pParams->cbCofactor != 0 ); + SYMCRYPT_ASSERT( (pParams->type == SYMCRYPT_ECURVE_TYPE_SHORT_WEIERSTRASS) || + (pParams->type == SYMCRYPT_ECURVE_TYPE_TWISTED_EDWARDS) || + (pParams->type == SYMCRYPT_ECURVE_TYPE_MONTGOMERY) ); + + // Reject inputs that are wildly big to avoid denial-of-service attacks. + if ( pParams->cbFieldLength > SYMCRYPT_INTERNAL_MAX_ECURVE_SIZE/8 || + pParams->cbSubgroupOrder > SYMCRYPT_INTERNAL_MAX_ECURVE_SIZE / 8 + 1 || // subgroup can be > field prime + pParams->cbCofactor > 2 || // We support co-factor = 256 + pParams->cbSeed > 256 ) + { + goto cleanup; + } + + // Getting the # of digits of the various parameters + pSizes->nDigitsFieldLength = SymCryptDigitsFromBits( pParams->cbFieldLength * 8 ); + pSizes->nDigitsSubgroupOrder = SymCryptDigitsFromBits( pParams->cbSubgroupOrder * 8 ); + pSizes->nDigitsCoFactor = SymCryptDigitsFromBits( pParams->cbCofactor * 8 ); + + // ----------------------------------------------- + // Getting the byte sizes of different objects + // ----------------------------------------------- + pSizes->cbModulus = SymCryptSizeofModulusFromDigits( pSizes->nDigitsFieldLength ); + pSizes->cbSubgroupOrder = SymCryptSizeofModulusFromDigits( pSizes->nDigitsSubgroupOrder ); + pSizes->cbCoFactor = SymCryptSizeofIntFromDigits( pSizes->nDigitsCoFactor ); + + pSizes->cbModElement = SYMCRYPT_SIZEOF_MODELEMENT_FROM_BITS( pParams->cbFieldLength * 8 ); + + // EcPoint: The curve is not initialized yet, we call the helper function. + // It depends on the default format of each curve type + switch (pParams->type) + { + case (SYMCRYPT_ECURVE_TYPE_SHORT_WEIERSTRASS): + pSizes->eCoordinates = SYMCRYPT_ECPOINT_COORDINATES_JACOBIAN; + break; + case (SYMCRYPT_ECURVE_TYPE_TWISTED_EDWARDS): + pSizes->eCoordinates = SYMCRYPT_ECPOINT_COORDINATES_EXTENDED_PROJECTIVE; + break; + case (SYMCRYPT_ECURVE_TYPE_MONTGOMERY): + pSizes->eCoordinates = SYMCRYPT_ECPOINT_COORDINATES_SINGLE_PROJECTIVE; + break; + default: + goto cleanup; + } + + pSizes->cbEcpoint = SymCryptSizeofEcpointEx( pSizes->cbModElement, SYMCRYPT_INTERNAL_NUMOF_COORDINATES( pSizes->eCoordinates ) ); + // ----------------------------------------------- + + // Compute memory needed for the curve + // + // From symcrypt_internal.h we have: + // - sizeof results are upper bounded by 2^19 + // Thus the following calculation does not overflow cbAlloc. + // + pSizes->cbAlloc = sizeof( SYMCRYPT_ECURVE ) + + pSizes->cbModulus + + 2 * pSizes->cbModElement + + pSizes->cbSubgroupOrder + + pSizes->cbCoFactor; + + if ( (pParams->type == SYMCRYPT_ECURVE_TYPE_SHORT_WEIERSTRASS) || + (pParams->type == SYMCRYPT_ECURVE_TYPE_TWISTED_EDWARDS) ) + { + // If the curve's type is short Weierstrass allocate space for 2^(w-2) ECPOINTs + // at the end of the curve's structure, where w is the width of the window. + // + // Note: The window width is fixed now. In later versions we can pass it in as a parameter. + // SYMCRYPT_ASSERT( (1 << (SYMCRYPT_ECURVE_SW_DEF_WINDOW-2)) <= SYMCRYPT_ECURVE_SW_MAX_NPRECOMP_POINTS ); + pSizes->cbAlloc += (1 << (SYMCRYPT_ECURVE_SW_DEF_WINDOW-2))*pSizes->cbEcpoint; + } + else + { + // Otherwise just allocate space for just the distinguished point + pSizes->cbAlloc += pSizes->cbEcpoint; + } + + // Compute memory needed for internal scratch space + // EcpointSetValue and SymCryptOfflinePrecomputation + + // + // From symcrypt_internal.h we have: + // - sizeof results are upper bounded by 2^19 + // - SYMCRYPT_SCRATCH_BYTES results are upper bounded by 2^27 (including RSA and ECURVE) + // - SymCryptSizeofEcpointEx is bounded by 2^20 + // Thus the following calculation does not overflow cbScratch. + // + pSizes->cbScratch = SymCryptSizeofEcpointEx( pSizes->cbModElement, SYMCRYPT_ECPOINT_FORMAT_MAX_LENGTH ) + + 8 * pSizes->cbModElement + + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pSizes->nDigitsFieldLength ), + SYMCRYPT_SCRATCH_BYTES_FOR_MODINV( pSizes->nDigitsFieldLength ) ); + // IntToModulus( FMod and GOrd ) + pSizes->cbScratch = SYMCRYPT_MAX( pSizes->cbScratch, + SYMCRYPT_SCRATCH_BYTES_FOR_INT_TO_MODULUS( SYMCRYPT_MAX(pSizes->nDigitsFieldLength, pSizes->nDigitsSubgroupOrder) ) ); + // ModElementSetValue( FMod ) + pSizes->cbScratch = SYMCRYPT_MAX( pSizes->cbScratch, + SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pSizes->nDigitsFieldLength ) ); + + fSuccess = TRUE; + +cleanup: + return fSuccess; +} + +BOOLEAN +SYMCRYPT_CALL +SymCryptEcurveBufferSizesFromParams( + _In_ PCSYMCRYPT_ECURVE_PARAMS pParams, + _Out_ SIZE_T * pcbCurve, + _Out_ SIZE_T * pcbScratch ) +{ + BOOLEAN fSuccess = FALSE; + SYMCRYPT_ECURVE_SIZES sizes; + + if ( !SymCryptEcurveValidateAndComputeSizes( pParams, &sizes )) + { + goto cleanup; + } + + *pcbCurve = sizes.cbAlloc; + *pcbScratch = sizes.cbScratch; + + fSuccess = TRUE; + +cleanup: + return fSuccess; +} + +// Internal function which actually computes and writes curve into the given buffer. +// +// This is called internally by both SymCryptEcurveCreate() and SymCryptEcurveAllocate(). +static +PSYMCRYPT_ECURVE +SymCryptEcurveInitialize( + _In_ PCSYMCRYPT_ECURVE_PARAMS pParams, + _In_ UINT32 flags, + _In_ PCSYMCRYPT_ECURVE_SIZES pSizes, + _Out_writes_bytes_( pSizes->cbAlloc ) PBYTE pbCurve, + _Out_writes_bytes_( pSizes->cbScratch) PBYTE pbScratch ) +{ + BOOLEAN fSuccess = FALSE; + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PSYMCRYPT_ECURVE pCurve = (PSYMCRYPT_ECURVE)pbCurve; + PBYTE pDst = NULL; // Destination pointer + PBYTE pSrc = NULL; // Source pointer + + PBYTE pSrcGenerator = NULL; // We have to set the generator point + // only after we have fully initialized the curve + + PSYMCRYPT_INT pTempInt = 0; + + PSYMCRYPT_MODELEMENT peTemp = NULL; + + PCSYMCRYPT_ECURVE_PARAMS_V2_EXTENSION pcParamsV2Ext = NULL; + + UNREFERENCED_PARAMETER( flags ); + + // ----------------------------------------------- + // Populating the fields of the curve object + // ----------------------------------------------- + + // Version of curve structure + pCurve->version = SYMCRYPT_INTERNAL_ECURVE_VERSION_LATEST; + + // Type of curve + pCurve->type = (int) pParams->type; + + // Curve point format + pCurve->eCoordinates = pSizes->eCoordinates; + + // Number of digits of the field modulus + pCurve->FModDigits = pSizes->nDigitsFieldLength; + + // Number of digits of the group order + pCurve->GOrdDigits = pSizes->nDigitsSubgroupOrder; + + // Byte size of field elements + pCurve->FModBytesize = (UINT32)pParams->cbFieldLength; + + // Byte size of group elements + SYMCRYPT_ASSERT( pParams->cbSubgroupOrder < UINT32_MAX ); + pCurve->GOrdBytesize = (UINT32)pParams->cbSubgroupOrder; + + // Byte size of mod elements + pCurve->cbModElement = pSizes->cbModElement; + + // Total bytesize of the curve (used to free the curve object) + pCurve->cbAlloc = pSizes->cbAlloc; + + // Set destination and source pointers + pDst = ((PBYTE) pCurve) + sizeof( SYMCRYPT_ECURVE ); + pSrc = ((PBYTE) pParams) + sizeof( SYMCRYPT_ECURVE_PARAMS ); + + // Field Modulus + pCurve->FMod = SymCryptModulusCreate( pDst, pSizes->cbModulus, pSizes->nDigitsFieldLength ); + if ( pCurve->FMod == NULL ) + { + goto cleanup; + } + + pTempInt = SymCryptIntFromModulus( pCurve->FMod ); + if ( pTempInt == NULL) + { + goto cleanup; + } + + scError = SymCryptIntSetValue( pSrc, pParams->cbFieldLength, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, pTempInt ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Field Modulus Bitsize + pCurve->FModBitsize = SymCryptIntBitsizeOfValue( pTempInt ); + if (pCurve->FModBitsize < SYMCRYPT_ECURVE_MIN_BITSIZE_FMOD) + { + scError = SYMCRYPT_WRONG_KEY_SIZE; + goto cleanup; + } + + if( (SymCryptIntGetValueLsbits32( pTempInt ) & 1) == 0 ) + { + // 'Prime' must be odd to avoid errors in conversion to modulus + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // IntToModulus requirement: + // FModBitsize >= SYMCRYPT_ECURVE_MIN_BITSIZE_FMOD --> pTempInt > 0 + SymCryptIntToModulus( + pTempInt, + pCurve->FMod, + SYMCRYPT_INTERNAL_ECURVE_MODULUS_NUMOF_OPERATIONS( 8 * pParams->cbFieldLength ), + SYMCRYPT_FLAG_DATA_PUBLIC | SYMCRYPT_FLAG_MODULUS_PRIME, + pbScratch, + pSizes->cbScratch ); + + pDst += pSizes->cbModulus; + pSrc += pParams->cbFieldLength; + + // A constant + pCurve->A = SymCryptModElementCreate( pDst, pSizes->cbModElement, pCurve->FMod ); + if ( pCurve->A == NULL ) + { + goto cleanup; + } + scError = SymCryptModElementSetValue( + pSrc, + pParams->cbFieldLength, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, + pCurve->FMod, + pCurve->A, + pbScratch, + pSizes->cbScratch ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + pDst += pSizes->cbModElement; + pSrc += pParams->cbFieldLength; + + // B constant + pCurve->B = SymCryptModElementCreate( pDst, pSizes->cbModElement, pCurve->FMod ); + if ( pCurve->B == NULL ) + { + goto cleanup; + } + + // Detect Short-Weierstrass curves with A == -3 (NIST prime curves are all of this form) + // Use B's ModElement space for check + if( pParams->type == SYMCRYPT_ECURVE_TYPE_SHORT_WEIERSTRASS ) + { + SymCryptModElementSetValueNegUint32( + 3, + pCurve->FMod, + pCurve->B, + pbScratch, + pSizes->cbScratch ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + if( SymCryptModElementIsEqual( pCurve->FMod, pCurve->A, pCurve->B ) ) + { + pCurve->type = SYMCRYPT_INTERNAL_ECURVE_TYPE_SHORT_WEIERSTRASS_AM3; + } + } + + // Set B to the correct value + scError = SymCryptModElementSetValue( + pSrc, + pParams->cbFieldLength, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, + pCurve->FMod, + pCurve->B, + pbScratch, + pSizes->cbScratch ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + pDst += pSizes->cbModElement; + pSrc += pParams->cbFieldLength; + + // Skip over the distinguished point until we fix all the parameters and scratch space sizes + pSrcGenerator = pSrc; + pSrc += pParams->cbFieldLength * 2; + + // Subgroup Order + pCurve->GOrd = SymCryptModulusCreate( pDst, pSizes->cbSubgroupOrder, pSizes->nDigitsSubgroupOrder ); + if ( pCurve->GOrd == NULL ) + { + goto cleanup; + } + + pTempInt = SymCryptIntFromModulus( pCurve->GOrd ); + if ( pTempInt == NULL) + { + goto cleanup; + } + + scError = SymCryptIntSetValue( pSrc, pParams->cbSubgroupOrder, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, pTempInt ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Subgroup Order Bitsize + pCurve->GOrdBitsize = SymCryptIntBitsizeOfValue( pTempInt ); + if (pCurve->GOrdBitsize < SYMCRYPT_ECURVE_MIN_BITSIZE_GORD) + { + scError = SYMCRYPT_WRONG_KEY_SIZE; + goto cleanup; + } + + if( (SymCryptIntGetValueLsbits32( pTempInt ) & 1) == 0 ) + { + // 'Prime' must be odd to avoid errors in conversion to modulus + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // IntToModulus requirement: + // GOrdBitsize >= SYMCRYPT_ECURVE_MIN_BITSIZE_GORD --> pTempInt > 0 + SymCryptIntToModulus( + pTempInt, + pCurve->GOrd, + SYMCRYPT_INTERNAL_ECURVE_GROUP_ORDER_NUMOF_OPERATIONS, + SYMCRYPT_FLAG_DATA_PUBLIC | SYMCRYPT_FLAG_MODULUS_PRIME, + pbScratch, + pSizes->cbScratch ); + + pDst += pSizes->cbSubgroupOrder; + pSrc += pParams->cbSubgroupOrder; + + // Cofactor + pCurve->H = SymCryptIntCreate( pDst, pSizes->cbCoFactor, pSizes->nDigitsCoFactor ); + if ( pCurve->H == NULL ) + { + goto cleanup; + } + scError = SymCryptIntSetValue( + pSrc, + pParams->cbCofactor, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, + pCurve->H ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + + // Make sure that the cofactor is not zero or too big + pCurve->coFactorPower = SymCryptIntBitsizeOfValue( pCurve->H ) - 1; + if (pCurve->coFactorPower == (UINT32)-1 || pCurve->coFactorPower > SYMCRYPT_ECURVE_MAX_COFACTOR_POWER) + { + goto cleanup; + } + + // Validate that the cofactor is a power of two + if (!SymCryptIntIsEqualUint32( pCurve->H, 1<<(pCurve->coFactorPower) )) + { + goto cleanup; + } + + pDst += pSizes->cbCoFactor; + pSrc += pParams->cbCofactor; + + // Calculate scratch spaces' sizes + if (pParams->type == SYMCRYPT_ECURVE_TYPE_SHORT_WEIERSTRASS) + { + pCurve->info.sw.window = SYMCRYPT_ECURVE_SW_DEF_WINDOW; + pCurve->info.sw.nPrecompPoints = (1 << (SYMCRYPT_ECURVE_SW_DEF_WINDOW-2)); + pCurve->info.sw.nRecodedDigits = pCurve->GOrdBitsize + 1; // This is the maximum - used by the wNAF Interleaving method + } + else if ( pParams->type == SYMCRYPT_ECURVE_TYPE_TWISTED_EDWARDS ) + { + pCurve->info.sw.window = SYMCRYPT_ECURVE_SW_DEF_WINDOW; + pCurve->info.sw.nPrecompPoints = (1 << (SYMCRYPT_ECURVE_SW_DEF_WINDOW-2)); + pCurve->info.sw.nRecodedDigits = pCurve->GOrdBitsize + 1; // This is the maximum - used by the wNAF Interleaving method + } + + SymCryptEcurveFillScratchSpaces(pCurve); + + // Now set the distinguished point + pCurve->G = SymCryptEcpointCreate( pDst, pSizes->cbEcpoint, pCurve ); + if ( pCurve->G == NULL ) + { + goto cleanup; + } + scError = SymCryptEcpointSetValue( + pCurve, + pSrcGenerator, + pParams->cbFieldLength * 2, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, + SYMCRYPT_ECPOINT_FORMAT_XY, + pCurve->G, + SYMCRYPT_FLAG_DATA_PUBLIC, + pbScratch, + pSizes->cbScratch ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + pDst += pSizes->cbEcpoint; + + // Fill the precomputed table + if ( (pParams->type == SYMCRYPT_ECURVE_TYPE_SHORT_WEIERSTRASS) || + (pParams->type == SYMCRYPT_ECURVE_TYPE_TWISTED_EDWARDS) ) + { + // The first point of the table is the generator + pCurve->info.sw.poPrecompPoints[0] = pCurve->G; + + for (UINT32 i=1; i<pCurve->info.sw.nPrecompPoints; i++) + { + pCurve->info.sw.poPrecompPoints[i] = SymCryptEcpointCreate( pDst, pSizes->cbEcpoint, pCurve ); + if ( pCurve->info.sw.poPrecompPoints[i] == NULL ) + { + goto cleanup; + } + pDst += pSizes->cbEcpoint; + } + + SymCryptOfflinePrecomputation( pCurve, pbScratch, pSizes->cbScratch ); + } + + // For Montgomery curve, we calculate A = (A + 2) / 4 + if (pParams->type == SYMCRYPT_ECURVE_TYPE_MONTGOMERY) + { + peTemp = SymCryptModElementCreate( pbScratch, pSizes->cbModElement, pCurve->FMod ); + + // SetValueUint32 requirements: + // FMod > 2 since it has more than SYMCRYPT_ECURVE_MIN_BITSIZE_FMOD bits + SymCryptModElementSetValueUint32( 2, pCurve->FMod, peTemp, pbScratch + pSizes->cbModElement, pSizes->cbScratch - pSizes->cbModElement ); + SymCryptModAdd (pCurve->FMod, pCurve->A, peTemp, pCurve->A, pbScratch + pSizes->cbModElement, pSizes->cbScratch - pSizes->cbModElement ); // A = A + 2; + SymCryptModDivPow2( pCurve->FMod, pCurve->A, 2, pCurve->A, pbScratch + pSizes->cbModElement, pSizes->cbScratch - pSizes->cbModElement ); // A = (A + 2) / 4 + } + + // Set the default curve policy for parameters of version 2 + if (pParams->version == 2) + { + // Skip over the seed (if any) + pSrc += pParams->cbSeed; + + // Copy the extension info (it can be unaligned) + pcParamsV2Ext = (PCSYMCRYPT_ECURVE_PARAMS_V2_EXTENSION) pSrc; + } + else + { + // Set the defaults for version 1 + if (pParams->type == SYMCRYPT_ECURVE_TYPE_SHORT_WEIERSTRASS) + { + pcParamsV2Ext = SymCryptEcurveParamsV2ExtensionShortWeierstrass; + } + else if ( pParams->type == SYMCRYPT_ECURVE_TYPE_TWISTED_EDWARDS ) + { + pcParamsV2Ext = SymCryptEcurveParamsV2ExtensionTwistedEdwards; + } + else if ( pParams->type == SYMCRYPT_ECURVE_TYPE_MONTGOMERY ) + { + pcParamsV2Ext = SymCryptEcurveParamsV2ExtensionMontgomery; + } + } + + pCurve->PrivateKeyDefaultFormat = pcParamsV2Ext->PrivateKeyDefaultFormat; + pCurve->HighBitRestrictionNumOfBits = pcParamsV2Ext->HighBitRestrictionNumOfBits; + pCurve->HighBitRestrictionPosition = pcParamsV2Ext->HighBitRestrictionPosition; + pCurve->HighBitRestrictionValue = pcParamsV2Ext->HighBitRestrictionValue; + + // Make sure that the HighBitRestrictions make sense + // (see SymCryptIntGet/SetBits) + if ( (pCurve->HighBitRestrictionNumOfBits>32) || + ((pCurve->HighBitRestrictionNumOfBits>0) && + (pCurve->HighBitRestrictionPosition + pCurve->HighBitRestrictionNumOfBits > pCurve->GOrdBitsize + pCurve->coFactorPower)) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Setting the magic + SYMCRYPT_SET_MAGIC( pCurve ); + + fSuccess = TRUE; + +cleanup: + if (!fSuccess) + { + SymCryptWipe( pbCurve, pSizes->cbAlloc ); + pCurve = NULL; + } + + return pCurve; +} + +PSYMCRYPT_ECURVE +SYMCRYPT_CALL +SymCryptEcurveCreate( + _In_ PSYMCRYPT_ECURVE_PARAMS pParams, + _In_ UINT32 flags, + _Out_writes_bytes_( cbCurve ) PBYTE pbCurve, + SIZE_T cbCurve, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch) +{ + SYMCRYPT_ECURVE_SIZES sizes; + + PSYMCRYPT_ECURVE pCurve = NULL; + + if ( !SymCryptEcurveValidateAndComputeSizes(pParams, &sizes) ) + { + goto cleanup; + } + + if ( cbCurve < sizes.cbAlloc ) + { + goto cleanup; + } + + if ( cbScratch < sizes.cbScratch ) + { + goto cleanup; + } + + pCurve = SymCryptEcurveInitialize( pParams, flags, &sizes, pbCurve, pbScratch ); + +cleanup: + return pCurve; +} + +PSYMCRYPT_ECURVE +SYMCRYPT_CALL +SymCryptEcurveAllocate( + _In_ PCSYMCRYPT_ECURVE_PARAMS pParams, + _In_ UINT32 flags ) +{ + SYMCRYPT_ECURVE_SIZES sizes; + + PBYTE pbCurve = NULL; + PBYTE pbScratch = NULL; + + PSYMCRYPT_ECURVE pCurve = NULL; + + if ( !SymCryptEcurveValidateAndComputeSizes(pParams, &sizes) ) + { + goto cleanup; + } + + pbCurve = SymCryptCallbackAlloc( sizes.cbAlloc ); + if ( pbCurve == NULL ) + { + goto cleanup; + } + + pbScratch = SymCryptCallbackAlloc( sizes.cbScratch ); + if ( pbScratch == NULL ) + { + goto cleanup; + } + + pCurve = SymCryptEcurveInitialize( pParams, flags, &sizes, pbCurve, pbScratch ); + if ( pCurve != NULL ) + { + pbCurve = NULL; + } + +cleanup: + if ( pbScratch != NULL ) + { + SymCryptWipe( pbScratch, sizes.cbScratch ); + SymCryptCallbackFree( pbScratch ); + } + + if ( pbCurve != NULL ) + { + SymCryptCallbackFree( pbCurve ); + } + + return pCurve; +} + +VOID +SYMCRYPT_CALL +SymCryptEcurveFree( _Out_ PSYMCRYPT_ECURVE pCurve ) +{ + SYMCRYPT_CHECK_MAGIC( pCurve ); + + SymCryptWipe( (PBYTE) pCurve, pCurve->cbAlloc ); + + SymCryptCallbackFree( pCurve ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptEcurveBitsizeofFieldModulus( _In_ PCSYMCRYPT_ECURVE pCurve ) +{ + return pCurve->FModBitsize; +} + +UINT32 +SYMCRYPT_CALL +SymCryptEcurveBitsizeofGroupOrder( _In_ PCSYMCRYPT_ECURVE pCurve ) +{ + return pCurve->GOrdBitsize; +} + +UINT32 +SYMCRYPT_CALL +SymCryptEcurveDigitsofFieldElement( _In_ PCSYMCRYPT_ECURVE pCurve ) +{ + return pCurve->FModDigits; +} + +UINT32 +SYMCRYPT_CALL +SymCryptEcurveSizeofFieldElement( _In_ PCSYMCRYPT_ECURVE pCurve ) +{ + return pCurve->FModBytesize; +} + +UINT32 +SYMCRYPT_CALL +SymCryptEcurveSizeofScalarMultiplier( _In_ PCSYMCRYPT_ECURVE pCurve ) +{ + return pCurve->GOrdBytesize; +} + +PCSYMCRYPT_MODULUS +SYMCRYPT_CALL +SymCryptEcurveGroupOrder( _In_ PCSYMCRYPT_ECURVE pCurve ) +{ + return pCurve->GOrd; +} + +UINT32 +SYMCRYPT_CALL +SymCryptEcurveDigitsofScalarMultiplier( _In_ PCSYMCRYPT_ECURVE pCurve ) +{ + return SymCryptDigitsFromBits( pCurve->GOrdBitsize + pCurve->coFactorPower ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptEcurvePrivateKeyDefaultFormat( _In_ PCSYMCRYPT_ECURVE pCurve ) +{ + return pCurve->PrivateKeyDefaultFormat; +} + +UINT32 +SYMCRYPT_CALL +SymCryptEcurveHighBitRestrictionNumOfBits( _In_ PCSYMCRYPT_ECURVE pCurve ) +{ + return pCurve->HighBitRestrictionNumOfBits; +} + +UINT32 +SYMCRYPT_CALL +SymCryptEcurveHighBitRestrictionPosition( _In_ PCSYMCRYPT_ECURVE pCurve ) +{ + return pCurve->HighBitRestrictionPosition; +} + +UINT32 +SYMCRYPT_CALL +SymCryptEcurveHighBitRestrictionValue( _In_ PCSYMCRYPT_ECURVE pCurve ) +{ + return pCurve->HighBitRestrictionValue; +} + +BOOLEAN +SYMCRYPT_CALL +SymCryptEcurveIsSame( + _In_ PCSYMCRYPT_ECURVE pCurve1, + _In_ PCSYMCRYPT_ECURVE pCurve2) +{ + BOOLEAN fIsSameCurve = FALSE; + + if ( pCurve1 == pCurve2 ) + { + fIsSameCurve = TRUE; + goto cleanup; + } + + if ( (pCurve1->type != pCurve2->type) || + !SymCryptIntIsEqual ( + SymCryptIntFromModulus( pCurve1->FMod ), + SymCryptIntFromModulus( pCurve2->FMod ) ) || + !SymCryptModElementIsEqual ( pCurve1->FMod, pCurve1->A, pCurve2->A ) || + !SymCryptModElementIsEqual ( pCurve1->FMod, pCurve1->B, pCurve2->B ) ) + { + goto cleanup; + } + + fIsSameCurve = TRUE; + +cleanup: + return fIsSameCurve; +} diff --git a/libs/symcrypt/lib/env_windowsUserModeWin8_1.c b/libs/symcrypt/lib/env_windowsUserModeWin8_1.c new file mode 100644 index 00000000000..ae66963b264 --- /dev/null +++ b/libs/symcrypt/lib/env_windowsUserModeWin8_1.c @@ -0,0 +1,187 @@ +// +// env_windowsUserMode.c +// Platform-specific code for windows user mode. +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +//#include "precomp.h" + +#pragma warning(push) +#pragma warning(disable: 5103) // Arm64's wdm.h included below currently generate a lot of 5103 warnings +#include <windows.h> +#pragma warning(pop) +#include "symcrypt.h" +#include "sc_lib.h" + +SYMCRYPT_CPU_FEATURES SYMCRYPT_CALL SymCryptCpuFeaturesNeverPresentEnvWindowsUsermodeWin8_1nLater() +{ + return 0; +} + +VOID +SYMCRYPT_CALL +SymCryptInitEnvWindowsUsermodeWin8_1nLater( UINT32 version ) +{ + if( g_SymCryptFlags & SYMCRYPT_FLAG_LIB_INITIALIZED ) + { + return; + } + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + // + // First we detect what the CPU has + // + SymCryptDetectCpuFeaturesByCpuid( SYMCRYPT_CPUID_DETECT_FLAG_CHECK_OS_SUPPORT_FOR_YMM ); + + // + // We also need to be sure that the OS supports the extended registers. + // + { + ULONGLONG FeatureMask = GetEnabledXStateFeatures(); + + if( !(FeatureMask & XSTATE_MASK_AVX) ) + { + g_SymCryptCpuFeaturesNotPresent |= SYMCRYPT_CPU_FEATURE_AVX2; + } + + if( !(FeatureMask & XSTATE_MASK_AVX512) ) + { + g_SymCryptCpuFeaturesNotPresent |= SYMCRYPT_CPU_FEATURE_AVX512; + } + } + + // + // Our SaveXmm function never fails because it doesn't have to do anything in User mode. + // + g_SymCryptCpuFeaturesNotPresent &= ~SYMCRYPT_CPU_FEATURE_SAVEXMM_NOFAIL; + +#elif SYMCRYPT_CPU_ARM + + g_SymCryptCpuFeaturesNotPresent = (SYMCRYPT_CPU_FEATURES) ~SYMCRYPT_CPU_FEATURE_NEON; + +#elif SYMCRYPT_CPU_ARM64 + + SymCryptDetectCpuFeaturesFromIsProcessorFeaturePresent(); + +#endif + + SymCryptInitEnvCommon( version ); +} + +_Analysis_noreturn_ +VOID +SYMCRYPT_CALL +SymCryptFatalEnvWindowsUsermodeWin8_1nLater( UINT32 fatalCode ) +{ + UINT32 fatalCodeVar; + + SymCryptFatalIntercept( fatalCode ); + + // + // Put the fatal code in a location where it shows up in the dump + // + SYMCRYPT_FORCE_WRITE32( &fatalCodeVar, fatalCode ); + + // + // Our first preference is to fastfail, + // the second to create an AV, which triggers a Watson report so that we get to + // see what is going wrong. + // + __fastfail( FAST_FAIL_CRYPTO_LIBRARY ); + + // + // Next we write to the NULL pointer, this causes an AV + // + SYMCRYPT_FORCE_WRITE32( (volatile UINT32 *)NULL, fatalCode ); + + // + // If that fails, we terminate the process. (This function call also ensures that this environment is actually + // used in user mode and not some other environment.) + // (During testing we had the TerminateProcess as the first option, but that makes debugging very hard as + // it leaves no traces of what went wrong.) + // + TerminateProcess( GetCurrentProcess(), fatalCode ); + + SymCryptFatalHang( fatalCode ); +} + +#if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_X86 + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSaveXmmEnvWindowsUsermodeWin8_1nLater( _Out_ PSYMCRYPT_EXTENDED_SAVE_DATA pSaveArea ) +{ + // + // In usermode there is no need to save XMM registers. + // The compiler should inline this function and optimize it away. + // + + UNREFERENCED_PARAMETER( pSaveArea ); + + return SYMCRYPT_NO_ERROR; +} + +VOID +SYMCRYPT_CALL +SymCryptRestoreXmmEnvWindowsUsermodeWin8_1nLater( _Inout_ PSYMCRYPT_EXTENDED_SAVE_DATA pSaveArea ) +{ + // + // In usermode there is no need to save XMM registers. + // The compiler should inline this function and optimize it away. + // + + UNREFERENCED_PARAMETER( pSaveArea ); +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSaveYmmEnvWindowsUsermodeWin8_1nLater( _Out_ PSYMCRYPT_EXTENDED_SAVE_DATA pSaveArea ) +{ + // + // In usermode there is no need to save XMM registers. + // The compiler should inline this function and optimize it away. + // + + UNREFERENCED_PARAMETER( pSaveArea ); + + return SYMCRYPT_NO_ERROR; +} + +VOID +SYMCRYPT_CALL +SymCryptRestoreYmmEnvWindowsUsermodeWin8_1nLater( _Inout_ PSYMCRYPT_EXTENDED_SAVE_DATA pSaveArea ) +{ + // + // In usermode there is no need to save XMM registers. + // The compiler should inline this function and optimize it away. + // + + UNREFERENCED_PARAMETER( pSaveArea ); +} + +#endif + +VOID +SYMCRYPT_CALL +SymCryptTestInjectErrorEnvWindowsUsermodeWin8_1nLater( PBYTE pbBuf, SIZE_T cbBuf ) +{ + // + // This feature is only used during testing. In production it is always + // an empty function that the compiler can optimize away. + // + UNREFERENCED_PARAMETER( pbBuf ); + UNREFERENCED_PARAMETER( cbBuf ); +} + +#if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_X86 + +VOID +SYMCRYPT_CALL +SymCryptCpuidExFuncEnvWindowsUsermodeWin8_1nLater( int cpuInfo[4], int function_id, int subfunction_id ) +{ + __cpuidex( cpuInfo, function_id, subfunction_id ); +} + +#endif diff --git a/libs/symcrypt/lib/equal.c b/libs/symcrypt/lib/equal.c new file mode 100644 index 00000000000..eec4c804fb1 --- /dev/null +++ b/libs/symcrypt/lib/equal.c @@ -0,0 +1,48 @@ +// +// equal.c Memory comparison routine that is safe against side channels. +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +BOOLEAN +SYMCRYPT_CALL +SymCryptEqual( _In_reads_( cbBytes ) PCBYTE pbSrc1, + _In_reads_( cbBytes ) PCBYTE pbSrc2, + SIZE_T cbBytes ) +{ + UINT32 neq = 0; + BYTE b; + volatile BYTE * p1 = (volatile BYTE *) pbSrc1; + volatile BYTE * p2 = (volatile BYTE *) pbSrc2; + + // + // We use forced-access memory reads to ensure that the compiler doesn't get + // smart and implement an early-out solution. + // + + while( cbBytes >= 4 ) + { + neq |= SYMCRYPT_FORCE_READ32( (volatile UINT32 *) p1 ) ^ SYMCRYPT_FORCE_READ32( (volatile UINT32 *) p2 ); + p1 += 4; + p2 += 4; + cbBytes -= 4; + } + + // We have to deal with the remaining bytes using a separate accumulator to work around an issue in the ARM64 compiler. + if( cbBytes > 0 ) + { + b = 0; + while( cbBytes > 0 ) + { + b |= SYMCRYPT_FORCE_READ8( p1 ) ^ SYMCRYPT_FORCE_READ8( p2 ); + p1++; + p2++; + cbBytes--; + } + neq |= b; + } + + return neq == 0; +} diff --git a/libs/symcrypt/lib/fdef_general.c b/libs/symcrypt/lib/fdef_general.c new file mode 100644 index 00000000000..fe18eaac5f5 --- /dev/null +++ b/libs/symcrypt/lib/fdef_general.c @@ -0,0 +1,1550 @@ +// +// fdef_general.c General functions of the default format. +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// +// + +#include "precomp.h" + +#include "smallPrimes32.h" // For SymCryptTestTrialdivisionMaxSmallPrime + +#if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM64 + +#define SYMCRYPT_TRIALDIVISION_DIGIT_REDUCTION_CYCLES (16) // Measured on amd64 +#define SYMCRYPT_TRIALDIVISION_DIVIDE_TEST_CYCLES (2) // Measured on amd64 +#define SYMCRYPT_RABINMILLER_DIGIT_CYCLES (43000) // Measured on amd64 + +#elif SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_ARM + +#define SYMCRYPT_TRIALDIVISION_DIGIT_REDUCTION_CYCLES (18) // Measured on x86 +#define SYMCRYPT_TRIALDIVISION_DIVIDE_TEST_CYCLES (16) // Measured on x86 +#define SYMCRYPT_RABINMILLER_DIGIT_CYCLES (25300) // Measured on x86 + +#else + +#define SYMCRYPT_TRIALDIVISION_DIGIT_REDUCTION_CYCLES (18) // Measured on x86 +#define SYMCRYPT_TRIALDIVISION_DIVIDE_TEST_CYCLES (16) // Measured on x86 +#define SYMCRYPT_RABINMILLER_DIGIT_CYCLES (25300) // Measured on x86 + +#endif + + +#define SYMCRYPT_TRIALDIVISION_MAX_SMALL_PRIME (1<<22) // Some large limit to bound memory usage +C_ASSERT( SYMCRYPT_TRIALDIVISION_MAX_SMALL_PRIME <= UINT32_MAX ); +C_ASSERT( SYMCRYPT_TRIALDIVISION_MAX_SMALL_PRIME == ((UINT32) SYMCRYPT_TRIALDIVISION_MAX_SMALL_PRIME) ); + +VOID +SYMCRYPT_CALL +SymCryptFdefMaskedCopyC( + _In_reads_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ) PCBYTE pbSrc, + _Inout_updates_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ) PBYTE pbDst, + UINT32 nDigits, + UINT32 mask ) + /* + This function is dangerous, and would create a buffer overflow if nDigits > nDigits for pbDst + It also appears that it is never called. Consider removing it if it is not needed. + */ +{ + UINT64 m64 = (UINT64)0 - (mask & 1); + PUINT64 pSrc = (PUINT64) pbSrc; // should be a const pointer to match pSrc + PUINT64 pDst = (PUINT64) pbDst; + SIZE_T i; + + // This allows 0xffffffff and 0, is that what you wanted? + // If so, ( mask == 0xffffffff || mask == 0 ) + // would be more readable. It is also odd that 1 is not valid, but it results in exactly the + // same code flow as ~0. + SYMCRYPT_ASSERT( (mask + 1) < 2 ); // Check that mask is valid + + // This - nDigits * SYMCRYPT_FDEF_DIGIT_SIZE / sizeof( UINT64 ) + // seems to occur often. Consider a macro with a name that explains what you are doing + // A comment on the macro which explains why this multiplication is never a problem would be + // helpful - I'm fairly sure it is not a problem. + for( i=0; i< nDigits * SYMCRYPT_FDEF_DIGIT_SIZE / sizeof( UINT64 ); i += 2 ) + { + pDst[i ] = (pSrc[i ] & m64) | (pDst[i ] & ~m64 ); + pDst[i+1] = (pSrc[i+1] & m64) | (pDst[i+1] & ~m64 ); + } +} + +VOID +SYMCRYPT_CALL +SymCryptFdefMaskedCopy( + _In_reads_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ) PCBYTE pbSrc, + _Inout_updates_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ) PBYTE pbDst, + UINT32 nDigits, + UINT32 mask ) +{ +#if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_ARM64 | SYMCRYPT_CPU_ARM + SYMCRYPT_ASSERT_ASYM_ALIGNED( pbSrc ); + SYMCRYPT_ASSERT_ASYM_ALIGNED( pbDst ); + SymCryptFdefMaskedCopyAsm( pbSrc, pbDst, nDigits, mask ); +#else + SymCryptFdefMaskedCopyC( pbSrc, pbDst, nDigits, mask ); +#endif +} + +VOID +SYMCRYPT_CALL +SymCryptFdefConditionalSwapC( + _Inout_updates_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ) PBYTE pbSrc1, + _Inout_updates_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ) PBYTE pbSrc2, + UINT32 nDigits, + UINT32 cond ) +{ + /* + Some documentation as to what the cond argument means would be helpful. + */ + UINT64 m64 = (UINT64)0 - (cond & 1); + PUINT64 pSrc1 = (PUINT64) pbSrc1; + PUINT64 pSrc2 = (PUINT64) pbSrc2; + UINT64 tmp1 = 0; + UINT64 tmp2 = 0; + SIZE_T i; + + // Unlike the previous function, this only allows 0 and 1 why? + SYMCRYPT_ASSERT( cond < 2 ); // Check that the condition is valid + + for( i=0; i< nDigits * SYMCRYPT_FDEF_DIGIT_SIZE / sizeof( UINT64 ); i += 2 ) + { + tmp1 = (pSrc1[i ] ^ pSrc2[i ]) & m64; + tmp2 = (pSrc1[i+1] ^ pSrc2[i+1]) & m64; + + pSrc1[i ] ^= tmp1; pSrc2[i ] ^= tmp1; + pSrc1[i+1] ^= tmp2; pSrc2[i+1] ^= tmp2; + } +} + +VOID +SYMCRYPT_CALL +SymCryptFdefConditionalSwap( + _Inout_updates_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ) PBYTE pbSrc1, + _Inout_updates_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ) PBYTE pbSrc2, + UINT32 nDigits, + UINT32 cond ) +{ + SymCryptFdefConditionalSwapC( pbSrc1, pbSrc2, nDigits, cond ); +} + + +UINT32 +SymCryptFdefDigitsFromBits( UINT32 nBits ) +{ + UINT32 res; + + if( nBits == 0 ) + { + res = 1; + } + else + { + SYMCRYPT_ASSERT( nBits <= SYMCRYPT_INT_MAX_BITS ); + + // Callers with integers larger than SYMCRYPT_INT_MAX_BITS should not occur in real use cases + // To avoid overflow issues, return the 0 digits to indicate an error which can be handled by + // callers, or flow through into object allocation which will in turn recognize the invalid + // digit count. + if( nBits > SYMCRYPT_INT_MAX_BITS ) + { + res = 0; + } else { + res = SYMCRYPT_FDEF_DIGITS_FROM_BITS( nBits ); + } + } + + return res; +} + +// Let's limit max bits to the number of bits we actually test +C_ASSERT( SYMCRYPT_INT_MAX_BITS < (1 << 30) ); // Larger values can cause overflows and sign confusion + +PSYMCRYPT_INT +SYMCRYPT_CALL +SymCryptFdefIntAllocate( UINT32 nDigits ) +{ + PVOID p = NULL; + UINT32 cb; + PSYMCRYPT_INT res = NULL; + + // + // The nDigits requirements are enforced by SymCryptFdefSizeofIntFromDigits. Thus + // the result does not overflow and is upper bounded by 2^18. + // + cb = SymCryptFdefSizeofIntFromDigits( nDigits ); + + if( cb != 0 ) + { + p = SymCryptCallbackAlloc( cb ); + } + + if( p == NULL ) + { + goto cleanup; + } + + res = SymCryptIntCreate( p, cb, nDigits ); + +cleanup: + return res; +} + + +UINT32 +SYMCRYPT_CALL +SymCryptFdefSizeofIntFromDigits( UINT32 nDigits ) +{ + SYMCRYPT_ASSERT( nDigits != 0 ); + SYMCRYPT_ASSERT( nDigits <= SYMCRYPT_FDEF_UPB_DIGITS ); + + // Ensure we do not overflow the following calculation when provided with invalid inputs + if( nDigits == 0 || nDigits > SYMCRYPT_FDEF_UPB_DIGITS ) + { + return 0; + } + + // Note: ti stands for 'Type-Int' and it helps catch type errors when type-casting macros are used. + return SYMCRYPT_FIELD_OFFSET( SYMCRYPT_INT, ti ) + nDigits * SYMCRYPT_FDEF_DIGIT_SIZE; +} + +PSYMCRYPT_INT +SYMCRYPT_CALL +SymCryptFdefIntCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + UINT32 nDigits ) +{ + PSYMCRYPT_INT pInt = NULL; + UINT32 cb = SymCryptFdefSizeofIntFromDigits( nDigits ); + + SYMCRYPT_ASSERT( cb >= sizeof(SYMCRYPT_INT) ); + SYMCRYPT_ASSERT( cbBuffer >= cb ); + if( (cb == 0) || (cbBuffer < cb) ) + { + goto cleanup; // return NULL + } + + SYMCRYPT_ASSERT_ASYM_ALIGNED( pbBuffer ); + pInt = (PSYMCRYPT_INT) pbBuffer; + + pInt->type = 'gI' << 16; + pInt->nDigits = nDigits; + + // + // The nDigits requirements are enforced by SymCryptFdefSizeofIntFromDigits. Thus + // the result does not overflow and is upper bounded by 2^18. + // + pInt->cbSize = cb; + + SYMCRYPT_SET_MAGIC( pInt ); + +cleanup: + return pInt; +} + + +VOID +SymCryptFdefIntCopyFixup( + _In_ PCSYMCRYPT_INT pSrc, + _Out_ PSYMCRYPT_INT pDst ) +{ + UNREFERENCED_PARAMETER( pSrc ); + UNREFERENCED_PARAMETER( pDst ); // not used in FRE builds... + + SYMCRYPT_SET_MAGIC( pDst ); +} + +VOID +SymCryptFdefIntCopy( + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_INT piDst ) +{ + SYMCRYPT_CHECK_MAGIC( piSrc ); + SYMCRYPT_CHECK_MAGIC( piDst ); + + SYMCRYPT_ASSERT( piSrc->nDigits == piDst->nDigits ); + + // + // in-place copy is somewhat common, and addresses are always public, so we can test for a no-op copy. + // + if( piSrc != piDst ) + { + // This is normally considered a banned, unsafe function. A note about why it is safe in this use + // would be good. + memcpy( SYMCRYPT_FDEF_INT_PUINT32( piDst ), SYMCRYPT_FDEF_INT_PUINT32( piSrc ), SYMCRYPT_OBJ_NBYTES( piDst )); + } +} + +VOID +SymCryptFdefIntMaskedCopy( + _In_ PCSYMCRYPT_INT piSrc, + _Inout_ PSYMCRYPT_INT piDst, + UINT32 mask ) + /* + Function notes would be helpful - what is mask, what does it do? + */ +{ + SYMCRYPT_CHECK_MAGIC( piSrc ); + SYMCRYPT_CHECK_MAGIC( piDst ); + + SYMCRYPT_ASSERT( piSrc->nDigits == piDst->nDigits ); + + SymCryptFdefMaskedCopy( (PBYTE) SYMCRYPT_FDEF_INT_PUINT32( piSrc ), (PBYTE) SYMCRYPT_FDEF_INT_PUINT32( piDst ), piSrc->nDigits, mask ); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefIntConditionalCopy( + _In_ PCSYMCRYPT_INT piSrc, + _Inout_ PSYMCRYPT_INT piDst, + UINT32 cond ) +{ + SYMCRYPT_CHECK_MAGIC( piSrc ); + SYMCRYPT_CHECK_MAGIC( piDst ); + + SYMCRYPT_ASSERT( piSrc->nDigits == piDst->nDigits ); + + SymCryptFdefMaskedCopy( (PBYTE) SYMCRYPT_FDEF_INT_PUINT32( piSrc ), (PBYTE) SYMCRYPT_FDEF_INT_PUINT32( piDst ), piSrc->nDigits, SYMCRYPT_MASK32_NONZERO( cond ) ); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefIntConditionalSwap( + _Inout_ PSYMCRYPT_INT piSrc1, + _Inout_ PSYMCRYPT_INT piSrc2, + UINT32 cond ) +{ + SYMCRYPT_CHECK_MAGIC( piSrc1 ); + SYMCRYPT_CHECK_MAGIC( piSrc2 ); + + SYMCRYPT_ASSERT( piSrc1->nDigits == piSrc2->nDigits ); + + SymCryptFdefConditionalSwap( (PBYTE) SYMCRYPT_FDEF_INT_PUINT32( piSrc1 ), (PBYTE) SYMCRYPT_FDEF_INT_PUINT32( piSrc2 ), piSrc1->nDigits, cond ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntBitsizeOfObject( _In_ PCSYMCRYPT_INT piSrc ) +{ + // This does not overflow since the nDigits field is + // bounded by SYMCRYPT_FDEF_UPB_DIGITS. + return SYMCRYPT_FDEF_DIGIT_BITS * piSrc->nDigits; +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefNumberofDigitsFromInt( _In_ PCSYMCRYPT_INT piSrc ) +{ + return piSrc->nDigits; +} + +SYMCRYPT_ERROR +SymCryptFdefIntCopyMixedSize( + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_INT piDst ) +{ + UINT32 n; + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + SYMCRYPT_CHECK_MAGIC( piSrc ); + SYMCRYPT_CHECK_MAGIC( piDst ); + + // in-place copy is somewhat common, and addresses are always public, so we can test for a no-op copy. + if( piSrc == piDst ) + { + goto cleanup; + } + + // + // Copy the digits that are available in both + // + n = SYMCRYPT_MIN( piSrc->nDigits, piDst->nDigits ); + memcpy( SYMCRYPT_FDEF_INT_PUINT32( piDst ), SYMCRYPT_FDEF_INT_PUINT32( piSrc ), n * SYMCRYPT_FDEF_DIGIT_SIZE ); + + if( piDst->nDigits > n ) + { + SymCryptWipe( &SYMCRYPT_FDEF_INT_PUINT32( piDst )[n * SYMCRYPT_FDEF_DIGIT_NUINT32], (piDst->nDigits - n) * SYMCRYPT_FDEF_DIGIT_SIZE ); + } + + if( piSrc->nDigits > n ) + { + // Check that the rest of the source is zero + PUINT64 p = (PUINT64) &SYMCRYPT_FDEF_INT_PUINT32( piSrc )[n * SYMCRYPT_FDEF_DIGIT_NUINT32]; + UINT64 v = 0; + UINT32 i = (piSrc->nDigits - n) * SYMCRYPT_FDEF_DIGIT_SIZE / sizeof( UINT64 ); + while( i > 0 ) + { + v |= *p++; + i--; + } + + // + // If the Src doesn't fit, we are allowed to publish that fact, so we can use an IF. + // + if( v != 0 ) + { + scError = SYMCRYPT_BUFFER_TOO_SMALL; + goto cleanup; + } + } + +cleanup: + return scError; +} + + +UINT32 +SYMCRYPT_CALL +SymCryptFdefBitsizeOfUint32( UINT32 v ) +{ + UINT32 res; + UINT32 mask; + UINT32 vUpper; + UINT32 vBit1; + + // This is tricky to do side-channel safe using only defined behaviour of the C language. + + // This is very difficult to make any sense of. A comment containing the C code that one would normally + // write to do the same thing would be helpful. I will need to come back to this. + // Also, there is no test coverage of this function. There should be a unit test to show that it does the same thing + // as the code one would normally write. + + vUpper = v & 0xffff0000; + mask = (UINT32) ( (0 -(UINT64)(vUpper)) >> 32 ); // mask = 0 or 0xffffffff + res = mask & 16; // Why do we want the 9th bit? Also, 0x10 would be better here + v = ((v & 0xffff) & ~mask) | ((vUpper >> 16) & mask); + + vUpper = v & 0xff00; + mask = (0 - vUpper) >> 16; // mask = 0 or 0xffff + res |= mask & 8; + v = ((v & 0xff) & ~mask) | ((v >> 8) & mask); + + vUpper = v & 0xf0; + mask = (0 - vUpper) >> 16; + res |= mask & 4; + v = ((v & 0xf) & ~mask) | ((v >> 4) & mask ); + + vUpper = v & 0xc; + mask = (0 - vUpper) >> 16; + res |= mask & 2; + v = ((v & 0x3) & ~mask) | ((v >> 2) & mask); + + // + // Only 2 bits left. + // + vBit1 = (v >> 1) & 1; + res |= vBit1; + + // + // Now we have the bit number of the MSbit set in res. + // We need to increase this by one if v was nonzero, so that we + // get 0 for v==0, and the # bits needed for v > 0 + // + res += (v | vBit1) & 1; + + return res; +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntBitsizeOfValue( _In_ PCSYMCRYPT_INT piSrc ) +{ + UINT32 nUint32 = SYMCRYPT_OBJ_NUINT32( piSrc ); + + UINT32 res = 0; + UINT32 msNonzeroWord = 0; // most significant nonzero digit + UINT32 searchingMask = SYMCRYPT_MASK32_SET; // Set if still searching, 0 otherwise + UINT32 d; + UINT32 dIsNonzeroMask; + UINT32 foundMask; + + SYMCRYPT_CHECK_MAGIC( piSrc ); + + // This while loop reveals the value of nUint32, is that OK? + // If so, document why + while( nUint32 > 0 ) + { + // + // Invariant: + // If no nonzero digit has been found, res = 0 and updateMask = -1. + // If a nonzero digit has been found: + // msNonzeroDigit = most significant nonzero digit in Src + // res = index where most-significant nonzero digit was found + // updateMask = 0 + // + + nUint32--; + d = SYMCRYPT_FDEF_INT_PUINT32( piSrc )[nUint32]; + + dIsNonzeroMask = SYMCRYPT_MASK32_NONZERO( d ); + foundMask = dIsNonzeroMask & searchingMask; + res |= nUint32 & foundMask; + msNonzeroWord |= d & foundMask; + searchingMask &= ~foundMask; + } + + // + // If all words are zero, then res == 0 and msNonzeroDigit == 0. + // + res = res * 8 * sizeof( UINT32 ) + SymCryptFdefBitsizeOfUint32( msNonzeroWord ); + + return res; +} + +VOID +SYMCRYPT_CALL +SymCryptFdefIntSetValueUint32( + UINT32 u32Src, + _Out_ PSYMCRYPT_INT piDst ) +{ + SYMCRYPT_CHECK_MAGIC( piDst ); + + SymCryptWipe( SYMCRYPT_FDEF_INT_PUINT32( piDst ), SYMCRYPT_OBJ_NBYTES( piDst ) ); + SYMCRYPT_FDEF_INT_PUINT32( piDst )[0] = u32Src; +} + +C_ASSERT( SYMCRYPT_FDEF_DIGIT_SIZE >= 8 ); // Code below fails if this doesn't hold + +VOID +SYMCRYPT_CALL +SymCryptFdefIntSetValueUint64( + UINT64 u64Src, + _Out_ PSYMCRYPT_INT piDst ) +{ + SYMCRYPT_CHECK_MAGIC( piDst ); + + SymCryptWipe( SYMCRYPT_FDEF_INT_PUINT32( piDst ), SYMCRYPT_OBJ_NBYTES( piDst ) ); + SYMCRYPT_FDEF_INT_PUINT32( piDst )[0] = (UINT32) u64Src; + SYMCRYPT_FDEF_INT_PUINT32( piDst )[1] = (UINT32)(u64Src >> 32); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptFdefRawSetValue( + _In_reads_bytes_(cbSrc) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_NUMBER_FORMAT format, + _Out_writes_(nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst, + UINT32 nDigits ) +{ + SYMCRYPT_ERROR scError; + UINT32 b; + INT32 step; + UINT32 w; + UINT32 windex; + UINT32 i; + UINT32 nWords = nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; + + // + // This is a very simple and slow generic implementation; + // We'll create optimized versions for specific CPU platforms + // (e.g. use of memcpy) + // + + // I assume the number format is public? + switch( format ) + { + case SYMCRYPT_NUMBER_FORMAT_LSB_FIRST: + step = 1; + break; + case SYMCRYPT_NUMBER_FORMAT_MSB_FIRST: + step = -1; + pbSrc += cbSrc; // avoid tripping pointer overflow sanitizer with cbSrc == 0 + pbSrc--; + break; + default: + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + for( windex = 0; windex < nWords; windex++ ) + { + w = 0; + for( i=0; i<4; i++ ) + { + // read the next byte into b + if( cbSrc > 0 ) + { + b = *pbSrc; + cbSrc -= 1; + pbSrc += step; + w |= b << 8*i; + } + } + pDst[windex] = w; + } + + // Inspect any remaining input bytes + b = 0; + while( cbSrc > 0 ) + { + b |= *pbSrc; + pbSrc += step; + cbSrc -= 1; + } + + if( b > 0 ) + { + scError = SYMCRYPT_BUFFER_TOO_SMALL; + goto cleanup; + } + + scError = SYMCRYPT_NO_ERROR; + +cleanup: + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptFdefIntSetValue( + _In_reads_bytes_(cbSrc) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_NUMBER_FORMAT format, + _Out_ PSYMCRYPT_INT piDst ) +{ + SYMCRYPT_ERROR scError; + + SYMCRYPT_CHECK_MAGIC( piDst ); + + scError = SymCryptFdefRawSetValue( pbSrc, cbSrc, format, SYMCRYPT_FDEF_INT_PUINT32( piDst ), piDst->nDigits ); + + return scError; +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptFdefRawGetValue( + _In_reads_(nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc, + UINT32 nDigits, + _Out_writes_bytes_(cbDst) PBYTE pbDst, + SIZE_T cbDst, + SYMCRYPT_NUMBER_FORMAT format ) +{ + SYMCRYPT_ERROR scError; + UINT32 b; + INT32 step; + UINT32 w; + UINT32 windex; + UINT32 i; + UINT32 nWords = nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; + + // + // This is a very simple and slow generic implementation; + // We'll create optimized versions for specific CPU platforms + // (e.g. use of memcpy) + // + + switch( format ) + { + case SYMCRYPT_NUMBER_FORMAT_LSB_FIRST: + step = 1; + break; + case SYMCRYPT_NUMBER_FORMAT_MSB_FIRST: + step = -1; + pbDst += cbDst; // avoid tripping pointer overflow sanitizer with cbSrc == 0 + pbDst--; + break; + default: + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + for( windex = 0; windex < nWords; windex++ ) + { + w = pSrc[windex]; + for( i=0; i<4; i++ ) + { + b = w & 0xff; + w >>= 8; + + // write the next byte + if( cbDst > 0 ) + { + *pbDst = (BYTE)b; + cbDst -= 1; + pbDst += step; + } else { + if( b != 0 ) + { + scError = SYMCRYPT_BUFFER_TOO_SMALL; + goto cleanup; + } + } + } + } + + // Zero any remaining output bytes + while( cbDst > 0 ) + { + *pbDst = 0; + pbDst += step; + cbDst -= 1; + } + + scError = SYMCRYPT_NO_ERROR; + +cleanup: + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptFdefIntGetValue( + _In_ PCSYMCRYPT_INT piSrc, + _Out_writes_bytes_(cbDst) PBYTE pbDst, + SIZE_T cbDst, + SYMCRYPT_NUMBER_FORMAT format ) +{ + SYMCRYPT_ERROR scError; + + SYMCRYPT_CHECK_MAGIC( piSrc ); + + scError = SymCryptFdefRawGetValue( &SYMCRYPT_FDEF_INT_PUINT32( piSrc )[0], piSrc->nDigits, pbDst, cbDst, format ); + + return scError; +} + + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntGetValueLsbits32( _In_ PCSYMCRYPT_INT piSrc ) +{ + // nDigits cannot be zero, so we don't have to test + return SYMCRYPT_FDEF_INT_PUINT32( piSrc )[0]; +} + +UINT64 +SYMCRYPT_CALL +SymCryptFdefIntGetValueLsbits64( _In_ PCSYMCRYPT_INT piSrc ) +{ + // nDigits cannot be zero, so we don't have to test + PCUINT32 p = SYMCRYPT_FDEF_INT_PUINT32( piSrc ); + return ((UINT64)(p[1]) << 32) | p[0]; +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawIsEqualUint32( + _In_reads_(nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc1, + UINT32 nDigits, + _In_ UINT32 u32Src2 ) +{ + UINT32 d; + UINT32 nWords = nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; + + d = pSrc1[0] ^ u32Src2; + for( UINT32 i=1; i<nWords; i++) + { + d |= pSrc1[i]; + } + + return SYMCRYPT_MASK32_ZERO( d ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntIsEqualUint32( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ UINT32 u32Src2 ) +{ + return SymCryptFdefRawIsEqualUint32( &SYMCRYPT_FDEF_INT_PUINT32( piSrc1 )[0], piSrc1->nDigits, u32Src2 ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntIsEqual( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2 ) +{ + UINT32 d; + UINT32 n1 = SYMCRYPT_OBJ_NUINT32( piSrc1 ); + UINT32 n2 = SYMCRYPT_OBJ_NUINT32( piSrc2 ); + UINT32 i; + UINT32 n; + PCUINT32 pSrc1 = SYMCRYPT_FDEF_INT_PUINT32( piSrc1 ); + PCUINT32 pSrc2 = SYMCRYPT_FDEF_INT_PUINT32( piSrc2 ); + + n = SYMCRYPT_MIN( n1, n2 ); + d = 0; + for( i=0; i < n ; i++ ) + { + d |= pSrc1[i] ^ pSrc2[i]; + } + + // i == n1 or i == n2, so at most one of the 2 loops below is ever run + + while( i < n1 ) + { + d |= pSrc1[i]; + i++; + } + + while( i < n2 ) + { + d |= pSrc2[i]; + i++; + } + + return SYMCRYPT_MASK32_ZERO( d ); +} + +PSYMCRYPT_DIVISOR +SYMCRYPT_CALL +SymCryptFdefDivisorAllocate( UINT32 nDigits ) +{ + PVOID p = NULL; + UINT32 cb; + PSYMCRYPT_DIVISOR res = NULL; + + // + // The nDigits requirements are enforced by SymCryptFdefSizeofDivisorFromDigits. Thus + // the result does not overflow and is upper bounded by 2^19. + // + cb = SymCryptFdefSizeofDivisorFromDigits( nDigits ); + + if( cb != 0 ) + { + p = SymCryptCallbackAlloc( cb ); + } + + if( p == NULL ) + { + goto cleanup; + } + + res = SymCryptFdefDivisorCreate( p, cb, nDigits ); + +cleanup: + return res; +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefSizeofDivisorFromDigits( UINT32 nDigits ) +{ + SYMCRYPT_ASSERT( nDigits != 0 ); + SYMCRYPT_ASSERT( nDigits <= SYMCRYPT_FDEF_UPB_DIGITS ); + + // Ensure we do not overflow the following calculation when provided with invalid inputs + if( nDigits == 0 || nDigits > SYMCRYPT_FDEF_UPB_DIGITS ) + { + return 0; + } + + return SYMCRYPT_FIELD_OFFSET( SYMCRYPT_DIVISOR, Int ) + SymCryptFdefSizeofIntFromDigits( nDigits ); +} + +PSYMCRYPT_DIVISOR +SYMCRYPT_CALL +SymCryptFdefDivisorCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + UINT32 nDigits ) +{ + PSYMCRYPT_DIVISOR pdDiv = NULL; + UINT32 cb = SymCryptSizeofDivisorFromDigits( nDigits ); + + SYMCRYPT_ASSERT( cb >= sizeof(SYMCRYPT_DIVISOR) ); + SYMCRYPT_ASSERT( cbBuffer >= cb ); + if( (cb == 0) || (cbBuffer < cb) ) + { + goto cleanup; // return NULL + } + + SYMCRYPT_ASSERT_ASYM_ALIGNED( pbBuffer ); + pdDiv = (PSYMCRYPT_DIVISOR) pbBuffer; + + pdDiv->type = 'gD' << 16; + pdDiv->nDigits = nDigits; + + // + // The nDigits requirements are enforced by SymCryptFdefSizeofDivisorFromDigits. Thus + // the result does not overflow and is upper bounded by 2^19. + // + pdDiv->cbSize = cb; + + SYMCRYPT_SET_MAGIC( pdDiv ); + + SymCryptIntCreate( (PBYTE)&pdDiv->Int, cbBuffer - SYMCRYPT_FIELD_OFFSET( SYMCRYPT_DIVISOR, Int ), nDigits ); + +cleanup: + return pdDiv; +} + +VOID +SymCryptFdefDivisorCopyFixup( + _In_ PCSYMCRYPT_DIVISOR pdSrc, + _Out_ PSYMCRYPT_DIVISOR pdDst ) +{ + UNREFERENCED_PARAMETER( pdSrc ); + UNREFERENCED_PARAMETER( pdDst ); + + SymCryptFdefIntCopyFixup( &pdSrc->Int, &pdDst->Int ); + + SYMCRYPT_SET_MAGIC( pdDst ); +} + +VOID +SymCryptFdefDivisorCopy( + _In_ PCSYMCRYPT_DIVISOR pdSrc, + _Out_ PSYMCRYPT_DIVISOR pdDst ) +{ + SYMCRYPT_CHECK_MAGIC( pdSrc ); + SYMCRYPT_CHECK_MAGIC( pdDst ); + + SYMCRYPT_ASSERT( pdSrc->nDigits == pdDst->nDigits ); + + // in-place copy is somewhat common, and addresses are always public, so we can test for a no-op copy. + if( pdSrc != pdDst ) + { + memcpy( pdDst, pdSrc, pdDst->cbSize ); + + SymCryptFdefDivisorCopyFixup( pdSrc, pdDst ); + } +} + + +VOID +SYMCRYPT_CALL +SymCryptFdefClaimScratch( PBYTE pbScratch, SIZE_T cbScratch, SIZE_T cbMin ) +{ +#if SYMCRYPT_DEBUG + SYMCRYPT_ASSERT( cbScratch >= cbMin ); + SymCryptWipe( pbScratch, cbMin ); +#else + UNREFERENCED_PARAMETER( pbScratch ); + UNREFERENCED_PARAMETER( cbScratch ); + UNREFERENCED_PARAMETER( cbMin ); +#endif +} + +UINT32 +SymCryptTestTrialdivisionMaxSmallPrime( + _In_ PCSYMCRYPT_TRIALDIVISION_CONTEXT pContext ) +{ + return pContext->maxTrialPrime; +} + +UINT64 +SymCryptInverseMod2e64( UINT64 m ) +{ + // Compute the inv64 value such that inv64 * m = 1 mod 2^64 for odd m. + // If m is even, there exists no inverse, this function will return a + // useless value in constant time. + // + // We use Newton's method to search for a zero of f(x) := x^-1 - m, working modulo 2^64 + // We get the iteration formula + // x_{i+1} = x_i - f(x_i)/f'(x_i) + // = x_i - (x_i^-1 - m)/(-x_i^-2) + // = x_i + x_i^2(1/x_i - m) + // = x_i + x_i - (x_i^2 * m) + // = x_i (2 - x_i*m) + // + // Let x_i = d + 2^n * e where d = inv64 = m^-1 mod 2^64, and 2^n * e is the error term that is zero in the n least + // significant bits. We have + // x_{i+1} = (d + 2^n * e) (2 - (d + 2^n * e) * m) + // = (d + 2^n * e) (2 - d*m - 2^n * e * m) + // = (d + 2^n * e) (2 - 1 - 2^n * e * m) + // = (d + 2^n * e) (1 - 2^n * e * m) + // = d - (2^n * e * (d*m)) + (2^n * e) - (2^{2n} * e^2 * m) + // = d - (2^{2n} * e^2 * m) + // In other words, the error has been squared and multiplied by m. In our case, working modulo 2^64, the number of correct bits + // on the least significant side is doubled. + // + // To get a 4-bit correct estimate for m^-1 given odd m, we consider the least significant 4 bits of m and inv: + // m = ... m_3 m_2 m_1 m_0 + // inv = ... i_3 i_2 i_1 i_0 + // We want to directly compute i_[3..0] s.t. (m*inv) & 0xf == 1 + // working through some simple simultaneous equations it is easily shown that: + // i_0 = m_0 = 1 + // i_1 = m_1 + // i_2 = m_2 + // i_3 = m_1 ^ m_2 ^ m_3 + // Once we have 4 correct bits, we can double that multiple times using Newton's method. + // + // We use 32-bit operations for most of the iterations for speed on 32-bit platforms. + // + UINT32 inv32; + UINT64 inv64; + UINT32 m32; + + m32 = (UINT32)m; + + inv32 = m32 ^ (((m32 - 1) * 0x6) & 0x8); // sets inv32 bits [3..0] + SYMCRYPT_ASSERT( ((m&1) == 0) || (((inv32 * m32) & 0xf) == 1) ); + + inv32 = inv32 * (2 - inv32 * m32 ); + SYMCRYPT_ASSERT( ((m&1) == 0) || (((inv32 * m32) & 0xff) == 1) ); + + inv32 = inv32 * (2 - inv32 * m32 ); + SYMCRYPT_ASSERT( ((m&1) == 0) || (((inv32 * m32) & 0xffff) == 1) ); + + inv32 = inv32 * (2 - inv32 * m32 ); + SYMCRYPT_ASSERT( ((m&1) == 0) || ((inv32 * m32) == 1) ); + + inv64 = inv32; + inv64 = inv64 * (2 - inv64 * m ); + SYMCRYPT_ASSERT( ((m&1) == 0) || ((inv64 * m) == 1) ); + + return inv64; +} + + +VOID +SYMCRYPT_CALL +SymCryptFdefInitTrialdivisionPrime( + UINT32 prime, + _Out_ PSYMCRYPT_TRIALDIVISION_PRIME pPrime ) +{ + // Compute the inverse of the prime mod 2^64 + pPrime->invMod2e64 = SymCryptInverseMod2e64( prime ); + pPrime->compareLimit = ((UINT64) -1) / prime; +} + +FORCEINLINE +UINT32 +SymCryptIsMultipleOfSmallPrime( UINT64 value, PCSYMCRYPT_TRIALDIVISION_PRIME pPrime ) +{ + return (value * pPrime->invMod2e64) <= pPrime->compareLimit; +} + +VOID +SYMCRYPT_CALL +SymCryptFdefInitTrialDivisionGroup( PSYMCRYPT_TRIALDIVISION_GROUP pGroup, UINT32 nPrimes, UINT32 primeProd ) +{ + UINT32 f; + UINT32 r; + UINT32 i; + + pGroup->nPrimes = nPrimes; + + // These % operations are expensive; maybe we can optimize this further. + // In assembler we can do the UINT64 % UINT32 -> UINT32 + // hopefully the compiler is smart enough... + + f = (UINT32) (((UINT64)1 << 32) % primeProd); + pGroup->factor[0] = f; + r = f; + for( i=1; i<9; i++ ) + { + r = (UINT32) (SYMCRYPT_MUL32x32TO64( r, f ) % primeProd); + pGroup->factor[i] = r; + } +} + +UINT32 +SYMCRYPT_CALL +SymCryptGenerateSmallPrimes( UINT32 maxPrime, PUINT32 * ppList ) +{ + // returns a list of small primes, excluding 2, 3, 5, and 17. + UINT32 nPrimes = 0; + PUINT32 pList = NULL; + + // pSieve[i] corresponds to 2*i+1 + // value X is in location X/2 + UINT32 nSieve; + PBYTE pSieve; + + UINT32 pi; + UINT32 p; + UINT32 si; + UINT32 i; + + maxPrime = SYMCRYPT_MAX( maxPrime, 32 ); // simplify error handling by always producing primes at least up to 32 + maxPrime = SYMCRYPT_MIN( maxPrime, 1 << 24 ); // Limit prime list to something sane (sieve = 8 MB, list = 4 MB or so). + + // highest index is (maxPrime - 1)/2 which encodes maxPrime if odd, or maxPrime-1 if even + nSieve = (maxPrime - 1) / 2 + 1; + + pSieve = SymCryptCallbackAlloc( nSieve ); + if( pSieve == NULL ) + { + goto cleanup; + } + + SymCryptWipe( pSieve, nSieve ); + + + pi = 1; // index of first prime 3 + p = 2*pi + 1; // prime value + for(;;) + { + si = 2*(pi*pi + pi); // index of p^2 + if( si > nSieve ) + { + break; // We're done sieving + } + while( si < nSieve ) + { + pSieve[si] = 1; + si += p; + } + // Search for the next prime + do { + pi += 1; + } while( pSieve[pi] != 0 ); + p = 2*pi + 1; + } + + // Eliminate 3, 5, and 17 + pSieve[1] = 1; + pSieve[2] = 1; + pSieve[8] = 1; + + for( i=1; i<nSieve; i++ ) + { + nPrimes += 1 - pSieve[i]; + } + + // dcl - I suspect that this is not a problem, but please document + // why this multiplication cannot overflow. I assume there is a practical limit on nPrimes, but unsure + // what that would be. + pList = SymCryptCallbackAlloc( nPrimes * sizeof( UINT32 ) ); + if( pList == NULL ) + { + goto cleanup; + } + + pi = 0; + for( i=1; i<nSieve; i++ ) + { + if( pSieve[i] == 0 ) + { + pList[pi++] = 2*i+1; + } + } + + SYMCRYPT_ASSERT( pi == nPrimes ); + +cleanup: + if( pSieve != NULL ) + { + SymCryptWipe( pSieve, nSieve ); + SymCryptCallbackFree( pSieve ); + } + + *ppList = pList; + return nPrimes; +} + + +PCSYMCRYPT_TRIALDIVISION_CONTEXT +SYMCRYPT_CALL +SymCryptFdefCreateTrialDivisionContext( UINT32 nDigits ) +{ + PSYMCRYPT_TRIALDIVISION_CONTEXT pRes = NULL; + PBYTE pAlloc; + UINT32 nBytes; + UINT32 iPrime; + UINT32 iGroup; + UINT32 nPrimes; + UINT32 nGroups; + UINT32 M; + UINT32 iGroupSpec; + UINT32 i; + UINT32 j; + UINT64 cRabinMillerCost; + UINT64 cPerPrimeCost; + UINT64 tmp64; + UINT32 maxPrime; + UINT32 minPrime; + UINT32 nSmallPrimes = 0; + UINT32 n; + UINT32 nP; + UINT32 nG; + PUINT32 pSmallPrimeList = NULL; + + // First we estimate the largest prime we will do trial division with + // Inputs: + // - cycles/digit of reduction per group of primes + // - cycles/prime of divide test + // - cycles per digit^3 for a Rabin-Miller test + // We optimize in this model, which is pretty accurate for large inputs but underestimates the RM cost + // for smaller sizes. + + // Compute the Rabin-Miller cost estimate. We reduce it by 20% because our cost model does not take + // into account some of the trial-division cost such as memory footprint, cache pressure, + // setup cost, etc. Reducing the Rabin-Miller cost leads us to do fewer trial divisions to approximately + // balance the hidden costs. + + if( nDigits <= 1000 ) + { + // nDigits is small enough to not have any overflows in this computation + if( nDigits == 0 ) + { + goto cleanup; // return NULL + } + + cRabinMillerCost = (UINT64) nDigits * nDigits * nDigits * (SYMCRYPT_RABINMILLER_DIGIT_CYCLES * 8 / 10); + i = 0; + minPrime = 0; + for(;;) + { + nPrimes = g_SymCryptSmallPrimeGroupsSpec[i].nPrimes; + maxPrime = g_SymCryptSmallPrimeGroupsSpec[i].maxPrime; + nGroups = g_SymCryptSmallPrimeGroupsSpec[i].nGroups; + cPerPrimeCost = (UINT64) nDigits * SYMCRYPT_TRIALDIVISION_DIGIT_REDUCTION_CYCLES / nPrimes + SYMCRYPT_TRIALDIVISION_DIVIDE_TEST_CYCLES; + + // If the last group isn't worth it, we shouldn't go to even fewer primes + if( nGroups == 0 || maxPrime * cPerPrimeCost >= cRabinMillerCost) + { + break; + } + i++; + minPrime = maxPrime; + } + + // Now we know how many primes are in the last groups, let's find out how large the largest prime should be + tmp64 = cRabinMillerCost / cPerPrimeCost; + tmp64 = SYMCRYPT_MIN( tmp64, SYMCRYPT_TRIALDIVISION_MAX_SMALL_PRIME ); + maxPrime = (UINT32) tmp64; + maxPrime = SYMCRYPT_MAX( maxPrime, minPrime ); // Make sure we don't fall into the previous group size that we don't want + } + else + { + maxPrime = SYMCRYPT_TRIALDIVISION_MAX_SMALL_PRIME; + } + + nSmallPrimes = SymCryptGenerateSmallPrimes( maxPrime, &pSmallPrimeList ); + + // Find out how many groups we'll have, and how many actual primes we'll use + n = nSmallPrimes; + nG = 0; + nP = 0; + i = 0; + for(;;) + { + nPrimes = g_SymCryptSmallPrimeGroupsSpec[i].nPrimes; + nGroups = g_SymCryptSmallPrimeGroupsSpec[i].nGroups; + + if( n < nPrimes * nGroups || nGroups == 0 ) + { + // At the right nPrimes, compute exactly how many groups to add + n = n / nPrimes; + nG += n; + nP += n * nPrimes; + n = 0; // No primes left + break; + } + + // Use up all the groups of this size... + nG += nGroups; + nP += nPrimes * nGroups; + n -= nPrimes * nGroups; + i++; + } + + // dcl - Potential integer overflow + // Need to document sizes, and limits of nG, nP, and confirm + // an overflow is not possible, also recall that size_t varies in size, but nBytes is 32-bit + nBytes = sizeof( SYMCRYPT_TRIALDIVISION_CONTEXT ) + + (nG + 1) * sizeof( SYMCRYPT_TRIALDIVISION_GROUP ) // + 1 for 0 sentinel + + (nP + 1) * sizeof( SYMCRYPT_TRIALDIVISION_PRIME ) // + 1 for 0 sentinel + + (nP + 1) * sizeof( UINT32 ); // + 1 for 0 sentinel + + pAlloc = SymCryptCallbackAlloc( nBytes ); + if( pAlloc == NULL ) + { + goto cleanup; + } + + pRes = (PSYMCRYPT_TRIALDIVISION_CONTEXT) pAlloc; + pAlloc += sizeof( *pRes ); + + pRes->nBytesAlloc = nBytes; + + pRes->pGroupList = (PSYMCRYPT_TRIALDIVISION_GROUP)pAlloc; + pAlloc += (nG + 1) * sizeof( SYMCRYPT_TRIALDIVISION_GROUP ); + + pRes->pPrimeList = (PSYMCRYPT_TRIALDIVISION_PRIME) pAlloc; + pAlloc += (nP + 1) * sizeof( SYMCRYPT_TRIALDIVISION_PRIME ); + + pRes->pPrimes = (PUINT32) pAlloc; + pAlloc += (nP + 1) * sizeof( UINT32 ); + + SYMCRYPT_ASSERT( nBytes == (SIZE_T)(pAlloc - (PBYTE)pRes) ); + + // Initialize the primes 3, 5, and 17 + SymCryptFdefInitTrialdivisionPrime( 3, &pRes->Primes3_5_17[0] ); + SymCryptFdefInitTrialdivisionPrime( 5, &pRes->Primes3_5_17[1] ); + SymCryptFdefInitTrialdivisionPrime( 17, &pRes->Primes3_5_17[2] ); + + memcpy( pRes->pPrimes, pSmallPrimeList, nP * sizeof( UINT32 ) ); + pRes->pPrimes[nP] = 0; + pRes->maxTrialPrime = pRes->pPrimes[nP-1]; + + /* + *** Old code to decrypt the nibble encoding. Keep in case we want it back later... + // Generate the other primes from the difference table. + // We initialize the prime structures, and a list of the primes that is used to compute the group specs + + pNibs = &g_SymCryptSmallPrimeDifferenceNibbles[0]; + + smallPrime = 3; + nPrimes = 0; + while( smallPrime < SYMCRYPT_MAX_SMALL_PRIME ) + { + b = *pNibs++; + nib = b & 0xf; + + if( nib == 0 ) + { + smallPrime += 30; + // No check for termination here as we wouldn't encode a 0 if there wasn't another prime. + } else { + smallPrime += 2*nib; + pRes->pPrimes[nPrimes] = smallPrime; + SymCryptFdefInitTrialdivisionPrime( smallPrime, &pRes->pPrimeList[nPrimes] ); + nPrimes++; + if( smallPrime >= SYMCRYPT_MAX_SMALL_PRIME ) + { + break; + } + } + nib = b >> 4; + if( nib == 0 ) + { + smallPrime += 30; + } else { + smallPrime += 2*nib; + pRes->pPrimes[nPrimes] = smallPrime; + SymCryptFdefInitTrialdivisionPrime( smallPrime, &pRes->pPrimeList[nPrimes] ); + nPrimes++; + } + } + SYMCRYPT_ASSERT( smallPrime == SYMCRYPT_MAX_SMALL_PRIME && nPrimes == SYMCRYPT_N_SMALL_PRIMES_ENCODED ); + */ + + for( iPrime = 0; iPrime < nP; iPrime++ ) + { + SymCryptFdefInitTrialdivisionPrime( pRes->pPrimes[iPrime], &pRes->pPrimeList[iPrime] ); + } + + // Add the trailing 0s + pRes->pPrimeList[nP].invMod2e64 = 0; + pRes->pPrimeList[nP].compareLimit = 0; + + // Make sure we have the 32-bit tables, not the 64-bit ones. + // dcl - warning suppression is not portable. Also, if it is a compile time constant, shouldn't it be a compile assert? +#pragma warning( suppress: 4127 ) // conditional expression is constant + SYMCRYPT_ASSERT( SYMCRYPT_MAX_SMALL_PRIME_GROUP_PRODUCT <= (UINT32)-1 ); + + iGroup = 0; + iPrime = 0; + iGroupSpec = 0; + nPrimes = g_SymCryptSmallPrimeGroupsSpec[iGroupSpec].nPrimes; + nGroups = g_SymCryptSmallPrimeGroupsSpec[iGroupSpec].nGroups; + while( iPrime < nP ) + { + if( nGroups == 0 ) + { + iGroupSpec +=1 ; + nPrimes = g_SymCryptSmallPrimeGroupsSpec[iGroupSpec].nPrimes; + nGroups = g_SymCryptSmallPrimeGroupsSpec[iGroupSpec].nGroups; + if( nGroups == 0 ) + { + nGroups = nG - iGroup; + } + } + + SYMCRYPT_ASSERT( iPrime + nPrimes <= nP ); + M = pRes->pPrimes[iPrime++]; + for( j=1; j<nPrimes; j++ ) + { + SYMCRYPT_ASSERT( M <= SYMCRYPT_MAX_SMALL_PRIME_GROUP_PRODUCT / pRes->pPrimes[iPrime] ); + M *= pRes->pPrimes[iPrime++]; + } + SymCryptFdefInitTrialDivisionGroup( &pRes->pGroupList[iGroup], nPrimes, M ); + iGroup++; + + nGroups--; + } + + SYMCRYPT_ASSERT( iPrime == nP && iGroup == nG ); + + // Add the trailing sentinel group + pRes->pGroupList[iGroup].nPrimes = 0; + +cleanup: + if( pSmallPrimeList != NULL ) + { + SymCryptWipe( pSmallPrimeList, nSmallPrimes * sizeof( UINT32 ) ); + SymCryptCallbackFree( pSmallPrimeList ); + pSmallPrimeList = NULL; + } + return pRes; +} + +VOID +SYMCRYPT_CALL +SymCryptFdefFreeTrialDivisionContext( PCSYMCRYPT_TRIALDIVISION_CONTEXT pContext ) +{ + // No security reason to wipe it, but our test code verifies that we wipe everything... + // Perf cost is minor + SymCryptWipe( (PBYTE) pContext, pContext->nBytesAlloc ); + SymCryptCallbackFree( (PSYMCRYPT_TRIALDIVISION_CONTEXT) pContext ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntFindSmallDivisor( + _In_ PCSYMCRYPT_TRIALDIVISION_CONTEXT pContext, + _In_ PCSYMCRYPT_INT piSrc, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + PCUINT32 pSrc = SYMCRYPT_FDEF_INT_PUINT32( piSrc ); + PCUINT32 p; + UINT32 nDigits = piSrc->nDigits; + UINT32 nUint32 = nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; + UINT64 Acc; + PCSYMCRYPT_TRIALDIVISION_GROUP pGroup; + PCSYMCRYPT_TRIALDIVISION_PRIME pPrime; + UINT32 nPrimes; + UINT32 res; + + // Check for 2. Not really needed for prime generation, but it makes the function easier to test/document/describe. + if( (*pSrc & 1) == 0 ) + { + res = 2; + goto cleanup; + } + + // Check the factors 3, 5, 17. These are special as they divide 2^32 - 1 + // (We could also do 257 and 65537 but that doesn't seem worth the added complexity.) + Acc = 0; + p = pSrc; + do { +#if SYMCRYPT_FDEF_DIGIT_SIZE == 16 + Acc = Acc + p[0] + p[1] + p[2] + p[3]; + p += 4; +#elif (SYMCRYPT_FDEF_DIGIT_SIZE % 32) == 0 + Acc = Acc + p[0] + p[1] + p[2] + p[3] + p[4] + p[5] + p[6] + p[7]; + p += 8; +#else + // dcl - ideally, #error would have a descriptive message so it is easily found in code if encountered, same below +#error ?? +#endif + } while( p < pSrc + nUint32 ); + + if( SymCryptIsMultipleOfSmallPrime( Acc, &pContext->Primes3_5_17[0] ) ) + { + res = 3; + goto cleanup; + } + + if( SymCryptIsMultipleOfSmallPrime( Acc, &pContext->Primes3_5_17[1] ) ) + { + res = 5; + goto cleanup; + } + + if( SymCryptIsMultipleOfSmallPrime( Acc, &pContext->Primes3_5_17[2] ) ) + { + res = 17; + goto cleanup; + } + + pGroup = pContext->pGroupList; + pPrime = pContext->pPrimeList; + while( (nPrimes = pGroup->nPrimes) != 0 ) + { + // Reduce Src modulo the group product to a 64-bit value + Acc = 0; + p = pSrc + nUint32; + +#if SYMCRYPT_FDEF_DIGIT_SIZE == 16 + if( (nUint32 & 4) != 0 ) + { + // nUInt32 is 4 mod 8, process the top 4 words only + p -= 4; + Acc = + p[0] + + SYMCRYPT_MUL32x32TO64( p[1], pGroup->factor[0] ) + + SYMCRYPT_MUL32x32TO64( p[2], pGroup->factor[1] ) + + SYMCRYPT_MUL32x32TO64( p[3], pGroup->factor[2] ); + } else { + // Process 8 words to start + p -= 8; + Acc = + p[0] + + SYMCRYPT_MUL32x32TO64( p[1], pGroup->factor[0] ) + + SYMCRYPT_MUL32x32TO64( p[2], pGroup->factor[1] ) + + SYMCRYPT_MUL32x32TO64( p[3], pGroup->factor[2] ) + + SYMCRYPT_MUL32x32TO64( p[4], pGroup->factor[3] ) + + SYMCRYPT_MUL32x32TO64( p[5], pGroup->factor[4] ) + + SYMCRYPT_MUL32x32TO64( p[6], pGroup->factor[5] ) + + SYMCRYPT_MUL32x32TO64( p[7], pGroup->factor[6] ); + } +#elif (SYMCRYPT_FDEF_DIGIT_SIZE % 32) == 0 + + p -= 8; + Acc = + p[0] + + SYMCRYPT_MUL32x32TO64( p[1], pGroup->factor[0] ) + + SYMCRYPT_MUL32x32TO64( p[2], pGroup->factor[1] ) + + SYMCRYPT_MUL32x32TO64( p[3], pGroup->factor[2] ) + + SYMCRYPT_MUL32x32TO64( p[4], pGroup->factor[3] ) + + SYMCRYPT_MUL32x32TO64( p[5], pGroup->factor[4] ) + + SYMCRYPT_MUL32x32TO64( p[6], pGroup->factor[5] ) + + SYMCRYPT_MUL32x32TO64( p[7], pGroup->factor[6] ); + +#else +#error ?? +#endif + while( p > pSrc ) + { + p -= 8; + Acc = + p[0] + + SYMCRYPT_MUL32x32TO64( p[1], pGroup->factor[0] ) + + SYMCRYPT_MUL32x32TO64( p[2], pGroup->factor[1] ) + + SYMCRYPT_MUL32x32TO64( p[3], pGroup->factor[2] ) + + SYMCRYPT_MUL32x32TO64( p[4], pGroup->factor[3] ) + + SYMCRYPT_MUL32x32TO64( p[5], pGroup->factor[4] ) + + SYMCRYPT_MUL32x32TO64( p[6], pGroup->factor[5] ) + + SYMCRYPT_MUL32x32TO64( p[7], pGroup->factor[6] ) + + SYMCRYPT_MUL32x32TO64( (UINT32) Acc , pGroup->factor[7] ) + + SYMCRYPT_MUL32x32TO64( (UINT32)(Acc >> 32), pGroup->factor[8] ); + } + + // Now we check whether we have a multiple of one of the primes + while( nPrimes > 0 ) + { + if( SymCryptIsMultipleOfSmallPrime( Acc, pPrime ) ) + { + res = pContext->pPrimes[ (pPrime - pContext->pPrimeList) ]; // pointer subtraction auto-divides by size... + goto cleanup; + } + pPrime++; + nPrimes--; + } + + pGroup++; + } + + UNREFERENCED_PARAMETER( pbScratch ); + UNREFERENCED_PARAMETER( cbScratch ); + + // Did not find a small factor, return zero + res = 0; + +cleanup: + return res; +} + +/* Wine hack: asm not supported yet */ + +VOID +SYMCRYPT_CALL +SymCryptFdefMaskedCopyAsm( + _In_reads_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ) PCBYTE pbSrc, + _Inout_updates_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ) PBYTE pbDst, + UINT32 nDigits, + UINT32 mask ) +{ + SymCryptFdefMaskedCopyC( pbSrc, pbDst, nDigits, mask ); +} diff --git a/libs/symcrypt/lib/fdef_int.c b/libs/symcrypt/lib/fdef_int.c new file mode 100644 index 00000000000..ba50e184802 --- /dev/null +++ b/libs/symcrypt/lib/fdef_int.c @@ -0,0 +1,1321 @@ +// +// fdef_int.c INT functions for default number format +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +// +// Default big-number format: +// INT objects are stored in two parts: +// a SYMCRYPT_FDEF_INT structure +// an array of UINT32; the # elements in the array is a multiple of SYMCRYPT_FDEF_DIGIT_SIZE/4. +// +// The pointer passed points to the start of the UINT32 array, just after the SYMCRYPT_FDEF_INT structure. +// +// The generic implementation accesses the digits as an array of UINT32, but on 64-bit CPUs +// the code can also view it as an array of UINT64. +// + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawAddC( + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc1, + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc2, + _Out_writes_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PUINT32 pDst, + UINT32 nDigits ) +{ + UINT32 i; + UINT64 t; + + t = 0; + for( i=0; i<nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; i++ ) + { + t = t + pSrc1[i] + pSrc2[i]; + pDst[i] = (UINT32) t; + t >>= 32; + } + + return (UINT32) t; +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawAdd( + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc1, + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc2, + _Out_writes_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PUINT32 pDst, + UINT32 nDigits ) +{ +#if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_ARM64 | SYMCRYPT_CPU_ARM + return SymCryptFdefRawAddAsm( pSrc1, pSrc2, pDst, nDigits ); +#else + return SymCryptFdefRawAddC( pSrc1, pSrc2, pDst, nDigits ); +#endif +} + + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawAddUint32( + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 Src1, + UINT32 Src2, + _Out_writes_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PUINT32 Dst, + UINT32 nDigits ) +{ + UINT32 i; + UINT64 t; + + t = Src2; + for( i=0; i<nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; i++ ) + { + t = t + Src1[i]; + Dst[i] = (UINT32) t; + t >>= 32; + } + + return (UINT32) t; +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntAddUint32( + _In_ PCSYMCRYPT_INT piSrc1, + UINT32 u32Src2, + _Out_ PSYMCRYPT_INT piDst ) +{ + SYMCRYPT_CHECK_MAGIC( piSrc1 ); + SYMCRYPT_CHECK_MAGIC( piDst ); + + SYMCRYPT_ASSERT( piSrc1->nDigits == piDst->nDigits ); + + return SymCryptFdefRawAddUint32( SYMCRYPT_FDEF_INT_PUINT32( piSrc1 ), u32Src2, SYMCRYPT_FDEF_INT_PUINT32( piDst ), piDst->nDigits ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntAddSameSize( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + _Out_ PSYMCRYPT_INT piDst ) +{ + SYMCRYPT_ASSERT( piSrc1->nDigits == piSrc2->nDigits && piSrc2->nDigits == piDst->nDigits ); + + return SymCryptFdefRawAdd( SYMCRYPT_FDEF_INT_PUINT32( piSrc1 ), + SYMCRYPT_FDEF_INT_PUINT32( piSrc2 ), + SYMCRYPT_FDEF_INT_PUINT32( piDst ), + piDst->nDigits ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntAddMixedSize( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + _Out_ PSYMCRYPT_INT piDst ) +{ + UINT32 nS1 = piSrc1->nDigits; + UINT32 nS2 = piSrc2->nDigits; + UINT32 nD = piDst->nDigits; + UINT32 c; + UINT32 nW; + + SYMCRYPT_ASSERT( nD >= nS1 && nD >= nS2 ); + + if( nS1 < nS2 ) + { + c = SymCryptFdefRawAdd( SYMCRYPT_FDEF_INT_PUINT32( piSrc1 ), SYMCRYPT_FDEF_INT_PUINT32( piSrc2 ), SYMCRYPT_FDEF_INT_PUINT32( piDst ), nS1 ); + c = SymCryptFdefRawAddUint32( &SYMCRYPT_FDEF_INT_PUINT32( piSrc2 )[nS1 * SYMCRYPT_FDEF_DIGIT_NUINT32], c, &SYMCRYPT_FDEF_INT_PUINT32( piDst )[nS1 * SYMCRYPT_FDEF_DIGIT_NUINT32], nS2 - nS1 ); + nW = nS2; + } else { + // nS2 < nS1 + c = SymCryptFdefRawAdd( SYMCRYPT_FDEF_INT_PUINT32( piSrc1 ), SYMCRYPT_FDEF_INT_PUINT32( piSrc2 ), SYMCRYPT_FDEF_INT_PUINT32( piDst ), nS2 ); + c = SymCryptFdefRawAddUint32( &SYMCRYPT_FDEF_INT_PUINT32( piSrc1 )[nS2 * SYMCRYPT_FDEF_DIGIT_NUINT32], c, &SYMCRYPT_FDEF_INT_PUINT32( piDst )[nS2 * SYMCRYPT_FDEF_DIGIT_NUINT32], nS1 - nS2 ); + nW = nS1; + } + + if( nW < nD ) + { + SymCryptWipe( &SYMCRYPT_FDEF_INT_PUINT32( piDst )[nW * SYMCRYPT_FDEF_DIGIT_NUINT32], (nD - nW) * SYMCRYPT_FDEF_DIGIT_SIZE ); + SYMCRYPT_FDEF_INT_PUINT32( piDst )[nW * SYMCRYPT_FDEF_DIGIT_NUINT32] = c; + c = 0; + } + + return c; +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawSubC( + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc1, + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc2, + _Out_writes_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PUINT32 pDst, + UINT32 nDigits ) +{ + UINT32 i; + UINT64 t; + UINT32 c; + + c = 0; + for( i=0; i<nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; i++ ) + { + // c == 1 for carry, 0 for no carry + t = (UINT64) pSrc1[i] - pSrc2[i] - c; + pDst[i] = (UINT32) t; + c = (UINT32)(t >> 32) & 1; + } + + return c; +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawSub( + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc1, + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc2, + _Out_writes_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PUINT32 pDst, + UINT32 nDigits ) +{ +#if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_ARM64 | SYMCRYPT_CPU_ARM + return SymCryptFdefRawSubAsm( pSrc1, pSrc2, pDst, nDigits ); +#else + return SymCryptFdefRawSubC( pSrc1, pSrc2, pDst, nDigits ); +#endif +} + + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawSubUint32( + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc1, + UINT32 Src2, + _Out_writes_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PUINT32 pDst, + UINT32 nDigits ) +{ + UINT32 i; + UINT64 t; + UINT32 c; + + c = Src2; + for( i=0; i<nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; i++ ) + { + t = (UINT64)pSrc1[i] - c; + pDst[i] = (UINT32) t; + c = (UINT32)(t >> 32) & 1; + } + + return c; +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawNeg( + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc1, + UINT32 carryIn, + _Out_writes_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PUINT32 pDst, + UINT32 nDigits ) +{ + UINT32 i; + UINT64 t; + UINT32 c; + + c = carryIn; + for( i=0; i<nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; i++ ) + { + t = (UINT64)0 - pSrc1[i] - c; + pDst[i] = (UINT32) t; + c = (UINT32)(t >> 32) & 1; + } + + return c; +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntSubUint32( + _In_ PCSYMCRYPT_INT piSrc1, + UINT32 u32Src2, + _Out_ PSYMCRYPT_INT piDst ) +{ + SYMCRYPT_ASSERT( piSrc1->nDigits == piDst->nDigits ); + + return SymCryptFdefRawSubUint32( SYMCRYPT_FDEF_INT_PUINT32( piSrc1 ), u32Src2, SYMCRYPT_FDEF_INT_PUINT32( piDst ), piDst->nDigits ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntSubSameSize( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + _Out_ PSYMCRYPT_INT piDst ) +{ + SYMCRYPT_ASSERT( piSrc1->nDigits == piSrc2->nDigits && piSrc1->nDigits == piDst->nDigits ); + + return SymCryptFdefRawSub( SYMCRYPT_FDEF_INT_PUINT32( piSrc1 ), SYMCRYPT_FDEF_INT_PUINT32( piSrc2 ), SYMCRYPT_FDEF_INT_PUINT32( piDst ), piDst->nDigits ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntSubMixedSize( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + _Out_ PSYMCRYPT_INT piDst ) +{ + UINT32 nS1 = piSrc1->nDigits; + UINT32 nS2 = piSrc2->nDigits; + UINT32 nD = piDst->nDigits; + UINT32 c; + UINT32 n; + + SYMCRYPT_ASSERT( nD >= nS1 && nD >= nS2 ); + + if( nS1 < nS2 ) + { + c = SymCryptFdefRawSub( SYMCRYPT_FDEF_INT_PUINT32( piSrc1 ), SYMCRYPT_FDEF_INT_PUINT32( piSrc2 ), SYMCRYPT_FDEF_INT_PUINT32( piDst ), nS1 ); + c = SymCryptFdefRawNeg( &SYMCRYPT_FDEF_INT_PUINT32( piSrc2 )[nS1 * SYMCRYPT_FDEF_DIGIT_NUINT32], c, &SYMCRYPT_FDEF_INT_PUINT32( piDst )[nS1 * SYMCRYPT_FDEF_DIGIT_NUINT32], nS2 - nS1 ); + n = nS2 * SYMCRYPT_FDEF_DIGIT_NUINT32; + } else { + // nS2 < nS1 + c = SymCryptFdefRawSub( SYMCRYPT_FDEF_INT_PUINT32( piSrc1 ), SYMCRYPT_FDEF_INT_PUINT32( piSrc2 ), SYMCRYPT_FDEF_INT_PUINT32( piDst ), nS2 ); + c = SymCryptFdefRawSubUint32( &SYMCRYPT_FDEF_INT_PUINT32( piSrc1 )[nS2 * SYMCRYPT_FDEF_DIGIT_NUINT32], c, &SYMCRYPT_FDEF_INT_PUINT32( piDst )[nS2 * SYMCRYPT_FDEF_DIGIT_NUINT32], nS1 - nS2 ); + n = nS1 * SYMCRYPT_FDEF_DIGIT_NUINT32; + } + + // + // Set the rest of the result to 0s or 1s + // + while( n < nD * SYMCRYPT_FDEF_DIGIT_NUINT32 ) + { + SYMCRYPT_FDEF_INT_PUINT32( piDst )[n++] = 0 - c; + } + + return c; +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawIsLessThanC( + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc1, + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc2, + UINT32 nDigits ) +{ + UINT32 i; + UINT64 t; + UINT32 c; + + // We just do a subtraction without writing and return the carry + c = 0; + for( i=0; i<nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; i++ ) + { + // c == 1 for carry, 0 for no carry + t = (UINT64) pSrc1[i] - pSrc2[i] - c; + c = (UINT32)(t >> 32) & 1; + } + + // All booleans are returned as masks + return 0 - c; +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawIsLessThan( + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc1, + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc2, + UINT32 nDigits ) +{ +#if 0 & SYMCRYPT_CPU_AMD64 +// return SymCryptFdefRawIsLessThanAsm( pSrc1, pSrc2, nDigits ); +#else + return SymCryptFdefRawIsLessThanC( pSrc1, pSrc2, nDigits ); +#endif +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawIsZeroC( + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc1, + UINT32 nDigits ) +{ + UINT32 i; + UINT32 c; + + c = 0; + for( i=0; i<nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; i++ ) + { + c |= pSrc1[i]; + } + + // All booleans are returned as masks + return SYMCRYPT_MASK32_ZERO( c ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawIsZero( + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc1, + UINT32 nDigits ) +{ +#if 0 & SYMCRYPT_CPU_AMD64 +// return SymCryptFdefRawIsZeroAsm( pSrc1, nDigits ); +#else + return SymCryptFdefRawIsZeroC( pSrc1, nDigits ); +#endif +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntIsLessThan( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2 ) +{ + UINT32 nD1 = piSrc1->nDigits; + UINT32 nD2 = piSrc2->nDigits; + + UINT32 res; + + if( nD1 == nD2 ) + { + res = SymCryptFdefRawIsLessThan( SYMCRYPT_FDEF_INT_PUINT32( piSrc1 ), SYMCRYPT_FDEF_INT_PUINT32( piSrc2 ), nD1 ); + } else if( nD1 < nD2 ) { + res = SymCryptFdefRawIsLessThan( SYMCRYPT_FDEF_INT_PUINT32( piSrc1 ), SYMCRYPT_FDEF_INT_PUINT32( piSrc2 ), nD1 ); + res |= ~SymCryptFdefRawIsZero( &SYMCRYPT_FDEF_INT_PUINT32( piSrc2 )[ nD1 * SYMCRYPT_FDEF_DIGIT_NUINT32 ], nD2 - nD1 ); + } else { + res = SymCryptFdefRawIsLessThan( SYMCRYPT_FDEF_INT_PUINT32( piSrc1 ), SYMCRYPT_FDEF_INT_PUINT32( piSrc2 ), nD2 ); + res &= SymCryptFdefRawIsZero( &SYMCRYPT_FDEF_INT_PUINT32( piSrc1 )[ nD2 * SYMCRYPT_FDEF_DIGIT_NUINT32 ], nD1 - nD2 ); + } + + return res; +} + + +VOID +SYMCRYPT_CALL +SymCryptFdefIntNeg( + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_INT piDst ) +{ + UINT32 nDigits = piDst->nDigits; + SYMCRYPT_ASSERT( piSrc->nDigits == nDigits ); + + SymCryptFdefRawNeg( SYMCRYPT_FDEF_INT_PUINT32( piSrc ), 0, SYMCRYPT_FDEF_INT_PUINT32( piDst ), nDigits ); +} + + +VOID +SYMCRYPT_CALL +SymCryptFdefIntMulPow2( + _In_ PCSYMCRYPT_INT piSrc, + SIZE_T Exp, + _Out_ PSYMCRYPT_INT piDst ) +{ + SYMCRYPT_ASSERT( piSrc->nDigits == piDst->nDigits ); + + SIZE_T shiftWords = Exp / (8 * sizeof( UINT32 ) ); + SIZE_T shiftBits = Exp % (8 * sizeof( UINT32 ) ); + + UINT32 nWords = piDst->nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; + + if( shiftWords >= nWords ) + { + SymCryptWipe( SYMCRYPT_FDEF_INT_PUINT32( piDst ), nWords * sizeof( UINT32 ) ); + goto cleanup; + } + + SIZE_T i = nWords; + while( i > shiftWords ) + { + i--; + UINT64 t = (UINT64)SYMCRYPT_FDEF_INT_PUINT32( piSrc )[i - shiftWords] << 32; + if( i > shiftWords ) + { + t |= SYMCRYPT_FDEF_INT_PUINT32( piSrc )[i - shiftWords - 1]; + } + SYMCRYPT_FDEF_INT_PUINT32( piDst )[i] = (UINT32)(t >> (32 - shiftBits)); + } + + while( i > 0 ) + { + i--; + SYMCRYPT_FDEF_INT_PUINT32( piDst )[i] = 0; + } + +cleanup: + ; +} + +// In shift-based operations which we have no assembly for, and we'd like to use 32-bit words +// on 32-bit architectures and 64-bit words on 64-bit architectures. So we use NATIVE_UINT & +// friends. + +// Note that accessing the FDEF uint32 array as an array of NATIVE_UINTs relies on +// the little-endianness of the target if NATIVE_UINT is larger than 32 bits. +// AMD64 is little endian and ARM64 code is always expected to execute in little +// endian mode, but this is not true in general for an arbitrary 64 bit platform. +// +// If we need to support a 64 bit big endian platform, we need to either +// restrict its NATIVE_UINT to 32 bits, or introduce load and store macros. +#define SYMCRYPT_FDEF_INT_PNATIVE_UINT(p) ((NATIVE_UINT*) SYMCRYPT_FDEF_INT_PUINT32( p )) +// Ensure that sizeof(NATIVE_UINT) > 4 only when compiling for known little endian target +C_ASSERT( (NATIVE_BYTES <= 4) || SYMCRYPT_CPU_AMD64 || SYMCRYPT_CPU_ARM64 ); + +#define SYMCRYPT_FDEF_DIGIT_NNATIVE_UINT ((NATIVE_UINT)(SYMCRYPT_FDEF_DIGIT_SIZE / NATIVE_BYTES)) + +// Ensure that digit is divisible by native word size! +C_ASSERT(SYMCRYPT_FDEF_DIGIT_NNATIVE_UINT * NATIVE_BYTES == SYMCRYPT_FDEF_DIGIT_SIZE); + +VOID +SYMCRYPT_CALL +SymCryptFdefIntDivPow2( + _In_ PCSYMCRYPT_INT piSrc, + SIZE_T exp, + _Out_ PSYMCRYPT_INT piDst ) +{ + SIZE_T shiftWords = exp / NATIVE_BITS; + SIZE_T shiftRightBits = exp % NATIVE_BITS; + SIZE_T shiftLeftBits = (NATIVE_BITS-1) - shiftRightBits; + NATIVE_UINT lowWord, highWord, highPart; + SIZE_T i = 0; + + NATIVE_UINT nWords = piDst->nDigits * SYMCRYPT_FDEF_DIGIT_NNATIVE_UINT; + + SYMCRYPT_ASSERT( piSrc->nDigits == piDst->nDigits ); + + shiftWords = SYMCRYPT_MIN(shiftWords, nWords); + if( shiftWords < nWords ) + { + lowWord = SYMCRYPT_FDEF_INT_PNATIVE_UINT(piSrc)[shiftWords]; + while( i+shiftWords+1 < nWords ) + { + highWord = SYMCRYPT_FDEF_INT_PNATIVE_UINT(piSrc)[i+shiftWords+1]; + + // We always shift highWord left by 1 to keep variable shiftLeftBits in range [0,NATIVE_BITS-1] + highPart = (highWord << shiftLeftBits)<<1; + + SYMCRYPT_FDEF_INT_PNATIVE_UINT(piDst)[i] = (lowWord >> shiftRightBits) | highPart; + + lowWord = highWord; + i++; + } + SYMCRYPT_FDEF_INT_PNATIVE_UINT(piDst)[i] = (lowWord >> shiftRightBits); + i++; + } + + SYMCRYPT_ASSERT(i + shiftWords == nWords); + + SymCryptWipe( &SYMCRYPT_FDEF_INT_PNATIVE_UINT( piDst )[nWords-shiftWords], shiftWords * NATIVE_BYTES ); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefIntShr1( + UINT32 highestBit, + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_INT piDst ) +{ + UINT32 nWords = piDst->nDigits * SYMCRYPT_FDEF_DIGIT_NNATIVE_UINT; + + SYMCRYPT_ASSERT( piSrc->nDigits == piDst->nDigits ); + SYMCRYPT_ASSERT( highestBit < 2 ); + + SIZE_T i = 0; + NATIVE_UINT lowWord = SYMCRYPT_FDEF_INT_PNATIVE_UINT(piSrc)[0]; + NATIVE_UINT highWord = 0; + while( i+1 < nWords ) + { + highWord = SYMCRYPT_FDEF_INT_PNATIVE_UINT(piSrc)[i+1]; + + SYMCRYPT_FDEF_INT_PNATIVE_UINT(piDst)[i] = (lowWord >> 1) | (highWord << (NATIVE_BITS - 1)); + + lowWord = highWord; + i++; + } + + SYMCRYPT_FDEF_INT_PNATIVE_UINT(piDst)[i] = (lowWord >> 1) | ((NATIVE_UINT)highestBit) << (NATIVE_BITS - 1); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefIntModPow2( + _In_ PCSYMCRYPT_INT piSrc, + SIZE_T exp, + _Out_ PSYMCRYPT_INT piDst ) +{ + SIZE_T expWords = exp / 32; // index of word with the partial mask + SIZE_T expBits = exp % 32; // # bits to leave in that word + + UINT32 nWords = piDst->nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; + + SYMCRYPT_ASSERT( piSrc->nDigits == piDst->nDigits ); + + if( piSrc != piDst ) + { + memcpy( SYMCRYPT_FDEF_INT_PUINT32( piDst ), SYMCRYPT_FDEF_INT_PUINT32( piSrc ), nWords * sizeof( UINT32 ) ); + } + + if( expWords >= nWords ) + { + // exp is so large that Dst = Src is sufficient. + goto cleanup; + } + + for( SIZE_T i=expWords + 1; i < nWords; i++ ) + { + SYMCRYPT_FDEF_INT_PUINT32( piDst )[i] = 0; + } + + if( expBits != 0 ) + { + SYMCRYPT_FDEF_INT_PUINT32( piDst )[expWords] &= ((UINT32) -1) >> (32 - expBits ); + } else { + SYMCRYPT_FDEF_INT_PUINT32( piDst )[expWords] = 0; + } + +cleanup: + ; +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntGetBit( + _In_ PCSYMCRYPT_INT piSrc, + UINT32 iBit ) +{ + SYMCRYPT_ASSERT( iBit < piSrc->nDigits * SYMCRYPT_FDEF_DIGIT_BITS ); + + return (((SYMCRYPT_FDEF_INT_PUINT32( piSrc)[iBit / 32]) >> (iBit % 32)) & 1); +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntGetBits( + _In_ PCSYMCRYPT_INT piSrc, + UINT32 iBit, + UINT32 nBits ) +{ + UINT32 mainMask = 0; + UINT32 result = 0; + + SYMCRYPT_ASSERT( (nBits > 0) && + (nBits < 33) && + (iBit < piSrc->nDigits * SYMCRYPT_FDEF_DIGIT_BITS) && + (iBit + nBits <= piSrc->nDigits * SYMCRYPT_FDEF_DIGIT_BITS) ); + + mainMask = (UINT32)(-1) >> (32-nBits); + + // Get the lower word first (it exists since iBit is smaller than the max bit) + result = SYMCRYPT_FDEF_INT_PUINT32(piSrc)[iBit/32]; + + // Shift to the right accordingly + result >>= (iBit%32); + + // Get the upper word (if we need it) + // Note: the iBit and nBits values are public + if ((iBit%32!=0) && ( iBit/32 + 1 < piSrc->nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32 )) + { + result |= ( SYMCRYPT_FDEF_INT_PUINT32(piSrc)[iBit/32+1] << (32 - iBit%32) ); + } + + // Mask out the top bits + result &= mainMask; + + return result; +} + +VOID +SYMCRYPT_CALL +SymCryptFdefIntSetBits( + _In_ PSYMCRYPT_INT piDst, + UINT32 value, + UINT32 iBit, + UINT32 nBits ) +{ + UINT32 mainMask = 0; + + UINT32 alignedVal = 0; + UINT32 alignedMask = 0; + + SYMCRYPT_ASSERT( (nBits > 0) && + (nBits < 33) && + (iBit < piDst->nDigits * SYMCRYPT_FDEF_DIGIT_BITS) && + (iBit + nBits <= piDst->nDigits * SYMCRYPT_FDEF_DIGIT_BITS) ); + + // Zero out the not needed bits of the value + mainMask = (UINT32)(-1) >> (32-nBits); + value &= mainMask; + + // + // Lower word + // + + // Create the needed mask + alignedMask = mainMask << (iBit%32); + + // Align the value + alignedVal = value << (iBit%32); + + // Set the lower word first (it exists since iBit is smaller than the max bit) + SYMCRYPT_FDEF_INT_PUINT32(piDst)[iBit/32] = (SYMCRYPT_FDEF_INT_PUINT32(piDst)[iBit/32] & ~alignedMask) | alignedVal; + + // + // Upper word + // + + if ((iBit%32!=0) && ( iBit/32 + 1 < piDst->nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32 )) + { + // Create the needed mask + alignedMask = mainMask >> (32 - iBit%32); + + // Align the value + alignedVal = value >> (32 - iBit%32); + + // Set the upper word + SYMCRYPT_FDEF_INT_PUINT32(piDst)[iBit/32 + 1] = (SYMCRYPT_FDEF_INT_PUINT32(piDst)[iBit/32 + 1] & ~alignedMask) | alignedVal; + } + +} + + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntMulUint32( + _In_ PCSYMCRYPT_INT piSrc1, + UINT32 Src2, + _Out_ PSYMCRYPT_INT piDst ) +{ + UINT32 nWords = piDst->nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; + + SYMCRYPT_ASSERT( piSrc1->nDigits == piDst->nDigits ); + + UINT64 c = 0; + for( UINT32 i=0; i<nWords; i++ ) + { + c += SYMCRYPT_MUL32x32TO64( SYMCRYPT_FDEF_INT_PUINT32( piSrc1 )[i], Src2 ); + SYMCRYPT_FDEF_INT_PUINT32( piDst )[i] = (UINT32) c; + c >>= 32; + } + + return (UINT32) c; +} + + +VOID +SYMCRYPT_CALL +SymCryptFdefIntMulSameSize( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + _Out_ PSYMCRYPT_INT piDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SymCryptFdefIntMulMixedSize( piSrc1, piSrc2, piDst, pbScratch, cbScratch ); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefIntSquare( + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_INT piDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 nS = piSrc->nDigits; + UINT32 nD = piDst->nDigits; + + SymCryptFdefClaimScratch( pbScratch, cbScratch, SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_MUL( piDst->nDigits ) ); + + SYMCRYPT_ASSERT( 2*nS <= nD ); + + SymCryptFdefRawSquare( SYMCRYPT_FDEF_INT_PUINT32( piSrc ), nS, SYMCRYPT_FDEF_INT_PUINT32( piDst ) ); + + if( 2*nS < nD ) + { + SymCryptWipe( &SYMCRYPT_FDEF_INT_PUINT32( piDst )[2 * nS * SYMCRYPT_FDEF_DIGIT_NUINT32], (nD - 2*nS) * SYMCRYPT_FDEF_DIGIT_SIZE ); + } +} + + +VOID +SYMCRYPT_CALL +SymCryptFdefRawMulC( + _In_reads_(nDigits1 * SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc1, + UINT32 nDigits1, + _In_reads_(nDigits2 * SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc2, + UINT32 nDigits2, + _Out_writes_((nDigits1+nDigits2)*SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst ) +{ + UINT32 nWords1 = nDigits1 * SYMCRYPT_FDEF_DIGIT_NUINT32; + UINT32 nWords2 = nDigits2 * SYMCRYPT_FDEF_DIGIT_NUINT32; + + // Set Dst to zero + SymCryptWipe( pDst, (nDigits1+nDigits2) * SYMCRYPT_FDEF_DIGIT_SIZE ); + + for( UINT32 i = 0; i < nWords1; i++ ) + { + UINT32 m = pSrc1[i]; + UINT64 c = 0; + for( UINT32 j = 0; j < nWords2; j++ ) + { + // Invariant: c < 2^32 + c += SYMCRYPT_MUL32x32TO64( pSrc2[j], m ); + c += pDst[i+j]; + // There is no overflow on C because the max value is + // (2^32 - 1) * (2^32 - 1) + 2^32 - 1 + 2^32 - 1 = 2^64 - 1. + pDst[i+j] = (UINT32) c; + c >>= 32; + } + pDst[i + nWords2] = (UINT32) c; + } +} + +VOID +SYMCRYPT_CALL +SymCryptFdefRawMul( + _In_reads_(nDigits1*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc1, + UINT32 nDigits1, + _In_reads_(nDigits2*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc2, + UINT32 nDigits2, + _Out_writes_((nDigits1+nDigits2)*SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst ) +{ +#if SYMCRYPT_CPU_AMD64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_MULX ) ) + { + SymCryptFdefRawMulMulx( pSrc1, nDigits1, pSrc2, nDigits2, pDst ); + } else { + SymCryptFdefRawMulAsm( pSrc1, nDigits1, pSrc2, nDigits2, pDst ); + } +#elif SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_ARM64 | SYMCRYPT_CPU_ARM + SymCryptFdefRawMulAsm( pSrc1, nDigits1, pSrc2, nDigits2, pDst ); +#else + SymCryptFdefRawMulC( pSrc1, nDigits1, pSrc2, nDigits2, pDst ); +#endif +} + +VOID +SYMCRYPT_CALL +SymCryptFdefRawSquareC( + _In_reads_(nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc, + UINT32 nDigits, + _Out_writes_(2*nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst ) +{ + UINT32 nWords = nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; + + UINT32 m = 0; + UINT64 c = 0; + + // Set Dst to zero + SymCryptWipe( pDst, (2*nDigits) * SYMCRYPT_FDEF_DIGIT_SIZE ); + + // First Pass - Addition of the cross products x_i*x_j with i!=j + for( UINT32 i = 0; i < nWords; i++ ) + { + m = pSrc[i]; + c = 0; + for( UINT32 j = i+1; j < nWords; j++ ) + { + // Invariant: c < 2^32 + c += SYMCRYPT_MUL32x32TO64( pSrc[j], m ); + c += pDst[i+j]; + // There is no overflow on C because the max value is + // (2^32 - 1) * (2^32 - 1) + 2^32 - 1 + 2^32 - 1 = 2^64 - 1. + pDst[i+j] = (UINT32) c; + c >>= 32; + } + pDst[i + nWords] = (UINT32) c; + } + + // Second Pass - Shifting all results 1 bit left + c = 0; + for( UINT32 i = 1; i < 2*nWords; i++ ) + { + c |= (((UINT64)pDst[i])<<1); + pDst[i] = (UINT32)c; + c >>= 32; + } + + // Third Pass - Adding the squares on the even columns and propagating the sum + c = 0; + for( UINT32 i = 0; i < nWords; i++ ) + { + // + // Even column + // + m = pSrc[i]; + c += SYMCRYPT_MUL32x32TO64( m, m ); + c += pDst[2*i]; + // There is no overflow on C because the max value is + // (2^32 - 1) * (2^32 - 1) + 2^32 - 1 + 2^32 - 1 = 2^64 - 1 + + pDst[2*i] = (UINT32) c; + c >>= 32; + + // + // Odd column + // + c += pDst[2*i+1]; + // There is no overflow on C because the max value is + // 2^32 - 1 + 2^32 - 1 = 2^33 - 2 + + pDst[2*i+1] = (UINT32) c; + c >>= 32; + } +} + +VOID +SYMCRYPT_CALL +SymCryptFdefRawSquare( + _In_reads_(nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc, + UINT32 nDigits, + _Out_writes_(2*nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst ) +{ +#if SYMCRYPT_CPU_AMD64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_MULX ) ) + { + SymCryptFdefRawSquareMulx( pSrc, nDigits, pDst ); + } else { + SymCryptFdefRawSquareAsm( pSrc, nDigits, pDst ); + } +#elif SYMCRYPT_CPU_ARM64 | SYMCRYPT_CPU_ARM + SymCryptFdefRawSquareAsm( pSrc, nDigits, pDst ); +#elif SYMCRYPT_CPU_X86 + SymCryptFdefRawMulAsm( pSrc, nDigits, pSrc, nDigits, pDst ); +#else + SymCryptFdefRawSquareC( pSrc, nDigits, pDst ); +#endif +} + +VOID +SYMCRYPT_CALL +SymCryptFdefIntMulMixedSize( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + _Out_ PSYMCRYPT_INT piDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 nS1 = piSrc1->nDigits; + UINT32 nS2 = piSrc2->nDigits; + UINT32 nD = piDst ->nDigits; + + SymCryptFdefClaimScratch( pbScratch, cbScratch, SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_MUL( piDst->nDigits ) ); + + SYMCRYPT_ASSERT( nS1 + nS2 <= nD ); + + SymCryptFdefRawMul( SYMCRYPT_FDEF_INT_PUINT32( piSrc1 ), nS1, SYMCRYPT_FDEF_INT_PUINT32( piSrc2 ), nS2, SYMCRYPT_FDEF_INT_PUINT32( piDst ) ); + + if( nS1 + nS2 < nD ) + { + SymCryptWipe( &SYMCRYPT_FDEF_INT_PUINT32( piDst )[(nS1 + nS2) * SYMCRYPT_FDEF_DIGIT_NUINT32], (nD - (nS1 + nS2)) * SYMCRYPT_FDEF_DIGIT_SIZE ); + } +} + + +PSYMCRYPT_INT +SYMCRYPT_CALL +SymCryptFdefIntFromDivisor( _In_ PSYMCRYPT_DIVISOR pdSrc ) +{ + return &pdSrc->Int; +} + +VOID +SYMCRYPT_CALL +SymCryptFdefIntToDivisor( + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_DIVISOR pdDst, + UINT32 totalOperations, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 W; + UINT32 nBits; + UINT32 nWords; + UINT32 bitToTest; + UINT64 P; + + UNREFERENCED_PARAMETER( totalOperations ); + UNREFERENCED_PARAMETER( flags ); + + SYMCRYPT_CHECK_MAGIC( piSrc ); + SYMCRYPT_CHECK_MAGIC( pdDst ); + + SYMCRYPT_ASSERT( piSrc->nDigits == pdDst->nDigits ); + + SymCryptFdefClaimScratch( pbScratch, cbScratch, SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_TO_DIVISOR( piSrc->nDigits ) ); + + // + // Copy the Int. + // + SymCryptFdefIntCopy( piSrc, &pdDst->Int ); + + // + // For an N-bit divisor M, and D-bit divisor digit size, + // the value W is defined as + // floor( (2^{N+D} - 1) / M } - 2^D + // which is the largest W such that (W * M + 2^D * M )< 2^{N+D} + // To compute W we use a binary search. + // This can be optimized, but this is the simplest side-channel safe solution. + // We can compute the upper bits of W * M + 2^D * M in a simple loop. + // + // For now we only compute a 32-bit W for a 32-bit digit divisor size. + // + + nBits = SymCryptIntBitsizeOfValue( &pdDst->Int ); + + SYMCRYPT_ASSERT( nBits != 0 ); + if( nBits == 0 ) + { + // Can't create a divisor from a Int whose value is 0 + + // We really should not have any callers which get here (it is a requirement that Src != 0) + // We assert in CHKed builds + // In release set the divisor to 1 instead + SymCryptIntSetValueUint32( 1, &pdDst->Int ); + } + + pdDst->nBits = nBits; + + nWords = (nBits + 31)/32; + bitToTest = (UINT32)1 << 31; + W = 0; + while( bitToTest > 0 ) + { + W |= bitToTest; + // Do the multiplication + P = 0; + for( UINT32 i=0; i<nWords; i++ ) + { + // Invariant: + // P <= 2^{2D} - 2 which ensures the mul-add doesn't generate an overflow + // P = floor( (W + 2^32)*M[0..i-1] / 2^{32*i} ) + P += SYMCRYPT_MUL32x32TO64( W, SYMCRYPT_FDEF_INT_PUINT32( &pdDst->Int )[i] ); + P >>= 32; + P += SYMCRYPT_FDEF_INT_PUINT32( &pdDst->Int )[i]; + } + // We are interested in bit N+D, and P[0] is bit nWords*D, this shift brings the relevant bit to position 0 + P >>= ((nBits+31) % 32) + 1; + // If the bit is 1, W*M is too large and we reset the corresponding bit in W. + W ^= bitToTest & (0 - ((UINT32)P & 1)); + bitToTest >>= 1; + } + pdDst->td.fdef.W = W; + + SYMCRYPT_SET_MAGIC( pdDst ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawMultSubUint32( + _Inout_updates_( nUint32 + 1 ) PUINT32 pAcc, + _In_reads_( nUint32 ) PCUINT32 pSrc1, + UINT32 Src2, + UINT32 nUint32 ) +{ + // + // pAcc -= pSrc1 * Src2 + // BEWARE: this is only used by the DivMod routine, and works in Words rather than Digits + // making optimizations hard. + // + + UINT32 i; + UINT64 tmul; + UINT64 tsub; + UINT32 c; + + tmul = 0; + c = 0; + for( i=0; i<nUint32; i++ ) + { + tmul += SYMCRYPT_MUL32x32TO64( pSrc1[i], Src2 ); + tsub = (UINT64)pAcc[i] - (UINT32) tmul - c; + pAcc[i] = (UINT32) tsub; + c = (tsub >> 32) & 1; + tmul >>= 32; + } + + // Writing the last word is strictly speaking not necessary, but a really good check that things are going right. + // We can remove the write, but still need the computation of c so it gains very little. + + tsub = (UINT64) pAcc[i] - (UINT32) tmul - c; + pAcc[i] = (UINT32) tsub; + c = (tsub >> 32) & 1; + + return c; +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawMaskedAddSubdigit( + _Inout_updates_( nUint32 ) PUINT32 pAcc, + _In_reads_( nUint32 ) PCUINT32 pSrc, + UINT32 mask, + UINT32 nUint32 ) +{ + UINT32 i; + UINT64 t; + + t = 0; + for( i=0; i<nUint32; i++ ) + { + t = t + pAcc[i] + (mask & pSrc[i]); + pAcc[i] = (UINT32) t; + t >>= 32; + } + + return (UINT32) t; +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawMaskedAdd( + _Inout_updates_( nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32 ) PUINT32 pAcc, + _In_reads_( nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32 ) PCUINT32 pSrc, + UINT32 mask, + UINT32 nDigits ) +{ + return SymCryptFdefRawMaskedAddSubdigit( pAcc, pSrc, mask, nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32 ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawMaskedSub( + _Inout_updates_( nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32 ) PUINT32 pAcc, + _In_reads_( nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32 ) PCUINT32 pSrc, + UINT32 mask, + UINT32 nDigits ) +{ + UINT32 i; + UINT64 t; + UINT32 c; + + c = 0; + for( i=0; i<nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; i++ ) + { + t = (UINT64) pAcc[i] - (mask & pSrc[i]) - c; + pAcc[i] = (UINT32) t; + c = (UINT32)(t >>= 32) & 1; + } + + return c; +} + + + +VOID +SYMCRYPT_CALL +SymCryptFdefRawDivMod( + _In_reads_(nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pNum, + UINT32 nDigits, + _In_ PCSYMCRYPT_DIVISOR pdDivisor, + _Out_writes_opt_(nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pQuotient, + _Out_writes_opt_(SYMCRYPT_OBJ_NUINT32(pdDivisor)) PUINT32 pRemainder, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 nWords = nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; + UINT32 activeDivWords = (pdDivisor->nBits + 8 * sizeof(UINT32) - 1) / (8 * sizeof( UINT32 ) ); + UINT32 remainderWords = SYMCRYPT_OBJ_NUINT32( pdDivisor ); + + UINT32 cbScratchNeeded = (nWords+4) * sizeof( UINT32 ); + PUINT32 pTmp = (PUINT32) pbScratch; + UINT32 Qest; + UINT32 Q; + UINT32 c; + UINT32 d; + UINT32 shift; + UINT32 X0, X1; + UINT32 W; + UINT64 T; + UINT32 nQ; + + SYMCRYPT_ASSERT( cbScratch >= cbScratchNeeded ); + SYMCRYPT_ASSERT_ASYM_ALIGNED( pbScratch ); + + if( nWords < activeDivWords ) + { + // + // input is smaller in size than the significant size of the divisor, no division to do. + // Note that both values in the if() statement are public, so this does not create a side channel. + // + + // Set quotient to zero, and the remainder to the input value + if( pQuotient != NULL ) + { + SymCryptWipe( pQuotient, nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ); + } + + if( pRemainder != NULL ) + { + SYMCRYPT_ASSERT( remainderWords >= nWords ); + memcpy( pRemainder, pNum, nWords * sizeof( UINT32 ) ); + SymCryptWipe( &pRemainder[nWords], (remainderWords - nWords) * sizeof( UINT32 ) ); // clear the rest of the remainder words + } + + SymCryptFdefClaimScratch( pbScratch, cbScratch, cbScratchNeeded ); + goto cleanup; + } + + // + // We have two zero words in front and two zero words behind the tmp value to allow unrestricted accesses. + // We keep the explicit offset of 2 rather than adjust the pTmp pointer to avoid negative indexes which appear + // to be buffer overflows, and cause trouble with unsigned computations of negative index values that overflow + // to 2^32 - 1 on a 64-bit CPU. + // + pTmp[0] = pTmp[1] = 0; + memcpy( &pTmp[2], pNum, nWords * sizeof( UINT32 ) ); + pTmp[nWords + 2] = pTmp[nWords + 3] = 0; + shift = (0 - pdDivisor->nBits) & 31; // # bits we have to shift top words to the left to align with the W value + + // We generate the quotient words one at a time, starting at the most significant position + // The top (divWords - 1) words are always zero + + if( pQuotient != NULL ) + { + SymCryptWipe( &pQuotient[nWords - activeDivWords + 1], (activeDivWords - 1) * sizeof( UINT32 ) ); + } + + nQ = nWords - activeDivWords + 1; + + // There is always at least one word of Q to be computed, so we can use a do-while loop which + // also avoids the UINT32 underflow. + do + { + nQ--; + X0 = ( ((UINT64) pTmp[nQ + activeDivWords + 2] << 32) + pTmp[nQ + activeDivWords + 1] ) >> (32 - shift); + X1 = ( ((UINT64) pTmp[nQ + activeDivWords + 1] << 32) + pTmp[nQ + activeDivWords + 0] ) >> (32 - shift); + + W = (UINT32) pdDivisor->td.fdef.W; + T = SYMCRYPT_MUL32x32TO64( W, X0 ) + (((UINT64)X0) << 32) + X1 + ((W>>1) & ((UINT32)0 - (X1 >> 31))); + Qest = (UINT32)(T >> 32); + // At this point the estimator is correct or one too small, add one but don't overflow + Qest += 1; + Qest += SYMCRYPT_MASK32_ZERO( Qest ); + + c = SymCryptFdefRawMultSubUint32( &pTmp[nQ+2], SYMCRYPT_FDEF_INT_PUINT32( &pdDivisor->Int ), Qest, activeDivWords ); + Q = Qest - c; + d = SymCryptFdefRawMaskedAddSubdigit( &pTmp[nQ+2], SYMCRYPT_FDEF_INT_PUINT32( &pdDivisor->Int ), (0-c), activeDivWords ); + SYMCRYPT_ASSERT( c == d ); + SYMCRYPT_ASSERT( pTmp[nQ + activeDivWords+2] == (0 - c) ); + + if( pQuotient != NULL ) + { + pQuotient[nQ] = Q; + } + } while( nQ > 0 ); + + if( pRemainder != NULL ) + { + memcpy( pRemainder, pTmp+2, activeDivWords * sizeof( UINT32 ) ); + SymCryptWipe( &pRemainder[activeDivWords], (remainderWords - activeDivWords) * sizeof( UINT32 ) ); + } + +cleanup: + return; // label needs a statement to follow it... +} + + +VOID +SYMCRYPT_CALL +SymCryptFdefIntDivMod( + _In_ PCSYMCRYPT_INT piSrc, + _In_ PCSYMCRYPT_DIVISOR pdDivisor, + _Out_opt_ PSYMCRYPT_INT piQuotient, + _Out_opt_ PSYMCRYPT_INT piRemainder, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 nDigits = SYMCRYPT_OBJ_NDIGITS( piSrc ); + + SYMCRYPT_ASSERT( piQuotient == NULL || piQuotient->nDigits >= piSrc->nDigits ); + SYMCRYPT_ASSERT( piRemainder == NULL || piRemainder->nDigits >= pdDivisor->nDigits ); + + SymCryptFdefRawDivMod( + SYMCRYPT_FDEF_INT_PUINT32( piSrc ), + nDigits, + pdDivisor, + piQuotient == NULL ? NULL : SYMCRYPT_FDEF_INT_PUINT32( piQuotient ), + piRemainder == NULL ? NULL : SYMCRYPT_FDEF_INT_PUINT32( piRemainder ), + pbScratch, + cbScratch + ); + + if ((piQuotient != NULL) && (piQuotient->nDigits > piSrc->nDigits)) + { + SymCryptWipe( &SYMCRYPT_FDEF_INT_PUINT32( piQuotient )[piSrc->nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32], (piQuotient->nDigits - piSrc->nDigits) * SYMCRYPT_FDEF_DIGIT_SIZE ); + } + + if ((piRemainder != NULL) && (piRemainder->nDigits > pdDivisor->nDigits)) + { + SymCryptWipe( &SYMCRYPT_FDEF_INT_PUINT32( piRemainder )[pdDivisor->nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32], (piRemainder->nDigits - pdDivisor->nDigits) * SYMCRYPT_FDEF_DIGIT_SIZE ); + } +} + +/* Wine hack: asm not supported yet */ + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawAddAsm( + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 Src1, + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 Src2, + _Out_writes_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PUINT32 Dst, + UINT32 nDigits ) +{ + return SymCryptFdefRawAddC( Src1, Src2, Dst, nDigits ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawSubAsm( + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc1, + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc2, + _Out_writes_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PUINT32 pDst, + UINT32 nDigits ) +{ + return SymCryptFdefRawSubC( pSrc1, pSrc2, pDst, nDigits ); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefRawMulAsm( + _In_reads_(nDigits1*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc1, + UINT32 nDigits1, + _In_reads_(nDigits2*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc2, + UINT32 nDigits2, + _Out_writes_((nDigits1+nDigits2)*SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst ) +{ + SymCryptFdefRawMulC( pSrc1, nDigits1, pSrc2, nDigits2, pDst ); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefRawSquareAsm( + _In_reads_(nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc, + UINT32 nDigits, + _Out_writes_(2*nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst ) +{ + SymCryptFdefRawSquareC( pSrc, nDigits, pDst ); +} + +/* Wine hack: asm not supported yet */ + +VOID +SYMCRYPT_CALL +SymCryptFdefRawSquareMulx( + _In_reads_(nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc, + UINT32 nDigits, + _Out_writes_(2*nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst ) +{ + SymCryptFdefRawSquareC( pSrc, nDigits, pDst ); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefRawMulMulx( + _In_reads_(nDigits1*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc1, + UINT32 nDigits1, + _In_reads_(nDigits2*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc2, + UINT32 nDigits2, + _Out_writes_((nDigits1+nDigits2)*SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst ) +{ + SymCryptFdefRawMulC( pSrc1, nDigits1, pSrc2, nDigits2, pDst ); +} diff --git a/libs/symcrypt/lib/fdef_mod.c b/libs/symcrypt/lib/fdef_mod.c new file mode 100644 index 00000000000..3ab4c2ba438 --- /dev/null +++ b/libs/symcrypt/lib/fdef_mod.c @@ -0,0 +1,1731 @@ +// +// fdef_int.c INT functions for default number format +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +PSYMCRYPT_MODULUS +SYMCRYPT_CALL +SymCryptFdefModulusAllocate( UINT32 nDigits ) +{ + PVOID p = NULL; + UINT32 cb; + PSYMCRYPT_MODULUS res = NULL; + + // + // The nDigits requirements are enforced by SymCryptFdefSizeofModulusFromDigits. Thus + // the result does not overflow and is upper bounded by 2^19. + // + cb = SymCryptFdefSizeofModulusFromDigits( nDigits ); + + if( cb != 0 ) + { + p = SymCryptCallbackAlloc( cb ); + } + + if( p == NULL ) + { + goto cleanup; + } + + res = SymCryptFdefModulusCreate( p, cb, nDigits ); + +cleanup: + return res; +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModulusFree( _Out_ PSYMCRYPT_MODULUS pmObj ) +{ + SymCryptModulusWipe( pmObj ); + SymCryptCallbackFree( pmObj ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefSizeofModulusFromDigits( UINT32 nDigits ) +{ + SYMCRYPT_ASSERT( nDigits != 0 ); + SYMCRYPT_ASSERT( nDigits <= SYMCRYPT_FDEF_UPB_DIGITS ); + + // Ensure we do not overflow the following calculation when provided with invalid inputs + if( nDigits == 0 || nDigits > SYMCRYPT_FDEF_UPB_DIGITS ) + { + return 0; + } + + // Room for the Modulus structure, the Divisor, the negated divisor, and the R^2 Montgomery factor + // + return SYMCRYPT_FIELD_OFFSET( SYMCRYPT_MODULUS, Divisor ) + SymCryptFdefSizeofDivisorFromDigits( nDigits ) + (2 * nDigits * SYMCRYPT_FDEF_DIGIT_SIZE); +} + +PSYMCRYPT_MODULUS +SYMCRYPT_CALL +SymCryptFdefModulusCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + UINT32 nDigits ) +{ + PSYMCRYPT_MODULUS pmMod = NULL; + UINT32 cb = SymCryptFdefSizeofModulusFromDigits( nDigits ); + + const UINT32 offset = SYMCRYPT_FIELD_OFFSET( SYMCRYPT_MODULUS, Divisor ); + + SYMCRYPT_ASSERT( cb >= sizeof(SYMCRYPT_MODULUS) ); + SYMCRYPT_ASSERT( cbBuffer >= cb ); + if( (cb == 0) || (cbBuffer < cb) ) + { + goto cleanup; // return NULL + } + + SYMCRYPT_ASSERT_ASYM_ALIGNED( pbBuffer ); + pmMod = (PSYMCRYPT_MODULUS) pbBuffer; + + pmMod->type = 'gM' << 16; + pmMod->nDigits = nDigits; + + // + // The nDigits requirements are enforced by SymCryptFdefSizeofModulusFromDigits. Thus + // the result does not overflow and is upper bounded by 2^19. + // + pmMod->cbSize = cb; + pmMod->flags = 0; + + // The following is bounded by 2^17 + pmMod->cbModElement = nDigits * SYMCRYPT_FDEF_DIGIT_SIZE; + + SymCryptFdefDivisorCreate( pbBuffer + offset, cbBuffer - offset, nDigits ); + + // We don't have a modulus value yet, so we don't create/initialize any implementation-specific things. + + SYMCRYPT_SET_MAGIC( pmMod ); + +cleanup: + return pmMod; +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModulusInitGeneric( + _Inout_ PSYMCRYPT_MODULUS pmMod, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UNREFERENCED_PARAMETER( pmMod ); + UNREFERENCED_PARAMETER( pbScratch ); + UNREFERENCED_PARAMETER( cbScratch ); +} + + +VOID +SymCryptFdefModulusCopy( + _In_ PCSYMCRYPT_MODULUS pmSrc, + _Out_ PSYMCRYPT_MODULUS pmDst ) +{ + SYMCRYPT_ASSERT( pmSrc->nDigits == pmDst->nDigits ); + + if( pmSrc != pmDst ) + { + memcpy( pmDst, pmSrc, pmDst->cbSize ); + + SymCryptFdefDivisorCopyFixup( &pmSrc->Divisor, &pmDst->Divisor ); + + // Copy the type-specific fields + SYMCRYPT_MOD_CALL( pmSrc ) modulusCopyFixup( pmSrc, pmDst ); + + SYMCRYPT_SET_MAGIC( pmDst ); + } +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModulusCopyFixupGeneric( + _In_ PCSYMCRYPT_MODULUS pmSrc, + _Out_ PSYMCRYPT_MODULUS pmDst ) +{ + // Only have to handle the type-specific fields, which we don't have any of. + UNREFERENCED_PARAMETER( pmSrc ); + UNREFERENCED_PARAMETER( pmDst ); +} + + +PSYMCRYPT_MODELEMENT +SYMCRYPT_CALL +SymCryptFdefModElementAllocate( _In_ PCSYMCRYPT_MODULUS pmMod ) +{ + PVOID p; + UINT32 cb; + PSYMCRYPT_MODELEMENT res = NULL; + + // + // The nDigits requirements are enforced by the modulus object. Thus + // the result does not overflow and is upper bounded by 2^17. + // + cb = SymCryptFdefSizeofModElementFromModulus( pmMod ); + + p = SymCryptCallbackAlloc( cb ); + + if( p == NULL ) + { + goto cleanup; + } + + res = SymCryptFdefModElementCreate( p, cb, pmMod ); + +cleanup: + return res; +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModElementFree( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peObj ) +{ + SymCryptFdefModElementWipe( pmMod, peObj ); + SymCryptCallbackFree( peObj ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefSizeofModElementFromModulus( PCSYMCRYPT_MODULUS pmMod ) +{ + // Upper bounded by 2^17 since the modulus is up to SYMCRYPT_INT_MAXBITS = 2^20 bits. + return pmMod->cbModElement; +} + +PSYMCRYPT_MODELEMENT +SYMCRYPT_CALL +SymCryptFdefModElementCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + PCSYMCRYPT_MODULUS pmMod ) +{ + PSYMCRYPT_MODELEMENT pDst = (PSYMCRYPT_MODELEMENT) pbBuffer; + + UNREFERENCED_PARAMETER( pmMod ); + UNREFERENCED_PARAMETER( cbBuffer ); + + SYMCRYPT_ASSERT_ASYM_ALIGNED( pbBuffer ); + SYMCRYPT_ASSERT( cbBuffer >= SymCryptFdefSizeofModElementFromModulus( pmMod ) ); + SYMCRYPT_ASSERT( cbBuffer >= pmMod->nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ); + + // + // We have various optimizations where we use only part of the last digit + // Simple and fast solution: always wipe the last digit + // +#if (SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM64) + UINT32 nDigits = pmMod->nDigits; + + SymCryptWipeKnownSize( pbBuffer + (nDigits-1) * SYMCRYPT_FDEF_DIGIT_SIZE, SYMCRYPT_FDEF_DIGIT_SIZE ); +#endif + + // There is nothing to initialize... + + return pDst; +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModElementWipe( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peDst ) +{ + SymCryptWipe( peDst, pmMod->cbModElement ); +} + +VOID +SymCryptFdefModElementCopy( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst ) +{ + if( peSrc != peDst ) + { + memcpy( peDst, peSrc, pmMod->cbModElement ); + } +} + +VOID +SymCryptFdefModElementMaskedCopy( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst, + UINT32 mask ) +{ + SymCryptFdefMaskedCopy( (PCBYTE) peSrc, (PBYTE) peDst, pmMod->nDigits, mask ); +} + + +PSYMCRYPT_DIVISOR +SYMCRYPT_CALL +SymCryptFdefDivisorFromModulus( _In_ PSYMCRYPT_MODULUS pmSrc ) +{ + return &pmSrc->Divisor; +} + +VOID +SymCryptFdefModElementConditionalSwap( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_ PSYMCRYPT_MODELEMENT peData1, + _Inout_ PSYMCRYPT_MODELEMENT peData2, + _In_ UINT32 cond ) +{ + SymCryptFdefConditionalSwap( (PBYTE) &peData1->d.uint32[0], (PBYTE) &peData2->d.uint32[0], pmMod->nDigits, cond ); +} + +PSYMCRYPT_INT +SYMCRYPT_CALL +SymCryptFdefIntFromModulus( _In_ PSYMCRYPT_MODULUS pmSrc ) +{ + + return SymCryptFdefIntFromDivisor( &pmSrc->Divisor ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefDecideModulusType( PCSYMCRYPT_INT piSrc, UINT32 nDigits, UINT32 averageOperations, UINT32 flags ) +{ + UINT32 res = 0; + BOOLEAN disableMontgomery = 0; + BYTE tempBuf[64]; + PCSYMCRYPT_MODULUS_TYPE_SELECTION_ENTRY pEntry; + + UINT32 nBitsizeOfValue = SymCryptIntBitsizeOfValue( piSrc ); + UINT32 modulusFeatures = 0; + + if( !disableMontgomery && + ( flags & (SYMCRYPT_FLAG_DATA_PUBLIC | SYMCRYPT_FLAG_MODULUS_PARITY_PUBLIC)) != 0 && + (SymCryptIntGetValueLsbits32( piSrc ) & 1) == 1 && + averageOperations >= 10 ) + { + modulusFeatures |= SYMCRYPT_MODULUS_FEATURE_MONTGOMERY; + + // Specific modulus value detection + if( (flags & SYMCRYPT_FLAG_DATA_PUBLIC) != 0 ) + { + // Detect if modulus value is the P384 field modulus (convert piSrc to big endian and do comparison with known value of P384 modulus) + if( nBitsizeOfValue == 384 && + SymCryptFdefRawGetValue(SYMCRYPT_FDEF_INT_PUINT32(piSrc), SYMCRYPT_FDEF_DIGITS_FROM_BITS(384), tempBuf, 64, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST) == SYMCRYPT_NO_ERROR ) + { + // First 16 bytes are guaranteed to be zero because nBitsizeOfValue is 384 + if( memcmp(tempBuf+16, ((PBYTE)SymCryptEcurveParamsNistP384) + sizeof(SYMCRYPT_ECURVE_PARAMS), 48) == 0 ) + { + modulusFeatures |= SYMCRYPT_MODULUS_FEATURE_NISTP384; + } + } + + // Detect if modulus value is the P256 field modulus (not currently used) + // if( nBitsizeOfValue == 256 && + // SymCryptFdefRawGetValue(SYMCRYPT_FDEF_INT_PUINT32(piSrc), SYMCRYPT_FDEF_DIGITS_FROM_BITS(256), tempBuf, 64, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST) == SYMCRYPT_NO_ERROR ) + // { + // // First 32 bytes are guaranteed to be zero because nBitsizeOfValue is 256 + // if( memcmp(tempBuf+32, ((PBYTE)SymCryptEcurveParamsNistP256) + sizeof(SYMCRYPT_ECURVE_PARAMS), 32) == 0 ) + // { + // modulusFeatures |= SYMCRYPT_MODULUS_FEATURE_NISTP256; + // } + // } + } + } + + pEntry = SymCryptModulusTypeSelections; + + for(;;) + { + if( SYMCRYPT_CPU_FEATURES_PRESENT( pEntry->cpuFeatures ) && + (pEntry->maxBits == 0 || (nDigits <= SymCryptDigitsFromBits( pEntry->maxBits ) && nBitsizeOfValue <= pEntry->maxBits )) && + (pEntry->modulusFeatures & ~modulusFeatures) == 0 + ) + { + res = pEntry->type; + break; + } + pEntry++; + } + + return res; +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModSetPostGeneric( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_ PSYMCRYPT_MODELEMENT peObj, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UNREFERENCED_PARAMETER( pmMod ); + UNREFERENCED_PARAMETER( peObj ); + UNREFERENCED_PARAMETER( pbScratch ); + UNREFERENCED_PARAMETER( cbScratch ); +} + +PCUINT32 +SYMCRYPT_CALL +SymCryptFdefModPreGetGeneric( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peObj, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UNREFERENCED_PARAMETER( pmMod ); + UNREFERENCED_PARAMETER( pbScratch ); + UNREFERENCED_PARAMETER( cbScratch ); + + return &peObj->d.uint32[0]; +} + + + +VOID +SYMCRYPT_CALL +SymCryptFdefIntToModulus( + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_MODULUS pmDst, + UINT32 averageOperations, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + pmDst->flags = flags; + SymCryptIntToDivisor( piSrc, &pmDst->Divisor, averageOperations, flags & SYMCRYPT_FLAG_DATA_PUBLIC, pbScratch, cbScratch ); + + pmDst->type = SymCryptFdefDecideModulusType( piSrc, pmDst->nDigits, averageOperations, flags ); + + // Set inv64 - note the value is only valid if the modulus is odd, but the computation + // is constant time regardless of the parity, so we can safely compute it in all cases + pmDst->inv64 = 0 - SymCryptInverseMod2e64( SymCryptIntGetValueLsbits64(piSrc) ); + + SYMCRYPT_MOD_CALL( pmDst ) modulusInit( pmDst, pbScratch, cbScratch ); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefIntToModElement( + _In_ PCSYMCRYPT_INT piSrc, + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SymCryptFdefRawDivMod( + SYMCRYPT_FDEF_INT_PUINT32( piSrc ), + piSrc->nDigits, + &pmMod->Divisor, + NULL, // throw away the quotient + &peDst->d.uint32[0], + pbScratch, + cbScratch ); + + SYMCRYPT_MOD_CALL( pmMod ) modSetPost( pmMod, peDst, pbScratch, cbScratch ); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModElementToIntGeneric( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_reads_bytes_( pmMod->nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) + PCUINT32 pSrc, + _Out_ PSYMCRYPT_INT piDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + memcpy( SYMCRYPT_FDEF_INT_PUINT32( piDst ), pSrc, pmMod->nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ); + + SymCryptWipe( &SYMCRYPT_FDEF_INT_PUINT32( piDst )[pmMod->nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32], (piDst->nDigits - pmMod->nDigits) * SYMCRYPT_FDEF_DIGIT_SIZE ); + + SymCryptFdefClaimScratch( pbScratch, cbScratch, SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pmMod->nDigits ) ); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptFdefModElementSetValueGeneric( + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_NUMBER_FORMAT format, + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError; + UINT32 nDigits = pmMod->nDigits; + + SymCryptFdefClaimScratch( pbScratch, cbScratch, SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigits ) ); + + SYMCRYPT_ASSERT( cbSrc <= nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ); + + scError = SymCryptFdefRawSetValue( pbSrc, cbSrc, format, &peDst->d.uint32[0], nDigits ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + SymCryptFdefRawDivMod( + &peDst->d.uint32[0], + nDigits, + &pmMod->Divisor, + NULL, + &peDst->d.uint32[0], + pbScratch, + cbScratch ); + + scError = SYMCRYPT_NO_ERROR; + +cleanup: + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptFdefModElementGetValue( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_writes_bytes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst, + SYMCRYPT_NUMBER_FORMAT format, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError; + PCUINT32 pUint32; + UINT32 nDigits = pmMod->nDigits; + + + SymCryptFdefClaimScratch( pbScratch, cbScratch, SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigits ) ); + + SYMCRYPT_ASSERT( cbDst <= nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ); + + pUint32 = SYMCRYPT_MOD_CALL( pmMod ) modPreGet( pmMod, peSrc, pbScratch, cbScratch ); + + scError = SymCryptFdefRawGetValue( pUint32, nDigits, pbDst, cbDst, format ); + + return scError; +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefModElementIsEqual( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2 ) +{ + UINT32 d; + UINT32 i; + + d = 0; + for( i=0; i < pmMod->nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32 ; i++ ) + { + d |= peSrc1->d.uint32[i] ^ peSrc2->d.uint32[i]; + } + + return SYMCRYPT_MASK32_ZERO( d ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefModElementIsZero( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc ) +{ + UINT32 d; + UINT32 i; + + d = 0; + for( i=0; i < pmMod->nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32 ; i++ ) + { + d |= peSrc->d.uint32[i]; // Check that all bits are zero + } + + return SYMCRYPT_MASK32_ZERO( d ); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModAddGeneric( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 c; + UINT32 d; + UINT32 nDigits = pmMod->nDigits; + + SymCryptFdefClaimScratch( pbScratch, cbScratch, SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigits ) ); + SYMCRYPT_ASSERT( cbScratch >= nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ); + + // + // Doing add/cmp/sub might be faster or not. + // Masked add is hard because the mask operations destroy the carry flag. + // + + // dcl - cleanup? + +// c = SymCryptFdefRawAdd( &pSrc1->uint32[0], &pSrc2->uint32[0], &pDst->uint32[0], nDigits); +// d = SymCryptFdefRawSub( &pDst->uint32[0], &pMod->Divisor.Int.uint32[0], &pDst->uint32[0], nDigits ); +// e = SymCryptFdefRawMaskedAdd( &pDst->uint32[0], &pMod->Divisor.Int.uint32[0], 0 - (c^d), nDigits ); + + c = SymCryptFdefRawAdd( &peSrc1->d.uint32[0], &peSrc2->d.uint32[0], &peDst->d.uint32[0], nDigits ); + d = SymCryptFdefRawSub( &peDst->d.uint32[0], SYMCRYPT_FDEF_INT_PUINT32( &pmMod->Divisor.Int ), (PUINT32) pbScratch, nDigits ); + SymCryptFdefMaskedCopy( pbScratch, (PBYTE) &peDst->d.uint32[0], nDigits, (c^d) - 1 ); + + // We can't have a carry in the first addition, and no carry in the subtraction. + SYMCRYPT_ASSERT( !( c == 1 && d == 0 ) ); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModSubGeneric( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 c; + UINT32 d; + UINT32 nDigits = pmMod->nDigits; + + SymCryptFdefClaimScratch( pbScratch, cbScratch, SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigits ) ); + SYMCRYPT_ASSERT( cbScratch >= nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ); + + c = SymCryptFdefRawSub( &peSrc1->d.uint32[0], &peSrc2->d.uint32[0], &peDst->d.uint32[0], nDigits ); + d = SymCryptFdefRawAdd( &peDst->d.uint32[0], SYMCRYPT_FDEF_INT_PUINT32( &pmMod->Divisor.Int ), (PUINT32) pbScratch, nDigits ); + SymCryptFdefMaskedCopy( pbScratch, (PBYTE) &peDst->d.uint32[0], nDigits, 0 - c ); + + SYMCRYPT_ASSERT( !(c == 1 && d == 0) ); +} + + +VOID +SYMCRYPT_CALL +SymCryptFdefModNegGeneric( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 nDigits = pmMod->nDigits; + UINT32 isZero; + UINT32 i; + + SymCryptFdefClaimScratch( pbScratch, cbScratch, SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigits ) ); + + // + // We have to be careful to handle the value 0 properly as it does NOT map to Modulus - Value. + // + isZero = SymCryptFdefRawIsEqualUint32( &peSrc->d.uint32[0], nDigits , 0 ); + SymCryptFdefRawSub( SYMCRYPT_FDEF_INT_PUINT32( &pmMod->Divisor.Int ), &peSrc->d.uint32[0], &peDst->d.uint32[0], nDigits ); + + // Now we set the result to zero if the input was zero + for( i=0; i< nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; i++ ) + { + peDst->d.uint32[i] &= ~isZero; + } +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModElementSetValueUint32Generic( + UINT32 value, + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 nDigits = pmMod->nDigits; + + SymCryptFdefClaimScratch( pbScratch, cbScratch, SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigits ) ); + + if( pmMod->Divisor.nBits <= 32 && value >= SYMCRYPT_FDEF_INT_PUINT32( &pmMod->Divisor.Int )[0] ) + { + // The value is >= the modulus; this is not supported + + // For now do a possibly non-sidechannel safe, but mathematically correct modulo operation + value %= SYMCRYPT_FDEF_INT_PUINT32( &pmMod->Divisor.Int )[0]; + } + + peDst->d.uint32[0] = value; + + SymCryptWipe( &peDst->d.uint32[1], nDigits * SYMCRYPT_FDEF_DIGIT_SIZE - sizeof( UINT32 ) ); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModElementSetValueNegUint32( + UINT32 value, + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 nDigits = pmMod->nDigits; + + SymCryptFdefClaimScratch( pbScratch, cbScratch, SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigits ) ); + + if( pmMod->Divisor.nBits <= 32 && value >= SYMCRYPT_FDEF_INT_PUINT32( &pmMod->Divisor.Int )[0] ) + { + // The value is >= the modulus; this is not supported. + + // For now do a possibly non-sidechannel safe, but mathematically correct modulo operation + value %= SYMCRYPT_FDEF_INT_PUINT32( &pmMod->Divisor.Int )[0]; + } + + if( value == 0 ) + { + SymCryptWipe( &peDst->d.uint32[0], nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ); + } else { + SymCryptFdefRawSubUint32( SYMCRYPT_FDEF_INT_PUINT32( &pmMod->Divisor.Int ), value, &peDst->d.uint32[0], nDigits ); + } + + // + // Possible future optimization: we can optimize the value==0 and value==1 cases on a per-type basis + // + SYMCRYPT_MOD_CALL( pmMod ) modSetPost( pmMod, peDst, pbScratch, cbScratch ); +} + +// In the worst case there is a 1 in 8 chance of successfully generating a value +// This is when the modulus is 4 (nBits of modulus is 3), and 0, 1, and -1 are disallowed. +// In this case, having 1000 retries, there is a ~ 2^-193 chance of failure unless SymCryptCallbackRandom +// is completely broken. This passes the bar of being reasonable to Fatal. +#define FDEF_MOD_SET_RANDOM_GENERIC_LIMIT (1000) + +VOID +SYMCRYPT_CALL +SymCryptFdefModSetRandomGeneric( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 offset; + UINT32 ulimit; + UINT32 nDigits = pmMod->nDigits; + PUINT32 pTmp = (PUINT32) pbScratch; + UINT32 nUsedBytes; + UINT32 mask; + UINT32 c; + UINT32 cntr; + PUINT32 pDst = &peDst->d.uint32[0]; + PCUINT32 pMod = SYMCRYPT_FDEF_INT_PUINT32( &pmMod->Divisor.Int ); + + SymCryptFdefClaimScratch( pbScratch, cbScratch, SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigits ) ); + + if( (flags & SYMCRYPT_FLAG_MODRANDOM_ALLOW_ZERO) != 0 ) + { + // SYMCRYPT_FLAG_MODRANDOM_ALLOW_ZERO => SYMCRYPT_FLAG_MODRANDOM_ALLOW_ONE + offset = 0; + } else if( (flags & SYMCRYPT_FLAG_MODRANDOM_ALLOW_ONE) != 0 ) + { + offset = 1; + } else + { + offset = 2; + } + + if( (flags & SYMCRYPT_FLAG_MODRANDOM_ALLOW_MINUSONE ) ) + { + ulimit = 0; + } else { + ulimit = 1; + } + + // + // Special case for small divisors: + // When the divisor is 1, 2, or 3 we always allow returning -1 + // We may also allow returning 1 or 0 depending on the flags specified + if ( pmMod->Divisor.nBits < 3 ) + { + // At a minimum, allow -1 + offset = SYMCRYPT_MIN(offset, pMod[0] - 1); + ulimit = 0; + } + + // Set pTmp to pMod-(offset+ulimit) + SYMCRYPT_ASSERT( nDigits * SYMCRYPT_FDEF_DIGIT_SIZE <= cbScratch ); + c = SymCryptFdefRawSubUint32( pMod, offset + ulimit, pTmp, nDigits ); + SYMCRYPT_ASSERT( c == 0 ); + + nUsedBytes = (pmMod->Divisor.nBits + 7)/8; + mask = 0x100 >> ( (8-pmMod->Divisor.nBits) & 7); + mask -= 1; + + // Wipe any bytes we won't fill with random + SymCryptWipe( (PBYTE)pDst + nUsedBytes, (nDigits * SYMCRYPT_FDEF_DIGIT_SIZE) - nUsedBytes ); + + for(cntr=0; cntr<FDEF_MOD_SET_RANDOM_GENERIC_LIMIT; cntr++) + { + // Try random values until we get one we like + SymCryptCallbackRandom( (PBYTE)pDst, nUsedBytes ); + ((PBYTE)pDst)[nUsedBytes-1] &= (BYTE) mask; + + // Compare value to pMod-(offset+ulimit) + if( SymCryptFdefRawIsLessThan( pDst, pTmp, nDigits ) ) + { + // The value is within required range [0, Divisor-offset-ulimit) + break; + } + } + + // Wipe all the digits in pTmp + SymCryptWipe( pTmp, nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ); + + if (cntr >= FDEF_MOD_SET_RANDOM_GENERIC_LIMIT) + { + SymCryptFatal( 'rndc'); + } + + // Add the offset which allows us to avoid 0 and/or 1 if required. + // Now result is in range [offset, Divisor-ulimit) + c = SymCryptFdefRawAddUint32( pDst, offset, pDst, nDigits ); + SYMCRYPT_ASSERT( c == 0 ); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModDivSmallPow2Generic( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _In_range_(1, NATIVE_BITS) UINT32 exp, + _Out_ PSYMCRYPT_MODELEMENT peDst) +{ + UINT32 nDigits = pmMod->nDigits; + UINT32 mask; + UINT64 t; + UINT64 u; + UINT32 i; + PCUINT32 pMod = SYMCRYPT_FDEF_INT_PUINT32( &pmMod->Divisor.Int ); + + // mod must be odd + SYMCRYPT_ASSERT( (pMod[0] & 1) != 0 ); + SYMCRYPT_ASSERT( (exp >= 1) && (exp <= NATIVE_BITS) ); + + do + { + mask = (UINT32)0 - (peSrc->d.uint32[0] & 1); + + t = (UINT64) peSrc->d.uint32[0] + (pMod[0] & mask); + u = (UINT32) t; + t >>= 32; + + for( i = 1; i < nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; i++ ) + { + t += pMod[i] & mask; + t += peSrc->d.uint32[i]; + + u |= t << 32; + + peDst->d.uint32[i-1] = (UINT32)(u >> 1); + t >>= 32; + u >>= 32; + } + u |= t << 32; + peDst->d.uint32[i-1] = (UINT32)( u >> 1 ); + + exp -= 1; + + // First iteration reads from peSrc and writes to peDst + // subsequent iterations must read from and write to peDst + peSrc = peDst; + } while (exp > 0); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModDivSmallPow2( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _In_range_(1, NATIVE_BITS) UINT32 exp, + _Out_ PSYMCRYPT_MODELEMENT peDst ) +{ + +#if SYMCRYPT_CPU_AMD64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_MULX ) ) + { + SymCryptFdefModDivSmallPow2Mulx( pmMod, peSrc, exp, peDst ); + } + else + { + // Currently SymCryptAsm does not support AMD64 functions with shl/shr/shrd + // by a variable count, as this needs special handling of the rcx (cl) register + // For now we just fallback to the generic implementation on machines without MULX + SymCryptFdefModDivSmallPow2Generic( pmMod, peSrc, exp, peDst ); + } +#elif SYMCRYPT_CPU_ARM64 + SymCryptFdefModDivSmallPow2Asm( pmMod, peSrc, exp, peDst ); +#else + SymCryptFdefModDivSmallPow2Generic( pmMod, peSrc, exp, peDst ); +#endif +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModDivPow2( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + UINT32 exp, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 shiftAmount; + + UNREFERENCED_PARAMETER(pbScratch); + UNREFERENCED_PARAMETER(cbScratch); + + // mod must be odd + SYMCRYPT_ASSERT( (SYMCRYPT_FDEF_INT_PUINT32(&pmMod->Divisor.Int)[0] & 1) != 0 ); + + if( exp == 0 ) + { + // If exp is 0 we just need to copy peSrc to peDst + SymCryptFdefModElementCopy( pmMod, peSrc, peDst ); + return; + } + + do + { + shiftAmount = SYMCRYPT_MIN(NATIVE_BITS, exp); + SymCryptFdefModDivSmallPow2( pmMod, peSrc, shiftAmount, peDst ); + exp -= shiftAmount; + + // First iteration reads from peSrc and writes to peDst + // subsequent iterations must read from and write to peDst + peSrc = peDst; + } while( exp > 0 ); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModMulGeneric( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 nDigits = pmMod->nDigits; + PUINT32 pTmp = (PUINT32) pbScratch; + UINT32 scratchOffset = 2 * nDigits * SYMCRYPT_FDEF_DIGIT_SIZE; + + SymCryptFdefClaimScratch( pbScratch, cbScratch, SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigits ) ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigits ) ); + SYMCRYPT_ASSERT( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigits ) >= scratchOffset + SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_DIVMOD( 2 * nDigits, nDigits ) ); + SYMCRYPT_ASSERT_ASYM_ALIGNED( pbScratch ); + + // Tmp space is enough for the product plus the DivMod scratch + + SymCryptFdefRawMul( &peSrc1->d.uint32[0], nDigits, &peSrc2->d.uint32[0], nDigits, pTmp ); + + SymCryptFdefRawDivMod( pTmp, 2*nDigits, &pmMod->Divisor, NULL, &peDst->d.uint32[0], pbScratch + scratchOffset, cbScratch - scratchOffset ); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModSquareGeneric( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 nDigits = pmMod->nDigits; + PUINT32 pTmp = (PUINT32) pbScratch; + UINT32 scratchOffset = 2 * nDigits * SYMCRYPT_FDEF_DIGIT_SIZE; + + SymCryptFdefClaimScratch( pbScratch, cbScratch, SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigits ) ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigits ) ); + SYMCRYPT_ASSERT( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigits ) >= scratchOffset + SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_DIVMOD( 2 * nDigits, nDigits ) ); + SYMCRYPT_ASSERT_ASYM_ALIGNED( pbScratch ); + + // Tmp space is enough for the product plus the DivMod scratch + + SymCryptFdefRawSquare( &peSrc->d.uint32[0], nDigits, pTmp ); + + SymCryptFdefRawDivMod( pTmp, 2*nDigits, &pmMod->Divisor, NULL, &peDst->d.uint32[0], pbScratch + scratchOffset, cbScratch - scratchOffset ); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptFdefModInvGeneric( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + UINT32 nDigits = pmMod->nDigits; + UINT32 nBytes; + UINT32 c; + UINT32 leastSignificantUint32; + UINT32 trailingZeros; + + // + // This function is called on Montgomery moduli so we can't directly call specifically optimized modular operations from here. + // + // For now we use dispatch functions with pmMod to perform potentially optimized modular operations. + // This approach makes sense when on average the cost of dispatch is less than the benefit using an optimized operation. + // The alternative is to make specialized ModInv routines for different types of moduli, but we do not yet do this to + // reduce code duplication / code size. + // + + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_MODINV( nDigits ) ); + + if( (pmMod->flags & (SYMCRYPT_FLAG_DATA_PUBLIC | SYMCRYPT_FLAG_MODULUS_PRIME )) != (SYMCRYPT_FLAG_DATA_PUBLIC | SYMCRYPT_FLAG_MODULUS_PRIME ) ) + { + // Inversion over non-public or non-prime moduli currently not supported. + // Our blinding below only works for prime moduli. + // As the modulus cannot be blinded, it requires a fully side-channel safe algorithm which is much more complicated and + // slower. + // When this is necessary, we will add a second ModInv implementation for those cases. + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // + // Algorithm: + // R = random nonzero value mod Mod + // X := Src * R (mod Mod) + // A = X + // B = Mod + // Va = 1 + // Vb = 0 + // invariant: A = Va*X (mod Mod), B = Vb*X (mod Mod), + // + // if( A == 0 ): error + // + // verify (A | B) is odd + // if B even: swap (A,B), swap( Va, Vb) + // + // repeat: + // while( A even ): + // A /= 2; Va /= 2 (mod Mod) + // if( A == 1 ): break1 + // (A, Va, B, Vb) = (B-A, Vb - Va, A, Va) + // if( A == 0 ): error (not co-prime) + + nBytes = SymCryptSizeofModElementFromModulus( pmMod ); + + SYMCRYPT_ASSERT( cbScratch >= 4*nBytes ); + PSYMCRYPT_MODELEMENT peR = SymCryptModElementCreate( pbScratch, nBytes, pmMod ); + pbScratch += nBytes; + PSYMCRYPT_MODELEMENT peX = SymCryptModElementCreate( pbScratch, nBytes, pmMod ); + pbScratch += nBytes; + PSYMCRYPT_MODELEMENT peVa = SymCryptModElementCreate( pbScratch, nBytes, pmMod ); + pbScratch += nBytes; + PSYMCRYPT_MODELEMENT peVb = SymCryptModElementCreate( pbScratch, nBytes, pmMod ); + pbScratch += nBytes; + cbScratch -= 4*nBytes; + + PSYMCRYPT_MODELEMENT peVtmpPtr; + + nBytes = SymCryptSizeofIntFromDigits( nDigits ); + SYMCRYPT_ASSERT( cbScratch >= 3 * nBytes ); + PSYMCRYPT_INT piA = SymCryptIntCreate( pbScratch, nBytes, nDigits ); + pbScratch += nBytes; + PSYMCRYPT_INT piB = SymCryptIntCreate( pbScratch, nBytes, nDigits ); + pbScratch += nBytes; + PSYMCRYPT_INT piT = SymCryptIntCreate( pbScratch, nBytes, nDigits ); + pbScratch += nBytes; + cbScratch -= 3*nBytes; + + PSYMCRYPT_INT piTmpPtr; + + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigits ) ); + + // If the data is not public, multiply by a random blinding factor; otherwise copy the value + if( (flags & SYMCRYPT_FLAG_DATA_PUBLIC) == 0 ) + { + SymCryptModSetRandom( pmMod, peR, SYMCRYPT_FLAG_MODRANDOM_ALLOW_ONE | SYMCRYPT_FLAG_MODRANDOM_ALLOW_MINUSONE, pbScratch, cbScratch ); //R = random + SymCryptModMul( pmMod, peR, peSrc, peX, pbScratch, cbScratch ); // X = R * Src + } else + { + SymCryptModElementCopy( pmMod, peSrc, peX ); + } + + // Set up piA and piB + SymCryptFdefModElementToIntGeneric( pmMod, &peX->d.uint32[0], piA, pbScratch, cbScratch ); // A = X + SymCryptIntCopy( SymCryptIntFromModulus( (PSYMCRYPT_MODULUS) pmMod ), piB ); // B = Mod + + // Reject if A = 0, B = 0, or A and B both even + if( SymCryptIntIsEqualUint32( piA, 0 ) | + SymCryptIntIsEqualUint32( piB, 0 ) | + (((SymCryptIntGetValueLsbits32( piA ) | SymCryptIntGetValueLsbits32( piB )) & 1) ^ 1) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if( SymCryptIntIsEqualUint32( piB, 2 ) ) + { + // Mod = 2 is a valid input. Luckily, modular inversion is easy. + // The rest of the code assumes that Mod is odd. Other even values are not prime. + SymCryptModElementCopy( pmMod, peSrc, peDst); + goto cleanup; + } + + SymCryptFdefModElementSetValueUint32Generic( 1, pmMod, peVa, pbScratch, cbScratch ); // Va = 1 + SymCryptFdefModElementSetValueUint32Generic( 0, pmMod, peVb, pbScratch, cbScratch ); // Vb = 0 + + for(;;) + { + // invariant: A = Va*X (mod Mod), B = Vb*X (mod Mod), A != 0, B > 1. + // Remove factors of 2 from A. This loop terminates because A != 0 + leastSignificantUint32 = SymCryptIntGetValueLsbits32(piA); + while( (leastSignificantUint32 & 1) == 0 ) + { + trailingZeros = SymCryptCountTrailingZeros32( leastSignificantUint32 ); + SymCryptIntDivPow2( piA, trailingZeros, piA ); + SymCryptFdefModDivSmallPow2( pmMod, peVa, trailingZeros, peVa ); + leastSignificantUint32 = SymCryptIntGetValueLsbits32(piA); + } + + if( SymCryptIntIsEqualUint32( piA, 1 ) ) + { + // A = 1 = Va * X (mod Mod), so Va is the inverse of X + break; + } + + c = SymCryptIntSubSameSize( piB, piA, piT ); + + // If A != 1 and A=B, then A is the GCD of the original inputs, and there is no inverse + if( SymCryptIntIsEqualUint32( piT, 0 ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if( c == 0 ) + { + // B > A, we set B to B-A and swap (B,A) + // that way we continue our halving on B-A + + SymCryptIntCopy( piT, piB ); + SymCryptModSub( pmMod, peVb, peVa, peVb, pbScratch, cbScratch ); + + piTmpPtr = piB; piB = piA; piA = piTmpPtr; + peVtmpPtr = peVb; peVb = peVa; peVa = peVtmpPtr; + } else { + // B < A, Set A to A-B and continue halving A + SymCryptIntNeg( piT, piA ); + SymCryptModSub( pmMod, peVa, peVb, peVa, pbScratch, cbScratch ); + } + } + + // 1 = A = Va * X (mod Mod), so Va is the inverse of X + // Check computation that we can test in the debugger + SymCryptModMul( pmMod, peVa, peX, peVb, pbScratch, cbScratch ); + + // Actual answer + + // If the data is not public, multiply by the random blinding factor; otherwise copy the value + if( (flags & SYMCRYPT_FLAG_DATA_PUBLIC) == 0 ) + { + SymCryptModMul( pmMod, peVa, peR, peDst, pbScratch, cbScratch ); + } else + { + SymCryptModElementCopy( pmMod, peVa, peDst ); + } + +cleanup: + return scError; +} + + +//============================= +// Montgomery representation + +VOID +SYMCRYPT_CALL +SymCryptFdefModulusInitMontgomeryInternal( + _Inout_ PSYMCRYPT_MODULUS pmMod, + UINT32 nUint32Used, // R = 2^{32 * this parameter} + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + // Scratch space is big enough for an nDigit+1 byte value + sufficient divmod scratch + PUINT32 pR2; + UINT32 cbR2; + UINT32 nDigits; + + PUINT32 modR2; + PUINT32 negDivisor; + + nDigits = pmMod->nDigits; + modR2 = (PUINT32)((PBYTE)&pmMod->Divisor + SymCryptFdefSizeofDivisorFromDigits( nDigits )); + + SYMCRYPT_ASSERT_ASYM_ALIGNED( pbScratch ); + + pmMod->tm.montgomery.Rsqr = modR2; + negDivisor = (PUINT32)((PBYTE)modR2 + (nDigits * SYMCRYPT_FDEF_DIGIT_SIZE)); + + // We pre-compute R^2 mod M + + pR2 = (PUINT32) pbScratch; + cbR2 = (2*nDigits + 1) * SYMCRYPT_FDEF_DIGIT_SIZE; + SYMCRYPT_ASSERT( cbScratch >= cbR2 ); + SYMCRYPT_ASSERT( cbScratch >= 2 * nUint32Used * sizeof(UINT32) ); + + // Set it to R^2 + SymCryptWipe( pR2, cbR2 ); + pR2[ 2 * nUint32Used ] = 1; + SymCryptFdefRawDivMod( pR2, 2*nDigits + 1, &pmMod->Divisor, NULL, modR2, pbScratch + cbR2, cbScratch - cbR2 ); + + SymCryptFdefRawNeg( SYMCRYPT_FDEF_INT_PUINT32( &pmMod->Divisor.Int ), 0, negDivisor, nDigits ); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModulusInitMontgomery( + _Inout_ PSYMCRYPT_MODULUS pmMod, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SymCryptFdefModulusInitMontgomeryInternal( pmMod, pmMod->nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32, pbScratch, cbScratch ); +} + +VOID +SymCryptFdefMontgomeryReduceC( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_updates_( 2 * pmMod->nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32 ) PUINT32 pSrc, + _Out_writes_( pmMod->nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32 ) PUINT32 pDst ) +{ + UINT32 nDigits = pmMod->nDigits; + UINT32 nWords = nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; + PCUINT32 pMod = SYMCRYPT_FDEF_INT_PUINT32( &pmMod->Divisor.Int ); + + UINT32 hc = 0; + for( UINT32 i=0; i<nWords; i++ ) + { + UINT32 m = (UINT32)pmMod->inv64 * pSrc[0]; + UINT64 c = 0; + for( UINT32 j = 0; j < nWords; j++ ) + { + // Invariant: c < 2^32 + c += SYMCRYPT_MUL32x32TO64( pMod[j], m ); + c += pSrc[j]; + // There is no overflow on C because the max value is + // (2^32 - 1) * (2^32 - 1) + 2^32 - 1 + 2^32 - 1 = 2^64 - 1. + pSrc[j] = (UINT32) c; + c >>= 32; + } + c = c + pSrc[nWords] + hc; + pSrc[nWords] = (UINT32) c; + hc = c >> 32; + pSrc++; + } + SYMCRYPT_ASSERT( hc < 2 ); + + UINT32 d = SymCryptFdefRawSub( pSrc, pMod, pDst, nDigits ); + + SYMCRYPT_ASSERT( hc <= d ); // if hc = 1, then d = 1 is mandatory + + SymCryptFdefMaskedCopy( (PCBYTE) pSrc, (PBYTE) pDst, nDigits, hc - (hc | d) ); // copy only if hc=0, d=1 +} + +VOID +SymCryptFdefMontgomeryReduce( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_updates_( 2 * pmMod->nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32 ) PUINT32 pSrc, + _Out_writes_( pmMod->nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32 ) PUINT32 pDst ) +{ +#if SYMCRYPT_CPU_AMD64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_MULX ) ) + { + SymCryptFdefMontgomeryReduceMulx( pmMod, pSrc, pDst ); + } else { + SymCryptFdefMontgomeryReduceAsm( pmMod, pSrc, pDst ); + } +#elif SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_ARM64 | SYMCRYPT_CPU_ARM + SymCryptFdefMontgomeryReduceAsm( pmMod, pSrc, pDst ); +#else + SymCryptFdefMontgomeryReduceC( pmMod, pSrc, pDst ); +#endif +} + + +VOID +SYMCRYPT_CALL +SymCryptFdefModSetPostMontgomery( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_ PSYMCRYPT_MODELEMENT peObj, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + // Montgomery representation for X is R*X mod M where R = 2^<nDigits * bits-per-digit> + // Montgomery reduction performs an implicit division by R + // This function converts to the internal representation by multiplying by R^2 mod M and then performing a Montgomery reduction + UINT32 nDigits = pmMod->nDigits; + + // dcl - this should not incur significant cost, consider checking always + SYMCRYPT_ASSERT( cbScratch >= nDigits * 2 * SYMCRYPT_FDEF_DIGIT_SIZE ); + UNREFERENCED_PARAMETER( cbScratch ); + + SymCryptFdefRawMul( &peObj->d.uint32[0], nDigits, pmMod->tm.montgomery.Rsqr, nDigits, (PUINT32) pbScratch ); + SymCryptFdefMontgomeryReduce( pmMod, (PUINT32) pbScratch, &peObj->d.uint32[0] ); +} + +PCUINT32 +SYMCRYPT_CALL +SymCryptFdefModPreGetMontgomery( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peObj, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + PUINT32 pTmp = (PUINT32) pbScratch; + UINT32 nDigits = pmMod->nDigits; + + // dcl - this should not incur significant cost, consider checking always + SYMCRYPT_ASSERT( cbScratch >= nDigits * 2 * SYMCRYPT_FDEF_DIGIT_SIZE ); + UNREFERENCED_PARAMETER( cbScratch ); + + memcpy( pTmp, &peObj->d.uint32[0], nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ); + SymCryptWipe( pTmp + nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32, nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ); + SymCryptFdefMontgomeryReduce( pmMod, pTmp, pTmp ); + + return pTmp; +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModulusCopyFixupMontgomery( + _In_ PCSYMCRYPT_MODULUS pmSrc, + _Out_ PSYMCRYPT_MODULUS pmDst ) +{ + // We only have to fix up the Montgomery-specific stuff here + // dcl - not sure I understand why you pass pmSrc here + UNREFERENCED_PARAMETER( pmSrc ); + pmDst->tm.montgomery.Rsqr = (PUINT32)((PBYTE)&pmDst->Divisor + SymCryptFdefSizeofDivisorFromDigits( pmDst->nDigits )); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModMulMontgomery( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 nDigits = pmMod->nDigits; + PUINT32 pTmp = (PUINT32) pbScratch; + + // dcl - missing assert? + UNREFERENCED_PARAMETER( cbScratch ); + SYMCRYPT_ASSERT( cbScratch >= 2 * nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ); + + SymCryptFdefRawMul( &peSrc1->d.uint32[0], nDigits, &peSrc2->d.uint32[0], nDigits, pTmp ); + SymCryptFdefMontgomeryReduce( pmMod, pTmp, &peDst->d.uint32[0] ); +} + +#if 0 && SYMCRYPT_CPU_AMD64 +VOID +SYMCRYPT_CALL +SymCryptFdefModMulMontgomeryMulx( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 nDigits = pmMod->nDigits; + PUINT32 pTmp = (PUINT32) pbScratch; + + UNREFERENCED_PARAMETER( cbScratch ); + SYMCRYPT_ASSERT( cbScratch >= 2 * nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ); + + SymCryptFdefRawMulMulx( &peSrc1->d.uint32[0], nDigits, &peSrc2->d.uint32[0], nDigits, pTmp ); + SymCryptFdefMontgomeryReduceMulx( pmMod, pTmp, &peDst->d.uint32[0] ); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModMulMontgomeryMulx1024( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 nDigits = pmMod->nDigits; + PUINT32 pTmp = (PUINT32) pbScratch; + + UNREFERENCED_PARAMETER( cbScratch ); + SYMCRYPT_ASSERT( cbScratch >= 2 * nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ); + + SymCryptFdefRawMulMulx1024( &peSrc1->d.uint32[0], &peSrc2->d.uint32[0], nDigits, pTmp ); + SymCryptFdefMontgomeryReduceMulx1024( pmMod, pTmp, &peDst->d.uint32[0] ); +} +#endif + + +VOID +SYMCRYPT_CALL +SymCryptFdefModSquareMontgomery( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 nDigits = pmMod->nDigits; + PUINT32 pTmp = (PUINT32) pbScratch; + + UNREFERENCED_PARAMETER( cbScratch ); + SYMCRYPT_ASSERT( cbScratch >= 2 * nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ); + + SymCryptFdefRawSquare( &peSrc->d.uint32[0], nDigits, pTmp ); + SymCryptFdefMontgomeryReduce( pmMod, pTmp, &peDst->d.uint32[0] ); +} + + +#if 0 && SYMCRYPT_CPU_AMD64 +VOID +SYMCRYPT_CALL +SymCryptFdefModSquareMontgomeryMulx( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 nDigits = pmMod->nDigits; + PUINT32 pTmp = (PUINT32) pbScratch; + + UNREFERENCED_PARAMETER( cbScratch ); + SYMCRYPT_ASSERT( cbScratch >= 2 * nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ); + + SymCryptFdefRawSquareMulx( &peSrc->d.uint32[0], nDigits, pTmp ); + SymCryptFdefMontgomeryReduceMulx( pmMod, pTmp, &peDst->d.uint32[0] ); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModSquareMontgomeryMulx1024( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 nDigits = pmMod->nDigits; + PUINT32 pTmp = (PUINT32) pbScratch; + + UNREFERENCED_PARAMETER( cbScratch ); + SYMCRYPT_ASSERT( cbScratch >= 2 * nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ); + + SymCryptFdefRawSquareMulx1024( &peSrc->d.uint32[0], nDigits, pTmp ); + SymCryptFdefMontgomeryReduceMulx1024( pmMod, pTmp, &peDst->d.uint32[0] ); +} +#endif + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptFdefModInvMontgomery( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + UINT32 nDigits = pmMod->nDigits; + UINT32 nBytes = nDigits * SYMCRYPT_FDEF_DIGIT_SIZE; + PUINT32 pTmp = (PUINT32) pbScratch; + + SYMCRYPT_ASSERT_ASYM_ALIGNED( pTmp ); + + // + // We have R*X; we first apply the montgomery reduction twice to get X/R, and then invert that + // using the generic inversion to get R/X. + // + SYMCRYPT_ASSERT( cbScratch >= 2 * nBytes ); + memcpy( pTmp, &peSrc->d.uint32[0], nBytes ); + + SymCryptWipe( (PBYTE)pTmp + nBytes, nBytes ); + SymCryptFdefMontgomeryReduce( pmMod, pTmp, pTmp ); + + SymCryptWipe( (PBYTE)pTmp + nBytes, nBytes ); + SymCryptFdefMontgomeryReduce( pmMod, pTmp, &peDst->d.uint32[0] ); + + scError = SymCryptFdefModInvGeneric( pmMod, peDst, peDst, flags, pbScratch, cbScratch ); + + return scError; +} + +#if 0 && SYMCRYPT_CPU_AMD64 + +//===================================== +// 256-bit Montgomery modulus code +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptFdefModInvMontgomery256( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + UINT32 nBytes = 32; + PUINT32 pTmp = (PUINT32) pbScratch; + + SYMCRYPT_ASSERT_ASYM_ALIGNED( pTmp ); + + // + // We have R*X; we first apply the montgomery reduction twice to get X/R, and then invert that + // using the generic inversion to get R/X. + // + SYMCRYPT_ASSERT( cbScratch >= 2 * nBytes ); + memcpy( pTmp, &peSrc->d.uint32[0], nBytes ); + + SymCryptWipe( (PBYTE)pTmp + nBytes, nBytes ); + SymCryptFdefMontgomeryReduce256Asm( pmMod, pTmp, pTmp ); + + SymCryptWipe( (PBYTE)pTmp + nBytes, nBytes ); + SymCryptFdefMontgomeryReduce256Asm( pmMod, pTmp, &peDst->d.uint32[0] ); + + scError = SymCryptFdefModInvGeneric( pmMod, peDst, peDst, flags, pbScratch, cbScratch ); + + return scError; +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModSetPostMontgomeryMulx256( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_ PSYMCRYPT_MODELEMENT peObj, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + // Montgomery representation for X is R*X mod M where R = 2^<nDigits * bits-per-digit> + // Montgomery reduction performs an implicit division by R + // This function converts to the internal representation by multiplying by R^2 mod M and then performing a Montgomery reduction + UINT32 nDigits = pmMod->nDigits; + + SYMCRYPT_ASSERT( cbScratch >= nDigits * 2 * SYMCRYPT_FDEF_DIGIT_SIZE ); + UNREFERENCED_PARAMETER( pbScratch ); + UNREFERENCED_PARAMETER( cbScratch ); + UNREFERENCED_PARAMETER( nDigits ); + + SymCryptFdefModMulMontgomeryMulx256Asm( pmMod, (PSYMCRYPT_MODELEMENT) pmMod->tm.montgomery.Rsqr, peObj, peObj ); +} + +PCUINT32 +SYMCRYPT_CALL +SymCryptFdefModPreGetMontgomery256( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peObj, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + PUINT32 pTmp = (PUINT32) pbScratch; + UINT32 nDigits = 1; + + SYMCRYPT_ASSERT( cbScratch >= nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ); + UNREFERENCED_PARAMETER( cbScratch ); + + memcpy( pTmp, &peObj->d.uint32[0], nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ); + SymCryptFdefMontgomeryReduce256Asm( pmMod, pTmp, pTmp ); + + // This gives the right result, but relies on peObj having zeroed upper half + // on AMD64 when digits are 512 bits. This should be true - check in a CHKed build. + for( UINT32 i=8; i<16; ++i ) + { + SYMCRYPT_ASSERT( pTmp[i] == 0 ); + } + + // Wipe the extra bytes + // SymCryptWipeKnownSize( pTmp + (SYMCRYPT_FDEF_DIGIT_NUINT32 / 2), 32 ); + + return pTmp; +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModulusInitMontgomery256( + _Inout_ PSYMCRYPT_MODULUS pmMod, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SymCryptFdefModulusInitMontgomeryInternal( pmMod, 8, pbScratch, cbScratch ); +} + +//===================================== +// 384-bit Montgomery modulus code +// + +VOID +SYMCRYPT_CALL +SymCryptFdefModSetPostMontgomeryMulxP384( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_ PSYMCRYPT_MODELEMENT peObj, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + // Montgomery representation for X is R*X mod M where R = 2^<nDigits * bits-per-digit> + // Montgomery reduction performs an implicit division by R + // This function converts to the internal representation by multiplying by R^2 mod M and then performing a Montgomery reduction + UINT32 nDigits = pmMod->nDigits; + + SYMCRYPT_ASSERT( cbScratch >= nDigits * 2 * SYMCRYPT_FDEF_DIGIT_SIZE ); + UNREFERENCED_PARAMETER( pbScratch ); + UNREFERENCED_PARAMETER( cbScratch ); + UNREFERENCED_PARAMETER( nDigits ); + + SymCryptFdefModMulMontgomeryMulxP384Asm( pmMod, (PSYMCRYPT_MODELEMENT) pmMod->tm.montgomery.Rsqr, peObj, peObj ); +} + +#if 0 +//===================================== +// 512-bit Montgomery modulus code +// + +VOID +SYMCRYPT_CALL +SymCryptFdefModMulMontgomery512( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 nDigits = pmMod->nDigits; + PUINT32 pTmp = (PUINT32) pbScratch; + + SYMCRYPT_ASSERT( cbScratch >= nDigits * 2 * SYMCRYPT_FDEF_DIGIT_SIZE ); + UNREFERENCED_PARAMETER( cbScratch ); + + SymCryptFdefRawMul512Asm( &peSrc1->d.uint32[0], &peSrc2->d.uint32[0], nDigits, pTmp ); + SymCryptFdefMontgomeryReduce512Asm( pmMod, pTmp, &peDst->d.uint32[0] ); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModSquareMontgomery512( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 nDigits = pmMod->nDigits; + PUINT32 pTmp = (PUINT32) pbScratch; + + SYMCRYPT_ASSERT( cbScratch >= nDigits * 2 * SYMCRYPT_FDEF_DIGIT_SIZE ); + UNREFERENCED_PARAMETER( cbScratch ); + + SymCryptFdefRawSquare512Asm( &peSrc->d.uint32[0], nDigits, pTmp ); + SymCryptFdefMontgomeryReduce512Asm( pmMod, pTmp, &peDst->d.uint32[0] ); +} + +//===================================== +// 1024-bit Montgomery modulus code +// + +VOID +SYMCRYPT_CALL +SymCryptFdefModMulMontgomery1024( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 nDigits = pmMod->nDigits; + PUINT32 pTmp = (PUINT32) pbScratch; + + SYMCRYPT_ASSERT( cbScratch >= nDigits * 2 * SYMCRYPT_FDEF_DIGIT_SIZE ); + UNREFERENCED_PARAMETER( cbScratch ); + + SymCryptFdefRawMul1024Asm( &peSrc1->d.uint32[0], &peSrc2->d.uint32[0], nDigits, pTmp ); + SymCryptFdefMontgomeryReduce1024Asm( pmMod, pTmp, &peDst->d.uint32[0] ); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModSquareMontgomery1024( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 nDigits = pmMod->nDigits; + PUINT32 pTmp = (PUINT32) pbScratch; + + SYMCRYPT_ASSERT( cbScratch >= nDigits * 2 * SYMCRYPT_FDEF_DIGIT_SIZE ); + UNREFERENCED_PARAMETER( cbScratch ); + + SymCryptFdefRawSquare1024Asm( &peSrc->d.uint32[0], nDigits, pTmp ); + SymCryptFdefMontgomeryReduce1024Asm( pmMod, pTmp, &peDst->d.uint32[0] ); +} +#endif + +#endif + +/* Wine hack: asm not supported yet */ + +VOID +SYMCRYPT_CALL +SymCryptFdefMontgomeryReduceAsm( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_ PUINT32 pSrc, + _Out_ PUINT32 pDst ) +{ + SymCryptFdefMontgomeryReduceC( pmMod, pSrc, pDst ); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModDivSmallPow2Mulx( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _In_range_(1, NATIVE_BITS) UINT32 exp, + _Out_ PSYMCRYPT_MODELEMENT peDst ) +{ + SymCryptFdefModDivSmallPow2Generic( pmMod, peSrc, exp, peDst ); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefMontgomeryReduceMulx( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_ PUINT32 pSrc, + _Out_ PUINT32 pDst ) +{ + SymCryptFdefMontgomeryReduceC( pmMod, pSrc, pDst ); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModDivSmallPow2Asm( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _In_range_(1, NATIVE_BITS) UINT32 exp, + _Out_ PSYMCRYPT_MODELEMENT peDst ) +{ + SymCryptFdefModDivSmallPow2Generic( pmMod, peSrc, exp, peDst ); +} diff --git a/libs/symcrypt/lib/gcm.c b/libs/symcrypt/lib/gcm.c new file mode 100644 index 00000000000..a3a66ddea2a --- /dev/null +++ b/libs/symcrypt/lib/gcm.c @@ -0,0 +1,902 @@ +// +// gcm.c Implementation of the GCM block cipher mode +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +#define GCM_MIN_NONCE_SIZE (1) +#define GCM_MIN_TAG_SIZE (12) +#define GCM_MAX_TAG_SIZE (16) + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptGcmValidateParameters( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ SIZE_T cbNonce, + _In_ UINT64 cbAssociatedData, + _In_ UINT64 cbData, + _In_ SIZE_T cbTag ) +{ + if( pBlockCipher->blockSize != SYMCRYPT_GCM_BLOCK_SIZE ) + { + return SYMCRYPT_WRONG_BLOCK_SIZE; + } + + // + // SP800-38D specifies that the nonce must be at least one bit, but we operate on bytes, + // so the minimum is one byte. + // + if( cbNonce < GCM_MIN_NONCE_SIZE ) + { + return SYMCRYPT_WRONG_NONCE_SIZE; + } + + // + // cbAssociatedData is limited to <2^61 bytes + // + if( (cbAssociatedData >> 61) > 0 ) + { + return SYMCRYPT_WRONG_DATA_SIZE; + } + + // + // per SP800-38D cbData is limited to 2^36 - 32 bytes + // + if( cbData > SYMCRYPT_GCM_MAX_DATA_SIZE ) + { + return SYMCRYPT_WRONG_DATA_SIZE; + } + + if( cbTag < GCM_MIN_TAG_SIZE || cbTag > GCM_MAX_TAG_SIZE ) + { + return SYMCRYPT_WRONG_TAG_SIZE; + } + + return SYMCRYPT_NO_ERROR; +} + + + +VOID +SYMCRYPT_CALL +SymCryptGcmAddMacData( + _Inout_ PSYMCRYPT_GCM_STATE pState, + _In_reads_opt_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + SIZE_T bytesToProcess; + if( pState->bytesInMacBlock > 0 ) + { + bytesToProcess = SYMCRYPT_MIN( cbData, SYMCRYPT_GCM_BLOCK_SIZE - pState->bytesInMacBlock ); + memcpy( &pState->macBlock[pState->bytesInMacBlock], pbData, bytesToProcess ); + pbData += bytesToProcess; + cbData -= bytesToProcess; + pState->bytesInMacBlock += bytesToProcess; + + if( pState->bytesInMacBlock == SYMCRYPT_GCM_BLOCK_SIZE ) + { + SymCryptGHashAppendData( &pState->pKey->ghashKey, + &pState->ghashState, + &pState->macBlock[0], + SYMCRYPT_GCM_BLOCK_SIZE ); + pState->bytesInMacBlock = 0; + } + } + + if( cbData >= SYMCRYPT_GCM_BLOCK_SIZE ) + { + bytesToProcess = cbData & SYMCRYPT_GCM_BLOCK_ROUND_MASK; + + SymCryptGHashAppendData( &pState->pKey->ghashKey, &pState->ghashState, pbData, bytesToProcess ); + + pbData += bytesToProcess; + cbData -= bytesToProcess; + } + + if( cbData > 0 ) + { + memcpy( &pState->macBlock[0], pbData, cbData ); + pState->bytesInMacBlock = cbData; + } +} + + + +VOID +SYMCRYPT_CALL +SymCryptGcmPadMacData( _Inout_ PSYMCRYPT_GCM_STATE pState ) +{ + SIZE_T nBytes; + // + // Pad the MAC data with zeroes until we hit the block size. + // + nBytes = pState->bytesInMacBlock; + if( nBytes > 0 ) + { + SymCryptWipe( &pState->macBlock[nBytes], SYMCRYPT_GCM_BLOCK_SIZE - nBytes ); + SymCryptGHashAppendData( &pState->pKey->ghashKey, &pState->ghashState, &pState->macBlock[0], SYMCRYPT_GCM_BLOCK_SIZE ); + pState->bytesInMacBlock = 0; + } +} + + + +VOID +SYMCRYPT_CALL +SymCryptGcmEncryptDecryptPart( + _Inout_ PSYMCRYPT_GCM_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + SIZE_T bytesToProcess; + SIZE_T bytesUsedInKeyStreamBuffer; + + bytesUsedInKeyStreamBuffer = (SIZE_T) (pState->cbData & SYMCRYPT_GCM_BLOCK_MOD_MASK); + + // + // We update pState->cbData once before we modify cbData. + // pState->cbData is not used in the rest of this function + // + SYMCRYPT_ASSERT( pState->cbData + cbData <= SYMCRYPT_GCM_MAX_DATA_SIZE ); + pState->cbData += cbData; + + if( bytesUsedInKeyStreamBuffer != 0 ) + { + bytesToProcess = SYMCRYPT_MIN( cbData, SYMCRYPT_GCM_BLOCK_SIZE - bytesUsedInKeyStreamBuffer ); + SymCryptXorBytes( pbSrc, &pState->keystreamBlock[bytesUsedInKeyStreamBuffer], pbDst, bytesToProcess ); + pbSrc += bytesToProcess; + pbDst += bytesToProcess; + cbData -= bytesToProcess; + + // + // If there are bytes left in the key stream buffer, then cbData == 0 and we're done. + // If we used up all the bytes, then we are fine, no need to compute the next key stream block + // + } + + if( cbData >= SYMCRYPT_GCM_BLOCK_SIZE ) + { + bytesToProcess = cbData & SYMCRYPT_GCM_BLOCK_ROUND_MASK; + + SYMCRYPT_ASSERT( pState->pKey->pBlockCipher->blockSize == SYMCRYPT_GCM_BLOCK_SIZE ); + SymCryptCtrMsb32( pState->pKey->pBlockCipher, + &pState->pKey->blockcipherKey, + &pState->counterBlock[0], + pbSrc, + pbDst, + bytesToProcess ); + + pbSrc += bytesToProcess; + pbDst += bytesToProcess; + cbData -= bytesToProcess; + } + + if( cbData > 0 ) + { + SymCryptWipeKnownSize( &pState->keystreamBlock[0], SYMCRYPT_GCM_BLOCK_SIZE ); + + SYMCRYPT_ASSERT( pState->pKey->pBlockCipher->blockSize == SYMCRYPT_GCM_BLOCK_SIZE ); + SymCryptCtrMsb32( pState->pKey->pBlockCipher, + &pState->pKey->blockcipherKey, + &pState->counterBlock[0], + &pState->keystreamBlock[0], + &pState->keystreamBlock[0], + SYMCRYPT_GCM_BLOCK_SIZE ); + + SymCryptXorBytes( &pState->keystreamBlock[0], pbSrc, pbDst, cbData ); + + // + // pState->cbData contains the data length after this call already, so it knows how many + // bytes are left in the keystream block + // + } + +} + +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptGcmResetCounterBlock( + _Inout_ PSYMCRYPT_GCM_STATE pState ) +{ + // Computing the tag for GCM requires invoking the GCTR function with the pre-counter + // block which was computed when the nonce was set. Historically, we only supported 12-byte + // nonces, so we could trivially reset the counter block by just setting the last 4 bytes to + // (DWORD) 1. With support for larger IVs, the pre-counter block is computed from a GHash of + // the nonce, and we don't store the value. Adding a field in the GCM struct to store the value + // would be ABI-breaking, so instead we can recompute the value by decrementing the last 32 bits + // of the counter block by the number of blocks that have been processed (since the counter is + // incremented once per block), plus one for the initial increment. + UINT32 preCounter32 = SYMCRYPT_LOAD_MSBFIRST32(&pState->counterBlock[12]) - + (UINT32) ((pState->cbData + SYMCRYPT_GCM_BLOCK_SIZE - 1) / SYMCRYPT_GCM_BLOCK_SIZE) - 1; + + SYMCRYPT_STORE_MSBFIRST32(&pState->counterBlock[12], preCounter32); +} + +VOID +SYMCRYPT_CALL +SymCryptGcmComputeTag( + _Inout_ PSYMCRYPT_GCM_STATE pState, + _Out_writes_( SYMCRYPT_GCM_BLOCK_SIZE ) PBYTE pbTag ) +{ + SYMCRYPT_ALIGN BYTE buf[2 * SYMCRYPT_GCM_BLOCK_SIZE]; + + SYMCRYPT_STORE_MSBFIRST64( &buf[16], pState->cbAuthData * 8 ); + SYMCRYPT_STORE_MSBFIRST64( &buf[24], pState->cbData * 8 ); + + if( pState->bytesInMacBlock > 0 ) + { + // + // Pad the MAC data with zeroes until we hit the block size + // + SymCryptWipeKnownSize( &buf[0], SYMCRYPT_GCM_BLOCK_SIZE ); + memcpy( buf, &pState->macBlock[0], pState->bytesInMacBlock ); + + SymCryptGHashAppendData( &pState->pKey->ghashKey, &pState->ghashState, &buf[0], 2 * SYMCRYPT_GCM_BLOCK_SIZE ); + } + else + { + SymCryptGHashAppendData( &pState->pKey->ghashKey, &pState->ghashState, &buf[16], SYMCRYPT_GCM_BLOCK_SIZE ); + } + + SymCryptGcmResetCounterBlock(pState); + + // + // Convert the GHash state to an array of bytes + // + SYMCRYPT_STORE_MSBFIRST64( &buf[0], pState->ghashState.ull[1] ); + SYMCRYPT_STORE_MSBFIRST64( &buf[8], pState->ghashState.ull[0] ); + + SYMCRYPT_ASSERT( pState->pKey->pBlockCipher->blockSize == SYMCRYPT_GCM_BLOCK_SIZE ); + SymCryptCtrMsb32( pState->pKey->pBlockCipher, + &pState->pKey->blockcipherKey, + &pState->counterBlock[0], + buf, + pbTag, + SYMCRYPT_GCM_BLOCK_SIZE ); + + SymCryptWipeKnownSize( buf, sizeof( buf ) ); +} + +SYMCRYPT_NOINLINE +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptGcmExpandKey( + _Out_ PSYMCRYPT_GCM_EXPANDED_KEY pExpandedKey, + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_reads_( cbKey ) PCBYTE pbKey, + SIZE_T cbKey ) +{ + SYMCRYPT_ALIGN BYTE H[SYMCRYPT_GCM_BLOCK_SIZE]; + SYMCRYPT_ERROR status = SYMCRYPT_NO_ERROR; + + if( cbKey > SYMCRYPT_GCM_MAX_KEY_SIZE ) + { + status = SYMCRYPT_WRONG_KEY_SIZE; + goto cleanup; + } + + // + // Perform the Block cipher key expansion first + // + pExpandedKey->pBlockCipher = pBlockCipher; + status = pBlockCipher->expandKeyFunc( &pExpandedKey->blockcipherKey, pbKey, cbKey ); + + if( status != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // + // We keep a copy of the key to make it easy to + // implement the SymCryptGcmKeyCopy function + // + pExpandedKey->cbKey = cbKey; + memcpy( &pExpandedKey->abKey[0], pbKey, cbKey ); + + // + // Compute H and the GHASH expanded key + // + SymCryptWipeKnownSize( H, sizeof( H ) ); + pBlockCipher->encryptFunc( &pExpandedKey->blockcipherKey, H, H ); + + + SymCryptGHashExpandKey( &pExpandedKey->ghashKey, H ); + + + SYMCRYPT_SET_MAGIC( pExpandedKey ); + + SymCryptWipeKnownSize( H, sizeof( H ) ); + +cleanup: + + return status; +} + +VOID +SYMCRYPT_CALL +SymCryptGcmKeyCopy( _In_ PCSYMCRYPT_GCM_EXPANDED_KEY pSrc, _Out_ PSYMCRYPT_GCM_EXPANDED_KEY pDst ) +{ + SYMCRYPT_ERROR status; + + SYMCRYPT_CHECK_MAGIC( pSrc ); + + status = SymCryptGcmExpandKey( pDst, pSrc->pBlockCipher, &pSrc->abKey[0], pSrc->cbKey ); + SYMCRYPT_ASSERT( status == SYMCRYPT_NO_ERROR ); +} + +VOID +SYMCRYPT_CALL +SymCryptGcmSetNonce( + _Out_ PSYMCRYPT_GCM_STATE pState, + _In_reads_( cbNonce ) PCBYTE pbNonce, + SIZE_T cbNonce ) +{ + SYMCRYPT_ASSERT( cbNonce >= GCM_MIN_NONCE_SIZE ); + + // Handle the nonce depending on its size, as specified in NIST SP800-38D + if( cbNonce == 12 ) + { + // If len(nonce) = 96 bits (12 bytes), pre-counter block = nonce || (DWORD) 1 + memcpy( &pState->counterBlock[0], pbNonce, cbNonce ); + SymCryptWipeKnownSize( &pState->counterBlock[12], 4 ); + pState->counterBlock[15] = 1; + } + else + { + // If len(nonce) != 96 bits (12 bytes), + // pre-counter block = GHASH(nonce padded to a multiple of 128 bits || (QWORD) len(nonce)) + BYTE buf[SYMCRYPT_GF128_BLOCK_SIZE]; + SIZE_T cbNonceRemainder = cbNonce & (SYMCRYPT_GF128_BLOCK_SIZE - 1); + + // Process all full blocks of the nonce, i.e. all nonce bytes up to a multiple of + // SYMCRYPT_GF128_BLOCK_SIZE. SymCryptGHashAppendData ignores additional data that are + // not a multiple of the block size. We will handle any such remaining data below. + // (This also works if the nonce is less than the block size.) + SymCryptGHashAppendData( &pState->pKey->ghashKey, &pState->ghashState, pbNonce, cbNonce ); + + // If the nonce length is not a multiple of SYMCRYPT_GF128_BLOCK_SIZE, we need to pad any + // remaining data to a multiple of the block size. + if(cbNonceRemainder > 0) + { + SymCryptWipeKnownSize( buf, sizeof(buf) ); + memcpy(buf, pbNonce + cbNonce - cbNonceRemainder, cbNonceRemainder); + SymCryptGHashAppendData( &pState->pKey->ghashKey, &pState->ghashState, buf, sizeof(buf) ); + } + + // Now we append the length of the nonce in bits. We take the length as a 64-bit integer, + // but it too must be padded to 128 bits for use in GHASH. + SymCryptWipeKnownSize( buf, 8 ); + SYMCRYPT_STORE_MSBFIRST64( &buf[8], cbNonce * 8 ); + SymCryptGHashAppendData( &pState->pKey->ghashKey, &pState->ghashState, buf, sizeof(buf) ); + + SymCryptGHashResult( &pState->ghashState, pState->counterBlock ); + SymCryptWipeKnownSize( &pState->ghashState, sizeof( pState->ghashState ) ); + } + + // Increment the last 32 bits of the counter. We'll recalculate the pre-counter block later + // when computing the tag. + SYMCRYPT_STORE_MSBFIRST32( + &pState->counterBlock[12], + 1 + SYMCRYPT_LOAD_MSBFIRST32( &pState->counterBlock[12] ) ); +} + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptGcmInit( + _Out_ PSYMCRYPT_GCM_STATE pState, + _In_ PCSYMCRYPT_GCM_EXPANDED_KEY pExpandedKey, + _In_reads_( cbNonce ) PCBYTE pbNonce, + SIZE_T cbNonce ) +{ + UNREFERENCED_PARAMETER( cbNonce ); // It is used in an ASSERT, but only in CHKed builds. + + SYMCRYPT_CHECK_MAGIC( pExpandedKey ); + + pState->pKey = pExpandedKey; + pState->cbData = 0; + pState->cbAuthData = 0; + pState->bytesInMacBlock = 0; + SymCryptWipeKnownSize( &pState->ghashState, sizeof( pState->ghashState ) ); + + SymCryptGcmSetNonce(pState, pbNonce, cbNonce); + + SYMCRYPT_SET_MAGIC( pState ); +} + + +VOID +SYMCRYPT_CALL +SymCryptGcmStateCopy( + _In_ PCSYMCRYPT_GCM_STATE pSrc, + _In_opt_ PCSYMCRYPT_GCM_EXPANDED_KEY pExpandedKeyCopy, + _Out_ PSYMCRYPT_GCM_STATE pDst ) +{ + SYMCRYPT_CHECK_MAGIC( pSrc ); + + *pDst = *pSrc; + if( pExpandedKeyCopy != NULL ) + { + pDst->pKey = pExpandedKeyCopy; + } + + SYMCRYPT_SET_MAGIC( pDst ); +} + + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptGcmAuthPart( + _Inout_ PSYMCRYPT_GCM_STATE pState, + _In_reads_opt_( cbData ) PCBYTE pbAuthData, + SIZE_T cbData ) +{ + SYMCRYPT_CHECK_MAGIC( pState ); + SYMCRYPT_ASSERT( pState->cbData == 0 ); + + SymCryptGcmAddMacData( pState, pbAuthData, cbData ); + pState->cbAuthData += cbData; +} + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptGcmEncryptPart( + _Inout_ PSYMCRYPT_GCM_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + if( pState->cbData == 0 ) + { + // + // This is the first actual encryption data, pad the Auth data with zeroes if needed. + // + SymCryptGcmPadMacData( pState ); + } + + if ( pState->pKey->pBlockCipher->gcmEncryptPartFunc != NULL ) + { + // + // Use optimized implementation if available + // + (*pState->pKey->pBlockCipher->gcmEncryptPartFunc) ( pState, pbSrc, pbDst, cbData ); + SYMCRYPT_ASSERT( pState->bytesInMacBlock <= 15 ); + } + else + { + SymCryptGcmEncryptPartTwoPass( pState, pbSrc, pbDst, cbData ); + } +} + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptGcmEncryptPartTwoPass( + _Inout_ PSYMCRYPT_GCM_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + // + // Do the actual encryption + // + SymCryptGcmEncryptDecryptPart( pState, pbSrc, pbDst, cbData ); + + // + // We break the read-once/write once rule here by reading the pbDst data back. + // In this particular situation this is safe, and avoiding it is expensive as it + // requires an extra copy and an extra memory buffer. + // The first write exposes the GCM key stream, independent of the underlying data that + // we are processing. From an attacking point of view we can think of this as literally + // handing over the key stream. So encryption consists of two steps: + // - hand over the key stream + // - MAC some ciphertext + // In this view (which has equivalent security properties to GCM) is obviously doesn't + // matter that we read pbDst back. + // + + SymCryptGcmAddMacData( pState, pbDst, cbData ); +} + + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptGcmDecryptPart( + _Inout_ PSYMCRYPT_GCM_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + if( pState->cbData == 0 ) + { + // + // This is the first actual encryption data, pad the Auth data with zeroes if needed. + // + SymCryptGcmPadMacData( pState ); + } + + if ( pState->pKey->pBlockCipher->gcmDecryptPartFunc != NULL ) + { + // + // Use optimized implementation if available + // + (*pState->pKey->pBlockCipher->gcmDecryptPartFunc) ( pState, pbSrc, pbDst, cbData ); + SYMCRYPT_ASSERT( pState->bytesInMacBlock <= 15 ); + } + else + { + SymCryptGcmDecryptPartTwoPass( pState, pbSrc, pbDst, cbData ); + } +} + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptGcmDecryptPartTwoPass( + _Inout_ PSYMCRYPT_GCM_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + SymCryptGcmAddMacData( pState, pbSrc, cbData ); + + // + // Do the actual decryption + // This violates the read-once rule, but it is safe for the same reasons as above + // in the encryption case. + // + + SymCryptGcmEncryptDecryptPart( pState, pbSrc, pbDst, cbData ); +} + + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptGcmEncryptFinal( + _Inout_ PSYMCRYPT_GCM_STATE pState, + _Out_writes_( cbTag ) PBYTE pbTag, + SIZE_T cbTag ) +{ + SYMCRYPT_ALIGN BYTE buf[SYMCRYPT_GCM_BLOCK_SIZE]; + + SYMCRYPT_ASSERT( cbTag >= GCM_MIN_TAG_SIZE && cbTag <= GCM_MAX_TAG_SIZE ); + + SymCryptGcmComputeTag( pState, &buf[0] ); + memcpy( pbTag, buf, cbTag ); + + SymCryptWipeKnownSize( buf, sizeof( buf ) ); + + SymCryptWipeKnownSize( pState, sizeof( *pState ) ); + SYMCRYPT_ASSERT( pState->bytesInMacBlock == 0 ); +} + +SYMCRYPT_NOINLINE +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptGcmDecryptFinal( + _Inout_ PSYMCRYPT_GCM_STATE pState, + _In_reads_( cbTag ) PCBYTE pbTag, + SIZE_T cbTag ) +{ + SYMCRYPT_ALIGN BYTE buf[SYMCRYPT_GCM_BLOCK_SIZE]; + SYMCRYPT_ERROR status; + + SYMCRYPT_ASSERT( cbTag >= GCM_MIN_TAG_SIZE && cbTag <= GCM_MAX_TAG_SIZE ); + + SymCryptGcmComputeTag( pState, &buf[0] ); + + if( !SymCryptEqual( pbTag, buf, cbTag ) ) + { + status = SYMCRYPT_AUTHENTICATION_FAILURE; + } + else + { + status = SYMCRYPT_NO_ERROR; + } + + SymCryptWipeKnownSize( buf, sizeof( buf ) ); + + SymCryptWipeKnownSize( pState, sizeof( *pState ) ); + SYMCRYPT_ASSERT( pState->bytesInMacBlock == 0 ); + + return status; +} + + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptGcmEncrypt( + _In_ PCSYMCRYPT_GCM_EXPANDED_KEY pExpandedKey, + _In_reads_( cbNonce ) PCBYTE pbNonce, + SIZE_T cbNonce, + _In_reads_opt_( cbAuthData ) PCBYTE pbAuthData, + SIZE_T cbAuthData, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData, + _Out_writes_( cbTag ) PBYTE pbTag, + SIZE_T cbTag ) +{ + SYMCRYPT_ALIGN BYTE buf[2 * SYMCRYPT_GCM_BLOCK_SIZE]; + SYMCRYPT_GCM_STATE state; + PSYMCRYPT_GCM_STATE pState = &state; + + // SymCryptGcmInit( &state, pExpandedKey, pbNonce, cbNonce ); + UNREFERENCED_PARAMETER( cbNonce ); // It is used in an ASSERT, but only in CHKed builds. + + SYMCRYPT_ASSERT( cbNonce >= GCM_MIN_NONCE_SIZE ); + SYMCRYPT_ASSERT( cbTag >= GCM_MIN_TAG_SIZE && cbTag <= GCM_MAX_TAG_SIZE ); + + SYMCRYPT_CHECK_MAGIC( pExpandedKey ); + + pState->pKey = pExpandedKey; + pState->cbData = 0; + pState->cbAuthData = 0; + pState->bytesInMacBlock = 0; + SymCryptWipeKnownSize( &pState->ghashState, sizeof( pState->ghashState ) ); + + SymCryptGcmSetNonce( pState, pbNonce, cbNonce ); + + // SymCryptGcmAuthPart( &state, pbAuthData, cbAuthData ); + pState->cbAuthData += cbAuthData; + if( cbAuthData >= SYMCRYPT_GCM_BLOCK_SIZE ) + { + SIZE_T bytesToDo = cbAuthData & SYMCRYPT_GCM_BLOCK_ROUND_MASK; + + SymCryptGHashAppendData( &pState->pKey->ghashKey, &pState->ghashState, pbAuthData, bytesToDo ); + + pbAuthData += bytesToDo; + cbAuthData -= bytesToDo; + } + + if( cbAuthData > 0 ) + { + // + // Pad the MAC data with zeroes until we hit the block size. + // + SymCryptWipeKnownSize( &pState->macBlock[0], SYMCRYPT_GCM_BLOCK_SIZE ); + memcpy( &pState->macBlock[0], pbAuthData, cbAuthData ); + SymCryptGHashAppendData( &pState->pKey->ghashKey, &pState->ghashState, &pState->macBlock[0], SYMCRYPT_GCM_BLOCK_SIZE ); + } + + // SymCryptGcmEncryptPart( &state, pbSrc, pbDst, cbData ); + if ( pState->pKey->pBlockCipher->gcmEncryptPartFunc != NULL ) + { + // + // Use optimized implementation if available + // + (*pState->pKey->pBlockCipher->gcmEncryptPartFunc) ( pState, pbSrc, pbDst, cbData ); + } + else + { + SymCryptGcmEncryptPartTwoPass( pState, pbSrc, pbDst, cbData ); + } + + // SymCryptGcmEncryptFinal( &state, pbTag, cbTag ); + SYMCRYPT_STORE_MSBFIRST64( &buf[16], pState->cbAuthData * 8 ); + SYMCRYPT_STORE_MSBFIRST64( &buf[24], pState->cbData * 8 ); + + if( pState->bytesInMacBlock > 0 ) + { + // + // Pad the MAC data with zeroes until we hit the block size + // + SymCryptWipeKnownSize( &buf[0], SYMCRYPT_GCM_BLOCK_SIZE ); + memcpy( buf, &pState->macBlock[0], pState->bytesInMacBlock ); + + SymCryptGHashAppendData( &pState->pKey->ghashKey, &pState->ghashState, &buf[0], 2 * SYMCRYPT_GCM_BLOCK_SIZE ); + } + else + { + SymCryptGHashAppendData( &pState->pKey->ghashKey, &pState->ghashState, &buf[16], SYMCRYPT_GCM_BLOCK_SIZE ); + } + + // Reset the counter block prior to computing the tag + SymCryptGcmResetCounterBlock( pState ); + + // + // Convert the GHash state to an array of bytes + // + SYMCRYPT_STORE_MSBFIRST64( &buf[0], pState->ghashState.ull[1] ); + SYMCRYPT_STORE_MSBFIRST64( &buf[8], pState->ghashState.ull[0] ); + + SYMCRYPT_ASSERT( pState->pKey->pBlockCipher->blockSize == SYMCRYPT_GCM_BLOCK_SIZE ); + SymCryptCtrMsb32( pState->pKey->pBlockCipher, + &pState->pKey->blockcipherKey, + &pState->counterBlock[0], + buf, + buf, + SYMCRYPT_GCM_BLOCK_SIZE ); + + memcpy( pbTag, buf, cbTag ); + + SymCryptWipeKnownSize( buf, sizeof( buf ) ); + SymCryptWipeKnownSize( pState, sizeof( *pState ) ); +} + + +SYMCRYPT_NOINLINE +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptGcmDecrypt( + _In_ PCSYMCRYPT_GCM_EXPANDED_KEY pExpandedKey, + _In_reads_( cbNonce ) PCBYTE pbNonce, + SIZE_T cbNonce, + _In_reads_opt_( cbAuthData ) PCBYTE pbAuthData, + SIZE_T cbAuthData, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData, + _In_reads_( cbTag ) PCBYTE pbTag, + SIZE_T cbTag ) +{ + SYMCRYPT_ERROR status; + SYMCRYPT_ALIGN BYTE buf[2 * SYMCRYPT_GCM_BLOCK_SIZE]; + SYMCRYPT_GCM_STATE state; + PSYMCRYPT_GCM_STATE pState = &state; + + // SymCryptGcmInit( &state, pExpandedKey, pbNonce, cbNonce ); + UNREFERENCED_PARAMETER( cbNonce ); // It is used in an ASSERT, but only in CHKed builds. + + SYMCRYPT_ASSERT( cbNonce >= GCM_MIN_NONCE_SIZE ); + SYMCRYPT_ASSERT( cbTag >= GCM_MIN_TAG_SIZE && cbTag <= GCM_MAX_TAG_SIZE ); + + SYMCRYPT_CHECK_MAGIC( pExpandedKey ); + + pState->pKey = pExpandedKey; + pState->cbData = 0; + pState->cbAuthData = 0; + pState->bytesInMacBlock = 0; + SymCryptWipeKnownSize( &pState->ghashState, sizeof( pState->ghashState ) ); + + SymCryptGcmSetNonce( pState, pbNonce, cbNonce ); + + // SymCryptGcmAuthPart( &state, pbAuthData, cbAuthData ); + pState->cbAuthData += cbAuthData; + if( cbAuthData >= SYMCRYPT_GCM_BLOCK_SIZE ) + { + SIZE_T bytesToDo = cbAuthData & SYMCRYPT_GCM_BLOCK_ROUND_MASK; + + SymCryptGHashAppendData( &pState->pKey->ghashKey, &pState->ghashState, pbAuthData, bytesToDo ); + + pbAuthData += bytesToDo; + cbAuthData -= bytesToDo; + } + + if( cbAuthData > 0 ) + { + // + // Pad the MAC data with zeroes until we hit the block size. + // + SymCryptWipeKnownSize( &pState->macBlock[0], SYMCRYPT_GCM_BLOCK_SIZE ); + memcpy( &pState->macBlock[0], pbAuthData, cbAuthData ); + SymCryptGHashAppendData( &pState->pKey->ghashKey, &pState->ghashState, &pState->macBlock[0], SYMCRYPT_GCM_BLOCK_SIZE ); + } + + // SymCryptGcmDecryptPart( &state, pbSrc, pbDst, cbData ); + if ( pState->pKey->pBlockCipher->gcmDecryptPartFunc != NULL ) + { + // + // Use optimized implementation if available + // + (*pState->pKey->pBlockCipher->gcmDecryptPartFunc) ( pState, pbSrc, pbDst, cbData ); + } + else + { + SymCryptGcmDecryptPartTwoPass( pState, pbSrc, pbDst, cbData ); + } + + //status = SymCryptGcmDecryptFinal( &state, pbTag, cbTag ); + SYMCRYPT_STORE_MSBFIRST64( &buf[16], pState->cbAuthData * 8 ); + SYMCRYPT_STORE_MSBFIRST64( &buf[24], pState->cbData * 8 ); + + if( pState->bytesInMacBlock > 0 ) + { + // + // Pad the MAC data with zeroes until we hit the block size + // + SymCryptWipeKnownSize( &buf[0], SYMCRYPT_GCM_BLOCK_SIZE ); + memcpy( buf, &pState->macBlock[0], pState->bytesInMacBlock ); + + SymCryptGHashAppendData( &pState->pKey->ghashKey, &pState->ghashState, &buf[0], 2 * SYMCRYPT_GCM_BLOCK_SIZE ); + } + else + { + SymCryptGHashAppendData( &pState->pKey->ghashKey, &pState->ghashState, &buf[16], SYMCRYPT_GCM_BLOCK_SIZE ); + } + + SymCryptGcmResetCounterBlock( pState ); + + // + // Convert the GHash state to an array of bytes + // + SYMCRYPT_STORE_MSBFIRST64( &buf[0], pState->ghashState.ull[1] ); + SYMCRYPT_STORE_MSBFIRST64( &buf[8], pState->ghashState.ull[0] ); + + SYMCRYPT_ASSERT( pState->pKey->pBlockCipher->blockSize == SYMCRYPT_GCM_BLOCK_SIZE ); + SymCryptCtrMsb32( pState->pKey->pBlockCipher, + &pState->pKey->blockcipherKey, + &pState->counterBlock[0], + buf, + buf, + SYMCRYPT_GCM_BLOCK_SIZE ); + + if( !SymCryptEqual( pbTag, buf, cbTag ) ) + { + status = SYMCRYPT_AUTHENTICATION_FAILURE; + } + else + { + status = SYMCRYPT_NO_ERROR; + } + + SymCryptWipeKnownSize( buf, sizeof( buf ) ); + SymCryptWipeKnownSize( pState, sizeof( *pState ) ); + + if( status != SYMCRYPT_NO_ERROR ) + { + SymCryptWipe( pbDst, cbData ); + } + + return status; +} + + +static const BYTE SymCryptGcmSelftestResult[3 + SYMCRYPT_AES_BLOCK_SIZE ] = +{ + 0xa5, 0x4c, 0x60, + 0x80, 0xb0, 0x48, 0x6d, 0x03, 0x9f, 0xea, 0xc3, 0x3c, 0x28, 0x96, 0x3f, 0x99, 0x8a, 0x77, 0x43, +}; + +VOID +SYMCRYPT_CALL +SymCryptGcmSelftest(void) +{ + BYTE buf[ 3 + SYMCRYPT_AES_BLOCK_SIZE ]; + SYMCRYPT_GCM_EXPANDED_KEY key; + SYMCRYPT_ERROR err; + + if( SymCryptGcmExpandKey( &key, SymCryptAesBlockCipher, SymCryptTestKey32, 16 ) != SYMCRYPT_NO_ERROR ) + { + SymCryptFatal( 'gcm0' ); + } + + SymCryptGcmEncrypt( &key, + &SymCryptTestKey32[16], 12, + NULL, 0, + &SymCryptTestMsg3[0], buf, 3, + &buf[3], SYMCRYPT_AES_BLOCK_SIZE ); + + SymCryptInjectError( buf, sizeof( buf ) ); + if( memcmp( buf, SymCryptGcmSelftestResult, sizeof( buf ) ) != 0 ) + { + SymCryptFatal( 'gcm1' ); + } + + // inject error into the ciphertext or tag + SymCryptInjectError( buf, sizeof( buf ) ); + + err = SymCryptGcmDecrypt( &key, + &SymCryptTestKey32[16], 12, + NULL, 0, + buf, buf, 3, + &buf[3], SYMCRYPT_AES_BLOCK_SIZE ); + + SymCryptInjectError( buf, 3 ); + + if( err != SYMCRYPT_NO_ERROR || memcmp( buf, SymCryptTestMsg3, 3 ) != 0 ) + { + SymCryptFatal( 'gcm2' ); + } + +} diff --git a/libs/symcrypt/lib/gen_int.c b/libs/symcrypt/lib/gen_int.c new file mode 100644 index 00000000000..5f5983358a0 --- /dev/null +++ b/libs/symcrypt/lib/gen_int.c @@ -0,0 +1,368 @@ +// +// gen_int.c Generic integer algorithms (not tied to low-level implementations) +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + + +UINT64 +SYMCRYPT_CALL +SymCryptUint64Gcd( UINT64 a, UINT64 b, UINT32 flags ) +{ + UINT64 swap; + UINT64 tmp; + UINT64 a2; + UINT64 b2; + UINT32 i; + +/* + Algorithm outline: + + if( b even ) + swap (a,b) + + loop: + { invariant: b is odd } + if( a even ) + a = a/2 + else + if a < b + swap (a,b) + a = (a - b) / 2 + + We ignore the data_public flag as we currently always use a side-channel safe implementation + + to compute (a < b) on 64-bit values is hard if we want to avoid +*/ + SYMCRYPT_ASSERT( (flags & SYMCRYPT_FLAG_GCD_INPUTS_NOT_BOTH_EVEN) != 0 && ((a | b) & 1) != 0 ); + UNREFERENCED_PARAMETER( flags ); + + // First we make sure that b is odd + // If b even: swap (a,b) + swap = ~(0 - (b & 1)); + tmp = (a ^ b) & swap; + a ^= tmp; + b ^= tmp; + + // Each loop iteration reduces len(a) + len(b) by at least 1, so looping 127 times is enough. + // For inputs (2^63, 2^63 + 1) we get 63 iterations to reduce a to 1, and then another 63 to get + // the other value to 1, plus one more to make it 0. + for( i=0; i < 127; i++ ) + { + // Compute the result of the 'else' part of the if( a even ) into (a2, b2) + // First we evaluate (a < b), which is a bit tricky without access to the carry flag. + // a < b = (b>>63) if ((a^b) >> 63) == 1 + // (a - b) >> 63 otherwise + tmp = a ^ b; + tmp = (tmp & b) | (~tmp & (a-b)); + swap = 0 - (tmp >> 63); + + // Now swap if a < b into (a2, b2) + tmp = (a ^ b) & swap; + a2 = a ^ tmp; + b2 = b ^ tmp; + + // + a2 = (a2 - b2) / 2; + + // Compute the (a is odd) condition + tmp = 0 - (a & 1); + + // Assemble the final result + a = (tmp & a2) | (~tmp & a/2); + b = (tmp & b2) | (~tmp & b); + } + + SYMCRYPT_ASSERT( a == 0 ); + return b; +} + + +/* +Extended GCD notes. + +A side-channel safe implementation cannot effectively use Euclid's algorithm. +The quotient is typically very small, but it can be very large. An SCS implementation +would require the quotient to always be treated as a full-sized number, which would kill performance. +Instead we use the binary algorithm which is easier to adapt to side-channel safety. + +Basic algorithm for inputs S1 and S2: + Eliminate the joint factors of two. These are added later to the result + For now we assume that both S1 and S2 are non-zero and S2 is odd. + +Invariant: + A = A1 * S1 (mod S2) + B = B1 * S1 (mod S2) + B is odd + +Initial values: + A = S1; A1 = 1; + B = S2; B1 = 0; + +Main loop: + + t = len(A) + len(B) - 1 // Careful of overflows, use a SIZE_T + + repeat t times: + 1. if A odd and A < B: + Swap (A, A1) with (B, B1) + 2. if A odd: + A -= B; + A1 -= B1 (mod S2); + 3. A /= 2; + A1 /= 2 (mod S2); + +Proof of the invariant: + It is easy to see that initially the invariant holds (S2 is odd). + + Assume the invariant holds at the start of the loop's iteration. + Step 1 of the main loop preserves the invariant since the first 2 + equations of the invariant are the same for A's and B's and + the swapping happens only if A is odd. Therefore, B is odd + after step 1. + Step 2 essentially subtracts the second equation of the invariant + from the first (modulo S2). This preserves the invariant since step + 1 ensured that A >= B (when A odd), so the operation A = A-B holds + modulo S2. + Step 3 essentially multiplies the first equation of the invariant + with the inverse of 2 modulo S2. Since S2 is odd we know that the + inverse exists. Also the operation A = A/2 is correct modulo S2 + because steps 1 and 2 ensured that A is even at this point. + (To see this, consider 2*a = x (mod S2) => a = x*2^{-1} (mod S2) + where a is an integer and 2^{-1} is the inverse of 2 modulo S2) + +Termination/Results: + Each iteration reduces len(A) + len(B) by at least one until A=0. + When A=0 the loop does nothing except churn by dividing A and A1 + by 2 every time. + After len(A)+len(B)-1 iterations, A must be zero. At that point + we have + + B = GCD + B1 * S1 = GCD (mod S2) + + The LCM is calculated as S1*S2 / GCD. + + InvS1ModS2 is defined as the smallest value X such that + X*S1 = GCD (mod S2), but B1 might not be the smallest solution. + Let P2 = S2/GCD. + Any two solutions to X*S1 = GCD (mod S2) has (X1-X2)*S1 mod S2 = 0, + so X1-X2 is a multiple of P2. Therefore we need to reduce B1 modulo P2 + to get the smallest solution for InvS1ModS2. + + ** Notice that if B1 is a multiple of S2 (or 0), which means that GCD is equal to S2, + then the above result is 0. In that case InvS1ModS2 is undefined. + + Similarly, InvS2ModS1 is defined as the smallest value Y such that + Y*S2 = GCD (mod S1). We have that for some integer q: + + q*S2 = B1*S1 - GCD => (-q mod S1) * S2 = GCD (mod S1) + + As above, if B1 is 0, then InvS2ModS1 is undefined. Therefore we ignore this case. + For the defined case, B1>=1 and S1>=GCD which implies that q >= 0. This + allows us to divide (B1*S1 - GCD) by S2. + Therefore InvS2ModS1 can be computed as -((B1*S1 - GCD)/S2) mod S1. + +For simplicity, our generic implementation works with all values the same size. +This can be less efficient if one input is much larger than the other, for +example for RSA key generation when one input is 1000+ bits and the other 17 bits. +However, that is not a high-performance path. If it is, a dedicated GCD with one +input a UINT32 or UINT64 would be the solution to a much faster extended GCD. +*/ +VOID +SYMCRYPT_CALL +SymCryptIntExtendedGcd( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + UINT32 flags, + _Out_opt_ PSYMCRYPT_INT piGcd, + _Out_opt_ PSYMCRYPT_INT piLcm, + _Out_opt_ PSYMCRYPT_INT piInvSrc1ModSrc2, + _Out_opt_ PSYMCRYPT_INT piInvSrc2ModSrc1, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 nDigits = SYMCRYPT_MAX( SymCryptIntDigitsizeOfObject( piSrc1 ), SymCryptIntDigitsizeOfObject( piSrc2 )); + PSYMCRYPT_INT piA; // size nDigits + PSYMCRYPT_INT piB; // size nDigits, NOT ALLOCATED (part of the pdGcd divisor) + PSYMCRYPT_INT piTmp; // size nDigits + PSYMCRYPT_INT piA1; // size nDigits + PSYMCRYPT_INT piB1; // size nDigits + PSYMCRYPT_INT piTmpDbl; // size 2*nDigits + PSYMCRYPT_DIVISOR pdGcd; // size nDigits + PSYMCRYPT_DIVISOR pdTmp; // size nDigits + UINT32 cbInt; + UINT32 cbWideInt; + UINT32 cbDivisor; + SIZE_T cbFnScratch; + UINT32 t; + UINT32 c; + UINT32 d; + + UNREFERENCED_PARAMETER( flags ); // Currently not used to improve performance. + + // Compute how much scratch space we need for the functions we call + cbFnScratch = SYMCRYPT_SCRATCH_BYTES_FOR_INT_DIVMOD( 2 * nDigits, nDigits ); + cbFnScratch = SYMCRYPT_MAX( cbFnScratch, SYMCRYPT_SCRATCH_BYTES_FOR_INT_MUL( 2*nDigits ) ); + cbFnScratch = SYMCRYPT_MAX( cbFnScratch, SYMCRYPT_SCRATCH_BYTES_FOR_INT_TO_DIVISOR( nDigits ) ); + + cbInt = SymCryptSizeofIntFromDigits( nDigits ); + cbWideInt = SymCryptSizeofIntFromDigits( 2*nDigits ); + cbDivisor = SymCryptSizeofDivisorFromDigits( nDigits ); + + SYMCRYPT_ASSERT( cbWideInt != 0 ); + SYMCRYPT_ASSERT( cbScratch >= 4 * cbInt + + 1 * cbWideInt + + 2 * cbDivisor + + cbFnScratch ); + + piA = SymCryptIntCreate( pbScratch, cbInt, nDigits ); + pbScratch += cbInt; cbScratch -= cbInt; + // piB is stored inside the pdGcd object created later + piTmp = SymCryptIntCreate( pbScratch, cbInt, nDigits ); + pbScratch += cbInt; cbScratch -= cbInt; + piA1 = SymCryptIntCreate( pbScratch, cbInt, nDigits ); + pbScratch += cbInt; cbScratch -= cbInt; + piB1 = SymCryptIntCreate( pbScratch, cbInt, nDigits ); + pbScratch += cbInt; cbScratch -= cbInt; + + piTmpDbl = SymCryptIntCreate( pbScratch, cbWideInt, 2 * nDigits ); + pbScratch += cbWideInt; cbScratch -= cbWideInt; + + pdGcd = SymCryptDivisorCreate( pbScratch, cbDivisor, nDigits ); + pbScratch += cbDivisor; cbScratch -= cbDivisor; + piB = SymCryptIntFromDivisor( pdGcd ); + + pdTmp = SymCryptDivisorCreate( pbScratch, cbDivisor, nDigits ); + pbScratch += cbDivisor; cbScratch -= cbDivisor; + + SymCryptIntCopyMixedSize( piSrc1, piA ); // Ignore the error return value here as we know + SymCryptIntCopyMixedSize( piSrc2, piB ); // that the destination integers are large enough. + + SymCryptIntSetValueUint32( 1, piA1 ); + SymCryptIntSetValueUint32( 0, piB1 ); + + // Currently not supported: Src1 to be 0 or Src2 to be even + SYMCRYPT_ASSERT( !SymCryptIntIsEqualUint32( piA, 0 ) ); + SYMCRYPT_ASSERT( (SymCryptIntGetValueLsbits32( piB ) & 1) != 0 ); + if ( SymCryptIntIsEqualUint32( piA, 0 ) || + ((SymCryptIntGetValueLsbits32( piB ) & 1) == 0) ) + { + goto cleanup; + } + + // Currently not supported: piInvSrc2ModSrc1 != NULL and max( Src1.nDigits, Src2.nDigits ) * 2 > SymCryptDigitsFromBits(SYMCRYPT_INT_MAX_BITS) + if( (piInvSrc2ModSrc1 != NULL) && (piTmpDbl == NULL) ) + { + goto cleanup; + } + + t = SymCryptIntBitsizeOfObject( piSrc1 ) + SymCryptIntBitsizeOfObject( piSrc2 ) - 1; + while( t > 0 ) + { + t--; + + //if A odd and A < B: + // Swap (A, A1) with (B, B1) + c = 1 & (SymCryptIntGetValueLsbits32( piA ) & SymCryptIntSubSameSize( piA, piB, piTmp ) ); + SymCryptIntConditionalSwap( piA, piB, c ); + SymCryptIntConditionalSwap( piA1, piB1, c ); + + //if A odd: + // A -= B; A1 -= B1 (mod S2); + c = 1 & SymCryptIntGetValueLsbits32( piA ); + SymCryptIntSubSameSize( piA, piB, piTmp ); // Never a carry due to the previous conditional swap + SymCryptIntConditionalCopy( piTmp, piA, c ); + + d = SymCryptIntSubSameSize( piA1, piB1, piTmp ); + SymCryptIntConditionalCopy( piTmp, piA1, c ); + SymCryptIntAddMixedSize( piA1, piSrc2, piTmp ); + SymCryptIntConditionalCopy( piTmp, piA1, c & d ); + + // A /= 2; A1 /= 2 (mod S2); + SYMCRYPT_ASSERT( (SymCryptIntGetValueLsbits32( piA ) & 1) == 0 ); + SymCryptIntShr1( 0, piA, piA ); + c = SymCryptIntGetValueLsbits32( piA1 ) & 1; + d = SymCryptIntAddMixedSize( piA1, piSrc2, piTmp ); + SymCryptIntConditionalCopy( piTmp, piA1, c ); + SymCryptIntShr1( c & d, piA1, piA1 ); + + } + + // B = GCD, B1 * S1 = GCD (mod S2) + // A = 0, A1 is scratch + // + // Algorithm from here: + // GCD as divisor + // LCM = S1 * S2 / GCD. + // P2 = S2 / GCD, as divisor (only for InvS1ModS2) + // InvS1ModS2 = B1 mod P2 + // InvS2ModS1 = -((B1*S1 - GCD) div S2) mod S1 + + if( piGcd != NULL ) + { + SymCryptIntCopyMixedSize( piB, piGcd ); + } + + if( piLcm == NULL && piInvSrc1ModSrc2 == NULL && piInvSrc2ModSrc1 == NULL ) + { + // Only GCD needed; don't do the other work + goto cleanup; + } + + SymCryptIntCopyMixedSize( piB, SymCryptIntFromDivisor( pdGcd ) ); // copy into INT of the right size + + // IntToDivisor requirement: + // Gcd !=0 + SymCryptIntToDivisor( SymCryptIntFromDivisor( pdGcd ), pdGcd, 3, 0, pbScratch, cbScratch ); + + if( piLcm != NULL ) + { + // LCM = S1 * S2 / GCD + SymCryptIntMulMixedSize( piSrc1, piSrc2, piLcm, pbScratch, cbScratch ); + SymCryptIntDivMod( piLcm, pdGcd, piLcm, NULL, pbScratch, cbScratch ); + } + + if( piInvSrc1ModSrc2 != NULL ) + { + // Future optimization: if GCD == 1 then we can just copy B1. + SymCryptIntDivMod( piSrc2, pdGcd, SymCryptIntFromDivisor( pdTmp ), NULL, pbScratch, cbScratch ); + + // IntToDivisor requirement: + // Src2 / pdGcd > 0 + SymCryptIntToDivisor( SymCryptIntFromDivisor( pdTmp ), pdTmp, 1, 0, pbScratch, cbScratch ); + SymCryptIntDivMod( piB1, pdTmp, NULL, piInvSrc1ModSrc2, pbScratch, cbScratch ); + } + + if( piInvSrc2ModSrc1 != NULL ) + { + // InvS2ModS1 = - ( (B1*S1 - GCD)/S2 ) mod S1 + + // S2 as divisor + SymCryptIntCopyMixedSize( piSrc2, SymCryptIntFromDivisor( pdTmp ) ); + + // IntToDivisor requirement: + // Src2 is odd --> Src2 != 0 + SymCryptIntToDivisor( SymCryptIntFromDivisor( pdTmp ), pdTmp, 1, 0, pbScratch, cbScratch ); + + SymCryptIntMulMixedSize( piB1, piSrc1, piTmpDbl, pbScratch, cbScratch ); + SymCryptIntSubMixedSize( piTmpDbl, piB, piTmpDbl ); // Never a borrow if B1 >= 1 + SymCryptIntDivMod( piTmpDbl, pdTmp, piTmpDbl, NULL, pbScratch, cbScratch ); + + // and reduce modulo S1 + SymCryptIntCopyMixedSize( piSrc1, SymCryptIntFromDivisor( pdTmp ) ); + + // IntToDivisor requirement: + // Src1 > 0 + SymCryptIntToDivisor( SymCryptIntFromDivisor( pdTmp ), pdTmp, 1, 0, pbScratch, cbScratch ); + SymCryptIntDivMod( piTmpDbl, pdTmp, NULL, piInvSrc2ModSrc1, pbScratch, cbScratch ); + + // Negative modulo S1 + SymCryptIntSubMixedSize( SymCryptIntFromDivisor( pdTmp ), piInvSrc2ModSrc1, piInvSrc2ModSrc1 ); // Never a borrow as piInvSrc2ModSrc1 < S1 + } + +cleanup: + return; // Need a statement after a label... +} diff --git a/libs/symcrypt/lib/ghash.c b/libs/symcrypt/lib/ghash.c new file mode 100644 index 00000000000..32533a74dfb --- /dev/null +++ b/libs/symcrypt/lib/ghash.c @@ -0,0 +1,951 @@ +// +// GHASH.c +// +// Implementation of the NIST SP800-38D GHASH function which is the +// core authentication function for the GCM and GMAC modes. +// +// This implementation was done by Niels Ferguson for the RSA32.lib library in 2008, +// and adapted to the SymCrypt library in 2009. +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" +#include "ghash_definitions.h" + +////////////////////////////////////////////////////////////////////////////// +// Platform-independent code +// + +// +// GHashExpandKeyC +// Generic GHash key expansion routine, works on all platforms. +// This function computes a table of H, Hx, Hx^2, Hx^3, ..., Hx^127 +// +VOID +SYMCRYPT_CALL +SymCryptGHashExpandKeyC( + _Out_writes_( SYMCRYPT_GF128_FIELD_SIZE ) PSYMCRYPT_GF128_ELEMENT expandedKey, + _In_reads_( SYMCRYPT_GF128_BLOCK_SIZE ) PCBYTE pH ) +{ + UINT64 H0, H1, t; + UINT32 i; + + // + // (H1, H0) form a 128-bit integer, H1 is the upper part, H0 the lower part. + // Convert pH[] to (H1, H0) using MSByte first convention. + // + H1 = SYMCRYPT_LOAD_MSBFIRST64( &pH[0] ); + H0 = SYMCRYPT_LOAD_MSBFIRST64( &pH[8] ); + + for( i=0; i<SYMCRYPT_GF128_FIELD_SIZE; i++ ) + { + expandedKey[i].ull[0] = H0; + expandedKey[i].ull[1] = H1; + // + // Multiply (H1,H0) by x in the GF(2^128) field using the field encoding from SP800-38D + // + t = UINT64_NEG(H0 & 1) & ((UINT64)GF128_FIELD_R_BYTE << (8 * ( sizeof( UINT64 ) - 1 )) ) ; + H0 = (H0 >> 1) | (H1 << 63); + H1 = (H1 >> 1) ^ t; + } +} + + +// +// GHashAppendDataC +// Generic GHash routine, works on all platforms. +// +VOID +SYMCRYPT_CALL +SymCryptGHashAppendDataC( + _In_reads_( SYMCRYPT_GF128_FIELD_SIZE ) PCSYMCRYPT_GF128_ELEMENT expandedKeyTable, + _Inout_ PSYMCRYPT_GF128_ELEMENT pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + UINT64 R0, R1; + UINT64 mask; + SYMCRYPT_ALIGN UINT32 state32[4]; + UINT32 t; + int i,j; + while( cbData >= SYMCRYPT_GF128_BLOCK_SIZE ) + { + R0 = R1 = 0; + + // + // We have two nested loops so that we can do most of our operations + // on 32-bit words. 64-bit rotates/shifts can be really slow on a 32-bit CPU. + // On AMD64 we use the XMM version which is much faster. + // + state32[0] = (UINT32)pState->ull[0]; + state32[1] = (UINT32)(pState->ull[0] >> 32); + state32[2] = (UINT32)pState->ull[1]; + state32[3] = (UINT32)(pState->ull[1] >> 32); + for( i=0; i<4; i++ ) + { + t = SYMCRYPT_LOAD_MSBFIRST32( &pbData[4*i] ) ^ state32[3-i]; + for( j=31; j>=0; j-- ) + { + mask = (UINT64)( -(INT64)(t & 1 )); + R0 ^= expandedKeyTable[32*i+j].ull[0] & mask; + R1 ^= expandedKeyTable[32*i+j].ull[1] & mask; + t >>= 1; + } + } + pState->ull[0] = R0; + pState->ull[1] = R1; + pbData += SYMCRYPT_GF128_BLOCK_SIZE; + cbData -= SYMCRYPT_GF128_BLOCK_SIZE; + } + + SymCryptWipeKnownSize( state32, sizeof( state32 ) ); +} + + +VOID +SYMCRYPT_CALL +SymCryptGHashResult( + _In_ PCSYMCRYPT_GF128_ELEMENT pState, + _Out_writes_( SYMCRYPT_GF128_BLOCK_SIZE ) PBYTE pbResult ) +{ + SYMCRYPT_STORE_MSBFIRST64( pbResult , pState->ull[1] ); + SYMCRYPT_STORE_MSBFIRST64( pbResult + 8, pState->ull[0] ); +} + +//////////////////////////////////////////////////////////////////////////////////////////// +// XMM code +// + +VOID +SYMCRYPT_CALL +SymCryptGHashExpandKeyXmm( + _Out_writes_( SYMCRYPT_GF128_FIELD_SIZE ) PSYMCRYPT_GF128_ELEMENT expandedKey, + _In_reads_( SYMCRYPT_GF128_BLOCK_SIZE ) PCBYTE pH ) +{ + // + // We use the same layout for XMM code as we did for C code, so we can use the same key + // expansion code. + // Improvement: we can add an expansion routine that uses the XMM registers for speed. + // + + SymCryptGHashExpandKeyC( expandedKey, pH ); +} + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + +#ifdef __clang__ +#pragma clang attribute push (__attribute__((target("sse2"))), apply_to=function) +#else +#pragma GCC push_options +#pragma GCC target("sse2") +#endif + +// +// The XMM-based GHash append data function, only on AMD64 & X86 +// +VOID +SYMCRYPT_CALL +SymCryptGHashAppendDataXmm( + _In_reads_( SYMCRYPT_GF128_FIELD_SIZE ) PCSYMCRYPT_GF128_ELEMENT expandedKeyTable, + _Inout_ PSYMCRYPT_GF128_ELEMENT pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + __m128i R; + __m128i cmpValue; + __m128i mask; + __m128i T; + __m128i tmp; + + PCSYMCRYPT_GF128_ELEMENT p; + PCSYMCRYPT_GF128_ELEMENT pLimit; + UINT32 t; + int i; + + cmpValue = _mm_setzero_si128(); // cmpValue = 0 + + while( cbData >= SYMCRYPT_GF128_BLOCK_SIZE ) + { + R = _mm_setzero_si128(); + + // + // The amd64 compiler can't optimize array indices in a loop where + // you use _mm intrinsics, + // so we do all the pointer arithmetic for the compiler. + // + p = &expandedKeyTable[0]; + pLimit = &expandedKeyTable[32]; + + for( i=0; i<4; i++ ) + { + // + // Set up our XMM register with 4 identical 32-bit integers so that + // we can generate the mask from the individual bits of the 32-bit value. + // Note the use of tmp; if we assign directly to the fields of T the + // compiler no longer caches T in an XMM register, which is bad. + // + // There are XMM instructions where we can do the duplication in the XMM + // registers, but they require SSE3 support, and this code only requires + // SSE2. As the inner loop consumes most of the time, it isn't worth + // using the SSE3 instructions. + // + // Note that accessing the state as an array of UINT32s depends on the + // endianness of the CPU, but this is XMM code that only runs on + // little endian machines. + // + t = SYMCRYPT_LOAD_MSBFIRST32( &pbData[4*i] ) ^ pState->ul[3-i]; + tmp = _mm_set_epi32(t, t, t, t); + + T = tmp; + while( p < pLimit ) + { + // + // p and plimit are always at indexes that are multiples of 4 from + // the start of the array. + // We need to explain to prefast that this means that p <= pLimit - 4 + // + SYMCRYPT_ASSERT( p <= pLimit - 4 ); + + mask = _mm_cmpgt_epi32( cmpValue, T ); + T = _mm_add_epi32( T, T ); + mask = _mm_and_si128( mask, p[0].m128i ); + R = _mm_xor_si128( R, mask ); + + mask = _mm_cmpgt_epi32( cmpValue, T ); + T = _mm_add_epi32( T, T ); + mask = _mm_and_si128( mask, p[1].m128i ); + R = _mm_xor_si128( R, mask ); + + mask = _mm_cmpgt_epi32( cmpValue, T ); + T = _mm_add_epi32( T, T ); + mask = _mm_and_si128( mask, p[2].m128i ); + R = _mm_xor_si128( R, mask ); + + mask = _mm_cmpgt_epi32( cmpValue, T ); + T = _mm_add_epi32( T, T ); + mask = _mm_and_si128( mask, p[3].m128i ); + R = _mm_xor_si128( R, mask ); + + p += 4; + } + pLimit += 32; + } + + pState->m128i = R; + pbData += SYMCRYPT_GF128_BLOCK_SIZE; + cbData -= SYMCRYPT_GF128_BLOCK_SIZE; + } +} + +#ifdef __clang__ +#pragma clang attribute pop +#else +#pragma GCC pop_options +#endif + +#endif + +#if SYMCRYPT_CPU_ARM | SYMCRYPT_CPU_ARM64 +// +// The NEON-based GHash append data function, only on ARM & ARM64 +// +VOID +SYMCRYPT_CALL +SymCryptGHashAppendDataNeon( + _In_reads_( SYMCRYPT_GF128_FIELD_SIZE ) PCSYMCRYPT_GF128_ELEMENT expandedKeyTable, + _Inout_ PSYMCRYPT_GF128_ELEMENT pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + // Room for improvement: replace non-crypto NEON code below, based on a bit by bit lookup with + // pmull on 8b elements - 8x(8bx8b) -> 8x(16b) pmull is NEON instruction since Armv7 + // + // When properly unrolled: + // 1 (64bx64b -> 128b) pmull instruction and 1 eor instruction can be replaced by + // 8 (8x(8bx8b) -> 8x(16b)) pmull instructions and 8 eor instructions + // so each 128b of data could be processed by less than 64 instructions (using karatsuba) + // rather than ~512 instructions (bit by bit) + // + // Not a priority, expect that AES-GCM performance will be dominated by AES on these platforms + + __n128 R; + __n128 cmpValue; + __n128 mask; + __n128 T; + + PCSYMCRYPT_GF128_ELEMENT p; + PCSYMCRYPT_GF128_ELEMENT pLimit; + UINT32 t; + int i; + + cmpValue = vdupq_n_u32(0); // cmpValue = 0 + + while( cbData >= SYMCRYPT_GF128_BLOCK_SIZE ) + { + R = cmpValue; + + // + // Do all the pointer arithmetic for the compiler. + // + p = &expandedKeyTable[0]; + pLimit = &expandedKeyTable[32]; + + for( i=0; i<4; i++ ) + { + // + // Set up our XMM register with 4 identical 32-bit integers so that + // we can generate the mask from the individual bits of the 32-bit value. + // Note the use of tmp; if we assign directly to the fields of T the + // compiler no longer caches T in an XMM register, which is bad. + // + // Note that accessing the state as an array of UINT32s depends on the + // endianness of the CPU, but Arm code is always expected to execute in + // little endian mode. + // + t = SYMCRYPT_LOAD_MSBFIRST32( &pbData[4*i] ) ^ pState->ul[3-i]; + T = vdupq_n_u32( t ); + + while( p < pLimit ) + { + // + // p and plimit are always at indexes that are multiples of 4 from + // the start of the array. + // We need to explain to prefast that this means that p <= pLimit - 4 + // + SYMCRYPT_ASSERT( p <= pLimit - 4 ); + + mask = vcgtq_s32( cmpValue, T ); + T = vaddq_u32( T, T ); + mask = vandq_u32( mask, p[0].n128 ); + R = veorq_u32( R, mask ); + + mask = vcgtq_s32( cmpValue, T ); + T = vaddq_u32( T, T ); + mask = vandq_u32( mask, p[1].n128 ); + R = veorq_u32( R, mask ); + + mask = vcgtq_s32( cmpValue, T ); + T = vaddq_u32( T, T ); + mask = vandq_u32( mask, p[2].n128 ); + R = veorq_u32( R, mask ); + + mask = vcgtq_s32( cmpValue, T ); + T = vaddq_u32( T, T ); + mask = vandq_u32( mask, p[3].n128 ); + R = veorq_u32( R, mask ); + + p += 4; + } + pLimit += 32; + } + + pState->n128 = R; + pbData += SYMCRYPT_GF128_BLOCK_SIZE; + cbData -= SYMCRYPT_GF128_BLOCK_SIZE; + } +} +#endif + + +////////////////////////////////////////////////////////////////////////////////////// +// Pclmulqdq implementation +// + +/* +GHASH GF(2^128) multiplication using PCLMULQDQ + +The GF(2^128) field used in GHASH is GF(2)[x]/p(x) where p(x) is the primitive polynomial + x^128 + x^7 + x^2 + x + 1 + +Notation: We use the standard mathematical notation '+' for the addition in the field, +which corresponds to a xor of the bits. + +Multiplication: +Given two field elements A and B (represented as 128-bit values), +we first compute the polynomial product + (C,D) := A * B +where C and D are also 128-bit values. + +The PCLMULQDQ instruction performs a 64 x 64 -> 128 bit carryless multiplication. +To multiply 128-bit values we write A = (A1, A0) and B = (B1, B0) in two 64-bit halves. + +The schoolbook multiplication is computed by + (C, D) = (A1 * B1)x^128 + (A1 * B0 + A0 * B1)x^64 + (A0 * B0) +This require four PCLMULQDQ instructions. The middle 128-bit result has to be shifted +left and right, and each half added to the upper and lower 128-bit result to get (C,D). + +Alternatively, the middle 128-bit intermediate result be computed using Karatsuba: + (A1*B0 + A0*B1) = (A1 + A0) * (B1 + B0) + (A1*B1) + (A0*B0) +This requires only one PCLMULQDQ instruction to multiply (A1 + A0) by (B1 + B0) +as the other two products are already computed. +Whether this is faster depends on the relative speed of shift/xor verses PCLMULQDQ. + +Both multiplication algorithms produce three 128-bit intermediate results (R1, Rmid, R0), +with the full result defined by R1 x^128 + Rmid x^64 + R0. +If we do Multiply-Accumulate then we can accumulate the three 128-bit intermediate results +directly. As there are no carries, there is no overflow, and the combining of the three +intermediate results into a 256-bit result can be shared amongst all multiplications. + + +Modulo reduction: +We use << and >> to denote shifts on 128-bit values. +The modulo reduction can now be done as follows: +given a 256-bit value (C,D) representing C x^128 + D we compute + (T1,T0) := C + C*x + C * x^2 + C * x^7 + R := D + T0 + T1 + (T1 << 1) + (T1 << 2) + (T1 << 7) + +(T1,T0) is just the value C x^128 reduced one step modulo p(x).The value T1 is at most 7 bits, +so in the next step the reduction, which computes the result R, is easy. The +expression T1 + (T1 << 1) + (T1 << 2) + (T1 << 7) is just T1 * x^128 reduced modulo p(x). + +Let's first get rid of the polynomial arithmetic and write this completely using shifts on +128-bit values. + +T0 := C + (C << 1) + (C << 2) + (C << 7) +T1 := (C >> 127) + (C >> 126) + (C >> 121) +R := D + T0 + T1 + (T1 << 1) + (T1 << 2) + (T1 << 7) + +We can optimize this by rewriting the equations + +T2 := T1 + C + = C + (C>>127) + (C>>126) + (C>>121) +R = D + T0 + T1 + (T1 << 1) + (T1 << 2) + (T1 << 7) + = D + C + (C << 1) + (C << 2) + (C << 7) + T1 + (T1 << 1) + (T1 << 2) + (T1 << 7) + = D + T2 + (T2 << 1) + (T2 << 2) + (T2 << 7) + +Thus +T2 = C + (C>>127) + (C>>126) + (C>>121) +R = D + T2 + (T2 << 1) + (T2 << 2) + (T2 << 7) + +Gets the right result and uses only 6 shifts. + +The SSE instruction set does not implement bit-shifts of 128-bit values. Instead, we will +use bit-shifts of the 32-bit subvalues, and byte shifts (shifts by a multiple of 8 bits) +on the full 128-bit values. +We use the <<<< and >>>> operators to denote shifts on 32-bit subwords. + +We can now do the modulo reduction by + +t1 := (C >> 127) = (C >>>> 31) >> 96 +t2 := (C >> 126) = (C >>>> 30) >> 96 +t3 := (C >> 121) = (C >>>> 25) >> 96 +T2 = C + t1 + t2 + t3 + +left-shifts in the computation of R are a bit more involved as we have to move bits from +one subword to the next + +u1 := (T2 << 1) = (T2 <<<< 1) + ((T2 >>>> 31) << 32) +u2 := (T2 << 2) = (T2 <<<< 2) + ((T2 >>>> 30) << 32) +u3 := (T2 << 7) = (T2 <<<< 7) + ((T2 >>>> 25) << 32) +R = D + T2 + u1 + u2 + u3 + +We can eliminate some common subexpressions. For any k we have +(T2 >>>> k) = ((C + r) >>>> k) +where r is a 7-bit value. If k>7 then this is equal to (C >>>> k). This means that +the value (T2 >>>> 31) is equal to (C >>>> 31) so we don't have to compute it again. + +So we can rewrite our formulas as +t4 := (C >>>> 31) +t5 := (C >>>> 30) +t6 := (C >>>> 25) +ts = t4 + t5 + t6 +T2 = C + (ts >> 96) + +Note that ts = (C >>>> 31) + (C >>>> 30) + (C >>>> 25) +which is equal to (T2 >>>> 31) + (T2 >>>> 30) + (T2 >>>> 25) + +R = D + T2 + u1 + u2 + u3 + = D + T2 + (T2 <<<< 1) + (T2 <<<< 2) + (T2 <<<< 7) + (ts << 32) + +All together, we can do the modulo reduction using the following formulas + +ts := (C >>>> 31) + (C >>>> 30) + (C >>>> 25) +T2 := C + (ts >> 96) +R = D + T2 + (T2 <<<< 1) + (T2 <<<< 2) + (T2 <<<< 7) + (ts << 32) + +Using a total of 16 operations. (6 subword shifts, 2 byte shifts, and 8 additions) + +Reversed bit order: +There is one more complication. GHASH uses the bits in the reverse order from normal representation. +The bits b_0, b_1, ..., b_127 represent the polynomial b_0 + b_1 * x + ... + b_127 * x^127. +This means that the most significant bit in each byte is actually the least significant bit in the +polynomial. + +SSE CPUs use the LSBFirst convention. This means that the bits b_0, b_1, ..., b_127 of the polynomial +end up at positions 7, 6, 5, ..., 1, 0, 15, 14, ..., 9, 8, 23, 22, ... of our XMM register. +This is obviously not a useful representation to do arithmetic in. +The first step is to BSWAP the value so that the bits appear in pure reverse order. +That is at least algebraically useful. + +To compute the multiplication we use the fact that GF(2)[x] multiplication has no carries and +thus no preference for bit order. After the BSWAP we don't have the values A and B, but rather +rev(A) and rev(B) where rev() is a function that reverses the bit order. We can now compute + + rev(A) * rev(B) = rev( A*B ) >> 1 + +where the shift operator is on the 256-bit product. + +The modulo reduction remains the same, except that we change all the shifts to be the other direction. + +This gives us finally the outline of our multiplication: + +- Apply BSWAP to all values loaded from memory. + A := BSWAP( Abytes ) + B := BSWAP( Bbytes ) +- Compute the 256-bit product, possibly using Karatsuba. + (P1, P0) := A * B // 128x128 carryless multiplication +- Shift the result left one bit. + (Q1, Q0) := (P1, P0) << 1 + which is computed as + Q0 = (P0 <<<< 1) + (P0 >>>> 31) << 32 + Q1 = (P1 <<<< 1) + (P1 >>>> 31) << 32 + (P0 >>>> 31) >> 96 +- Perform the modulo reduction, with reversed bit order + ts := (Q0 <<<< 31) + (Q0 <<<< 30) + (Q0 <<<< 25) + T2 := Q0 + (ts << 96) + R = Q1 + T2 + (T2 >>>> 1) + (T2 >>>> 2) + (T2 >>>> 7) + (ts >> 32) + +Future work: +It might be possible to construct a faster solution by merging the leftshift of (P1,P0) +with the modulo reduction. + +*/ + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + +#ifdef __clang__ +#pragma clang attribute push (__attribute__((target("ssse3,pclmul"))), apply_to=function) +#else +#pragma GCC push_options +#pragma GCC target("ssse3,pclmul") +#endif + +VOID +SYMCRYPT_CALL +SymCryptGHashExpandKeyPclmulqdq( + _Out_writes_( SYMCRYPT_GF128_FIELD_SIZE ) PSYMCRYPT_GF128_ELEMENT expandedKey, + _In_reads_( SYMCRYPT_GF128_BLOCK_SIZE ) PCBYTE pH ) +{ + int i; + __m128i H, Hx, H2, H2x; + __m128i t0, t1, t2, t3, t4, t5; + __m128i Hi_even, Hix_even, Hi_odd, Hix_odd; + __m128i BYTE_REVERSE_ORDER = _mm_set_epi8( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ); + __m128i vMultiplicationConstant = _mm_set_epi32( 0, 0, 0xc2000000, 0 ); + + // + // Our expanded key consists of a list of N=SYMCRYPT_GHASH_PCLMULQDQ_HPOWERS + // powers of H. The first entry is H^N, the next H^(N-1), then H^(N-2), ... + // + // For each power we store two 128-bit values. The first is H^i (Hi) and the second + // contains the two halves of H^i xorred with each other in the lower 64 bits (Hix). + // + // We keep all of the Hi entries together in the first half of the expanded key + // table, and all of the Hix entries together in the second half of the table. + // + // This ordering allow for efficient vectorization with arbitrary vector width, as + // many multiplication constants can be loaded into wider vectors with the correct + // alignment. Not maintaining different layouts for different vector lengths does + // leave a small amount of performance on the table, but experimentally it seems to + // <1% difference, and using a single layout reduces complexity significantly. + // + C_ASSERT( 2*SYMCRYPT_GHASH_PCLMULQDQ_HPOWERS <= SYMCRYPT_GF128_FIELD_SIZE ); + + H = _mm_loadu_si128((__m128i *) pH ); + H = _mm_shuffle_epi8( H, BYTE_REVERSE_ORDER ); + Hx = _mm_xor_si128( H, _mm_srli_si128( H, 8 ) ); + + _mm_store_si128( &GHASH_H_POWER(expandedKey, 1), H ); + _mm_store_si128( &GHASH_Hx_POWER(expandedKey, 1), Hx ); + + CLMUL_X_3( H, Hx, H, Hx, t0, t1, t2 ); + CLMUL_3_POST( t0, t1, t2 ); + MODREDUCE( vMultiplicationConstant, t0, t1, t2, H2 ); + H2x = _mm_xor_si128( H2, _mm_srli_si128( H2, 8 ) ); + _mm_store_si128( &GHASH_H_POWER(expandedKey, 2), H2 ); + _mm_store_si128( &GHASH_Hx_POWER(expandedKey, 2), H2x ); + + Hi_even = H2; + Hix_even = H2x; + + for( i=2; i<SYMCRYPT_GHASH_PCLMULQDQ_HPOWERS; i+=2 ) + { + CLMUL_X_3( H, Hx, Hi_even, Hix_even, t0, t1, t2 ); + CLMUL_3_POST( t0, t1, t2 ); + CLMUL_X_3( H2, H2x, Hi_even, Hix_even, t3, t4, t5 ); + CLMUL_3_POST( t3, t4, t5 ); + MODREDUCE( vMultiplicationConstant, t0, t1, t2, Hi_odd ); + MODREDUCE( vMultiplicationConstant, t3, t4, t5, Hi_even ); + Hix_odd = _mm_xor_si128( Hi_odd, _mm_srli_si128( Hi_odd, 8 ) ); + Hix_even = _mm_xor_si128( Hi_even, _mm_srli_si128( Hi_even, 8 ) ); + + _mm_store_si128( &GHASH_H_POWER(expandedKey, i + 1), Hi_odd ); + _mm_store_si128( &GHASH_H_POWER(expandedKey, i + 2), Hi_even ); + _mm_store_si128( &GHASH_Hx_POWER(expandedKey, i + 1), Hix_odd ); + _mm_store_si128( &GHASH_Hx_POWER(expandedKey, i + 2), Hix_even ); + } +} + + + +VOID +SYMCRYPT_CALL +SymCryptGHashAppendDataPclmulqdq( + _In_reads_( SYMCRYPT_GF128_FIELD_SIZE ) PCSYMCRYPT_GF128_ELEMENT expandedKeyTable, + _Inout_ PSYMCRYPT_GF128_ELEMENT pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + __m128i state; + __m128i data; + __m128i a0, a1, a2; + __m128i Hi, Hix; + SIZE_T i; + SIZE_T nBlocks = cbData / SYMCRYPT_GF128_BLOCK_SIZE; + SIZE_T todo; + + // + // To do a BSWAP we need an __m128i value with the bytes + // + + __m128i BYTE_REVERSE_ORDER = _mm_set_epi8( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ); + __m128i vMultiplicationConstant = _mm_set_epi32( 0, 0, 0xc2000000, 0 ); + + state = _mm_loadu_si128( (__m128i *) pState ); + + while( nBlocks > 0 ) + { + // + // We process the data in blocks of up to SYMCRYPT_GHASH_PCLMULQDQ_HPOWERS blocks + // + todo = SYMCRYPT_MIN( nBlocks, SYMCRYPT_GHASH_PCLMULQDQ_HPOWERS ); + + // + // The first block is xorred with the state before multiplying it with a power of H + // + data = _mm_loadu_si128( (__m128i *) pbData ); + data = _mm_shuffle_epi8( data, BYTE_REVERSE_ORDER ); + pbData += SYMCRYPT_GF128_BLOCK_SIZE; + + state = _mm_xor_si128( state, data ); + CLMUL_3( state, GHASH_H_POWER(expandedKeyTable, todo), GHASH_Hx_POWER(expandedKeyTable, todo), a0, a1, a2 ); + + // + // Then we just do an improduct + // + for( i=1; i<todo; i++ ) + { + data = _mm_loadu_si128( (__m128i *) pbData ); + data = _mm_shuffle_epi8( data, BYTE_REVERSE_ORDER ); + pbData += SYMCRYPT_GF128_BLOCK_SIZE; + + Hi = _mm_load_si128( &GHASH_H_POWER(expandedKeyTable, todo - i) ); + Hix = _mm_load_si128( &GHASH_Hx_POWER(expandedKeyTable, todo - i) ); + CLMUL_ACC_3( data, Hi, Hix, a0, a1, a2 ); + } + + CLMUL_3_POST( a0, a1, a2 ); + MODREDUCE( vMultiplicationConstant, a0, a1, a2, state ); + nBlocks -= todo; + } + + _mm_storeu_si128((__m128i *)pState, state ); +} + +#ifdef __clang__ +#pragma clang attribute pop +#else +#pragma GCC pop_options +#endif + +#endif // CPU_X86 || CPU_AMD64 + +#if SYMCRYPT_CPU_ARM64 + +#ifdef __clang__ +#pragma clang attribute push (__attribute__((target("aes"))), apply_to=function) +#else +#pragma GCC push_options +#pragma GCC target("aes") +#endif + +VOID +SYMCRYPT_CALL +SymCryptGHashExpandKeyPmull( + _Out_writes_( SYMCRYPT_GF128_FIELD_SIZE ) PSYMCRYPT_GF128_ELEMENT expandedKey, + _In_reads_( SYMCRYPT_GF128_BLOCK_SIZE ) PCBYTE pH ) +{ + int i; + __n128 H, Hx, H2, H2x; + __n128 t0, t1, t2, t3, t4, t5; + __n128 Hi_even, Hix_even, Hi_odd, Hix_odd; + const __n64 vMultiplicationConstant = SYMCRYPT_SET_N64_U64(0xc200000000000000); + // + // Our expanded key consists of a list of N=SYMCRYPT_GHASH_PMULL_HPOWERS + // powers of H. The first entry is H^N, the next H^(N-1), then H^(N-2), ... + // + // For each power we store two 128-bit values. The first is H^i (Hi) and the second + // contains the two halves of H^i xorred with each other in the lower 64 bits (Hix). + // + // We keep all of the Hi entries together in the first half of the expanded key + // table, and all of the Hix entries together in the second half of the table. + // + // This ordering allow for efficient vectorization with arbitrary vector width, as + // many multiplication constants can be loaded into wider vectors with the correct + // alignment. Not maintaining different layouts for different vector lengths does + // leave a small amount of performance on the table, but experimentally it seems to + // <1% difference, and using a single layout reduces complexity significantly. + // + C_ASSERT( 2*SYMCRYPT_GHASH_PMULL_HPOWERS <= SYMCRYPT_GF128_FIELD_SIZE ); + + H = *(__n128 *) pH; + Hx = vrev64q_u8( H ); + H = vextq_u8( Hx, Hx, 8 ); + Hx = veorq_u8( H, Hx ); + + GHASH_H_POWER(expandedKey, 1) = H; + GHASH_Hx_POWER(expandedKey, 1) = Hx; + + CLMUL_X_3( H, Hx, H, Hx, t0, t1, t2 ); + CLMUL_3_POST( t0, t1, t2 ); + MODREDUCE( vMultiplicationConstant, t0, t1, t2, H2 ); + H2x = veorq_u8( H2, vextq_u8( H2, H2, 8 ) ); + GHASH_H_POWER(expandedKey, 2) = H2; + GHASH_Hx_POWER(expandedKey, 2) = H2x; + + Hi_even = H2; + Hix_even = H2x; + + for( i=2; i<SYMCRYPT_GHASH_PMULL_HPOWERS; i+=2 ) + { + CLMUL_X_3( H, Hx, Hi_even, Hix_even, t0, t1, t2 ); + CLMUL_3_POST( t0, t1, t2 ); + CLMUL_X_3( H2, H2x, Hi_even, Hix_even, t3, t4, t5 ); + CLMUL_3_POST( t3, t4, t5 ); + MODREDUCE( vMultiplicationConstant, t0, t1, t2, Hi_odd ); + MODREDUCE( vMultiplicationConstant, t3, t4, t5, Hi_even ); + Hix_odd = veorq_u8( Hi_odd, vextq_u8( Hi_odd, Hi_odd, 8 ) ); + Hix_even = veorq_u8( Hi_even, vextq_u8( Hi_even, Hi_even, 8 ) ); + + GHASH_H_POWER(expandedKey, i + 1) = Hi_odd; + GHASH_H_POWER(expandedKey, i + 2) = Hi_even; + GHASH_Hx_POWER(expandedKey, i + 1) = Hix_odd; + GHASH_Hx_POWER(expandedKey, i + 2) = Hix_even; + } +} + +VOID +SYMCRYPT_CALL +SymCryptGHashAppendDataPmull( + _In_reads_( SYMCRYPT_GF128_FIELD_SIZE ) PCSYMCRYPT_GF128_ELEMENT expandedKeyTable, + _Inout_ PSYMCRYPT_GF128_ELEMENT pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + __n128 state; + __n128 data, datax; + __n128 a0, a1, a2; + __n128 Hi, Hix; + const __n64 vMultiplicationConstant = SYMCRYPT_SET_N64_U64(0xc200000000000000); + SIZE_T i; + SIZE_T nBlocks = cbData / SYMCRYPT_GF128_BLOCK_SIZE; + SIZE_T todo; + + state = *(__n128 *) pState; + + while( nBlocks > 0 ) + { + // + // We process the data in blocks of up to SYMCRYPT_GHASH_PMULL_HPOWERS blocks + // + todo = SYMCRYPT_MIN( nBlocks, SYMCRYPT_GHASH_PMULL_HPOWERS ); + + // + // The first block is xorred with the state before multiplying it with a power of H + // + data = *(__n128 *)pbData; + REVERSE_BYTES( data, data ); + pbData += SYMCRYPT_GF128_BLOCK_SIZE; + + state = veorq_u8( state, data ); + CLMUL_3( state, GHASH_H_POWER(expandedKeyTable, todo), GHASH_Hx_POWER(expandedKeyTable, todo), a0, a1, a2 ); + + // + // Then we just do an improduct + // + for( i=1; i<todo; i++ ) + { + // we can avoid an EXT here by precomputing datax for CLMUL_ACCX_3 + datax = vrev64q_u8( *(__n128 *)pbData ); + data = vextq_u8( datax, datax, 8 ); + datax = veorq_u8( data, datax ); + pbData += SYMCRYPT_GF128_BLOCK_SIZE; + + Hi = GHASH_H_POWER(expandedKeyTable, todo - i); + Hix = GHASH_Hx_POWER(expandedKeyTable, todo - i); + CLMUL_ACCX_3( data, datax, Hi, Hix, a0, a1, a2 ); + } + + CLMUL_3_POST( a0, a1, a2 ); + MODREDUCE( vMultiplicationConstant, a0, a1, a2, state ); + nBlocks -= todo; + } + + *(__n128 *) pState = state; +} + +#ifdef __clang__ +#pragma clang attribute pop +#else +#pragma GCC pop_options +#endif + +#endif // CPU_ARM64 + + + +////////////////////////////////////////////////////////////// +// Stuff around the core algorithm implementation functions +// + + +VOID +SYMCRYPT_CALL +SymCryptGHashExpandKey( + _Out_ PSYMCRYPT_GHASH_EXPANDED_KEY expandedKey, + _In_reads_( SYMCRYPT_GF128_BLOCK_SIZE ) PCBYTE pH ) +{ +#if SYMCRYPT_CPU_X86 + PSYMCRYPT_GF128_ELEMENT pExpandedKeyTable; + SYMCRYPT_EXTENDED_SAVE_DATA SaveData; + + // + // Initialize offset into table space for 16-alignment. + // + expandedKey->tableOffset = (0 -((UINT_PTR) &expandedKey->tableSpace[0])) % sizeof(SYMCRYPT_GF128_ELEMENT); + + pExpandedKeyTable = (PSYMCRYPT_GF128_ELEMENT)&expandedKey->tableSpace[expandedKey->tableOffset]; + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_PCLMULQDQ_CODE ) ) + { + // + // We can only use the PCLMULQDQ data representation if the SaveXmm never fails. + // This is one of the CPU features required. + // We check anyway... + // + if( SymCryptSaveXmm( &SaveData ) != SYMCRYPT_NO_ERROR ) + { + SymCryptFatal( 'pclm' ); + } + SymCryptGHashExpandKeyPclmulqdq( pExpandedKeyTable, pH ); + SymCryptRestoreXmm( &SaveData ); + } else if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_SSE2 ) && SymCryptSaveXmm( &SaveData ) == SYMCRYPT_NO_ERROR ) + { + SymCryptGHashExpandKeyXmm( pExpandedKeyTable, pH ); + SymCryptRestoreXmm( &SaveData ); + } else { + SymCryptGHashExpandKeyC( pExpandedKeyTable, pH ); + } + +#elif SYMCRYPT_CPU_AMD64 + PSYMCRYPT_GF128_ELEMENT pExpandedKeyTable; + pExpandedKeyTable = &expandedKey->table[0]; + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_PCLMULQDQ_CODE ) ) + { + SymCryptGHashExpandKeyPclmulqdq( pExpandedKeyTable, pH ); + } else if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_SSE2 ) ) + { + SymCryptGHashExpandKeyXmm( pExpandedKeyTable, pH ); + } else { + SymCryptGHashExpandKeyC( pExpandedKeyTable, pH ); + } + +#elif SYMCRYPT_CPU_ARM64 + PSYMCRYPT_GF128_ELEMENT pExpandedKeyTable; + pExpandedKeyTable = &expandedKey->table[0]; + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON_PMULL ) ) + { + SymCryptGHashExpandKeyPmull( pExpandedKeyTable, pH ); + } else { + SymCryptGHashExpandKeyC( pExpandedKeyTable, pH ); + } + +#else + SymCryptGHashExpandKeyC( &expandedKey->table[0], pH ); // Default expansion (does not need alignment) +#endif +} + +VOID +SYMCRYPT_CALL +SymCryptGHashAppendData( + _In_ PCSYMCRYPT_GHASH_EXPANDED_KEY expandedKey, + _Inout_ PSYMCRYPT_GF128_ELEMENT pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ +#if SYMCRYPT_CPU_X86 + PCSYMCRYPT_GF128_ELEMENT pExpandedKeyTable; + SYMCRYPT_EXTENDED_SAVE_DATA SaveData; + + pExpandedKeyTable = (PSYMCRYPT_GF128_ELEMENT)&expandedKey->tableSpace[expandedKey->tableOffset]; + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_PCLMULQDQ_CODE ) ) + { + if( SymCryptSaveXmm( &SaveData ) != SYMCRYPT_NO_ERROR ) + { + SymCryptFatal( 'pclm' ); + } + SymCryptGHashAppendDataPclmulqdq( pExpandedKeyTable, pState, pbData, cbData ); + SymCryptRestoreXmm( &SaveData ); + } else if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_SSE2 ) && SymCryptSaveXmm( &SaveData ) == SYMCRYPT_NO_ERROR ) + { + SymCryptGHashAppendDataXmm( pExpandedKeyTable, pState, pbData, cbData ); + SymCryptRestoreXmm( &SaveData ); + } else { + SymCryptGHashAppendDataC( pExpandedKeyTable, pState, pbData, cbData ); + } + +#elif SYMCRYPT_CPU_AMD64 + PCSYMCRYPT_GF128_ELEMENT pExpandedKeyTable; + + pExpandedKeyTable = &expandedKey->table[0]; + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_PCLMULQDQ_CODE ) ) + { + SymCryptGHashAppendDataPclmulqdq( pExpandedKeyTable, pState, pbData, cbData ); + } else if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_SSE2 ) ) + { + SymCryptGHashAppendDataXmm( pExpandedKeyTable, pState, pbData, cbData ); + } else { + SymCryptGHashAppendDataC( pExpandedKeyTable, pState, pbData, cbData ); + } +#elif SYMCRYPT_CPU_ARM + PCSYMCRYPT_GF128_ELEMENT pExpandedKeyTable; + + pExpandedKeyTable = &expandedKey->table[0]; + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON ) ) + { + SymCryptGHashAppendDataNeon( pExpandedKeyTable, pState, pbData, cbData ); + } else { + SymCryptGHashAppendDataC( pExpandedKeyTable, pState, pbData, cbData ); + } +#elif SYMCRYPT_CPU_ARM64 + PCSYMCRYPT_GF128_ELEMENT pExpandedKeyTable; + + pExpandedKeyTable = &expandedKey->table[0]; + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON_PMULL ) ) + { + SymCryptGHashAppendDataPmull( pExpandedKeyTable, pState, pbData, cbData ); + } else if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON ) ) + { + SymCryptGHashAppendDataNeon( pExpandedKeyTable, pState, pbData, cbData ); + } else { + SymCryptGHashAppendDataC( pExpandedKeyTable, pState, pbData, cbData ); + } +#else + SymCryptGHashAppendDataC( &expandedKey->table[0], pState, pbData, cbData ); +#endif +} diff --git a/libs/symcrypt/lib/ghash_definitions.h b/libs/symcrypt/lib/ghash_definitions.h new file mode 100644 index 00000000000..351ad82f9d2 --- /dev/null +++ b/libs/symcrypt/lib/ghash_definitions.h @@ -0,0 +1,472 @@ +// +// ghash_definitions.h +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +////////////////////////////////////////////////////////////////////////////// +// Constants & globals +// + +#define GF128_FIELD_R_BYTE (0xe1) +#define UINT64_NEG(x) ((UINT64)-(INT64)(x)) + + + +////////////////////////////////////////////////////////////////////////////////////// +// Pclmulqdq implementation +// + +/* +GHASH GF(2^128) multiplication using PCLMULQDQ + +The GF(2^128) field used in GHASH is GF(2)[x]/p(x) where p(x) is the primitive polynomial + x^128 + x^7 + x^2 + x + 1 + +Notation: We use the standard mathematical notation '+' for the addition in the field, +which corresponds to a xor of the bits. + +Multiplication: +Given two field elements A and B (represented as 128-bit values), +we first compute the polynomial product + (C,D) := A * B +where C and D are also 128-bit values. + +The PCLMULQDQ instruction performs a 64 x 64 -> 128 bit carryless multiplication. +To multiply 128-bit values we write A = (A1, A0) and B = (B1, B0) in two 64-bit halves. + +The schoolbook multiplication is computed by + (C, D) = (A1 * B1)x^128 + (A1 * B0 + A0 * B1)x^64 + (A0 * B0) +This require four PCLMULQDQ instructions. The middle 128-bit result has to be shifted +left and right, and each half added to the upper and lower 128-bit result to get (C,D). + +Alternatively, the middle 128-bit intermediate result be computed using Karatsuba: + (A1*B0 + A0*B1) = (A1 + A0) * (B1 + B0) + (A1*B1) + (A0*B0) +This requires only one PCLMULQDQ instruction to multiply (A1 + A0) by (B1 + B0) +as the other two products are already computed. +Whether this is faster depends on the relative speed of shift/xor verses PCLMULQDQ. + +Both multiplication algorithms produce three 128-bit intermediate results (R1, Rmid, R0), +with the full result defined by R1 x^128 + Rmid x^64 + R0. +If we do Multiply-Accumulate then we can accumulate the three 128-bit intermediate results +directly. As there are no carries, there is no overflow, and the combining of the three +intermediate results into a 256-bit result can be shared amongst all multiplications. + + +Modulo reduction: +We use << and >> to denote shifts on 128-bit values. +The modulo reduction can now be done as follows: +given a 256-bit value (C,D) representing C x^128 + D we compute + (T1,T0) := C + C*x + C * x^2 + C * x^7 + R := D + T0 + T1 + (T1 << 1) + (T1 << 2) + (T1 << 7) + +(T1,T0) is just the value C x^128 reduced one step modulo p(x).The value T1 is at most 7 bits, +so in the next step the reduction, which computes the result R, is easy. The +expression T1 + (T1 << 1) + (T1 << 2) + (T1 << 7) is just T1 * x^128 reduced modulo p(x). + +Let's first get rid of the polynomial arithmetic and write this completely using shifts on +128-bit values. + +T0 := C + (C << 1) + (C << 2) + (C << 7) +T1 := (C >> 127) + (C >> 126) + (C >> 121) +R := D + T0 + T1 + (T1 << 1) + (T1 << 2) + (T1 << 7) + +We can optimize this by rewriting the equations + +T2 := T1 + C + = C + (C>>127) + (C>>126) + (C>>121) +R = D + T0 + T1 + (T1 << 1) + (T1 << 2) + (T1 << 7) + = D + C + (C << 1) + (C << 2) + (C << 7) + T1 + (T1 << 1) + (T1 << 2) + (T1 << 7) + = D + T2 + (T2 << 1) + (T2 << 2) + (T2 << 7) + +Thus +T2 = C + (C>>127) + (C>>126) + (C>>121) +R = D + T2 + (T2 << 1) + (T2 << 2) + (T2 << 7) + +Gets the right result and uses only 6 shifts. + +The SSE instruction set does not implement bit-shifts of 128-bit values. Instead, we will +use bit-shifts of the 32-bit subvalues, and byte shifts (shifts by a multiple of 8 bits) +on the full 128-bit values. +We use the <<<< and >>>> operators to denote shifts on 32-bit subwords. + +We can now do the modulo reduction by + +t1 := (C >> 127) = (C >>>> 31) >> 96 +t2 := (C >> 126) = (C >>>> 30) >> 96 +t3 := (C >> 121) = (C >>>> 25) >> 96 +T2 = C + t1 + t2 + t3 + +left-shifts in the computation of R are a bit more involved as we have to move bits from +one subword to the next + +u1 := (T2 << 1) = (T2 <<<< 1) + ((T2 >>>> 31) << 32) +u2 := (T2 << 2) = (T2 <<<< 2) + ((T2 >>>> 30) << 32) +u3 := (T2 << 7) = (T2 <<<< 7) + ((T2 >>>> 25) << 32) +R = D + T2 + u1 + u2 + u3 + +We can eliminate some common subexpressions. For any k we have +(T2 >>>> k) = ((C + r) >>>> k) +where r is a 7-bit value. If k>7 then this is equal to (C >>>> k). This means that +the value (T2 >>>> 31) is equal to (C >>>> 31) so we don't have to compute it again. + +So we can rewrite our formulas as +t4 := (C >>>> 31) +t5 := (C >>>> 30) +t6 := (C >>>> 25) +ts = t4 + t5 + t6 +T2 = C + (ts >> 96) + +Note that ts = (C >>>> 31) + (C >>>> 30) + (C >>>> 25) +which is equal to (T2 >>>> 31) + (T2 >>>> 30) + (T2 >>>> 25) + +R = D + T2 + u1 + u2 + u3 + = D + T2 + (T2 <<<< 1) + (T2 <<<< 2) + (T2 <<<< 7) + (ts << 32) + +All together, we can do the modulo reduction using the following formulas + +ts := (C >>>> 31) + (C >>>> 30) + (C >>>> 25) +T2 := C + (ts >> 96) +R = D + T2 + (T2 <<<< 1) + (T2 <<<< 2) + (T2 <<<< 7) + (ts << 32) + +Using a total of 16 operations. (6 subword shifts, 2 byte shifts, and 8 additions) + +Reversed bit order: +There is one more complication. GHASH uses the bits in the reverse order from normal representation. +The bits b_0, b_1, ..., b_127 represent the polynomial b_0 + b_1 * x + ... + b_127 * x^127. +This means that the most significant bit in each byte is actually the least significant bit in the +polynomial. + +SSE CPUs use the LSBFirst convention. This means that the bits b_0, b_1, ..., b_127 of the polynomial +end up at positions 7, 6, 5, ..., 1, 0, 15, 14, ..., 9, 8, 23, 22, ... of our XMM register. +This is obviously not a useful representation to do arithmetic in. +The first step is to BSWAP the value so that the bits appear in pure reverse order. +That is at least algebraically useful. + +To compute the multiplication we use the fact that GF(2)[x] multiplication has no carries and +thus no preference for bit order. After the BSWAP we don't have the values A and B, but rather +rev(A) and rev(B) where rev() is a function that reverses the bit order. We can now compute + + rev(A) * rev(B) = rev( A*B ) >> 1 + +where the shift operator is on the 256-bit product. + +The modulo reduction remains the same, except that we change all the shifts to be the other direction. + +This gives us finally the outline of our multiplication: + +- Apply BSWAP to all values loaded from memory. + A := BSWAP( Abytes ) + B := BSWAP( Bbytes ) +- Compute the 256-bit product, possibly using Karatsuba. + (P1, P0) := A * B // 128x128 carryless multiplication +- Shift the result left one bit. + (Q1, Q0) := (P1, P0) << 1 + which is computed as + Q0 = (P0 <<<< 1) + (P0 >>>> 31) << 32 + Q1 = (P1 <<<< 1) + (P1 >>>> 31) << 32 + (P0 >>>> 31) >> 96 +- Perform the modulo reduction, with reversed bit order + ts := (Q0 <<<< 31) + (Q0 <<<< 30) + (Q0 <<<< 25) + T2 := Q0 + (ts << 96) + R = Q1 + T2 + (T2 >>>> 1) + (T2 >>>> 2) + (T2 >>>> 7) + (ts >> 32) + +Future work: +It might be possible to construct a faster solution by merging the leftshift of (P1,P0) +with the modulo reduction. + +*/ + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + +#define SYMCRYPT_GHASH_PCLMULQDQ_HPOWERS 32 + +#define GHASH_H_POWER( ghashTable, ind ) ( (ghashTable)[ SYMCRYPT_GHASH_PCLMULQDQ_HPOWERS - (ind)].m128i ) +#define GHASH_Hx_POWER( ghashTable, ind ) ( (ghashTable)[2*SYMCRYPT_GHASH_PCLMULQDQ_HPOWERS - (ind)].m128i ) + +// +// We define a few macros +// + +// +// CLMUL_4 multiplies two operands into three intermediate results using 4 pclmulqdq instructions +// +#define CLMUL_4( opA, opB, resl, resm, resh ) \ +{ \ + resl = _mm_clmulepi64_si128( opA, opB, 0x00 ); \ + resm = _mm_xor_si128( _mm_clmulepi64_si128( opA, opB, 0x01 ), _mm_clmulepi64_si128( opA, opB, 0x10 ) ); \ + resh = _mm_clmulepi64_si128( opA, opB, 0x11 ); \ +}; + +// +// CLMUL_3 multiplies two operands into three intermediate results using 3 pclmulqdq instructions. +// The second operand has a pre-computed difference of the two halves. +// This uses Karatsuba, but we delay xorring the high and low piece into the middle piece. +// +#define CLMUL_3( opA, opB, opBx, resl, resm, resh ) \ +{ \ + __m128i _tmpA; \ + resl = _mm_clmulepi64_si128( opA, opB, 0x00 ); \ + resh = _mm_clmulepi64_si128( opA, opB, 0x11 ); \ + _tmpA = _mm_xor_si128( opA, _mm_srli_si128( opA, 8 ) ); \ + resm = _mm_clmulepi64_si128( _tmpA, opBx, 0x00 ); \ +}; +// +// CLMUL_X_3 is as CLMUL_3 only it takes precomputed differences of both multiplicands. +// +#define CLMUL_X_3( opA, opAx, opB, opBx, resl, resm, resh ) \ +{ \ + resl = _mm_clmulepi64_si128( opA, opB, 0x00 ); \ + resh = _mm_clmulepi64_si128( opA, opB, 0x11 ); \ + resm = _mm_clmulepi64_si128( opAx, opBx, 0x00 ); \ +}; + +// +// Post-process the CLMUL_3 result to be compatible with the CLMUL_4 +// +#define CLMUL_3_POST( resl, resm, resh ) \ + resm = _mm_xor_si128( resm, _mm_xor_si128( resl, resh ) ); + +// +// Multiply-accumulate using CLMUL_4 +// +#define CLMUL_ACC_4( opA, opB, resl, resm, resh ) \ +{\ + __m128i _tmpl, _tmpm, _tmph;\ + CLMUL_4( opA, opB, _tmpl, _tmpm, _tmph );\ + resl = _mm_xor_si128( resl, _tmpl ); \ + resm = _mm_xor_si128( resm, _tmpm ); \ + resh = _mm_xor_si128( resh, _tmph ); \ +}; + +// +// Multiply-accumulate using CLMUL_3 +// +#define CLMUL_ACC_3( opA, opB, opBx, resl, resm, resh ) \ +{\ + __m128i _tmpl, _tmpm, _tmph;\ + CLMUL_3( opA, opB, opBx, _tmpl, _tmpm, _tmph );\ + resl = _mm_xor_si128( resl, _tmpl ); \ + resm = _mm_xor_si128( resm, _tmpm ); \ + resh = _mm_xor_si128( resh, _tmph ); \ +}; +#define CLMUL_ACC_3_Ymm( opA, opB, opBx, resl, resm, resh ) \ +{\ + __m256i _tmpl, _tmpm, _tmph;\ + __m256i _tmpA; \ + _tmpl = _mm256_clmulepi64_epi128( opA, opB, 0x00 ); \ + _tmph = _mm256_clmulepi64_epi128( opA, opB, 0x11 ); \ + _tmpA = _mm256_xor_si256( opA, _mm256_srli_si256( opA, 8 ) ); \ + _tmpm = _mm256_clmulepi64_epi128( _tmpA, opBx, 0x00 ); \ + resl = _mm256_xor_si256( resl, _tmpl ); \ + resm = _mm256_xor_si256( resm, _tmpm ); \ + resh = _mm256_xor_si256( resh, _tmph ); \ +}; + + +// +// Convert the 3 intermediate results to a 256-bit result, +// and do the modulo reduction. +#define MODREDUCE( vMultiplicationConstant, rl, rm, rh, res ) \ +{\ + __m128i _T0, _T1; \ +\ + /* multiply rl by constant which is (rev(0x87) << 1) - we'll eor the lost high bit in manually */ \ + _T0 = _mm_clmulepi64_si128( rl, vMultiplicationConstant, 0x00 ); \ +\ + /* we want the high 64b of rl to align with the low 64b of rm, because we haven't merged rm into rl and rh */ \ + /* we want the low 64b of rl to align with the high 64b of rm, because we lost the high bit in the previous pmull */ \ + rl = _mm_shuffle_epi32( rl, _MM_SHUFFLE( 1, 0, 3, 2 ) ); \ +\ + rm = _mm_xor_si128( rm, _T0 ); \ + rm = _mm_xor_si128( rm, rl ); \ +\ + /* almost same again to fold rm into rh, but bit 63 needs no more multiplication and the result ultimately needs shifting left by 1 */ \ + /* pre-shift bottom of rm left by 1 and accumulate the result when the other parts are aligned */ \ + _T0 = _mm_clmulepi64_si128( _mm_slli_epi64( rm, 1 ), vMultiplicationConstant, 0x00 ); \ +\ + rm = _mm_shuffle_epi32( rm, _MM_SHUFFLE( 1, 0, 3, 2 ) ); \ + res = _mm_xor_si128( rh, rm ); \ +\ + /* rotate res left by 1 and accumulate the aligned parts */ \ + _T1 = _mm_slli_epi32( res, 1 ); \ + res = _mm_srli_epi32( res, 31 ); \ +\ + _T0 = _mm_xor_si128( _T0, _T1 ); \ + res = _mm_shuffle_epi32( res, _MM_SHUFFLE( 2, 1, 0, 3 ) ); \ +\ + res = _mm_xor_si128( res, _T0 ); \ +}; + +// +// See the large comment above on how this is done. +// When we want to do MODREDUCE in parallel with other work, making use of pclmuldq to reduce +// total instruction count (and register pressure) is beneficial. When testing on Haswell, +// using the newer approach is beneficial. Keeping the old approach around in case we have significant +// regression on older platforms. +// +#define MODREDUCE_OLD( rl, rm, rh, res ) \ +{\ + __m128i _T0, _T1, _T2, _Q0, _Q1; \ + rl = _mm_xor_si128( rl, _mm_slli_si128( rm, 8 ) ); \ + rh = _mm_xor_si128( rh, _mm_srli_si128( rm, 8 ) ); \ +\ + _Q0 = _mm_slli_epi32( rl, 1 ); \ + _Q1 = _mm_slli_epi32( rh, 1 ); \ +\ + _T0 = _mm_srli_epi32( rl, 31 ); \ + _T1 = _mm_srli_epi32( rh, 31 ); \ +\ + _T1 = _mm_alignr_epi8( _T1, _T0, 12 ); \ + _T0 = _mm_slli_si128( _T0, 4 ); \ +\ + _Q0 = _mm_xor_si128( _Q0, _T0 ); \ + _Q1 = _mm_xor_si128( _Q1, _T1 ); \ +\ + _T0 = _mm_slli_epi32( _Q0, 31 ); \ + _T1 = _mm_slli_epi32( _Q0, 30 ); \ + _T2 = _mm_slli_epi32( _Q0, 25 ); \ + _T0 = _mm_xor_si128( _T0, _T1 ); \ + _T0 = _mm_xor_si128( _T0, _T2 ); \ +\ + _T1 = _mm_slli_si128( _T0, 12 ); \ +\ + _T2 = _mm_xor_si128( _Q0, _T1 ); \ +\ + res = _mm_xor_si128( _Q1, _T2 ); \ + _T1 = _mm_srli_si128( _T0, 4 ); \ + res = _mm_xor_si128( res, _T1 ); \ +\ + _T0 = _mm_srli_epi32( _T2, 1 ); \ + _T1 = _mm_srli_epi32( _T2, 2 ); \ + _T2 = _mm_srli_epi32( _T2, 7 ); \ +\ + _T1 = _mm_xor_si128( _T0, _T1 ); \ + res = _mm_xor_si128( res, _T2 ); \ + res = _mm_xor_si128( res, _T1 ); \ +}; + +#endif // CPU_X86 || CPU_AMD64 + +#if SYMCRYPT_CPU_ARM64 + +#define SYMCRYPT_GHASH_PMULL_HPOWERS 32 + +#define GHASH_H_POWER( ghashTable, ind ) ( (ghashTable)[ SYMCRYPT_GHASH_PMULL_HPOWERS - (ind)].n128 ) +#define GHASH_Hx_POWER( ghashTable, ind ) ( (ghashTable)[2*SYMCRYPT_GHASH_PMULL_HPOWERS - (ind)].n128 ) + +#if SYMCRYPT_MS_VC +#ifndef vshl_n_u64 +#define vshl_n_u64(src1, src2) neon_shlis64(src1, src2) +#endif +#endif +// +// CLMUL_4 multiplies two operands into three intermediate results using 4 pmull instructions +// +#define CLMUL_4( opA, opB, resl, resm, resh ) \ +{ \ + __n128 _tmp; \ + resl = vmullq_p64( opA, opB ); \ + _tmp = vextq_u8( opA, opA, 8 ); \ + resm = veorq_u8( vmullq_p64( opB, _tmp ), vmull_high_p64( opB, _tmp ) );\ + resh = vmull_high_p64( opA, opB ); \ +}; + +// +// CLMUL_3 multiplies two operands into three intermediate results using 3 pmull instructions. +// The second operand has a pre-computed difference of the two halves. +// This uses Karatsuba, but we delay xorring the high and low piece into the middle piece. +// +#define CLMUL_3( opA, opB, opBx, resl, resm, resh ) \ +{ \ + __n128 _tmpA; \ + resl = vmullq_p64( opA, opB ); \ + resh = vmull_high_p64( opA, opB ); \ + _tmpA = veorq_u8( opA, vextq_u8( opA, opA, 8 ) ); \ + resm = vmullq_p64( _tmpA, opBx ); \ +}; +// +// CLMUL_X_3 is as CLMUL_3 only it takes precomputed differences of both multiplicands +// +#define CLMUL_X_3( opA, opAx, opB, opBx, resl, resm, resh ) \ +{ \ + resl = vmullq_p64( opA, opB ); \ + resh = vmull_high_p64( opA, opB ); \ + resm = vmullq_p64( opAx, opBx ); \ +}; + +// +// Post-process the CLMUL_3 result to be compatible with the CLMUL_4 +// +#define CLMUL_3_POST( resl, resm, resh ) \ + resm = veorq_u8( resm, veorq_u8( resl, resh ) ); + +// +// Multiply-accumulate using CLMUL_4 +// +#define CLMUL_ACC_4( opA, opB, resl, resm, resh ) \ +{\ + __n128 _tmpl, _tmpm, _tmph;\ + CLMUL_4( opA, opB, _tmpl, _tmpm, _tmph );\ + resl = veorq_u8( resl, _tmpl ); \ + resm = veorq_u8( resm, _tmpm ); \ + resh = veorq_u8( resh, _tmph ); \ +}; + +// +// Multiply-accumulate two operands into 3 accumulators. +// Takes the multiplicands and the pre-computed differences of the two halves of both multiplicands. +// +#define CLMUL_ACCX_3( opA, opAx, opB, opBx, resl, resm, resh ) \ +{\ + __n128 _tmpl, _tmpm, _tmph;\ + CLMUL_X_3( opA, opAx, opB, opBx, _tmpl, _tmpm, _tmph ); \ + resl = veorq_u8( resl, _tmpl ); \ + resm = veorq_u8( resm, _tmpm ); \ + resh = veorq_u8( resh, _tmph ); \ +}; + + +// +// Convert the 3 intermediate results to a 256-bit result, +// and do the modulo reduction. +// See the large comment above on how this is done. +// +#define MODREDUCE( vMultiplicationConstant, rl, rm, rh, res ) \ +{\ + __n128 _T0, _T1; \ +\ + /* multiply rl by constant which is (rev(0x87) << 1) - we'll eor the lost high bit in manually */ \ + _T0 = vmull_p64( vget_low_p64(rl), vMultiplicationConstant ); \ +\ + /* we want the high 64b of rl to align with the low 64b of rm, because we haven't merged rm into rl and rh */ \ + /* we want the low 64b of rl to align with the high 64b of rm, because we lost the high bit in the previous pmull */ \ + rl = vextq_u8( rl, rl, 8 ); \ +\ + rm = veorq_u8( rm, _T0 ); \ + rm = veorq_u8( rm, rl ); \ +\ + /* almost same again to fold rm into rh, but bit 63 needs no more multiplication and the result ultimately needs shifting left by 1 */ \ + /* pre-shift bottom of rm left by 1 and accumulate the result when the other parts are aligned */ \ + _T0 = vmull_p64( vshl_n_u64(vget_low_p64(rm), 1), vMultiplicationConstant ); \ +\ + rm = vextq_u8( rm, rm, 8 ); \ + res = veorq_u8( rh, rm ); \ +\ + /* rotate res left by 1 and accumulate the aligned parts */ \ + _T1 = vshlq_n_u32( res, 1 ); \ + res = vshrq_n_u32( res, 31 ); \ +\ + _T0 = veorq_u8( _T0, _T1 ); \ + res = vextq_u8( res, res, 12 ); \ +\ + res = veorq_u8( res, _T0 ); \ +}; + +#define REVERSE_BYTES( _in, _out )\ +{\ + __n128 _t;\ + _t = vrev64q_u8( _in ); \ + _out = vextq_u8( _t, _t, 8 ); \ +} + +#endif // CPU_ARM64 diff --git a/libs/symcrypt/lib/hash.c b/libs/symcrypt/lib/hash.c new file mode 100644 index 00000000000..a0420c35964 --- /dev/null +++ b/libs/symcrypt/lib/hash.c @@ -0,0 +1,216 @@ +// +// hash.c generic code used in many hash implementations. +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +VOID +SYMCRYPT_CALL +SymCryptHashAppendInternal( + _In_ PCSYMCRYPT_HASH pHash, + _Inout_ PSYMCRYPT_COMMON_HASH_STATE pState, + _In_reads_bytes_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + UINT32 bytesInBuffer; + UINT32 freeInBuffer; + SIZE_T tmp; + + SYMCRYPT_CHECK_MAGIC( pState ); + + pState->dataLengthL += cbData; + if( pState->dataLengthL < cbData ) { + pState->dataLengthH ++; // This is almost-unreachable code as it requires 2^64 bytes to be hashed. + } + + bytesInBuffer = pState->bytesInBuffer; + + // + // If previous data in buffer, buffer new input and transform if possible. + // + if( bytesInBuffer > 0 ) + { + SYMCRYPT_ASSERT( pHash->inputBlockSize > bytesInBuffer ); + + freeInBuffer = pHash->inputBlockSize - bytesInBuffer; + if( cbData < freeInBuffer ) + { + // + // All the data will fit in the buffer. + // We don't do anything here. + // As cbData < inputBlockSize the bulk data processing is skipped, + // and the data will be copied to the buffer at the end + // of this code. + } else { + // + // Enough data to fill the whole buffer & process it + // + memcpy(&pState->buffer[bytesInBuffer], pbData, freeInBuffer); + pbData += freeInBuffer; + cbData -= freeInBuffer; + (*pHash->appendBlockFunc)( (PBYTE)pState + pHash->chainOffset, &pState->buffer[0], pHash->inputBlockSize, &tmp ); + + bytesInBuffer = 0; + } + } + + // + // Internal buffer is empty; process all remaining whole blocks in the input + // + if( cbData >= pHash->inputBlockSize ) + { + (*pHash->appendBlockFunc)( (PBYTE)pState + pHash->chainOffset, pbData, cbData, &tmp ); + SYMCRYPT_ASSERT( tmp < pHash->inputBlockSize ); + pbData += cbData - tmp; + cbData = tmp; + } + + SYMCRYPT_ASSERT( cbData < pHash->inputBlockSize ); + + // + // buffer remaining input if necessary. + // + if( cbData > 0 ) + { + memcpy( &pState->buffer[bytesInBuffer], pbData, cbData ); + bytesInBuffer += (UINT32) cbData; + } + + pState->bytesInBuffer = bytesInBuffer; +} + +VOID +SYMCRYPT_CALL +SymCryptHashCommonPaddingMd4Style( + _In_ PCSYMCRYPT_HASH pHash, + _Inout_ PSYMCRYPT_COMMON_HASH_STATE pState ) +{ + SIZE_T tmp; + SIZE_T bytesInBuffer = pState->bytesInBuffer; + + SYMCRYPT_CHECK_MAGIC( pState ); + SYMCRYPT_ASSERT( pHash->inputBlockSize == 64 ); + SYMCRYPT_ASSERT( bytesInBuffer == (pState->dataLengthL & 0x3f) ); + + // + // The buffer is never completely full, so we can always put the first + // padding byte in. + // + pState->buffer[bytesInBuffer++] = 0x80; + + if( bytesInBuffer > 64-8 ) { + // + // No room for the rest of the padding. Pad with zeroes & process block + // bytesInBuffer is at most 64, so we do not have an integer underflow + // + SymCryptWipe( &pState->buffer[bytesInBuffer], 64-bytesInBuffer ); + (*pHash->appendBlockFunc)( (PBYTE)pState + pHash->chainOffset, pState->buffer, 64, &tmp ); + SYMCRYPT_ASSERT( tmp == 0 ); + bytesInBuffer = 0; + } + + // + // Set rest of padding + // At this point bytesInBuffer <= 64-8, so we don't have an underflow + // We wipe to the end of the buffer as it is 16-aligned, + // and it is faster to wipe to an aligned point + // + SymCryptWipe( &pState->buffer[bytesInBuffer], 64-bytesInBuffer ); + SYMCRYPT_STORE_LSBFIRST64( &pState->buffer[64-8], pState->dataLengthL * 8 ); + + // + // Process the final block + // + (*pHash->appendBlockFunc)( (PBYTE)pState + pHash->chainOffset, pState->buffer, 64, &tmp ); +} + + + +SIZE_T +SYMCRYPT_CALL +SymCryptHashResultSize( _In_ PCSYMCRYPT_HASH pHash ) +{ + return pHash->resultSize; +} + + +SIZE_T +SYMCRYPT_CALL +SymCryptHashInputBlockSize( _In_ PCSYMCRYPT_HASH pHash ) +{ + return pHash->inputBlockSize; +} + +SIZE_T +SYMCRYPT_CALL +SymCryptHashStateSize( _In_ PCSYMCRYPT_HASH pHash ) +{ + return pHash->stateSize; +} + + +VOID +SYMCRYPT_CALL +SymCryptHash( + _In_ PCSYMCRYPT_HASH pHash, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_MIN( cbResult, pHash->resultSize ) ) PBYTE pbResult, + SIZE_T cbResult ) +{ + SYMCRYPT_HASH_STATE hash; + + _Analysis_assume_( pHash->stateSize <= sizeof( hash ) ); + SymCryptHashInit( pHash, &hash ); + SymCryptHashAppend( pHash, &hash, pbData, cbData ); + SymCryptHashResult( pHash, &hash, pbResult, cbResult ); + SymCryptWipe( &hash, pHash->stateSize ); +} + +VOID +SYMCRYPT_CALL +SymCryptHashInit( + _In_ PCSYMCRYPT_HASH pHash, + _Out_writes_bytes_( pHash->stateSize ) PVOID pState ) +{ + (*pHash->initFunc)( pState ); +} + +VOID +SYMCRYPT_CALL +SymCryptHashAppend( + _In_ PCSYMCRYPT_HASH pHash, + _Inout_updates_bytes_( pHash->stateSize ) PVOID pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + (*pHash->appendFunc)( pState, pbData, cbData ); +} + +VOID +SYMCRYPT_CALL +SymCryptHashResult( + _In_ PCSYMCRYPT_HASH pHash, + _Inout_updates_bytes_( pHash->stateSize ) PVOID pState, + _Out_writes_( SYMCRYPT_MIN( cbResult, pHash->resultSize ) ) PBYTE pbResult, + SIZE_T cbResult ) +{ + BYTE buf[SYMCRYPT_HASH_MAX_RESULT_SIZE]; + + _Analysis_assume_( pHash->resultSize <= SYMCRYPT_HASH_MAX_RESULT_SIZE ); + + (*pHash->resultFunc)( pState, buf ); + memcpy( pbResult, buf, SYMCRYPT_MIN( cbResult, pHash->resultSize )); + SymCryptWipe( buf, pHash->resultSize ); +} + +VOID +SYMCRYPT_CALL +SymCryptHashStateCopy( + _In_ PCSYMCRYPT_HASH pHash, + _In_reads_( pHash->stateSize ) PCVOID pSrc, + _Out_writes_( pHash->stateSize ) PVOID pDst) +{ + (*pHash->stateCopyFunc)( pSrc, pDst ); +} diff --git a/libs/symcrypt/lib/hash_buffer_pattern.c b/libs/symcrypt/lib/hash_buffer_pattern.c new file mode 100644 index 00000000000..c0d5dd19c92 --- /dev/null +++ b/libs/symcrypt/lib/hash_buffer_pattern.c @@ -0,0 +1,75 @@ +// +// hash_buffer_pattern.c +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#if 0 +#pragma makedep header +#endif + +/* +SymCryptXxxAppend( _Inout_ SYMCRYPT_Xxx_STATE * state, + _In_reads_bytes_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + <Set up a SIZE_T variable 'bytesInBuffer' that contains the # bytes in the buffer> +*/ + + // + // Truncate bytesInBuffer so that we never have an integer overflow. + // + bytesInBuffer &= SYMCRYPT_XXX_INPUT_BLOCK_SIZE - 1; + + // + // If previous data in buffer, buffer new input and transform if possible. + // + if (bytesInBuffer > 0) + { + SIZE_T freeInBuffer = SYMCRYPT_XXX_INPUT_BLOCK_SIZE - bytesInBuffer; + if( cbData < freeInBuffer ) + { + // + // All the data will fit in the buffer. + // We don't do anything here. + // As cbData < INPUT_BLOCK_SIZE the bulk data processing is skipped, + // and the data will be copied to the buffer at the end + // of this code. + } else { + // + // Enough data to fill the whole buffer & process it + // + memcpy(&state->buffer[bytesInBuffer], pbData, freeInBuffer); + pbData += freeInBuffer; + cbData -= freeInBuffer; + SYMCRYPT_XxxAppendBlocks( &state->chain, state->buffer, SYMCRYPT_XXX_INPUT_BLOCK_SIZE ); + + // + // Set bytesInBuffer to zero to ensure that the trailing data in the + // buffer will be copied to the right location of the buffer below. + // + bytesInBuffer = 0; + } + } + + // + // Internal buffer is empty; process all remaining whole blocks in the input + // + if( cbData >= SYMCRYPT_XXX_INPUT_BLOCK_SIZE ) + { + SIZE_T cbDataRoundedDown = cbData & ~(SIZE_T)(SYMCRYPT_XXX_INPUT_BLOCK_SIZE - 1); + SYMCRYPT_XxxAppendBlocks( &state->chain, pbData, cbDataRoundedDown ); + pbData += cbDataRoundedDown; + cbData -= cbDataRoundedDown; + } + + // + // buffer remaining input if necessary. + // + if( cbData > 0 ) + { + memcpy( &state->buffer[bytesInBuffer], pbData, cbData ); + } + +/* +} +*/ diff --git a/libs/symcrypt/lib/hash_pattern.c b/libs/symcrypt/lib/hash_pattern.c new file mode 100644 index 00000000000..57920c7cb71 --- /dev/null +++ b/libs/symcrypt/lib/hash_pattern.c @@ -0,0 +1,39 @@ +// +// hash_pattern.c +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#if 0 +#pragma makedep header +#endif + +// +// This is a file that is #included to define the +// all-in-one hash function. +// + + + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SYMCRYPT_Xxx( + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( CONCAT3( SYMCRYPT_, ALG, _RESULT_SIZE ) ) PBYTE pbResult ) +{ + SYMCRYPT_XXX_STATE state; + + SYMCRYPT_XxxInit( &state ); + SYMCRYPT_XxxAppend( &state, pbData, cbData ); + SYMCRYPT_XxxResult( & state, pbResult ); +} + +VOID +SYMCRYPT_CALL +SYMCRYPT_XxxStateCopy( _In_ const SYMCRYPT_XXX_STATE * pSrc, _Out_ SYMCRYPT_XXX_STATE * pDst ) +{ + SYMCRYPT_CHECK_MAGIC( pSrc ); + *pDst = *pSrc; + SYMCRYPT_SET_MAGIC( pDst ); +} diff --git a/libs/symcrypt/lib/hkdf.c b/libs/symcrypt/lib/hkdf.c new file mode 100644 index 00000000000..7a188f9b674 --- /dev/null +++ b/libs/symcrypt/lib/hkdf.c @@ -0,0 +1,229 @@ +// +// hkdf.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +// +// This module contains the routines to implement the HKDF +// function for the TLS protocol 1.3. It is used in +// the protocol's key derivation function. +// +// + +#include "precomp.h" + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHkdfExpandKey( + _Out_ PSYMCRYPT_HKDF_EXPANDED_KEY pExpandedKey, + _In_ PCSYMCRYPT_MAC macAlgorithm, + _In_reads_(cbIkm) PCBYTE pbIkm, + SIZE_T cbIkm, + _In_reads_opt_(cbSalt) PCBYTE pbSalt, + SIZE_T cbSalt ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + SYMCRYPT_ALIGN BYTE rbPrk[SYMCRYPT_MAC_MAX_RESULT_SIZE] = { 0 }; + + SYMCRYPT_ASSERT( macAlgorithm->expandedKeySize <= sizeof( pExpandedKey->macKey ) ); + + scError = SymCryptHkdfExtractPrk( macAlgorithm, pbIkm, cbIkm, pbSalt, cbSalt, rbPrk, macAlgorithm->resultSize ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + scError = SymCryptHkdfPrkExpandKey( pExpandedKey, macAlgorithm, rbPrk, macAlgorithm->resultSize ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + +cleanup: + SymCryptWipeKnownSize(&rbPrk[0], sizeof(rbPrk)); + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHkdfExtractPrk( + _In_ PCSYMCRYPT_MAC macAlgorithm, + _In_reads_(cbIkm) PCBYTE pbIkm, + SIZE_T cbIkm, + _In_reads_opt_(cbSalt) PCBYTE pbSalt, + SIZE_T cbSalt, + _Out_writes_(cbPrk) PBYTE pbPrk, + SIZE_T cbPrk ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + SYMCRYPT_MAC_STATE state; + SYMCRYPT_MAC_EXPANDED_KEY key; + + // Ensure that pbPrk is the correct size + if (cbPrk != macAlgorithm->resultSize) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Calculation of PRK = HMAC-Hash(salt, IKM) + scError = macAlgorithm->expandKeyFunc( &key, pbSalt, cbSalt ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + macAlgorithm->initFunc( &state, &key ); + macAlgorithm->appendFunc( &state, pbIkm, cbIkm ); + macAlgorithm->resultFunc( &state, pbPrk ); + +cleanup: + SymCryptWipeKnownSize(&key, sizeof(key)); + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHkdfPrkExpandKey( + _Out_ PSYMCRYPT_HKDF_EXPANDED_KEY pExpandedKey, + _In_ PCSYMCRYPT_MAC macAlgorithm, + _In_reads_(cbPrk) PCBYTE pbPrk, + SIZE_T cbPrk ) +{ + SYMCRYPT_ASSERT( macAlgorithm->expandedKeySize <= sizeof( pExpandedKey->macKey ) ); + + pExpandedKey->macAlg = macAlgorithm; + return macAlgorithm->expandKeyFunc( &pExpandedKey->macKey, pbPrk, cbPrk ); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHkdfDerive( + _In_ PCSYMCRYPT_HKDF_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbInfo) PCBYTE pbInfo, + SIZE_T cbInfo, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult) +{ + + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + SYMCRYPT_MAC_STATE state; + + PCSYMCRYPT_MAC pMacAlgorithm = pExpandedKey->macAlg; + + SYMCRYPT_ALIGN BYTE rbPartialResult[SYMCRYPT_MAC_MAX_RESULT_SIZE]; + BYTE * pbCurr = pbResult; + + SIZE_T cbMacResultSize = pMacAlgorithm->resultSize; + + BYTE cntr = 0x01; + + // Check that cbResult <= 255*HashLen + if (cbResult > 0xff * cbMacResultSize) + { + scError = SYMCRYPT_WRONG_DATA_SIZE; + goto cleanup; + } + + // In the first iteration T(0) is the empty string + // Calculate T(1) = HMAC-Hash(PRK, T(0) | info | 0x01) + pMacAlgorithm->initFunc( &state, pExpandedKey ); + pMacAlgorithm->appendFunc( &state, pbInfo, cbInfo ); + pMacAlgorithm->appendFunc( &state, &cntr, sizeof(cntr) ); + pMacAlgorithm->resultFunc( &state, rbPartialResult ); + + // Store the result in the output buffer + memcpy(pbCurr, rbPartialResult, SYMCRYPT_MIN(cbResult, cbMacResultSize)); + if (cbResult <= cbMacResultSize) + { + goto cleanup; + } + + // Update counters + cntr++; + pbCurr += cbMacResultSize; + cbResult -= cbMacResultSize; + + while( cbResult > 0 ) + { + // Calculate T(i) = HMAC-Hash(PRK, T(i-1) | info | 0xi) + pMacAlgorithm->initFunc( &state, pExpandedKey ); + pMacAlgorithm->appendFunc( &state, rbPartialResult, cbMacResultSize ); + pMacAlgorithm->appendFunc( &state, pbInfo, cbInfo ); + pMacAlgorithm->appendFunc( &state, &cntr, sizeof(cntr) ); + pMacAlgorithm->resultFunc( &state, rbPartialResult ); + + // Store the result in the output buffer + memcpy(pbCurr, rbPartialResult, SYMCRYPT_MIN(cbResult, cbMacResultSize)); + if (cbResult <= cbMacResultSize) + { + goto cleanup; + } + + // Update counters + cntr++; + pbCurr += cbMacResultSize; + cbResult -= cbMacResultSize; + } + +cleanup: + SymCryptWipeKnownSize(&rbPartialResult[0], sizeof(rbPartialResult)); + + return scError; +} + +// +// The full HKDF +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHkdf( + PCSYMCRYPT_MAC macAlgorithm, + _In_reads_(cbIkm) PCBYTE pbIkm, + SIZE_T cbIkm, + _In_reads_opt_(cbSalt) PCBYTE pbSalt, + SIZE_T cbSalt, + _In_reads_opt_(cbInfo) PCBYTE pbInfo, + SIZE_T cbInfo, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SYMCRYPT_HKDF_EXPANDED_KEY key; + + // Create the expanded key + scError = SymCryptHkdfExpandKey( + &key, + macAlgorithm, + pbIkm, + cbIkm, + pbSalt, + cbSalt ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // Derive the key + scError = SymCryptHkdfDerive( + &key, + pbInfo, + cbInfo, + pbResult, + cbResult ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + +cleanup: + SymCryptWipeKnownSize(&key, sizeof(key)); + + return scError; +} diff --git a/libs/symcrypt/lib/hmac.c b/libs/symcrypt/lib/hmac.c new file mode 100644 index 00000000000..766fd9eb95e --- /dev/null +++ b/libs/symcrypt/lib/hmac.c @@ -0,0 +1,195 @@ +// +// hmac.c +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +VOID +SYMCRYPT_CALL +SymCryptHmacStateCopy( + _In_ PCSYMCRYPT_HMAC_STATE pSrc, + _In_opt_ PCSYMCRYPT_HMAC_EXPANDED_KEY pExpandedKey, + _Out_ PSYMCRYPT_HMAC_STATE pDst ) +{ + SYMCRYPT_CHECK_MAGIC( pSrc ); + + PCSYMCRYPT_HASH pHash = pSrc->pKey->pHash; + + SymCryptHashStateCopy( pHash, &pSrc->hash, &pDst->hash ); + + if( pExpandedKey != NULL ) + { + SYMCRYPT_CHECK_MAGIC( pExpandedKey ); + pDst->pKey = pExpandedKey; + } + else + { + SYMCRYPT_CHECK_MAGIC( pSrc->pKey ); + pDst->pKey = pSrc->pKey; + } + SYMCRYPT_SET_MAGIC( pDst ); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacKeyCopy( + _In_ PCSYMCRYPT_HMAC_EXPANDED_KEY pSrc, + _Out_ PSYMCRYPT_HMAC_EXPANDED_KEY pDst ) +{ + SYMCRYPT_CHECK_MAGIC( pSrc ); + + // Copy innerState and outerState + SymCryptHashStateCopy(pSrc->pHash, &pSrc->innerState, &pDst->innerState ); + SymCryptHashStateCopy(pSrc->pHash, &pSrc->outerState, &pDst->outerState ); + + pDst->pHash = pSrc->pHash; + + SYMCRYPT_SET_MAGIC( pDst ); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHmacExpandKey( + _In_ PCSYMCRYPT_HASH pHash, + _Out_ PSYMCRYPT_HMAC_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ) +{ + // This buffer has to be large enough to hold one input block + // and the result of the hash function. + // Using SHA3-224 input block size to satisfy those requirements. + SYMCRYPT_ALIGN BYTE iblock[ SYMCRYPT_SHA3_224_INPUT_BLOCK_SIZE ]; + + SYMCRYPT_ASSERT( sizeof(iblock) >= pHash->inputBlockSize ); + SYMCRYPT_ASSERT( sizeof(iblock) >= pHash->resultSize ); + + // XorByteIntoBuffer function updates the buffer in multiples of 8-bytes + SYMCRYPT_ASSERT( pHash->inputBlockSize % 8 == 0); + + memset( iblock, 0, sizeof( iblock ) ); + + if( cbKey <= pHash->inputBlockSize ) + { + if( cbKey > 0 ) + { + memcpy( iblock, pbKey, cbKey ); + } + } + else + { + SymCryptHash( pHash, pbKey, cbKey, iblock, pHash->resultSize ); + } + + XorByteIntoBuffer( iblock, pHash->inputBlockSize / 8, HMAC_IPAD_BYTE ); + + // + // Initialize the inner and outer states in the expanded key + // + SymCryptHashInit( pHash, &pExpandedKey->innerState ); + SymCryptHashInit( pHash, &pExpandedKey->outerState ); + + // Update the inner state in the expanded key + SymCryptHashAppend( pHash, &pExpandedKey->innerState, iblock, pHash->inputBlockSize ); + + XorByteIntoBuffer( iblock, pHash->inputBlockSize / 8, HMAC_IPAD_BYTE ^ HMAC_OPAD_BYTE ); + + // Update the outer state in the expanded key + SymCryptHashAppend( pHash, &pExpandedKey->outerState, iblock, pHash->inputBlockSize ); + + SymCryptWipeKnownSize( iblock, sizeof( iblock ) ); + + // Save the hash function in the expanded key, it will be used in other + // generic HMAC function calls. + pExpandedKey->pHash = pHash; + + SYMCRYPT_SET_MAGIC(pExpandedKey); + + return SYMCRYPT_NO_ERROR; +} + + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptHmacInit( + _Out_ PSYMCRYPT_HMAC_STATE pState, + _In_ PCSYMCRYPT_HMAC_EXPANDED_KEY pExpandedKey ) +{ + SYMCRYPT_CHECK_MAGIC( pExpandedKey ); + + SymCryptHashStateCopy( pExpandedKey->pHash, &pExpandedKey->innerState, &pState->hash ); + + pState->pKey = pExpandedKey; + + SYMCRYPT_SET_MAGIC(pState); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacAppend( + _Inout_ PSYMCRYPT_HMAC_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + SymCryptHashAppend( pState->pKey->pHash, &pState->hash, pbData, cbData ); +} + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptHmacResult( + _Inout_ PSYMCRYPT_HMAC_STATE pState, + _Out_writes_( pState->pKey->pHash->resultSize ) PBYTE pbResult ) +{ + BYTE innerRes[64]; + + PCSYMCRYPT_HASH pHash = pState->pKey->pHash; + + SYMCRYPT_ASSERT(sizeof(innerRes) >= pHash->resultSize); + + SYMCRYPT_CHECK_MAGIC( pState ); + + // + // We have to buffer the inner hash result. We can't put it directly in the + // hash state data buffer as the Result() function wipes that buffer before returning. + // + SymCryptHashResult( pHash, &pState->hash, innerRes, pHash->resultSize ); + + SYMCRYPT_CHECK_MAGIC( pState->pKey ) + + SymCryptHashStateCopy( pHash, &pState->pKey->outerState, &pState->hash ); + + SymCryptHashAppend( pHash, &pState->hash, innerRes, pHash->resultSize ); + + SymCryptHashResult( pHash, &pState->hash, pbResult, pHash->resultSize ); + + // + // The SymCryptHashResult already wipes the hash state. + // We only need to wipe our own buffer. + // + // We also set the key pointer to NULL. This is not for security; + // it creates a clear error when callers forget to call the Init routine + // when re-using a state. Rather than the wrong result, they will get + // a NULL pointer exception, and they will fix their code. + // + + SymCryptWipeKnownSize( innerRes, sizeof( innerRes ) ); + pState->pKey = NULL; +} + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptHmac( + _In_ PCSYMCRYPT_HMAC_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( pExpandedKey->pHash->resultSize ) PBYTE pbResult ) +{ + SYMCRYPT_HMAC_STATE state; + + SymCryptHmacInit( &state, pExpandedKey ); + SymCryptHmacAppend( &state, pbData, cbData ); + SymCryptHmacResult( &state, pbResult ); +} diff --git a/libs/symcrypt/lib/hmac_pattern.c b/libs/symcrypt/lib/hmac_pattern.c new file mode 100644 index 00000000000..1d4ccf7c7b5 --- /dev/null +++ b/libs/symcrypt/lib/hmac_pattern.c @@ -0,0 +1,197 @@ +// +// hmac_pattern.c +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#if 0 +#pragma makedep header +#endif + +VOID +SYMCRYPT_CALL +SYMCRYPT_HmacXxxStateCopy( + _In_ PCSYMCRYPT_HMAC_XXX_STATE pSrc, + _In_opt_ PCSYMCRYPT_HMAC_XXX_EXPANDED_KEY pExpandedKey, + _Out_ PSYMCRYPT_HMAC_XXX_STATE pDst ) +{ + SYMCRYPT_CHECK_MAGIC( pSrc ); + + SYMCRYPT_XxxStateCopy( &pSrc->hash, &pDst->hash ); + + if( pExpandedKey != NULL ) + { + SYMCRYPT_CHECK_MAGIC( pExpandedKey ); + pDst->pKey = pExpandedKey; + } + else + { + SYMCRYPT_CHECK_MAGIC( pSrc->pKey ); + pDst->pKey = pSrc->pKey; + } + SYMCRYPT_SET_MAGIC( pDst ); +} + +VOID +SYMCRYPT_CALL +SYMCRYPT_HmacXxxKeyCopy( _In_ PCSYMCRYPT_HMAC_XXX_EXPANDED_KEY pSrc, _Out_ PSYMCRYPT_HMAC_XXX_EXPANDED_KEY pDst ) +{ + SYMCRYPT_CHECK_MAGIC( pSrc ); + *pDst = *pSrc; + SYMCRYPT_SET_MAGIC( pDst ); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SYMCRYPT_HmacXxxExpandKey( + _Out_ PSYMCRYPT_HMAC_XXX_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ) +{ + SYMCRYPT_XXX_STATE hashState; + SYMCRYPT_ALIGN BYTE iblock[ SYMCRYPT_XXX_INPUT_BLOCK_SIZE ]; // One input block for the hash function + SIZE_T tmp; + + SYMCRYPT_SET_MAGIC( pExpandedKey ); + + // + // Initialize our hash state and our input block + // We wipe the whole block & then copy the key into it. This is often faster + // as the compiler can optimize the wipe because it knows the size at compile time. + // + SYMCRYPT_XxxInit( &hashState ); + memset( iblock, 0, sizeof( iblock ) ); + + if( cbKey <= sizeof( iblock ) ) + { + if( cbKey > 0 ) + { + memcpy( iblock, pbKey, cbKey ); + } + } else { + // + // We can use the existing MD5 state to hash the long key. + // The state is re-initialized by the SymCryptMd5Result() function. + // + SYMCRYPT_XxxAppend( &hashState, pbKey, cbKey ); + SYMCRYPT_XxxResult( &hashState, iblock ); + } + + XorByteIntoBuffer( iblock, sizeof( iblock )/8, HMAC_IPAD_BYTE ); + + // + // Copy the initial chaining state to both states in the expanded key + // + pExpandedKey->innerState = hashState.chain; + pExpandedKey->outerState = hashState.chain; + + // + // Update the state in the expanded key directly + // + SYMCRYPT_XxxAppendBlocks( &pExpandedKey->innerState, iblock, sizeof( iblock ), &tmp ); + + XorByteIntoBuffer( iblock, sizeof( iblock )/8, HMAC_IPAD_BYTE ^ HMAC_OPAD_BYTE ); + + SYMCRYPT_XxxAppendBlocks( &pExpandedKey->outerState, iblock, sizeof( iblock ), &tmp ); + + SymCryptWipeKnownSize( iblock, sizeof( iblock ) ); + SymCryptWipeKnownSize( &hashState, sizeof( hashState ) ); + + return SYMCRYPT_NO_ERROR; +} + + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SYMCRYPT_HmacXxxInit( + _Out_ PSYMCRYPT_HMAC_XXX_STATE pState, + _In_ PCSYMCRYPT_HMAC_XXX_EXPANDED_KEY pExpandedKey) +{ + SYMCRYPT_CHECK_MAGIC( pExpandedKey ); + + SYMCRYPT_SET_MAGIC( pState ); + + // + // We don't call SymCryptXxxInit on the hash sub-state; + // instead we directly initialize its fields. + // + SYMCRYPT_SET_MAGIC( &pState->hash ); + pState->hash.chain = pExpandedKey->innerState; + SET_DATALENGTH( pState->hash, SYMCRYPT_XXX_INPUT_BLOCK_SIZE ); + pState->hash.bytesInBuffer = 0; + pState->pKey = pExpandedKey; +} + +VOID +SYMCRYPT_CALL +SYMCRYPT_HmacXxxAppend( + _Inout_ PSYMCRYPT_HMAC_XXX_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + SYMCRYPT_XxxAppend( &pState->hash, pbData, cbData ); +} + +C_ASSERT( SYMCRYPT_XXX_RESULT_SIZE == SYMCRYPT_HMAC_XXX_RESULT_SIZE ); + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SYMCRYPT_HmacXxxResult( + _Inout_ PSYMCRYPT_HMAC_XXX_STATE pState, + _Out_writes_( SYMCRYPT_HMAC_XXX_RESULT_SIZE ) PBYTE pbResult ) +{ + BYTE innerRes[SYMCRYPT_XXX_RESULT_SIZE]; + + SYMCRYPT_CHECK_MAGIC( pState ); + + // + // We have to buffer the inner hash result. We can't put it directly in the + // hash state data buffer as the Result() function wipes that buffer before returning. + // + + SYMCRYPT_XxxResult( &pState->hash, innerRes ); + + SYMCRYPT_CHECK_MAGIC( pState->pKey ) + + pState->hash.chain = pState->pKey->outerState; + + // + // We put the data directly in the buffer, rather than call the Append function. + // + memcpy( &pState->hash.buffer, innerRes, sizeof( innerRes ) ); + SET_DATALENGTH( pState->hash, SYMCRYPT_XXX_INPUT_BLOCK_SIZE + SYMCRYPT_XXX_RESULT_SIZE ); + pState->hash.bytesInBuffer = SYMCRYPT_XXX_RESULT_SIZE; + + SYMCRYPT_XxxResult( &pState->hash, pbResult ); + + // + // The SymCryptXxxResult already wipes the hash state. + // We only need to wipe our own buffer. + // + // We also set the key pointer to NULL. This is not for security; + // it creates a clear error when callers forget to call the Init routine + // when re-using a state. Rather than the wrong result, they will get + // a NULL pointer exception, and they will fix their code. + // + + SymCryptWipe( innerRes, sizeof( innerRes ) ); + pState->pKey = NULL; +} + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SYMCRYPT_HmacXxx( + _In_ PCSYMCRYPT_HMAC_XXX_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_HMAC_XXX_RESULT_SIZE ) PBYTE pbResult ) +{ + SYMCRYPT_HMAC_XXX_STATE state; + + SYMCRYPT_HmacXxxInit( &state, pExpandedKey ); + SYMCRYPT_HmacXxxAppend( &state, pbData, cbData ); + SYMCRYPT_HmacXxxResult( &state, pbResult ); + +} diff --git a/libs/symcrypt/lib/hmacmd5.c b/libs/symcrypt/lib/hmacmd5.c new file mode 100644 index 00000000000..03f5fe01aea --- /dev/null +++ b/libs/symcrypt/lib/hmacmd5.c @@ -0,0 +1,56 @@ +// +// HmacMd5.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +#define ALG MD5 +#define Alg Md5 +#define SET_DATALENGTH( state, len ) {state.dataLengthL = len;} +#include "hmac_pattern.c" +#undef SET_DATALENGTH +#undef Alg +#undef ALG + +const SYMCRYPT_MAC SymCryptHmacMd5Algorithm_default = { + SymCryptHmacMd5ExpandKey, + SymCryptHmacMd5Init, + SymCryptHmacMd5Append, + SymCryptHmacMd5Result, + sizeof(SYMCRYPT_HMAC_MD5_EXPANDED_KEY), + sizeof(SYMCRYPT_HMAC_MD5_STATE), + SYMCRYPT_HMAC_MD5_RESULT_SIZE, + &SymCryptMd5Algorithm, + 0, +}; + +const PCSYMCRYPT_MAC SymCryptHmacMd5Algorithm = &SymCryptHmacMd5Algorithm_default; + +static const BYTE hmacMd5Kat[16] = { + 0x77, 0x33, 0x69, 0x79, 0x9e, 0x54, 0xeb, 0x49, 0xff, 0x21, 0xe6, 0xf9, 0x63, 0xe5, 0xbb, 0x49, +}; + +VOID +SYMCRYPT_CALL +SymCryptHmacMd5Selftest(void) +{ + SYMCRYPT_HMAC_MD5_EXPANDED_KEY xKey; + BYTE res[SYMCRYPT_HMAC_MD5_RESULT_SIZE]; + + SymCryptHmacMd5ExpandKey( &xKey, SymCryptTestKey32, 16 ); + SymCryptHmacMd5( &xKey, SymCryptTestMsg3, sizeof( SymCryptTestMsg3 ), res ); + + SymCryptInjectError( res, sizeof( res ) ); + + if( memcmp( res, hmacMd5Kat, sizeof( res ) ) != 0 ) + { + SymCryptFatal( 'hmd5'); + } + + // + // Normally we would wipe the expanded key structure here, + // but as this is a selftest with known data this is not needed. + // +} diff --git a/libs/symcrypt/lib/hmacsha1.c b/libs/symcrypt/lib/hmacsha1.c new file mode 100644 index 00000000000..e8a8de0c4af --- /dev/null +++ b/libs/symcrypt/lib/hmacsha1.c @@ -0,0 +1,65 @@ +// +// HmacSha1.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +// +// This implementation of HMAC uses extensive knowledge of the internal workings of the +// SHA1 implementation and uses internal routines. +// This reduces the overhead per HMAC computation by up to 20%, which is significant +// enough to take on the added complexity. +// + +#define ALG SHA1 +#define Alg Sha1 +#define SET_DATALENGTH( state, len ) {state.dataLengthL = len;} +#include "hmac_pattern.c" +#undef SET_DATALENGTH +#undef Alg +#undef ALG + +const SYMCRYPT_MAC SymCryptHmacSha1Algorithm_default = { + SymCryptHmacSha1ExpandKey, + SymCryptHmacSha1Init, + SymCryptHmacSha1Append, + SymCryptHmacSha1Result, + sizeof(SYMCRYPT_HMAC_SHA1_EXPANDED_KEY), + sizeof(SYMCRYPT_HMAC_SHA1_STATE), + SYMCRYPT_HMAC_SHA1_RESULT_SIZE, + &SymCryptSha1Algorithm, + SYMCRYPT_FIELD_OFFSET( SYMCRYPT_HMAC_SHA1_EXPANDED_KEY, outerState ), +}; + +const PCSYMCRYPT_MAC SymCryptHmacSha1Algorithm = &SymCryptHmacSha1Algorithm_default; + +static const BYTE hmacSha1Kat[20] = { + 0x2a, 0x29, 0x85, 0x40, 0x23, 0xba, 0x2e, 0xf1, + 0x49, 0x0f, 0x8c, 0xd8, 0x97, 0xa8, 0xcc, 0x6b, + 0x55, 0x7b, 0x2a, 0x12, +}; + +VOID +SYMCRYPT_CALL +SymCryptHmacSha1Selftest(void) +{ + SYMCRYPT_HMAC_SHA1_EXPANDED_KEY xKey; + BYTE res[SYMCRYPT_HMAC_SHA1_RESULT_SIZE]; + + SymCryptHmacSha1ExpandKey( &xKey, SymCryptTestKey32, 16 ); + SymCryptHmacSha1( &xKey, SymCryptTestMsg3, sizeof( SymCryptTestMsg3 ), res ); + + SymCryptInjectError( res, sizeof( res ) ); + + if( memcmp( res, hmacSha1Kat, sizeof( res ) ) != 0 ) + { + SymCryptFatal( 'hSh1' ); + } + + // + // Normally we would wipe the expanded key structure here, + // but as this is a selftest with known data this is not needed. + // +} diff --git a/libs/symcrypt/lib/hmacsha224.c b/libs/symcrypt/lib/hmacsha224.c new file mode 100644 index 00000000000..fcfc8d85cdf --- /dev/null +++ b/libs/symcrypt/lib/hmacsha224.c @@ -0,0 +1,62 @@ +// +// HmacSha224.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +#define SymCryptSha224AppendBlocks SymCryptSha256AppendBlocks + +#define ALG SHA224 +#define Alg Sha224 +#define SET_DATALENGTH( state, len ) {state.dataLengthL = len;} +#include "hmac_pattern.c" +#undef SET_DATALENGTH +#undef Alg +#undef ALG + +const SYMCRYPT_MAC SymCryptHmacSha224Algorithm_default = { + SymCryptHmacSha224ExpandKey, + SymCryptHmacSha224Init, + SymCryptHmacSha224Append, + SymCryptHmacSha224Result, + sizeof(SYMCRYPT_HMAC_SHA224_EXPANDED_KEY), + sizeof(SYMCRYPT_HMAC_SHA224_STATE), + SYMCRYPT_HMAC_SHA224_RESULT_SIZE, + &SymCryptSha224Algorithm, + SYMCRYPT_FIELD_OFFSET( SYMCRYPT_HMAC_SHA224_EXPANDED_KEY, outerState ), +}; + +const PCSYMCRYPT_MAC SymCryptHmacSha224Algorithm = &SymCryptHmacSha224Algorithm_default; + +static const BYTE hmacSha224Kat[28] = { + 0x3e, 0x1c, 0x48, 0x2f, 0x66, 0x49, 0x67, 0xa9, + 0xad, 0x4f, 0x76, 0x52, 0x36, 0xf8, 0x5a, 0x1f, + 0x63, 0x5b, 0x34, 0xe9, 0x35, 0x71, 0x62, 0x35, + 0xa2, 0x9e, 0x61, 0xb1 +}; + + +VOID +SYMCRYPT_CALL +SymCryptHmacSha224Selftest(void) +{ + SYMCRYPT_HMAC_SHA224_EXPANDED_KEY xKey; + BYTE res[SYMCRYPT_HMAC_SHA224_RESULT_SIZE]; + + SymCryptHmacSha224ExpandKey( &xKey, SymCryptTestKey32, 16 ); + SymCryptHmacSha224( &xKey, SymCryptTestMsg3, sizeof( SymCryptTestMsg3 ), res ); + + SymCryptInjectError( res, sizeof( res ) ); + + if( memcmp( res, hmacSha224Kat, sizeof( res ) ) != 0 ) + { + SymCryptFatal( 'hsh4' ); + } + + // + // Normally we would wipe the expanded key structure here, + // but as this is a selftest with known data this is not needed. + // +} diff --git a/libs/symcrypt/lib/hmacsha256.c b/libs/symcrypt/lib/hmacsha256.c new file mode 100644 index 00000000000..c2641ef39aa --- /dev/null +++ b/libs/symcrypt/lib/hmacsha256.c @@ -0,0 +1,60 @@ +// +// HmacSha256.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +#define ALG SHA256 +#define Alg Sha256 +#define SET_DATALENGTH( state, len ) {state.dataLengthL = len;} +#include "hmac_pattern.c" +#undef SET_DATALENGTH +#undef Alg +#undef ALG + +const SYMCRYPT_MAC SymCryptHmacSha256Algorithm_default = { + SymCryptHmacSha256ExpandKey, + SymCryptHmacSha256Init, + SymCryptHmacSha256Append, + SymCryptHmacSha256Result, + sizeof(SYMCRYPT_HMAC_SHA256_EXPANDED_KEY), + sizeof(SYMCRYPT_HMAC_SHA256_STATE), + SYMCRYPT_HMAC_SHA256_RESULT_SIZE, + &SymCryptSha256Algorithm, + SYMCRYPT_FIELD_OFFSET( SYMCRYPT_HMAC_SHA256_EXPANDED_KEY, outerState ), +}; + +const PCSYMCRYPT_MAC SymCryptHmacSha256Algorithm = &SymCryptHmacSha256Algorithm_default; + +static const BYTE hmacSha256Kat[32] = { + 0xd6, 0x01, 0xcc, 0x17, 0x75, 0x59, 0xb0, 0x24, + 0x84, 0x59, 0x78, 0x7f, 0x7e, 0x80, 0x4e, 0xd7, + 0xf2, 0x76, 0x89, 0xb5, 0x99, 0x5c, 0x59, 0xb6, + 0x61, 0x80, 0x2d, 0x96, 0x82, 0xfd, 0xf8, 0xd2, +}; + + +VOID +SYMCRYPT_CALL +SymCryptHmacSha256Selftest(void) +{ + SYMCRYPT_HMAC_SHA256_EXPANDED_KEY xKey; + BYTE res[SYMCRYPT_HMAC_SHA256_RESULT_SIZE]; + + SymCryptHmacSha256ExpandKey( &xKey, SymCryptTestKey32, 16 ); + SymCryptHmacSha256( &xKey, SymCryptTestMsg3, sizeof( SymCryptTestMsg3 ), res ); + + SymCryptInjectError( res, sizeof( res ) ); + + if( memcmp( res, hmacSha256Kat, sizeof( res ) ) != 0 ) + { + SymCryptFatal( 'hsh2' ); + } + + // + // Normally we would wipe the expanded key structure here, + // but as this is a selftest with known data this is not needed. + // +} diff --git a/libs/symcrypt/lib/hmacsha384.c b/libs/symcrypt/lib/hmacsha384.c new file mode 100644 index 00000000000..36042092edf --- /dev/null +++ b/libs/symcrypt/lib/hmacsha384.c @@ -0,0 +1,59 @@ +// +// HmacSha512.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +#define SymCryptSha384AppendBlocks SymCryptSha512AppendBlocks + +#define ALG SHA384 +#define Alg Sha384 +#define SET_DATALENGTH( state, len ) {state.dataLengthL = len; state.dataLengthH = 0;} +#include "hmac_pattern.c" +#undef SET_DATALENGTH +#undef Alg +#undef ALG + +const SYMCRYPT_MAC SymCryptHmacSha384Algorithm_default = { + SymCryptHmacSha384ExpandKey, + SymCryptHmacSha384Init, + SymCryptHmacSha384Append, + SymCryptHmacSha384Result, + sizeof(SYMCRYPT_HMAC_SHA384_EXPANDED_KEY), + sizeof(SYMCRYPT_HMAC_SHA384_STATE), + SYMCRYPT_HMAC_SHA384_RESULT_SIZE, + &SymCryptSha384Algorithm, + SYMCRYPT_FIELD_OFFSET( SYMCRYPT_HMAC_SHA384_EXPANDED_KEY, outerState ), +}; + +const PCSYMCRYPT_MAC SymCryptHmacSha384Algorithm = &SymCryptHmacSha384Algorithm_default; + +static const BYTE hmacSha384Kat[48] = { + 0x67, 0xdb, 0x9d, 0x4d, 0x66, 0xed, 0xf2, 0xe7, 0x2b, 0x88, 0xb8, 0x50, 0x55, 0x68, 0xa0, 0x00, + 0xa9, 0x83, 0x2b, 0xa3, 0x5e, 0x4f, 0xde, 0xcf, 0xe5, 0x38, 0x9a, 0x5d, 0x92, 0x79, 0x81, 0x53, + 0x6d, 0xdb, 0x94, 0xc0, 0xf6, 0xc0, 0xbd, 0x94, 0xc4, 0x18, 0x96, 0x4b, 0xbe, 0x4b, 0x6c, 0xf2, +}; + +VOID +SYMCRYPT_CALL +SymCryptHmacSha384Selftest(void) +{ + SYMCRYPT_HMAC_SHA384_EXPANDED_KEY xKey; + BYTE res[SYMCRYPT_HMAC_SHA384_RESULT_SIZE]; + + SymCryptHmacSha384ExpandKey( &xKey, SymCryptTestKey32, 16 ); + SymCryptHmacSha384( &xKey, SymCryptTestMsg3, sizeof( SymCryptTestMsg3 ), res ); + + SymCryptInjectError( res, sizeof( res ) ); + if( memcmp( res, hmacSha384Kat, sizeof( res ) ) != 0 ) + { + SymCryptFatal( 'hsh3' ); + } + + // + // Normally we would wipe the expanded key structure here, + // but as this is a selftest with known data this is not needed. + // +} diff --git a/libs/symcrypt/lib/hmacsha3_224.c b/libs/symcrypt/lib/hmacsha3_224.c new file mode 100644 index 00000000000..8f3b7626e43 --- /dev/null +++ b/libs/symcrypt/lib/hmacsha3_224.c @@ -0,0 +1,122 @@ +// +// HmacSha3_224.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHmacSha3_224ExpandKey( + _Out_ PSYMCRYPT_HMAC_SHA3_224_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbKey) PCBYTE pbKey, + SIZE_T cbKey) +{ + return SymCryptHmacExpandKey(SymCryptSha3_224Algorithm, &pExpandedKey->generic, pbKey, cbKey); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_224KeyCopy( + _In_ PCSYMCRYPT_HMAC_SHA3_224_EXPANDED_KEY pSrc, + _Out_ PSYMCRYPT_HMAC_SHA3_224_EXPANDED_KEY pDst) +{ + SymCryptHmacKeyCopy(&pSrc->generic, &pDst->generic); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_224( + _In_ PCSYMCRYPT_HMAC_SHA3_224_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_HMAC_SHA3_224_RESULT_SIZE ) PBYTE pbResult ) +{ + SymCryptHmac(&pExpandedKey->generic, pbData, cbData, pbResult); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_224StateCopy( + _In_ PCSYMCRYPT_HMAC_SHA3_224_STATE pSrc, + _In_opt_ PCSYMCRYPT_HMAC_SHA3_224_EXPANDED_KEY pExpandedKey, + _Out_ PSYMCRYPT_HMAC_SHA3_224_STATE pDst ) +{ + SymCryptHmacStateCopy(&pSrc->generic, pExpandedKey == NULL ? NULL : &pExpandedKey->generic, &pDst->generic); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_224Init( + _Out_ PSYMCRYPT_HMAC_SHA3_224_STATE pState, + _In_ PCSYMCRYPT_HMAC_SHA3_224_EXPANDED_KEY pExpandedKey) +{ + SymCryptHmacInit(&pState->generic, &pExpandedKey->generic); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_224Append( + _Inout_ PSYMCRYPT_HMAC_SHA3_224_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + SymCryptHmacAppend(&pState->generic, pbData, cbData); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_224Result( + _Inout_ PSYMCRYPT_HMAC_SHA3_224_STATE pState, + _Out_writes_( SYMCRYPT_HMAC_SHA3_224_RESULT_SIZE ) PBYTE pbResult ) +{ + SymCryptHmacResult(&pState->generic, pbResult); +} + + +const SYMCRYPT_MAC SymCryptHmacSha3_224Algorithm_default = { + SymCryptHmacSha3_224ExpandKey, + SymCryptHmacSha3_224Init, + SymCryptHmacSha3_224Append, + SymCryptHmacSha3_224Result, + sizeof(SYMCRYPT_HMAC_SHA3_224_EXPANDED_KEY), + sizeof(SYMCRYPT_HMAC_SHA3_224_STATE), + SYMCRYPT_HMAC_SHA3_224_RESULT_SIZE, + &SymCryptSha3_224Algorithm, + SYMCRYPT_FIELD_OFFSET( SYMCRYPT_HMAC_SHA3_224_EXPANDED_KEY, generic.outerState ), +}; + +const PCSYMCRYPT_MAC SymCryptHmacSha3_224Algorithm = &SymCryptHmacSha3_224Algorithm_default; + + +static const BYTE hmacSha3_224Kat[28] = +{ + 0x10, 0x90, 0xac, 0xa1, 0xd5, 0xad, 0xc4, 0x12, + 0xf5, 0xe7, 0xb4, 0xdf, 0xd2, 0x87, 0x09, 0xdd, + 0x24, 0x82, 0xc0, 0x4a, 0x5e, 0x9a, 0x3b, 0xf0, + 0xc3, 0x35, 0x7e, 0x12 +}; + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_224Selftest(void) +{ + SYMCRYPT_HMAC_SHA3_224_EXPANDED_KEY xKey; + BYTE res[SYMCRYPT_HMAC_SHA3_224_RESULT_SIZE]; + + SymCryptHmacSha3_224ExpandKey( &xKey, SymCryptTestKey32, 16 ); + SymCryptHmacSha3_224( &xKey, SymCryptTestMsg3, sizeof( SymCryptTestMsg3 ), res ); + + SymCryptInjectError( res, sizeof( res ) ); + + if( memcmp( res, hmacSha3_224Kat, sizeof( res ) ) != 0 ) + { + SymCryptFatal( 'hsh3' ); + } + + // + // Normally we would wipe the expanded key structure here, + // but as this is a selftest with known data this is not needed. + // +} diff --git a/libs/symcrypt/lib/hmacsha3_256.c b/libs/symcrypt/lib/hmacsha3_256.c new file mode 100644 index 00000000000..8f145d398bf --- /dev/null +++ b/libs/symcrypt/lib/hmacsha3_256.c @@ -0,0 +1,122 @@ +// +// HmacSha3_256.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHmacSha3_256ExpandKey( + _Out_ PSYMCRYPT_HMAC_SHA3_256_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbKey) PCBYTE pbKey, + SIZE_T cbKey) +{ + return SymCryptHmacExpandKey(SymCryptSha3_256Algorithm, &pExpandedKey->generic, pbKey, cbKey); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_256KeyCopy( + _In_ PCSYMCRYPT_HMAC_SHA3_256_EXPANDED_KEY pSrc, + _Out_ PSYMCRYPT_HMAC_SHA3_256_EXPANDED_KEY pDst) +{ + SymCryptHmacKeyCopy(&pSrc->generic, &pDst->generic); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_256( + _In_ PCSYMCRYPT_HMAC_SHA3_256_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_HMAC_SHA3_256_RESULT_SIZE ) PBYTE pbResult ) +{ + SymCryptHmac(&pExpandedKey->generic, pbData, cbData, pbResult); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_256StateCopy( + _In_ PCSYMCRYPT_HMAC_SHA3_256_STATE pSrc, + _In_opt_ PCSYMCRYPT_HMAC_SHA3_256_EXPANDED_KEY pExpandedKey, + _Out_ PSYMCRYPT_HMAC_SHA3_256_STATE pDst ) +{ + SymCryptHmacStateCopy(&pSrc->generic, pExpandedKey == NULL ? NULL : &pExpandedKey->generic, &pDst->generic); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_256Init( + _Out_ PSYMCRYPT_HMAC_SHA3_256_STATE pState, + _In_ PCSYMCRYPT_HMAC_SHA3_256_EXPANDED_KEY pExpandedKey) +{ + SymCryptHmacInit(&pState->generic, &pExpandedKey->generic); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_256Append( + _Inout_ PSYMCRYPT_HMAC_SHA3_256_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + SymCryptHmacAppend(&pState->generic, pbData, cbData); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_256Result( + _Inout_ PSYMCRYPT_HMAC_SHA3_256_STATE pState, + _Out_writes_( SYMCRYPT_HMAC_SHA3_256_RESULT_SIZE ) PBYTE pbResult ) +{ + SymCryptHmacResult(&pState->generic, pbResult); +} + + +const SYMCRYPT_MAC SymCryptHmacSha3_256Algorithm_default = { + SymCryptHmacSha3_256ExpandKey, + SymCryptHmacSha3_256Init, + SymCryptHmacSha3_256Append, + SymCryptHmacSha3_256Result, + sizeof(SYMCRYPT_HMAC_SHA3_256_EXPANDED_KEY), + sizeof(SYMCRYPT_HMAC_SHA3_256_STATE), + SYMCRYPT_HMAC_SHA3_256_RESULT_SIZE, + &SymCryptSha3_256Algorithm, + SYMCRYPT_FIELD_OFFSET( SYMCRYPT_HMAC_SHA3_256_EXPANDED_KEY, generic.outerState ), +}; + +const PCSYMCRYPT_MAC SymCryptHmacSha3_256Algorithm = &SymCryptHmacSha3_256Algorithm_default; + + +static const BYTE hmacSha3_256Kat[32] = +{ + 0x18, 0xe8, 0x2e, 0xa4, 0x5a, 0x94, 0x07, 0xcc, + 0xb7, 0x87, 0x29, 0x16, 0x80, 0x99, 0xd6, 0xc6, + 0x73, 0x1b, 0x56, 0x2e, 0x0d, 0x16, 0x67, 0x5a, + 0x1f, 0xe2, 0xe3, 0xd6, 0x81, 0x56, 0x52, 0x77 +}; + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_256Selftest(void) +{ + SYMCRYPT_HMAC_SHA3_256_EXPANDED_KEY xKey; + BYTE res[SYMCRYPT_HMAC_SHA3_256_RESULT_SIZE]; + + SymCryptHmacSha3_256ExpandKey( &xKey, SymCryptTestKey32, 16 ); + SymCryptHmacSha3_256( &xKey, SymCryptTestMsg3, sizeof( SymCryptTestMsg3 ), res ); + + SymCryptInjectError( res, sizeof( res ) ); + + if( memcmp( res, hmacSha3_256Kat, sizeof( res ) ) != 0 ) + { + SymCryptFatal( 'hsh3' ); + } + + // + // Normally we would wipe the expanded key structure here, + // but as this is a selftest with known data this is not needed. + // +} diff --git a/libs/symcrypt/lib/hmacsha3_384.c b/libs/symcrypt/lib/hmacsha3_384.c new file mode 100644 index 00000000000..14eef9d09d7 --- /dev/null +++ b/libs/symcrypt/lib/hmacsha3_384.c @@ -0,0 +1,124 @@ +// +// HmacSha3_256.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHmacSha3_384ExpandKey( + _Out_ PSYMCRYPT_HMAC_SHA3_384_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbKey) PCBYTE pbKey, + SIZE_T cbKey) +{ + return SymCryptHmacExpandKey(SymCryptSha3_384Algorithm, &pExpandedKey->generic, pbKey, cbKey); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_384KeyCopy( + _In_ PCSYMCRYPT_HMAC_SHA3_384_EXPANDED_KEY pSrc, + _Out_ PSYMCRYPT_HMAC_SHA3_384_EXPANDED_KEY pDst) +{ + SymCryptHmacKeyCopy(&pSrc->generic, &pDst->generic); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_384( + _In_ PCSYMCRYPT_HMAC_SHA3_384_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_HMAC_SHA3_384_RESULT_SIZE ) PBYTE pbResult ) +{ + SymCryptHmac(&pExpandedKey->generic, pbData, cbData, pbResult); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_384StateCopy( + _In_ PCSYMCRYPT_HMAC_SHA3_384_STATE pSrc, + _In_opt_ PCSYMCRYPT_HMAC_SHA3_384_EXPANDED_KEY pExpandedKey, + _Out_ PSYMCRYPT_HMAC_SHA3_384_STATE pDst ) +{ + SymCryptHmacStateCopy(&pSrc->generic, pExpandedKey == NULL ? NULL : &pExpandedKey->generic, &pDst->generic); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_384Init( + _Out_ PSYMCRYPT_HMAC_SHA3_384_STATE pState, + _In_ PCSYMCRYPT_HMAC_SHA3_384_EXPANDED_KEY pExpandedKey) +{ + SymCryptHmacInit(&pState->generic, &pExpandedKey->generic); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_384Append( + _Inout_ PSYMCRYPT_HMAC_SHA3_384_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + SymCryptHmacAppend(&pState->generic, pbData, cbData); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_384Result( + _Inout_ PSYMCRYPT_HMAC_SHA3_384_STATE pState, + _Out_writes_( SYMCRYPT_HMAC_SHA3_384_RESULT_SIZE ) PBYTE pbResult ) +{ + SymCryptHmacResult(&pState->generic, pbResult); +} + + +const SYMCRYPT_MAC SymCryptHmacSha3_384Algorithm_default = { + SymCryptHmacSha3_384ExpandKey, + SymCryptHmacSha3_384Init, + SymCryptHmacSha3_384Append, + SymCryptHmacSha3_384Result, + sizeof(SYMCRYPT_HMAC_SHA3_384_EXPANDED_KEY), + sizeof(SYMCRYPT_HMAC_SHA3_384_STATE), + SYMCRYPT_HMAC_SHA3_384_RESULT_SIZE, + &SymCryptSha3_384Algorithm, + SYMCRYPT_FIELD_OFFSET( SYMCRYPT_HMAC_SHA3_384_EXPANDED_KEY, generic.outerState ), +}; + +const PCSYMCRYPT_MAC SymCryptHmacSha3_384Algorithm = &SymCryptHmacSha3_384Algorithm_default; + + +static const BYTE hmacSha3_384Kat[48] = +{ + 0x8f, 0xc4, 0x8a, 0x84, 0xb5, 0xa7, 0xa1, 0x36, + 0x3c, 0x3b, 0x4b, 0x21, 0x3c, 0xfb, 0x65, 0x36, + 0xa6, 0x2b, 0xa3, 0x4c, 0x12, 0x33, 0xa1, 0x27, + 0xbc, 0xfc, 0xb2, 0xd7, 0xae, 0xaf, 0x30, 0x6b, + 0xc9, 0xe6, 0x90, 0xfd, 0xf1, 0xfa, 0x12, 0x61, + 0xa4, 0x7e, 0xb2, 0x27, 0x1a, 0xeb, 0xf1, 0x34 +}; + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_384Selftest(void) +{ + SYMCRYPT_HMAC_SHA3_384_EXPANDED_KEY xKey; + BYTE res[SYMCRYPT_HMAC_SHA3_384_RESULT_SIZE]; + + SymCryptHmacSha3_384ExpandKey( &xKey, SymCryptTestKey32, 24 ); + SymCryptHmacSha3_384( &xKey, SymCryptTestMsg3, sizeof( SymCryptTestMsg3 ), res ); + + SymCryptInjectError( res, sizeof( res ) ); + + if( memcmp( res, hmacSha3_384Kat, sizeof( res ) ) != 0 ) + { + SymCryptFatal( 'hsh3' ); + } + + // + // Normally we would wipe the expanded key structure here, + // but as this is a selftest with known data this is not needed. + // +} diff --git a/libs/symcrypt/lib/hmacsha3_512.c b/libs/symcrypt/lib/hmacsha3_512.c new file mode 100644 index 00000000000..5a576e6ae24 --- /dev/null +++ b/libs/symcrypt/lib/hmacsha3_512.c @@ -0,0 +1,126 @@ +// +// HmacSha3_256.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHmacSha3_512ExpandKey( + _Out_ PSYMCRYPT_HMAC_SHA3_512_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbKey) PCBYTE pbKey, + SIZE_T cbKey) +{ + return SymCryptHmacExpandKey(SymCryptSha3_512Algorithm, &pExpandedKey->generic, pbKey, cbKey); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_512KeyCopy( + _In_ PCSYMCRYPT_HMAC_SHA3_512_EXPANDED_KEY pSrc, + _Out_ PSYMCRYPT_HMAC_SHA3_512_EXPANDED_KEY pDst) +{ + SymCryptHmacKeyCopy(&pSrc->generic, &pDst->generic); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_512( + _In_ PCSYMCRYPT_HMAC_SHA3_512_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_HMAC_SHA3_512_RESULT_SIZE ) PBYTE pbResult ) +{ + SymCryptHmac(&pExpandedKey->generic, pbData, cbData, pbResult); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_512StateCopy( + _In_ PCSYMCRYPT_HMAC_SHA3_512_STATE pSrc, + _In_opt_ PCSYMCRYPT_HMAC_SHA3_512_EXPANDED_KEY pExpandedKey, + _Out_ PSYMCRYPT_HMAC_SHA3_512_STATE pDst ) +{ + SymCryptHmacStateCopy(&pSrc->generic, pExpandedKey == NULL ? NULL : &pExpandedKey->generic, &pDst->generic); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_512Init( + _Out_ PSYMCRYPT_HMAC_SHA3_512_STATE pState, + _In_ PCSYMCRYPT_HMAC_SHA3_512_EXPANDED_KEY pExpandedKey) +{ + SymCryptHmacInit(&pState->generic, &pExpandedKey->generic); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_512Append( + _Inout_ PSYMCRYPT_HMAC_SHA3_512_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + SymCryptHmacAppend(&pState->generic, pbData, cbData); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_512Result( + _Inout_ PSYMCRYPT_HMAC_SHA3_512_STATE pState, + _Out_writes_( SYMCRYPT_HMAC_SHA3_512_RESULT_SIZE ) PBYTE pbResult ) +{ + SymCryptHmacResult(&pState->generic, pbResult); +} + + +const SYMCRYPT_MAC SymCryptHmacSha3_512Algorithm_default = { + SymCryptHmacSha3_512ExpandKey, + SymCryptHmacSha3_512Init, + SymCryptHmacSha3_512Append, + SymCryptHmacSha3_512Result, + sizeof(SYMCRYPT_HMAC_SHA3_512_EXPANDED_KEY), + sizeof(SYMCRYPT_HMAC_SHA3_512_STATE), + SYMCRYPT_HMAC_SHA3_512_RESULT_SIZE, + &SymCryptSha3_512Algorithm, + SYMCRYPT_FIELD_OFFSET( SYMCRYPT_HMAC_SHA3_512_EXPANDED_KEY, generic.outerState ), +}; + +const PCSYMCRYPT_MAC SymCryptHmacSha3_512Algorithm = &SymCryptHmacSha3_512Algorithm_default; + + +static const BYTE hmacSha3_512Kat[64] = +{ + 0x83, 0x3b, 0x31, 0xe7, 0x77, 0xd6, 0xb3, 0x3d, + 0x75, 0x23, 0xa5, 0x79, 0xcc, 0x3b, 0xeb, 0x27, + 0x6f, 0xd6, 0x52, 0x57, 0x54, 0xc4, 0xc5, 0x4b, + 0x2d, 0x5a, 0x34, 0x7d, 0x36, 0x24, 0x07, 0x91, + 0x7a, 0x3c, 0x62, 0x6e, 0x7e, 0xdb, 0x8e, 0x49, + 0x3b, 0x42, 0xc8, 0xe5, 0xa6, 0x96, 0xd5, 0xe6, + 0x6b, 0xa7, 0xad, 0x20, 0x00, 0xeb, 0x6c, 0xff, + 0x76, 0xcb, 0x1e, 0xc0, 0x30, 0x13, 0x0e, 0x81 +}; + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_512Selftest(void) +{ + SYMCRYPT_HMAC_SHA3_512_EXPANDED_KEY xKey; + BYTE res[SYMCRYPT_HMAC_SHA3_512_RESULT_SIZE]; + + SymCryptHmacSha3_512ExpandKey( &xKey, SymCryptTestKey32, 32 ); + SymCryptHmacSha3_512( &xKey, SymCryptTestMsg3, sizeof( SymCryptTestMsg3 ), res ); + + SymCryptInjectError( res, sizeof( res ) ); + + if( memcmp( res, hmacSha3_512Kat, sizeof( res ) ) != 0 ) + { + SymCryptFatal( 'hsh3' ); + } + + // + // Normally we would wipe the expanded key structure here, + // but as this is a selftest with known data this is not needed. + // +} diff --git a/libs/symcrypt/lib/hmacsha512.c b/libs/symcrypt/lib/hmacsha512.c new file mode 100644 index 00000000000..c91dfab7e1a --- /dev/null +++ b/libs/symcrypt/lib/hmacsha512.c @@ -0,0 +1,59 @@ +// +// HmacSha512.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +#define ALG SHA512 +#define Alg Sha512 +#define SET_DATALENGTH( state, len ) {state.dataLengthL = len; state.dataLengthH = 0;} +#include "hmac_pattern.c" +#undef SET_DATALENGTH +#undef Alg +#undef ALG + +const SYMCRYPT_MAC SymCryptHmacSha512Algorithm_default = { + SymCryptHmacSha512ExpandKey, + SymCryptHmacSha512Init, + SymCryptHmacSha512Append, + SymCryptHmacSha512Result, + sizeof(SYMCRYPT_HMAC_SHA512_EXPANDED_KEY), + sizeof(SYMCRYPT_HMAC_SHA512_STATE), + SYMCRYPT_HMAC_SHA512_RESULT_SIZE, + &SymCryptSha512Algorithm, + SYMCRYPT_FIELD_OFFSET( SYMCRYPT_HMAC_SHA512_EXPANDED_KEY, outerState ), +}; + +const PCSYMCRYPT_MAC SymCryptHmacSha512Algorithm = &SymCryptHmacSha512Algorithm_default; + +static const BYTE hmacSha512Kat[64] = { + 0x07, 0x64, 0xa6, 0x58, 0xeb, 0x3e, 0x2f, 0xb0, 0x2c, 0x06, 0x72, 0x93, 0xcd, 0xaa, 0x3c, 0x05, + 0x28, 0x73, 0x15, 0xf2, 0xd3, 0xb4, 0x5a, 0x28, 0x10, 0x20, 0x1e, 0x26, 0xc3, 0x89, 0x35, 0x48, + 0xe9, 0xea, 0xca, 0x72, 0xf0, 0x2e, 0x04, 0x19, 0x20, 0x31, 0x71, 0x68, 0xb5, 0x7a, 0x86, 0x40, + 0x29, 0x1b, 0x3b, 0xb7, 0xaa, 0x4a, 0x5f, 0xaf, 0x80, 0x26, 0xb4, 0xad, 0x23, 0x5a, 0xc4, 0x25, +}; + + +VOID +SYMCRYPT_CALL +SymCryptHmacSha512Selftest(void) +{ + SYMCRYPT_HMAC_SHA512_EXPANDED_KEY xKey; + BYTE res[SYMCRYPT_HMAC_SHA512_RESULT_SIZE]; + + SymCryptHmacSha512ExpandKey( &xKey, SymCryptTestKey32, 16 ); + SymCryptHmacSha512( &xKey, SymCryptTestMsg3, sizeof( SymCryptTestMsg3 ), res ); + + SymCryptInjectError( res, sizeof( res ) ); + if( memcmp( res, hmacSha512Kat, sizeof( res ) ) != 0 ) + { + SymCryptFatal( 'hsh5' ); + } + + // + // Normally we would wipe the expanded key structure here, + // but as this is a selftest with known data this is not needed. + // +} diff --git a/libs/symcrypt/lib/hmacsha512_224.c b/libs/symcrypt/lib/hmacsha512_224.c new file mode 100644 index 00000000000..a36556485dc --- /dev/null +++ b/libs/symcrypt/lib/hmacsha512_224.c @@ -0,0 +1,62 @@ +// +// HmacSha512_224.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +#define SymCryptSha512_224AppendBlocks SymCryptSha512AppendBlocks + +#define ALG SHA512_224 +#define Alg Sha512_224 +#define SET_DATALENGTH( state, len ) {state.dataLengthL = len; state.dataLengthH = 0;} +#include "hmac_pattern.c" +#undef SET_DATALENGTH +#undef Alg +#undef ALG + +const SYMCRYPT_MAC SymCryptHmacSha512_224Algorithm_default = { + SymCryptHmacSha512_224ExpandKey, + SymCryptHmacSha512_224Init, + SymCryptHmacSha512_224Append, + SymCryptHmacSha512_224Result, + sizeof(SYMCRYPT_HMAC_SHA512_224_EXPANDED_KEY), + sizeof(SYMCRYPT_HMAC_SHA512_224_STATE), + SYMCRYPT_HMAC_SHA512_224_RESULT_SIZE, + &SymCryptSha512_224Algorithm, + SYMCRYPT_FIELD_OFFSET( SYMCRYPT_HMAC_SHA512_224_EXPANDED_KEY, outerState ), +}; + +const PCSYMCRYPT_MAC SymCryptHmacSha512_224Algorithm = &SymCryptHmacSha512_224Algorithm_default; + +static const BYTE hmacSha512_224Kat[28] = { + 0x62, 0xc9, 0x59, 0xc7, 0x5b, 0x3c, 0xb2, 0xaf, + 0x95, 0xf5, 0x59, 0x73, 0x2c, 0x46, 0x1d, 0x72, + 0x06, 0x9e, 0xf9, 0x52, 0x9a, 0x8d, 0x84, 0x1a, + 0x73, 0x97, 0xa6, 0x9c +}; + + +VOID +SYMCRYPT_CALL +SymCryptHmacSha512_224Selftest(void) +{ + SYMCRYPT_HMAC_SHA512_224_EXPANDED_KEY xKey; + BYTE res[SYMCRYPT_HMAC_SHA512_224_RESULT_SIZE]; + + SymCryptHmacSha512_224ExpandKey( &xKey, SymCryptTestKey32, 16 ); + SymCryptHmacSha512_224( &xKey, SymCryptTestMsg3, sizeof( SymCryptTestMsg3 ), res ); + + SymCryptInjectError( res, sizeof( res ) ); + + if( memcmp( res, hmacSha512_224Kat, sizeof( res ) ) != 0 ) + { + SymCryptFatal( 'hsh4' ); + } + + // + // Normally we would wipe the expanded key structure here, + // but as this is a selftest with known data this is not needed. + // +} diff --git a/libs/symcrypt/lib/hmacsha512_256.c b/libs/symcrypt/lib/hmacsha512_256.c new file mode 100644 index 00000000000..a7a49413cda --- /dev/null +++ b/libs/symcrypt/lib/hmacsha512_256.c @@ -0,0 +1,62 @@ +// +// HmacSha512_256.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +#define SymCryptSha512_256AppendBlocks SymCryptSha512AppendBlocks + +#define ALG SHA512_256 +#define Alg Sha512_256 +#define SET_DATALENGTH( state, len ) {state.dataLengthL = len; state.dataLengthH = 0;} +#include "hmac_pattern.c" +#undef SET_DATALENGTH +#undef Alg +#undef ALG + +const SYMCRYPT_MAC SymCryptHmacSha512_256Algorithm_default = { + SymCryptHmacSha512_256ExpandKey, + SymCryptHmacSha512_256Init, + SymCryptHmacSha512_256Append, + SymCryptHmacSha512_256Result, + sizeof(SYMCRYPT_HMAC_SHA512_256_EXPANDED_KEY), + sizeof(SYMCRYPT_HMAC_SHA512_256_STATE), + SYMCRYPT_HMAC_SHA512_256_RESULT_SIZE, + &SymCryptSha512_256Algorithm, + SYMCRYPT_FIELD_OFFSET( SYMCRYPT_HMAC_SHA512_256_EXPANDED_KEY, outerState ), +}; + +const PCSYMCRYPT_MAC SymCryptHmacSha512_256Algorithm = &SymCryptHmacSha512_256Algorithm_default; + +static const BYTE hmacSha512_256Kat[32] = { + 0x79, 0x44, 0xb9, 0x97, 0xc0, 0xaa, 0xf7, 0x11, + 0xdd, 0xb3, 0x78, 0x60, 0x68, 0xdb, 0x2b, 0xa1, + 0x40, 0x80, 0x4f, 0xdc, 0xb7, 0x02, 0x7b, 0x6a, + 0xe9, 0x9f, 0x5a, 0x38, 0xc8, 0x28, 0x67, 0x4c +}; + + +VOID +SYMCRYPT_CALL +SymCryptHmacSha512_256Selftest(void) +{ + SYMCRYPT_HMAC_SHA512_256_EXPANDED_KEY xKey; + BYTE res[SYMCRYPT_HMAC_SHA512_256_RESULT_SIZE]; + + SymCryptHmacSha512_256ExpandKey( &xKey, SymCryptTestKey32, 16 ); + SymCryptHmacSha512_256( &xKey, SymCryptTestMsg3, sizeof( SymCryptTestMsg3 ), res ); + + SymCryptInjectError( res, sizeof( res ) ); + + if( memcmp( res, hmacSha512_256Kat, sizeof( res ) ) != 0 ) + { + SymCryptFatal( 'hsh4' ); + } + + // + // Normally we would wipe the expanded key structure here, + // but as this is a selftest with known data this is not needed. + // +} diff --git a/libs/symcrypt/lib/kmac.c b/libs/symcrypt/lib/kmac.c new file mode 100644 index 00000000000..ae82af143c4 --- /dev/null +++ b/libs/symcrypt/lib/kmac.c @@ -0,0 +1,123 @@ +// +// kmac.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + + +// +// KMAC128 +// +#define Alg Kmac128 +#define ALG KMAC128 +#define SYMCRYPT_CSHAKEXXX_INIT SymCryptCShake128Init +#define SYMCRYPT_CSHAKEXXX_STATE SYMCRYPT_CSHAKE128_STATE +#define SYMCRYPT_KMACXXX_RESULT_SIZE SYMCRYPT_KMAC128_RESULT_SIZE +#include "kmac_pattern.c" +#undef SYMCRYPT_KMACXXX_RESULT_SIZE +#undef SYMCRYPT_CSHAKEXXX_STATE +#undef SYMCRYPT_CSHAKEXXX_INIT +#undef ALG +#undef Alg + +// MAC interface +const SYMCRYPT_MAC SymCryptKmac128Algorithm_Default = { + SymCryptKmac128ExpandKey, + SymCryptKmac128Init, + SymCryptKmac128Append, + SymCryptKmac128Result, + sizeof(SYMCRYPT_KMAC128_EXPANDED_KEY), + sizeof(SYMCRYPT_KMAC128_STATE), + SYMCRYPT_KMAC128_RESULT_SIZE, + NULL, // ppHashAlgorithm + 0, // outerChainingStateOffset +}; + +const PCSYMCRYPT_MAC SymCryptKmac128Algorithm = &SymCryptKmac128Algorithm_Default; + +static const BYTE kmac128KATAnswer[SYMCRYPT_KMAC128_RESULT_SIZE] = { + 0xea, 0xe9, 0xde, 0xd3, 0xee, 0x2f, 0x34, 0x8a, + 0xd6, 0xd2, 0xcb, 0x70, 0x4b, 0xba, 0xd4, 0x47, + 0x15, 0x32, 0x46, 0x82, 0x8e, 0x41, 0x3a, 0xf5, + 0xf5, 0x62, 0x96, 0x1a, 0xf7, 0x67, 0x48, 0xc1 +}; + +VOID +SYMCRYPT_CALL +SymCryptKmac128Selftest(void) +{ + BYTE result[SYMCRYPT_KMAC128_RESULT_SIZE]; + static const unsigned char Sstr[] = { 'S' }; + SYMCRYPT_KMAC128_EXPANDED_KEY expandedKey; + + SymCryptKmac128ExpandKeyEx(&expandedKey, SymCryptTestKey32, 16, Sstr, sizeof(Sstr)); + + SymCryptKmac128(&expandedKey, SymCryptTestMsg16, sizeof(SymCryptTestMsg16), result); + + SymCryptInjectError(result, sizeof(result)); + + if (memcmp(result, kmac128KATAnswer, sizeof(result)) != 0) + { + SymCryptFatal('kmac'); + } +} + + +// +// KMAC256 +// +#define Alg Kmac256 +#define ALG KMAC256 +#define SYMCRYPT_CSHAKEXXX_INIT SymCryptCShake256Init +#define SYMCRYPT_CSHAKEXXX_STATE SYMCRYPT_CSHAKE256_STATE +#define SYMCRYPT_KMACXXX_RESULT_SIZE SYMCRYPT_KMAC256_RESULT_SIZE +#include "kmac_pattern.c" +#undef SYMCRYPT_KMACXXX_RESULT_SIZE +#undef SYMCRYPT_CSHAKEXXX_STATE +#undef SYMCRYPT_CSHAKEXXX_INIT +#undef ALG +#undef Alg + +// MAC interface +const SYMCRYPT_MAC SymCryptKmac256Algorithm_Default = { + SymCryptKmac256ExpandKey, + SymCryptKmac256Init, + SymCryptKmac256Append, + SymCryptKmac256Result, + sizeof(SYMCRYPT_KMAC256_EXPANDED_KEY), + sizeof(SYMCRYPT_KMAC256_STATE), + SYMCRYPT_KMAC256_RESULT_SIZE, + NULL, // ppHashAlgorithm + 0, // outerChainingStateOffset +}; + +const PCSYMCRYPT_MAC SymCryptKmac256Algorithm = &SymCryptKmac256Algorithm_Default; + +static const BYTE kmac256KATAnswer[SYMCRYPT_KMAC256_RESULT_SIZE] = { + 0xa9, 0x1d, 0x09, 0x00, 0x71, 0x0c, 0x63, 0xc5, 0x0f, 0xb6, 0x4d, 0xfa, 0xd8, 0x75, 0x4d, 0x78, + 0x2d, 0xc0, 0x82, 0x4b, 0x87, 0x97, 0xda, 0xf2, 0x36, 0xde, 0xe9, 0x35, 0x69, 0x2e, 0x50, 0x81, + 0x0a, 0xea, 0x3b, 0x05, 0xaf, 0x1b, 0x82, 0x3b, 0xc8, 0xa1, 0x9e, 0xe9, 0x9c, 0x5f, 0xd5, 0x5a, + 0x20, 0x92, 0x89, 0x46, 0xa4, 0xe4, 0x1a, 0xdd, 0x3d, 0xb6, 0x47, 0x4d, 0xf2, 0xa5, 0xfc, 0x73 +}; + +VOID +SYMCRYPT_CALL +SymCryptKmac256Selftest(void) +{ + BYTE result[SYMCRYPT_KMAC256_RESULT_SIZE]; + static const unsigned char Sstr[] = { 'S' }; + SYMCRYPT_KMAC256_EXPANDED_KEY expandedKey; + + SymCryptKmac256ExpandKeyEx(&expandedKey, SymCryptTestKey32, 32, Sstr, sizeof(Sstr)); + + SymCryptKmac256(&expandedKey, SymCryptTestMsg16, sizeof(SymCryptTestMsg16), result); + + SymCryptInjectError(result, sizeof(result)); + + if (memcmp(result, kmac256KATAnswer, sizeof(result)) != 0) + { + SymCryptFatal('kmac'); + } +} diff --git a/libs/symcrypt/lib/kmac_pattern.c b/libs/symcrypt/lib/kmac_pattern.c new file mode 100644 index 00000000000..1b4aa0604bc --- /dev/null +++ b/libs/symcrypt/lib/kmac_pattern.c @@ -0,0 +1,218 @@ +// +// kmac_pattern.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#if 0 +#pragma makedep header +#endif + +// +// This source file implements KMAC128 and KMAC256 +// +// See the symcrypt.h file for documentation on what the various functions do. +// + + +// +// SymCryptKmac +// +VOID +SYMCRYPT_CALL +SYMCRYPT_Xxx( + _In_ PCSYMCRYPT_XXX_EXPANDED_KEY pExpandedKey, + _In_reads_bytes_( cbInput ) PCBYTE pbInput, + SIZE_T cbInput, + _Out_writes_bytes_( SYMCRYPT_KMACXXX_RESULT_SIZE ) PBYTE pbResult) +{ + SYMCRYPT_XXX_STATE state; + + SYMCRYPT_XxxInit(&state, pExpandedKey); + SYMCRYPT_XxxAppend(&state, pbInput, cbInput); + SYMCRYPT_XxxResult(&state, pbResult); +} + +// +// SymCryptKmacEx +// +VOID +SYMCRYPT_CALL +SYMCRYPT_XxxEx( + _In_ PCSYMCRYPT_XXX_EXPANDED_KEY pExpandedKey, + _In_reads_bytes_( cbInput ) PCBYTE pbInput, + SIZE_T cbInput, + _Out_writes_bytes_( cbResult ) PBYTE pbResult, + SIZE_T cbResult) +{ + SYMCRYPT_XXX_STATE state; + + SYMCRYPT_XxxInit(&state, pExpandedKey); + SYMCRYPT_XxxAppend(&state, pbInput, cbInput); + SYMCRYPT_XxxResultEx(&state, pbResult, cbResult); +} + + +// +// SymCryptKmacExpandKey +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SYMCRYPT_XxxExpandKey( + _Out_ PSYMCRYPT_XXX_EXPANDED_KEY pExpandedKey, + _In_reads_bytes_(cbKey) PCBYTE pbKey, + SIZE_T cbKey) +{ + return SYMCRYPT_XxxExpandKeyEx(pExpandedKey, pbKey, cbKey, NULL, 0); +} + +// +// SymCryptKmacExpandKeyEx +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SYMCRYPT_XxxExpandKeyEx( + _Out_ PSYMCRYPT_XXX_EXPANDED_KEY pExpandedKey, + _In_reads_bytes_( cbKey ) PCBYTE pbKey, + SIZE_T cbKey, + _In_reads_bytes_( cbCustomizationString ) PCBYTE pbCustomizationString, + SIZE_T cbCustomizationString) +{ + static const BYTE nameString[] = { 0x4b, 0x4d, 0x41, 0x43 }; // "KMAC" + + C_ASSERT( sizeof(SYMCRYPT_XXX_EXPANDED_KEY) == sizeof(SYMCRYPT_CSHAKEXXX_STATE) ); + + SYMCRYPT_CSHAKEXXX_INIT( (SYMCRYPT_CSHAKEXXX_STATE*)pExpandedKey, nameString, sizeof(nameString), pbCustomizationString, cbCustomizationString); + + SYMCRYPT_KECCAK_STATE* pks = &pExpandedKey->ks; + + // byte_pad( encode_string( K ) ) + SymCryptKeccakAppendEncodeTimes8(pks, pks->inputBlockSize / 8, TRUE); + SymCryptKeccakAppendEncodedString(pks, pbKey, cbKey); + + if (pks->stateIndex != 0) + { + SymCryptKeccakZeroAppendBlock(pks); + } + + return SYMCRYPT_NO_ERROR; +} + +// +// SymCryptKmacInit +// +VOID +SYMCRYPT_CALL +SYMCRYPT_XxxInit( + _Out_ PSYMCRYPT_XXX_STATE pState, + _In_ PCSYMCRYPT_XXX_EXPANDED_KEY pExpandedKey) +{ + C_ASSERT(sizeof(*pState) == sizeof(*pExpandedKey)); + + SYMCRYPT_CHECK_MAGIC(pExpandedKey); + memcpy(pState, pExpandedKey, sizeof(*pState)); + SYMCRYPT_SET_MAGIC(pState); +} + +// +// SymCryptKmacAppend +// +VOID +SYMCRYPT_CALL +SYMCRYPT_XxxAppend( + _Inout_ PSYMCRYPT_XXX_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + SYMCRYPT_ASSERT(!pState->ks.squeezeMode); + SymCryptKeccakAppend(&pState->ks, pbData, cbData); +} + +// +// SymCryptKmacExtract +// +VOID +SYMCRYPT_CALL +SYMCRYPT_XxxExtract( + _Inout_ PSYMCRYPT_XXX_STATE pState, + _Out_writes_( cbOutput ) PBYTE pbOutput, + SIZE_T cbOutput, + BOOLEAN bWipe) +{ + // This function uses KMAC in XOF mode. + // + // If this is the first time Extract is being called, append right_encode(0) + // to indicate that we're in XOF mode. This padding will be applied only once + // as SymCryptKeccakExtract will transition the state to squeeze mode. + if (!pState->ks.squeezeMode) + { + SymCryptKeccakAppendEncodeTimes8(&pState->ks, 0, FALSE); + } + + SymCryptKeccakExtract(&pState->ks, pbOutput, cbOutput, bWipe); +} + +// +// SymCryptKmacResult +// +VOID +SYMCRYPT_CALL +SYMCRYPT_XxxResult( + _Inout_ PSYMCRYPT_XXX_STATE pState, + _Out_writes_( SYMCRYPT_KMACXXX_RESULT_SIZE ) PBYTE pbOutput) +{ + SYMCRYPT_XxxResultEx(pState, pbOutput, SYMCRYPT_KMACXXX_RESULT_SIZE); +} + + +// +// SymCryptKmacResultEx +// +VOID +SYMCRYPT_CALL +SYMCRYPT_XxxResultEx( + _Inout_ PSYMCRYPT_XXX_STATE pState, + _Out_writes_( cbOutput ) PBYTE pbOutput, + SIZE_T cbOutput) +{ + // Result and ResultEx functions are used to extract data only once. + // KMAC requires the output length to be encoded and appended to the + // end of the input before the state switches to squeeze mode. + // + // If Result or ResultEx is called after an Extract call with bWipe=FALSE, + // this means KMAC was used in XOF mode and length padding has already been + // applied. In this case, Result and ResultEx functions extract data one last + // time in XOF mode and wipe the state afterwards. + + if (!pState->ks.squeezeMode) + { + // Append right_encode(L) + SymCryptKeccakAppendEncodeTimes8(&pState->ks, cbOutput, FALSE); + } + + SymCryptKeccakExtract(&pState->ks, pbOutput, cbOutput, TRUE); +} + +// +// SymCryptKmacKeyCopy +// +VOID +SYMCRYPT_CALL +SYMCRYPT_XxxKeyCopy(_In_ PCSYMCRYPT_XXX_EXPANDED_KEY pSrc, _Out_ PSYMCRYPT_XXX_EXPANDED_KEY pDst) +{ + SYMCRYPT_CHECK_MAGIC(pSrc); + *pDst = *pSrc; + SYMCRYPT_SET_MAGIC(pDst); +} + +// +// SymCryptKmacStateCopy +// +VOID +SYMCRYPT_CALL +SYMCRYPT_XxxStateCopy(_In_ const SYMCRYPT_XXX_STATE* pSrc, _Out_ SYMCRYPT_XXX_STATE* pDst) +{ + SYMCRYPT_CHECK_MAGIC(pSrc); + *pDst = *pSrc; + SYMCRYPT_SET_MAGIC(pDst); +} diff --git a/libs/symcrypt/lib/libmain.c b/libs/symcrypt/lib/libmain.c new file mode 100644 index 00000000000..c25c56e6b3e --- /dev/null +++ b/libs/symcrypt/lib/libmain.c @@ -0,0 +1,539 @@ +// +// libmain.c +// General routines for the SymCrypt library +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +#include "C_asm_shared.inc" + +#include "buildInfo.h" + +// The following global g_SymCryptFlags has to be at least 32 +// bits because the iOS environment has interlocked function +// support for variables of size at least 32 bits. +// The relevant function is OSAtomicOr32Barrier. +UINT32 g_SymCryptFlags = 0; + +SYMCRYPT_CPU_FEATURES g_SymCryptCpuFeaturesNotPresent = (SYMCRYPT_CPU_FEATURES) ~0; +SYMCRYPT_CPU_FEATURES g_SymCryptCpuFeaturesPresentCheck = 0; + +#if SYMCRYPT_DEBUG + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptLibraryWasNotInitialized(void) +{ + SymCryptFatal( 'init' ); // Function name helps figure out what the problem is. +} + +#endif + +const CHAR * const SymCryptBuildString = + "v" SYMCRYPT_BUILD_INFO_VERSION + "_" SYMCRYPT_BUILD_INFO_BRANCH + "_" SYMCRYPT_BUILD_INFO_COMMIT + "_" SYMCRYPT_BUILD_INFO_TIMESTAMP; + +VOID +SYMCRYPT_CALL +SymCryptInitEnvCommon( UINT32 version ) +// Returns TRUE if the initialization steps have to be performed. +{ + UINT32 tmp; + + const CHAR * p; + + // Assertion that verifies that the calling application was compiled with + // the same version header files as the library. + if( version != SYMCRYPT_API_VERSION ) + { + SymCryptFatal( 'apiv' ); + } + + // + // Use an interlocked to set the flag in case we add other flags + // that are modified by different threads. + // + SYMCRYPT_ATOMIC_OR32_PRE_RELAXED( &g_SymCryptFlags, SYMCRYPT_FLAG_LIB_INITIALIZED ); + + // + // Do a forced write of our code version. This ensures that the code + // version is part of the binary, so we can look at a binary and figure + // out which version of SymCrypt it was linked with. + // + SYMCRYPT_FORCE_WRITE32( &tmp, SYMCRYPT_API_VERSION ); + + // + // Force the build string to be in memory, because otherwise the + // compiler might get smart and remove it. + // This ensures we can always track back to the SymCrypt source code from + // any binary that links this library + // + for( p = SymCryptBuildString; *p!=0; p++ ) + { + SYMCRYPT_FORCE_WRITE8( (PBYTE) &tmp, *p ); + } + + // + // Make an inverted copy of the CPU detection results. + // This helps us diagnose corruption of our flags + // Force-write otherwise the compiler optimizes it away + // + SYMCRYPT_FORCE_WRITE32( &g_SymCryptCpuFeaturesPresentCheck, ~g_SymCryptCpuFeaturesNotPresent ); + + // + // Test that the C and assembler code agree on the various structure member offsets. + // This gets optimized away in FRE builds as all the values are compile-time computable. + // +#define SYMCRYPT_CHECK_ASM_OFFSET( a, b ) if( (a) != (b) ) {SymCryptFatal( b );} + SYMCRYPT_CHECK_ASM_OFFSETS; +#undef SYMCRYPT_CHECK_ASM_OFFSET +} + +_Analysis_noreturn_ +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptFatalHang( UINT32 fatalCode ) +// +// This function is used by the environment-specific fatal code +// as a last resort when none of the other fatal methods work. +// +{ + UINT32 fcode; + + // + // Put the fatal code in a location we can find + // + SYMCRYPT_FORCE_WRITE32( &fcode, fatalCode ); + +fatalInfiniteLoop: + goto fatalInfiniteLoop; +} + +#if 0 /* SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM | SYMCRYPT_CPU_ARM64 */ + +VOID +SYMCRYPT_CALL +SymCryptWipeAsm( _Out_writes_bytes_( cbData ) PVOID pbData, SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptWipe( _Out_writes_bytes_( cbData ) PVOID pbData, SIZE_T cbData ) +{ + SymCryptWipeAsm( pbData, cbData ); +} + +#else +// +// Generic but slow wipe routine. +// +VOID +SYMCRYPT_CALL +SymCryptWipe( _Out_writes_bytes_( cbData ) PVOID pbData, SIZE_T cbData ) +{ + volatile BYTE * p = (volatile BYTE *) pbData; + SIZE_T i; + + for( i=0; i<cbData; i++ ){ + p[i] = 0; + } + +} +#endif + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_ARM +VOID +SYMCRYPT_CALL +SymCryptXorBytes( + _In_reads_( cbBytes ) PCBYTE pbSrc1, + _In_reads_( cbBytes ) PCBYTE pbSrc2, + _Out_writes_( cbBytes ) PBYTE pbResult, + SIZE_T cbBytes ) +{ + SIZE_T i; + + if( cbBytes == 16 ) + { + PCUINT32 s1 = (PCUINT32) pbSrc1; + PCUINT32 s2 = (PCUINT32) pbSrc2; + PUINT32 d = (PUINT32) pbResult; + + d[0] = s1[0] ^ s2[0]; + d[1] = s1[1] ^ s2[1]; + d[2] = s1[2] ^ s2[2]; + d[3] = s1[3] ^ s2[3]; + } + else + { + i = 0; + while( i + 3 < cbBytes ) + { + *(UINT32 *)&pbResult[i] = *(UINT32 *)&pbSrc1[i] ^ *(UINT32 *)&pbSrc2[i]; + i += 4; + } + + while( i < cbBytes ) + { + pbResult[i] = pbSrc1[i] ^ pbSrc2[i]; + i++; + } + } +} + +#elif SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM64 + +VOID +SYMCRYPT_CALL +SymCryptXorBytes( + _In_reads_( cbBytes ) PCBYTE pbSrc1, + _In_reads_( cbBytes ) PCBYTE pbSrc2, + _Out_writes_( cbBytes ) PBYTE pbResult, + SIZE_T cbBytes ) +{ + if( cbBytes == 16 ) + { + PCUINT64 s1 = (PCUINT64) pbSrc1; + PCUINT64 s2 = (PCUINT64) pbSrc2; + PUINT64 d = (PUINT64) pbResult; + + d[0] = s1[0] ^ s2[0]; + d[1] = s1[1] ^ s2[1]; + } + else + { + while( cbBytes >= 8 ) + { + *(UINT64 *)pbResult = *(UINT64 *)pbSrc1 ^ *(UINT64 *)pbSrc2; + pbSrc1 += 8; + pbSrc2 += 8; + pbResult += 8; + cbBytes -= 8; + } + + while( cbBytes > 0 ) + { + *pbResult = *pbSrc1 ^ *pbSrc2; + pbResult++; + pbSrc1++; + pbSrc2++; + cbBytes--; + } + } +} + + +#else +// +// Generic code +// +VOID +SYMCRYPT_CALL +SymCryptXorBytes( + _In_reads_( cbBytes ) PCBYTE pbSrc1, + _In_reads_( cbBytes ) PCBYTE pbSrc2, + _Out_writes_( cbBytes ) PBYTE pbResult, + SIZE_T cbBytes ) +{ + SIZE_T i; + + for( i=0; i<cbBytes; i++ ) + { + pbResult[i] = pbSrc1[i] ^ pbSrc2[i]; + } +} +#endif + + +// +// Generic LSB/MSBfirst load/store code for variable-sized buffers. +// These implementations are inefficient and not side-channel safe. +// This is sufficient for the current usage (typically to allow +// callers to read/write RSA public exponents from/to variable-sized +// buffers). +// Consider upgrading them in future. +// + +UINT32 +SymCryptUint32Bitsize( UINT32 value ) +// +// Some CPUs/compilers have intrinsics for this, +// but this is portable and works everywhere. +// +{ + UINT32 res; + + res = 0; + while( value != 0 ) + { + res += 1; + value >>= 1; + } + + return res; +} + +UINT32 +SymCryptUint64Bitsize( UINT64 value ) +{ + UINT32 res; + UINT32 upper; + + upper = (UINT32)(value >> 32); + + if( upper == 0 ) + { + res = SymCryptUint32Bitsize( (UINT32) value ); + } else { + res = 32 + SymCryptUint32Bitsize( upper ); + } + + return res; +} + +UINT32 +SymCryptUint32Bytesize( UINT32 value ) +{ + if( value == 0 ) + { + return 0; + } + if( value < 0x100 ) + { + return 1; + } + if( value < 0x10000 ) + { + return 2; + } + if( value < 0x1000000 ) + { + return 3; + } + return 4; +} + +UINT32 +SymCryptUint64Bytesize( UINT64 value ) +{ + UINT32 res; + UINT32 upper; + + upper = (UINT32)(value >> 32); + + if( upper == 0 ) + { + res = SymCryptUint32Bytesize( (UINT32) value ); + } else { + res = 4 + SymCryptUint32Bytesize( upper ); + } + + return res; +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLoadLsbFirstUint32( + _In_reads_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + _Out_ PUINT32 pDst ) +{ + UINT64 v64; + UINT32 v32; + SYMCRYPT_ERROR scError; + + scError = SymCryptLoadLsbFirstUint64( pbSrc, cbSrc, &v64 ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + v32 = (UINT32) v64; + if( v32 != v64 ) + { + scError = SYMCRYPT_VALUE_TOO_LARGE; + goto cleanup; + } + + *pDst = v32; + +cleanup: + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLoadLsbFirstUint64( + _In_reads_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + _Out_ PUINT64 pDst ) +{ + UINT64 v; + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + v = 0; + pbSrc += cbSrc; + while( cbSrc > 8 ) + { + if( *--pbSrc != 0 ) + { + scError = SYMCRYPT_VALUE_TOO_LARGE; + goto cleanup; + } + cbSrc--; + } + + while( cbSrc > 0 ) + { + v = (v << 8) | *--pbSrc; + cbSrc--; + } + + *pDst = v; + +cleanup: + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLoadMsbFirstUint32( + _In_reads_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + _Out_ PUINT32 pDst ) +{ + UINT64 v64; + UINT32 v32; + SYMCRYPT_ERROR scError; + + scError = SymCryptLoadMsbFirstUint64( pbSrc, cbSrc, &v64 ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + v32 = (UINT32) v64; + if( v32 != v64 ) + { + scError = SYMCRYPT_VALUE_TOO_LARGE; + goto cleanup; + } + + *pDst = v32; + +cleanup: + return scError; +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLoadMsbFirstUint64( + _In_reads_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + _Out_ PUINT64 pDst ) +{ + UINT64 v; + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + v = 0; + while( cbSrc > 8 ) + { + if( *pbSrc++ != 0 ) + { + scError = SYMCRYPT_VALUE_TOO_LARGE; + goto cleanup; + } + cbSrc--; + } + + while( cbSrc > 0 ) + { + v = (v << 8) | *pbSrc++; + cbSrc--; + } + + *pDst = v; + +cleanup: + return scError; +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptStoreLsbFirstUint32( + UINT32 src, + _Out_writes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst ) +{ + return SymCryptStoreLsbFirstUint64( src, pbDst, cbDst ); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptStoreLsbFirstUint64( + UINT64 src, + _Out_writes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + while( cbDst > 0 ) + { + *pbDst++ = (BYTE) src; + src >>= 8; + cbDst--; + } + + if( src != 0 ) + { + scError = SYMCRYPT_VALUE_TOO_LARGE; + goto cleanup; + } + +cleanup: + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptStoreMsbFirstUint32( + UINT32 src, + _Out_writes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst ) +{ + return SymCryptStoreMsbFirstUint64( src, pbDst, cbDst ); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptStoreMsbFirstUint64( + UINT64 src, + _Out_writes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + pbDst += cbDst; + while( cbDst > 0 ) + { + *--pbDst = (BYTE) src; + src >>= 8; + cbDst--; + } + + if( src != 0 ) + { + scError = SYMCRYPT_VALUE_TOO_LARGE; + goto cleanup; + } + +cleanup: + return scError; +} diff --git a/libs/symcrypt/lib/lms.c b/libs/symcrypt/lib/lms.c new file mode 100644 index 00000000000..50879e89f9e --- /dev/null +++ b/libs/symcrypt/lib/lms.c @@ -0,0 +1,1162 @@ +// +// lms.c LMS implementation +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +// +// See the symcrypt.h file for documentation on what the various functions do. +// +static const PCSYMCRYPT_HASH* LmsHashObjects[] = { + &SymCryptSha256Algorithm, // 0 + &SymCryptShake256HashAlgorithm, // 1 +}; + +typedef struct _SYMCRYPT_LMS_PARAMETER_PREDEFINED +{ + SYMCRYPT_LMS_ALGID lmsAlgId; + + // output length + UINT8 cbHashOutput; + + // total tree height + UINT8 nTreeHeight; + + // hash function index + UINT8 nHashIdx; + +} SYMCRYPT_LMS_PARAMETER_PREDEFINED, * PSYMCRYPT_LMS_PARAMETER_PREDEFINED; + +typedef const SYMCRYPT_LMS_PARAMETER_PREDEFINED* PCSYMCRYPT_LMS_PARAMETER_PREDEFINED; + + +static const SYMCRYPT_LMS_PARAMETER_PREDEFINED LmsParametersPredefined[] = { + + // algId m h HIdx + { SYMCRYPT_LMS_SHA256_M32_H5, 32, 5 , 0 }, + { SYMCRYPT_LMS_SHA256_M32_H10, 32, 10, 0 }, + { SYMCRYPT_LMS_SHA256_M32_H15, 32, 15, 0 }, + { SYMCRYPT_LMS_SHA256_M32_H20, 32, 20, 0 }, + { SYMCRYPT_LMS_SHA256_M32_H25, 32, 25, 0 }, + { SYMCRYPT_LMS_SHAKE_M32_H5, 32, 5 , 1 }, + { SYMCRYPT_LMS_SHAKE_M32_H10, 32, 10, 1 }, + { SYMCRYPT_LMS_SHAKE_M32_H15, 32, 15, 1 }, + { SYMCRYPT_LMS_SHAKE_M32_H20, 32, 20, 1 }, + { SYMCRYPT_LMS_SHAKE_M32_H25, 32, 25, 1 }, + { SYMCRYPT_LMS_SHA256_M24_H5, 24, 5 , 0 }, + { SYMCRYPT_LMS_SHA256_M24_H10, 24, 10, 0 }, + { SYMCRYPT_LMS_SHA256_M24_H15, 24, 15, 0 }, + { SYMCRYPT_LMS_SHA256_M24_H20, 24, 20, 0 }, + { SYMCRYPT_LMS_SHA256_M24_H25, 24, 25, 0 }, + { SYMCRYPT_LMS_SHAKE_M24_H5, 24, 5 , 1 }, + { SYMCRYPT_LMS_SHAKE_M24_H10, 24, 10, 1 }, + { SYMCRYPT_LMS_SHAKE_M24_H15, 24, 15, 1 }, + { SYMCRYPT_LMS_SHAKE_M24_H20, 24, 20, 1 }, + { SYMCRYPT_LMS_SHAKE_M24_H25, 24, 25, 1 }, +}; + +typedef struct _SYMCRYPT_LMS_OTS_PARAMETER_PREDEFINED +{ + SYMCRYPT_LMS_OTS_ALGID lmsOtsAlgId; + + // output length + UINT8 cbHashOutput; + + // Winternitz width + UINT8 nWidth; + + // hash function index + UINT8 nHashIdx; + +} SYMCRYPT_LMS_OTS_PARAMETER_PREDEFINED, * PSYMCRYPT_LMS_OTS_PARAMETER_PREDEFINED; +typedef const SYMCRYPT_LMS_OTS_PARAMETER_PREDEFINED* PCSYMCRYPT_LMS_OTS_PARAMETER_PREDEFINED; + +static const SYMCRYPT_LMS_OTS_PARAMETER_PREDEFINED LmsOtsParametersPredefined[] = { + + // algId n w HIdx + { SYMCRYPT_LMS_OTS_SHA256_N32_W1, 32, 1, 0 }, + { SYMCRYPT_LMS_OTS_SHA256_N32_W2, 32, 2, 0 }, + { SYMCRYPT_LMS_OTS_SHA256_N32_W4, 32, 4, 0 }, + { SYMCRYPT_LMS_OTS_SHA256_N32_W8, 32, 8, 0 }, + { SYMCRYPT_LMS_OTS_SHAKE_N32_W1, 32, 1, 1 }, + { SYMCRYPT_LMS_OTS_SHAKE_N32_W2, 32, 2, 1 }, + { SYMCRYPT_LMS_OTS_SHAKE_N32_W4, 32, 4, 1 }, + { SYMCRYPT_LMS_OTS_SHAKE_N32_W8, 32, 8, 1 }, + { SYMCRYPT_LMS_OTS_SHA256_N24_W1, 24, 1, 0 }, + { SYMCRYPT_LMS_OTS_SHA256_N24_W2, 24, 2, 0 }, + { SYMCRYPT_LMS_OTS_SHA256_N24_W4, 24, 4, 0 }, + { SYMCRYPT_LMS_OTS_SHA256_N24_W8, 24, 8, 0 }, + { SYMCRYPT_LMS_OTS_SHAKE_N24_W1, 24, 1, 1 }, + { SYMCRYPT_LMS_OTS_SHAKE_N24_W2, 24, 2, 1 }, + { SYMCRYPT_LMS_OTS_SHAKE_N24_W4, 24, 4, 1 }, + { SYMCRYPT_LMS_OTS_SHAKE_N24_W8, 24, 8, 1 }, +}; +static const BYTE SYMCRYPT_LMS_D_PBLC[] = { 0x80, 0x80 }; +static const BYTE SYMCRYPT_LMS_D_MESG[] = { 0x81, 0x81 }; +static const BYTE SYMCRYPT_LMS_D_LEAF[] = { 0x82, 0x82 }; +static const BYTE SYMCRYPT_LMS_D_INTR[] = { 0x83, 0x83 }; + +static +VOID +LmsHashMessage( + _In_ PCSYMCRYPT_HASH pHash, + _In_reads_bytes_(SYMCRYPT_LMS_KEY_PAIR_IDENTIFIER_SIZE) PCBYTE pbId, + _In_reads_bytes_(sizeof(UINT32)) PCBYTE pbLeafNumber, + _In_reads_bytes_(cbRandomizer) PCBYTE pbRandomizer, + SIZE_T cbRandomizer, + _In_reads_bytes_(cbMessage) PCBYTE pbMessage, + SIZE_T cbMessage, + _Out_writes_bytes_(cbOut) PBYTE pbOut, + SIZE_T cbOut) +{ + SYMCRYPT_HASH_STATE state = { 0 }; + + SymCryptHashInit(pHash, &state); + SymCryptHashAppend(pHash, &state, pbId, SYMCRYPT_LMS_KEY_PAIR_IDENTIFIER_SIZE); + SymCryptHashAppend(pHash, &state, pbLeafNumber, sizeof(UINT32)); + SymCryptHashAppend(pHash, &state, SYMCRYPT_LMS_D_MESG, sizeof(SYMCRYPT_LMS_D_MESG)); + SymCryptHashAppend(pHash, &state, pbRandomizer, cbRandomizer); + SymCryptHashAppend(pHash, &state, pbMessage, cbMessage); + SymCryptHashResult(pHash, &state, pbOut, cbOut); +} + +static +VOID +SYMCRYPT_CALL +LmsOtskeyComputePrivate( + _In_ PCSYMCRYPT_LMS_KEY pKey, + _In_ UINT32 nLeafNumber, + _In_ UINT32 nPIdx, + _Out_writes_bytes_(pKey->params.cbHashOutput) + PBYTE pbOtsPrivateKey) +{ + UINT32 cbHashOutput = pKey->params.cbHashOutput; + PCSYMCRYPT_HASH pHash = pKey->params.pLmsHashFunction; + SYMCRYPT_HASH_STATE state = { 0 }; + BYTE abTemp[sizeof(UINT32) + 3] = { 0 }; // sizeof(UINT32) for nLeafNumber, 2 bytes of nPIdx and 1 byte of 0xff + + SYMCRYPT_ASSERT(nLeafNumber <= (((UINT32)1 << pKey->params.nTreeHeight) - 1)); + + SYMCRYPT_STORE_MSBFIRST32(abTemp, nLeafNumber); + SYMCRYPT_STORE_MSBFIRST16(abTemp + sizeof(UINT32), (UINT16)nPIdx); + abTemp[sizeof(UINT32) + 2] = 0xff; + + SymCryptHashInit(pHash, &state); + SymCryptHashAppend(pHash, &state, pKey->abId, SYMCRYPT_LMS_KEY_PAIR_IDENTIFIER_SIZE); + SymCryptHashAppend(pHash, &state, abTemp, sizeof(abTemp)); + SymCryptHashAppend(pHash, &state, pKey->abSeed, cbHashOutput); + SymCryptHashResult(pHash, &state, pbOtsPrivateKey, cbHashOutput); +} + +static +VOID +SYMCRYPT_CALL +LmskeyWipe( + _Inout_ PSYMCRYPT_LMS_KEY pKey) +{ + SYMCRYPT_CHECK_MAGIC(pKey); + + SymCryptWipeKnownSize(pKey->abSeed, sizeof(pKey->abSeed)); + SymCryptWipeKnownSize(pKey->abPublicRoot, sizeof(pKey->abPublicRoot)); + SymCryptWipeKnownSize(pKey->abId, sizeof(pKey->abId)); + pKey->nNextUnusedLeaf = 0; + pKey->keyType = SYMCRYPT_LMSKEY_TYPE_NONE; +} + +static +UINT16 +LmsOtsCalculateChecksum( + _In_reads_bytes_(cbString) PCBYTE pbString, + UINT32 cbString, + UINT32 nWidth, + UINT32 nLeftShift) +{ + UINT32 sum = 0; + UINT32 max = (1 << nWidth) - 1; + SYMCRYPT_ASSERT(SYMCRYPT_IS_VALID_WINTERNITZ_WIDTH(nWidth)); + + for (UINT32 i = 0; i < (cbString * 8 / nWidth); i = i + 1) + { + sum = sum + max - SymCryptHbsGetDigit(nWidth, pbString, cbString, i); + } + return (UINT16)(sum << nLeftShift); +} + +static +SIZE_T +SYMCRYPT_CALL +LmsOtsSizeofSignatureFromParams( + _In_ PCSYMCRYPT_LMS_PARAMS pParams) +{ + UINT32 n = pParams->cbHashOutput; + UINT32 p = pParams->nByteStringCount; + SIZE_T size = 0; + + size += sizeof(UINT32); // type + size += n; // randomizer + size += p * n; // y[0..p-1] + + return size; +} + +static +VOID +SYMCRYPT_CALL +LmsOtskeySign( + _In_ PSYMCRYPT_LMS_KEY pKey, + UINT64 nLeafNumber, + _In_reads_bytes_(cbMessage) PCBYTE pbMessage, + SIZE_T cbMessage, + _In_reads_bytes_(pKey->params.cbHashOutput) PCBYTE pbRandomizer, + _Out_writes_bytes_(cbSignature) PBYTE pbSignature, + SIZE_T cbSignature) +{ + PCSYMCRYPT_HASH pHash = pKey->params.pLmsHashFunction; + SYMCRYPT_HASH_STATE state = { 0 }; + UINT32 nIndex = 0; + UINT32 cbHashOutput = pKey->params.cbHashOutput; + UINT32 nWinternitzChainWidth = pKey->params.nWinternitzChainWidth; + SIZE_T cbRemainingBytes = cbSignature; + UINT16 nChecksum = 0; + BYTE en32LeafNumber[sizeof(UINT32)] = {0}; + BYTE en16Index[sizeof(UINT16)] = { 0 }; + BYTE abOtsPrivateKey[SYMCRYPT_LMS_MAX_N] = { 0 }; + BYTE abLmsHashedMessage[SYMCRYPT_LMS_MAX_N + sizeof(nChecksum)] = { 0 }; + PBYTE pbDest = pbSignature; + + SYMCRYPT_ASSERT(cbSignature == LmsOtsSizeofSignatureFromParams(&pKey->params)); + + SYMCRYPT_STORE_MSBFIRST32(pbDest, pKey->params.lmsOtsAlgID); + pbDest += sizeof(UINT32); + cbRemainingBytes -= sizeof(UINT32); + + memcpy(pbDest, pbRandomizer, cbHashOutput); + pbDest += cbHashOutput; + cbRemainingBytes -= cbHashOutput; + + SYMCRYPT_STORE_MSBFIRST32(en32LeafNumber, (UINT32)nLeafNumber); + LmsHashMessage(pHash, pKey->abId, en32LeafNumber, pbRandomizer, cbHashOutput, pbMessage, cbMessage, abLmsHashedMessage, cbHashOutput); + + nChecksum = LmsOtsCalculateChecksum(abLmsHashedMessage, cbHashOutput, nWinternitzChainWidth, pKey->params.nChecksumLShiftBits); + SYMCRYPT_STORE_MSBFIRST16((UINT16*)&abLmsHashedMessage[cbHashOutput], nChecksum); + + SymCryptHashInit(pHash, &state); + for (nIndex = 0; nIndex < pKey->params.nByteStringCount; nIndex++) + { + BYTE coeff = (BYTE)SymCryptHbsGetDigit(nWinternitzChainWidth, abLmsHashedMessage, cbHashOutput + sizeof(nChecksum), nIndex); + LmsOtskeyComputePrivate(pKey, (UINT32)nLeafNumber, nIndex, abOtsPrivateKey); + + SYMCRYPT_STORE_MSBFIRST16(en16Index, (UINT16)nIndex); + + for (BYTE j = 0; j < coeff; j++) + { + SymCryptHashAppend(pHash, &state, pKey->abId, SYMCRYPT_LMS_KEY_PAIR_IDENTIFIER_SIZE); + SymCryptHashAppend(pHash, &state, en32LeafNumber, sizeof(UINT32)); + SymCryptHashAppend(pHash, &state, en16Index, sizeof(UINT16)); + SymCryptHashAppend(pHash, &state, &j, 1); + SymCryptHashAppend(pHash, &state, abOtsPrivateKey, cbHashOutput); + SymCryptHashResult(pHash, &state, abOtsPrivateKey, cbHashOutput); + } + memcpy(pbDest, abOtsPrivateKey, cbHashOutput); + pbDest += cbHashOutput; + cbRemainingBytes -= cbHashOutput; + } + SYMCRYPT_ASSERT(cbRemainingBytes == 0); + + return; +} + +static +VOID +SYMCRYPT_CALL +LmsOtskeyComputePublic( + _In_ PCSYMCRYPT_LMS_KEY pKey, + UINT32 nNodeIdx, + _Out_writes_bytes_(pKey->params.cbHashOutput) PBYTE pbK) +{ + UINT32 cbHashOutput = pKey->params.cbHashOutput; + UINT32 maxJ = (1 << pKey->params.nWinternitzChainWidth) - 1; + PCSYMCRYPT_HASH pHash = pKey->params.pLmsHashFunction; + SYMCRYPT_HASH_STATE statePriv = { 0 }; + SYMCRYPT_HASH_STATE statePub = { 0 }; + BYTE en32LeafNumber[sizeof(UINT32)] = { 0 }; + BYTE en16Index[sizeof(UINT16)] = { 0 }; + BYTE abNode[SYMCRYPT_LMS_MAX_N] = { 0 }; + + SYMCRYPT_STORE_MSBFIRST32(en32LeafNumber, nNodeIdx); + + SymCryptHashInit(pHash, &statePub); + SymCryptHashAppend(pHash, &statePub, pKey->abId, SYMCRYPT_LMS_KEY_PAIR_IDENTIFIER_SIZE); + SymCryptHashAppend(pHash, &statePub, en32LeafNumber, sizeof(UINT32)); + SymCryptHashAppend(pHash, &statePub, SYMCRYPT_LMS_D_PBLC, sizeof(SYMCRYPT_LMS_D_PBLC)); + + SymCryptHashInit(pHash, &statePriv); + for (UINT32 i = 0; i < pKey->params.nByteStringCount; i++) + { + LmsOtskeyComputePrivate(pKey, nNodeIdx, i, abNode); + SYMCRYPT_STORE_MSBFIRST16(en16Index, (UINT16)i); + + for (BYTE j = 0; j < maxJ; j++) + { + SymCryptHashAppend(pHash, &statePriv, pKey->abId, SYMCRYPT_LMS_KEY_PAIR_IDENTIFIER_SIZE); + SymCryptHashAppend(pHash, &statePriv, en32LeafNumber, sizeof(UINT32)); + SymCryptHashAppend(pHash, &statePriv, en16Index, sizeof(UINT16)); + SymCryptHashAppend(pHash, &statePriv, &j, 1); + SymCryptHashAppend(pHash, &statePriv, abNode, cbHashOutput); + SymCryptHashResult(pHash, &statePriv, abNode, cbHashOutput); + } + SymCryptHashAppend(pHash, &statePub, abNode, cbHashOutput); + } + SymCryptHashResult(pHash, &statePub, pbK, cbHashOutput); +} + +static +VOID +SYMCRYPT_CALL +LmsComputeNodeValue( + _In_ PCSYMCRYPT_LMS_KEY pKey, + UINT32 nIndex, + _Out_writes_bytes_(pKey->params.cbHashOutput) PBYTE pbNodeValue, + SIZE_T cbNodeValue) +{ + UNREFERENCED_PARAMETER(cbNodeValue); + + UINT32 cbHashOutput = pKey->params.cbHashOutput; + UINT32 nInternalNodes = (UINT32)1 << pKey->params.nTreeHeight; + PCSYMCRYPT_HASH pHash = pKey->params.pLmsHashFunction; + SYMCRYPT_HASH_STATE state = { 0 }; + BYTE abTemp[SYMCRYPT_LMS_MAX_N] = { 0 }; + BYTE en32Index[sizeof(UINT32)] = { 0 }; + BYTE abOtsPubKey[SYMCRYPT_LMS_MAX_N] = { 0 }; + + SYMCRYPT_ASSERT(nIndex > 0); + SYMCRYPT_ASSERT(cbNodeValue == cbHashOutput); + + SYMCRYPT_STORE_MSBFIRST32(en32Index, nIndex); + + SymCryptHashInit(pHash, &state); + SymCryptHashAppend(pHash, &state, pKey->abId, SYMCRYPT_LMS_KEY_PAIR_IDENTIFIER_SIZE); + SymCryptHashAppend(pHash, &state, en32Index, sizeof(UINT32)); + if (nIndex >= nInternalNodes) + { + LmsOtskeyComputePublic(pKey, nIndex - nInternalNodes, abOtsPubKey); + + SymCryptHashAppend(pHash, &state, SYMCRYPT_LMS_D_LEAF, sizeof(SYMCRYPT_LMS_D_LEAF)); + SymCryptHashAppend(pHash, &state, abOtsPubKey, cbHashOutput); + } + else + { + SymCryptHashAppend(pHash, &state, SYMCRYPT_LMS_D_INTR, sizeof(SYMCRYPT_LMS_D_INTR)); + + LmsComputeNodeValue(pKey, 2 * nIndex, abTemp, cbHashOutput); + SymCryptHashAppend(pHash, &state, abTemp, cbHashOutput); + SymCryptWipeKnownSize(abTemp, SYMCRYPT_LMS_MAX_N); + + LmsComputeNodeValue(pKey, 2 * nIndex + 1, abTemp, cbHashOutput); + SymCryptHashAppend(pHash, &state, abTemp, cbHashOutput); + } + SymCryptHashResult(pHash, &state, pbNodeValue, cbHashOutput); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLmsParamsFromAlgId( + SYMCRYPT_LMS_ALGID lmsAlgID, + SYMCRYPT_LMS_OTS_ALGID lmsOtsAlgID, + _Out_ PSYMCRYPT_LMS_PARAMS pParams) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SIZE_T uLmsParametersCount = 0; + SIZE_T uLmsOtsParametersCount = 0; + UINT32 u = 0; + UINT32 v = 0; + PCSYMCRYPT_LMS_PARAMETER_PREDEFINED pLmsParameters = NULL; + PCSYMCRYPT_LMS_OTS_PARAMETER_PREDEFINED pLmsOtsParameters = NULL; + BOOL bFound = FALSE; + + SymCryptWipeKnownSize(pParams, sizeof(*pParams)); + pLmsOtsParameters = LmsOtsParametersPredefined; + uLmsOtsParametersCount = SYMCRYPT_ARRAY_SIZE(LmsOtsParametersPredefined); + pLmsParameters = LmsParametersPredefined; + uLmsParametersCount = SYMCRYPT_ARRAY_SIZE(LmsParametersPredefined); + + for (UINT32 i = 0; i < uLmsParametersCount; i++) + { + if (pLmsParameters[i].lmsAlgId == lmsAlgID) + { + pParams->lmsAlgID = lmsAlgID; + pParams->nTreeHeight = pLmsParameters[i].nTreeHeight; + SYMCRYPT_ASSERT(pParams->nTreeHeight <= SYMCRYPT_LMS_MAX_CUSTOM_TREE_HEIGHT); + + pParams->cbHashOutput = pLmsParameters[i].cbHashOutput; + + SYMCRYPT_ASSERT(pLmsParameters[i].nHashIdx < SYMCRYPT_ARRAY_SIZE(LmsHashObjects)); + pParams->pLmsHashFunction = *LmsHashObjects[pLmsParameters[i].nHashIdx]; + bFound = TRUE; + break; + } + } + if (!bFound) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + bFound = FALSE; + for (UINT32 i = 0; i < uLmsOtsParametersCount; i++) + { + if (pLmsOtsParameters[i].lmsOtsAlgId == lmsOtsAlgID) + { + SYMCRYPT_ASSERT(pLmsOtsParameters[i].nHashIdx < SYMCRYPT_ARRAY_SIZE(LmsHashObjects)); + + if (pParams->pLmsHashFunction != *LmsHashObjects[pLmsOtsParameters[i].nHashIdx] || + pParams->cbHashOutput != pLmsOtsParameters[i].cbHashOutput) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + pParams->lmsOtsAlgID = lmsOtsAlgID; + pParams->nWinternitzChainWidth = pLmsOtsParameters[i].nWidth; + SymCryptHbsGetWinternitzLengths( + pParams->cbHashOutput, + pParams->nWinternitzChainWidth, + &u, + &v); + SYMCRYPT_ASSERT((v * pParams->nWinternitzChainWidth) <= SYMCRYPT_LMS_CHECKSUM_SIZE); + pParams->nChecksumLShiftBits = SYMCRYPT_LMS_CHECKSUM_SIZE - (v * pParams->nWinternitzChainWidth); + pParams->nByteStringCount = u + v; + bFound = TRUE; + break; + } + } + if (!bFound) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + +cleanup: + return scError; +} + +SIZE_T +SYMCRYPT_CALL +SymCryptLmsSizeofSignatureFromParams( + _In_ PCSYMCRYPT_LMS_PARAMS pParams) +{ + SIZE_T size = 0; + + size += sizeof(UINT32); // q + size += LmsOtsSizeofSignatureFromParams(pParams); // LMS-OTS signature + size += sizeof(UINT32); // type + size += pParams->nTreeHeight * pParams->cbHashOutput; // path[0..h-1] + return size; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLmsSetParams( + _Out_ PSYMCRYPT_LMS_PARAMS pParams, + UINT32 lmsAlgID, + UINT32 lmsOtsAlgID, + _In_ PCSYMCRYPT_HASH pLmsHashFunction, + UINT32 cbHashOutput, + UINT32 nTreeHeight, + UINT32 nWinternitzChainWidth) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + UINT32 u = 0; + UINT32 v = 0; + + SymCryptWipeKnownSize(pParams, sizeof(*pParams)); + + // nTreeHeight must be positive and maximum SYMCRYPT_LMS_MAX_CUSTOM_TREE_HEIGHT + if (nTreeHeight == 0 || nTreeHeight > SYMCRYPT_LMS_MAX_CUSTOM_TREE_HEIGHT) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Output cbHashOutput cannot be larger than the hash output size or SYMCRYPT_LMS_MAX_N + if (cbHashOutput == 0 || cbHashOutput > pLmsHashFunction->resultSize || cbHashOutput > SYMCRYPT_LMS_MAX_N) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Winternitz parameter must be one of 1, 2, 4, or 8 + if (!SYMCRYPT_IS_VALID_WINTERNITZ_WIDTH(nWinternitzChainWidth)) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + pParams->lmsAlgID = lmsAlgID; + pParams->lmsOtsAlgID = lmsOtsAlgID; + pParams->pLmsHashFunction = pLmsHashFunction; + pParams->nTreeHeight = nTreeHeight; + pParams->cbHashOutput = cbHashOutput; + pParams->nWinternitzChainWidth = nWinternitzChainWidth; + SymCryptHbsGetWinternitzLengths( + pParams->cbHashOutput, + pParams->nWinternitzChainWidth, + &u, + &v); + pParams->nChecksumLShiftBits = SYMCRYPT_LMS_CHECKSUM_SIZE - (v * pParams->nWinternitzChainWidth); + pParams->nByteStringCount = u + v; + +cleanup: + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLmsSizeofKeyBlobFromParams( + _In_ PCSYMCRYPT_LMS_PARAMS pParams, + SYMCRYPT_LMSKEY_TYPE keyType, + _Out_ SIZE_T* pcbKey) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + switch (keyType) + { + case SYMCRYPT_LMSKEY_TYPE_PUBLIC: + *pcbKey = SYMCRYPT_LMS_PUB_KEY_SIZE(pParams->cbHashOutput); + break; + + case SYMCRYPT_LMSKEY_TYPE_PRIVATE: + *pcbKey = SYMCRYPT_LMS_PRIV_KEY_SIZE(pParams->cbHashOutput); + break; + + default: + scError = SYMCRYPT_INVALID_ARGUMENT; + break; + } + + return scError; +} + +PSYMCRYPT_LMS_KEY +SYMCRYPT_CALL +SymCryptLmskeyAllocate( + _In_ PCSYMCRYPT_LMS_PARAMS pParams, + UINT32 flags) +{ + PSYMCRYPT_LMS_KEY pKey = NULL; + SIZE_T cbSize = sizeof(SYMCRYPT_LMS_KEY); + + if (flags != 0) + { + goto cleanup; + } + + pKey = SymCryptCallbackAlloc(cbSize); + if (pKey == NULL) + { + goto cleanup; + } + + SymCryptWipe(pKey, cbSize); + pKey->cbSize = cbSize; + + memcpy(&pKey->params, pParams, sizeof(*pParams)); + SYMCRYPT_SET_MAGIC(pKey); + +cleanup: + return pKey; +} + +VOID +SYMCRYPT_CALL +SymCryptLmskeyFree( + _Inout_ PSYMCRYPT_LMS_KEY pKey) +{ + SYMCRYPT_CHECK_MAGIC(pKey); + + SymCryptWipeKnownSize(pKey, sizeof(*pKey)); + SymCryptCallbackFree(pKey); +} + +static +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLmskeyVerifyRoot( + _In_ PCSYMCRYPT_LMS_KEY pKey) +{ + BYTE abPublicRoot[SYMCRYPT_LMS_MAX_N] = { 0 }; + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + SYMCRYPT_CHECK_MAGIC(pKey); + + // key to be verified has to be a private key + if (pKey->keyType != SYMCRYPT_LMSKEY_TYPE_PRIVATE) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // compute the public root from the private key, root node has index 1 + LmsComputeNodeValue( + pKey, + 1, + abPublicRoot, + pKey->params.cbHashOutput); + + if (!SymCryptEqual(abPublicRoot, pKey->abPublicRoot, pKey->params.cbHashOutput)) + { + scError = SYMCRYPT_HBS_PUBLIC_ROOT_MISMATCH; + } + +cleanup: + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLmskeyGenerate( + _Inout_ PSYMCRYPT_LMS_KEY pKey, + UINT32 flags) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + SYMCRYPT_CHECK_MAGIC(pKey); + + if (flags != 0) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + pKey->nNextUnusedLeaf = 0; + // Set the LMS key identifier I + scError = SymCryptCallbackRandom(pKey->abId, SYMCRYPT_LMS_KEY_PAIR_IDENTIFIER_SIZE); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // Set the private key Seed value + scError = SymCryptCallbackRandom(pKey->abSeed, pKey->params.cbHashOutput); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // compute the public root from the private key + LmsComputeNodeValue( + pKey, + 1, + pKey->abPublicRoot, + pKey->params.cbHashOutput); + + pKey->keyType = SYMCRYPT_LMSKEY_TYPE_PRIVATE; + +cleanup: + if (scError != SYMCRYPT_NO_ERROR) + { + LmskeyWipe(pKey); + } + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLmskeyGetValue( + _In_ PCSYMCRYPT_LMS_KEY pKey, + SYMCRYPT_LMSKEY_TYPE keyType, + UINT32 flags, + _Out_writes_bytes_(cbBlob) PBYTE pbBlob, + SIZE_T cbBlob) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SIZE_T cbHashOutput = pKey->params.cbHashOutput; + SIZE_T cbKey = 0; + + SYMCRYPT_CHECK_MAGIC(pKey); + + if (flags != 0 || + (keyType != SYMCRYPT_LMSKEY_TYPE_PRIVATE && + keyType != SYMCRYPT_LMSKEY_TYPE_PUBLIC)) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if ((keyType == SYMCRYPT_LMSKEY_TYPE_PRIVATE) && (pKey->keyType == SYMCRYPT_LMSKEY_TYPE_PUBLIC)) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + SymCryptLmsSizeofKeyBlobFromParams(&pKey->params, keyType, &cbKey); + if (cbBlob != cbKey) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + SYMCRYPT_STORE_MSBFIRST32(pbBlob, (UINT32)pKey->params.lmsAlgID); + pbBlob += sizeof(UINT32); + + SYMCRYPT_STORE_MSBFIRST32(pbBlob, (UINT32)pKey->params.lmsOtsAlgID); + pbBlob += sizeof(UINT32); + + memcpy(pbBlob, pKey->abId, SYMCRYPT_LMS_KEY_PAIR_IDENTIFIER_SIZE); + pbBlob += SYMCRYPT_LMS_KEY_PAIR_IDENTIFIER_SIZE; + + memcpy(pbBlob, pKey->abPublicRoot, cbHashOutput); + pbBlob += cbHashOutput; + + if (keyType == SYMCRYPT_LMSKEY_TYPE_PRIVATE) + { + + SYMCRYPT_ASSERT((pKey->nNextUnusedLeaf & 0xFFFFFFFF00000000) == 0); + + SYMCRYPT_STORE_MSBFIRST32(pbBlob, (UINT32)pKey->nNextUnusedLeaf); + pbBlob += sizeof(UINT32); + + memcpy(pbBlob, pKey->abSeed, cbHashOutput); + } + +cleanup: + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLmskeySetValue( + _In_reads_bytes_(cbBlob) PCBYTE pbBlob, + SIZE_T cbBlob, + SYMCRYPT_LMSKEY_TYPE keyType, + UINT32 flags, + _Inout_ PSYMCRYPT_LMS_KEY pKey) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + UINT32 lmsAlgID = 0; + UINT32 lmsOtsAlgID = 0; + SIZE_T cbKey = 0; + + SYMCRYPT_ASSERT(keyType == SYMCRYPT_LMSKEY_TYPE_PUBLIC || keyType == SYMCRYPT_LMSKEY_TYPE_PRIVATE); + + SYMCRYPT_CHECK_MAGIC(pKey); + + if (flags & (~SYMCRYPT_FLAG_LMSKEY_VERIFY_ROOT)) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Public key validation can only be performed for private keys + if ((flags & SYMCRYPT_FLAG_LMSKEY_VERIFY_ROOT) != 0 && + keyType != SYMCRYPT_LMSKEY_TYPE_PRIVATE) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + SymCryptLmsSizeofKeyBlobFromParams(&pKey->params, keyType, &cbKey); + if (cbBlob != cbKey) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + lmsAlgID = SYMCRYPT_LOAD_MSBFIRST32(pbBlob); + pbBlob += sizeof(UINT32); + + lmsOtsAlgID = SYMCRYPT_LOAD_MSBFIRST32(pbBlob); + pbBlob += sizeof(UINT32); + + // check if the lmsAlgID and lmsOtsAlgID matches the ones in the key + if (lmsAlgID != pKey->params.lmsAlgID || lmsOtsAlgID != pKey->params.lmsOtsAlgID) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + SymCryptWipeKnownSize(pKey->abPublicRoot, sizeof(pKey->abPublicRoot)); + SymCryptWipeKnownSize(pKey->abId, sizeof(pKey->abId)); + + pKey->keyType = keyType; + + memcpy(pKey->abId, pbBlob, SYMCRYPT_LMS_KEY_PAIR_IDENTIFIER_SIZE); + pbBlob += SYMCRYPT_LMS_KEY_PAIR_IDENTIFIER_SIZE; + + memcpy(pKey->abPublicRoot, pbBlob, pKey->params.cbHashOutput); + pbBlob += pKey->params.cbHashOutput; + + if (keyType == SYMCRYPT_LMSKEY_TYPE_PRIVATE) + { + // Wipe private key material + pKey->nNextUnusedLeaf = 0; + SymCryptWipeKnownSize(pKey->abSeed, sizeof(pKey->abSeed)); + + pKey->nNextUnusedLeaf = SYMCRYPT_LOAD_MSBFIRST32(pbBlob); + pbBlob += sizeof(UINT32); + + memcpy(pKey->abSeed, pbBlob,pKey->params.cbHashOutput); + + if (flags & SYMCRYPT_FLAG_LMSKEY_VERIFY_ROOT) + { + scError = SymCryptLmskeyVerifyRoot(pKey); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + } + } + +cleanup: + if (scError != SYMCRYPT_NO_ERROR) + { + LmskeyWipe(pKey); + } + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLmsSign( + _Inout_ PSYMCRYPT_LMS_KEY pKey, + _In_reads_bytes_(cbMessage) PCBYTE pbMessage, + SIZE_T cbMessage, + UINT32 flags, + _Out_writes_bytes_(cbSignature) PBYTE pbSignature, + SIZE_T cbSignature) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + UINT32 nLeafNumber = (UINT32)pKey->nNextUnusedLeaf; + UINT32 cbHashOutput = pKey->params.cbHashOutput; + UINT32 nTreeHeight = pKey->params.nTreeHeight; + SIZE_T cbRemainingBytes = cbSignature; + UINT32 nLeavesCount = ((UINT32)1 << nTreeHeight); + UINT32 nNodeIndex = 0; + UINT32 nTemp = 0; + SIZE_T cbOtsSignature = LmsOtsSizeofSignatureFromParams(&pKey->params); + BYTE abLMSRandomizer[SYMCRYPT_LMS_MAX_N] = { 0 }; + + SYMCRYPT_CHECK_MAGIC(pKey); + + if (flags != 0) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if (pKey->keyType != SYMCRYPT_LMSKEY_TYPE_PRIVATE) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if (cbSignature != SymCryptLmsSizeofSignatureFromParams(&pKey->params)) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + scError = SymCryptCallbackRandom(abLMSRandomizer, cbHashOutput); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + nLeafNumber = (UINT32)SYMCRYPT_ATOMIC_ADD64_POST_RELAXED(&pKey->nNextUnusedLeaf, 1) - 1; + if (nLeafNumber >= (nLeavesCount)) + { + scError = SYMCRYPT_HBS_NO_OTS_KEYS_LEFT; + pKey->nNextUnusedLeaf = nLeavesCount; + goto cleanup; + } + SYMCRYPT_STORE_MSBFIRST32(pbSignature, nLeafNumber); + pbSignature += sizeof(UINT32); + cbRemainingBytes -= sizeof(UINT32); + + LmsOtskeySign( + pKey, + nLeafNumber, + pbMessage, + cbMessage, + abLMSRandomizer, + pbSignature, + cbOtsSignature); + pbSignature += cbOtsSignature; + cbRemainingBytes -= cbOtsSignature; + + SYMCRYPT_STORE_MSBFIRST32(pbSignature, pKey->params.lmsAlgID); + pbSignature += sizeof(UINT32); + cbRemainingBytes -= sizeof(UINT32); + + nNodeIndex = nLeavesCount + nLeafNumber; + // write the path into the signature + for (UINT32 nIndex = 0; nIndex < nTreeHeight; nIndex++) + { + nTemp = (nNodeIndex >> nIndex) ^ 1; + LmsComputeNodeValue( + pKey, + nTemp, + pbSignature, + cbHashOutput); + pbSignature += cbHashOutput; + cbRemainingBytes -= cbHashOutput; + } + SYMCRYPT_ASSERT(cbRemainingBytes == 0); + +cleanup: + return scError; +} + +static +SYMCRYPT_ERROR +SYMCRYPT_CALL +LmsComputeOtsPubKeyCandidate( + UINT32 nLeafNumber, + _In_reads_bytes_(cbMessage) PCBYTE pbMessage, + SIZE_T cbMessage, + _In_reads_bytes_(cbOtsSignature) PCBYTE pbOtsSignature, + SIZE_T cbOtsSignature, + _In_reads_bytes_(SYMCRYPT_LMS_KEY_PAIR_IDENTIFIER_SIZE) PCBYTE pbId, + _In_ PCSYMCRYPT_LMS_PARAMS pSigParams, + _Out_writes_bytes_(pSigParams->cbHashOutput) PBYTE pbOtsPubKeyCandidate) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + UINT32 cbHashOutput = pSigParams->cbHashOutput; + UINT32 nWinternitzChainWidth = pSigParams->nWinternitzChainWidth; + UINT32 nByteStringCount = pSigParams->nByteStringCount; + UINT32 nSigType = 0; + UINT32 nMaxJ = (1 << nWinternitzChainWidth) - 1; + UINT16 nCksm = 0; + PCSYMCRYPT_HASH pHash = pSigParams->pLmsHashFunction; + SYMCRYPT_HASH_STATE state = { 0 }; + SYMCRYPT_HASH_STATE stateKc = { 0 }; + BYTE en32LeafNumber[sizeof(UINT32)] = { 0 }; + BYTE en16Index[sizeof(UINT16)] = { 0 }; + BYTE abLmsHashedMsg[SYMCRYPT_LMS_MAX_N + sizeof(nCksm)] = { 0 }; + BYTE abTmpRes[SYMCRYPT_LMS_MAX_N] = { 0 }; + PCBYTE pbRandomizer = NULL; + + if (cbOtsSignature != LmsOtsSizeofSignatureFromParams(pSigParams)) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + nSigType = SYMCRYPT_LOAD_MSBFIRST32(pbOtsSignature); + pbOtsSignature += sizeof(UINT32); + if (nSigType != pSigParams->lmsOtsAlgID) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + pbRandomizer = pbOtsSignature; + pbOtsSignature += cbHashOutput; + + SYMCRYPT_STORE_MSBFIRST32(en32LeafNumber, nLeafNumber); + + LmsHashMessage(pHash, pbId, en32LeafNumber, pbRandomizer, cbHashOutput, pbMessage, cbMessage, abLmsHashedMsg, cbHashOutput); + nCksm = LmsOtsCalculateChecksum(abLmsHashedMsg, cbHashOutput, nWinternitzChainWidth, pSigParams->nChecksumLShiftBits); + SYMCRYPT_STORE_MSBFIRST16((UINT16*)&abLmsHashedMsg[cbHashOutput], (UINT16)nCksm); + + SymCryptHashInit(pHash, &stateKc); + SymCryptHashAppend(pHash, &stateKc, pbId, SYMCRYPT_LMS_KEY_PAIR_IDENTIFIER_SIZE); + SymCryptHashAppend(pHash, &stateKc, en32LeafNumber, sizeof(UINT32)); + SymCryptHashAppend(pHash, &stateKc, SYMCRYPT_LMS_D_PBLC, sizeof(SYMCRYPT_LMS_D_PBLC)); + + SymCryptHashInit(pHash, &state); + for (UINT32 i = 0; i < nByteStringCount; i++) + { + BYTE a = (BYTE)SymCryptHbsGetDigit(nWinternitzChainWidth, abLmsHashedMsg, cbHashOutput + sizeof(nCksm), i); + PCBYTE tmp = pbOtsSignature + (i * cbHashOutput); + + SYMCRYPT_STORE_MSBFIRST16(en16Index, (UINT16)i); + + for (BYTE j = a; j < nMaxJ; j++) + { + SymCryptHashAppend(pHash, &state, pbId, SYMCRYPT_LMS_KEY_PAIR_IDENTIFIER_SIZE); + SymCryptHashAppend(pHash, &state, en32LeafNumber, sizeof(UINT32)); + SymCryptHashAppend(pHash, &state, en16Index, sizeof(UINT16)); + SymCryptHashAppend(pHash, &state, &j, 1); + SymCryptHashAppend(pHash, &state, tmp, cbHashOutput); + SymCryptHashResult(pHash, &state, abTmpRes, cbHashOutput); + tmp = abTmpRes; + } + SymCryptHashAppend(pHash, &stateKc, tmp, cbHashOutput); + } + SymCryptHashResult(pHash, &stateKc, pbOtsPubKeyCandidate, cbHashOutput); + +cleanup: + return scError; +} + +static +VOID +SYMCRYPT_CALL +LmsComputeRootCandidate( + UINT32 nLeafNumber, + _In_ PCSYMCRYPT_LMS_PARAMS pParams, + _In_reads_bytes_(SYMCRYPT_LMS_KEY_PAIR_IDENTIFIER_SIZE) PCBYTE pbId, + _In_reads_bytes_(pParams->nTreeHeight * pParams->cbHashOutput) PCBYTE pbPath, + _In_reads_bytes_(pParams->cbHashOutput) PCBYTE pbPubKeyCandidate, + _Out_writes_bytes_(pParams->cbHashOutput) PBYTE pbRootCandidate +) +{ + PCSYMCRYPT_HASH pHash = pParams->pLmsHashFunction; + SYMCRYPT_HASH_STATE state = { 0 }; + UINT32 cbHashOutput = pParams->cbHashOutput; + UINT32 nIndex = 0; + UINT32 nNodeNum = (1 << pParams->nTreeHeight) + nLeafNumber; + PBYTE pbTemp = pbRootCandidate; + BYTE en32NodeNum[sizeof(UINT32)] = { 0 }; + + SYMCRYPT_STORE_MSBFIRST32(en32NodeNum, nNodeNum); + SymCryptHashInit(pHash, &state); + SymCryptHashAppend(pHash, &state, pbId, SYMCRYPT_LMS_KEY_PAIR_IDENTIFIER_SIZE); + SymCryptHashAppend(pHash, &state, en32NodeNum, sizeof(UINT32)); + SymCryptHashAppend(pHash, &state, SYMCRYPT_LMS_D_LEAF, sizeof(SYMCRYPT_LMS_D_LEAF)); + SymCryptHashAppend(pHash, &state, pbPubKeyCandidate, cbHashOutput); + SymCryptHashResult(pHash, &state, pbTemp, cbHashOutput); + + for (nIndex = 0; nIndex < pParams->nTreeHeight; nIndex ++) + { + SYMCRYPT_STORE_MSBFIRST32(en32NodeNum, nNodeNum / 2); + SymCryptHashAppend(pHash, &state, pbId, SYMCRYPT_LMS_KEY_PAIR_IDENTIFIER_SIZE); + SymCryptHashAppend(pHash, &state, en32NodeNum, sizeof(UINT32)); + SymCryptHashAppend(pHash, &state, SYMCRYPT_LMS_D_INTR, sizeof(SYMCRYPT_LMS_D_INTR)); + if (nNodeNum % 2) + { + SymCryptHashAppend(pHash, &state, pbPath + (cbHashOutput * nIndex), cbHashOutput); + SymCryptHashAppend(pHash, &state, pbTemp, cbHashOutput); + } + else + { + SymCryptHashAppend(pHash, &state, pbTemp, cbHashOutput); + SymCryptHashAppend(pHash, &state, pbPath + (cbHashOutput * nIndex), cbHashOutput); + } + SymCryptHashResult(pHash, &state, pbTemp, cbHashOutput); + nNodeNum /= 2; + } + SYMCRYPT_ASSERT(nNodeNum <= 1); + + memcpy(pbRootCandidate, pbTemp, cbHashOutput); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLmsVerifyInternal( + _In_ PCSYMCRYPT_LMS_KEY pKey, + _In_reads_bytes_(cbMessage) PCBYTE pbMessage, + SIZE_T cbMessage, + UINT32 flags, + _In_reads_bytes_(cbSignature) PCBYTE pbSignature, + SIZE_T cbSignature) +{ + SYMCRYPT_ASSERT(pKey != NULL); + SYMCRYPT_ASSERT(pKey->keyType != SYMCRYPT_LMSKEY_TYPE_NONE); + SYMCRYPT_CHECK_MAGIC(pKey); + + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + UINT32 cbHashOutput = pKey->params.cbHashOutput; + PCSYMCRYPT_LMS_PARAMS pLmsKeyParams = &pKey->params; + PCBYTE pbLocSignature = pbSignature; + BYTE abRootCandidate[SYMCRYPT_LMS_MAX_N] = { 0 }; + BYTE abOtsPubKeyCandidate[SYMCRYPT_LMS_MAX_N] = { 0 }; + + if (flags != 0) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if (cbSignature != SymCryptLmsSizeofSignatureFromParams(&pKey->params)) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + UINT32 nLeafNumber = SYMCRYPT_LOAD_MSBFIRST32(pbLocSignature); + pbLocSignature += sizeof(UINT32); + if (nLeafNumber >= ((UINT32)1 << pKey->params.nTreeHeight)) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + UINT32 nOtsSigtype = SYMCRYPT_LOAD_MSBFIRST32(pbLocSignature); + pbLocSignature += sizeof(UINT32); + + if (nOtsSigtype != pLmsKeyParams->lmsOtsAlgID) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + pbLocSignature += cbHashOutput * (pKey->params.nByteStringCount + 1); // +1 is for the randomizer + UINT32 nSigType = SYMCRYPT_LOAD_MSBFIRST32(pbLocSignature); + pbLocSignature += sizeof(UINT32); + + if (nSigType != pLmsKeyParams->lmsAlgID) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + scError = LmsComputeOtsPubKeyCandidate( + nLeafNumber, + pbMessage, + cbMessage, + pbSignature + sizeof(UINT32), //the +sizeof(UINT32) is to skip the leaf number and reach the LMS-OTS signature + LmsOtsSizeofSignatureFromParams(&pKey->params), + pKey->abId, + pLmsKeyParams, + abOtsPubKeyCandidate); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + LmsComputeRootCandidate( + nLeafNumber, + pLmsKeyParams, + pKey->abId, + pbLocSignature, + abOtsPubKeyCandidate, + abRootCandidate); + if (!SymCryptEqual(abRootCandidate, pKey->abPublicRoot, cbHashOutput)) + { + scError = SYMCRYPT_SIGNATURE_VERIFICATION_FAILURE; + goto cleanup; + } + +cleanup: + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLmsVerify( + _In_ PCSYMCRYPT_LMS_KEY pKey, + _In_reads_bytes_(cbMessage) PCBYTE pbMessage, + SIZE_T cbMessage, + UINT32 flags, + _In_reads_bytes_(cbSignature) PCBYTE pbSignature, + SIZE_T cbSignature) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + SYMCRYPT_RUN_SELFTEST_ONCE( + SymCryptLmsSelftest, + SYMCRYPT_SELFTEST_ALGORITHM_LMS); + + scError = SymCryptLmsVerifyInternal( + pKey, + pbMessage, + cbMessage, + flags, + pbSignature, + cbSignature); + + return scError; +} diff --git a/libs/symcrypt/lib/marvin32.c b/libs/symcrypt/lib/marvin32.c new file mode 100644 index 00000000000..9e92633bebc --- /dev/null +++ b/libs/symcrypt/lib/marvin32.c @@ -0,0 +1,331 @@ +// +// Marvin32.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +// +// This module contains the routines to implement the Marvin32 checksum function +// +// + +#include "precomp.h" + +// +// See the symcrypt.h file for documentation on what the various functions do. +// + + +// +// Default initial seed, first 8 bytes of SHA256( "Marvin32" ); +// +static const SYMCRYPT_MARVIN32_EXPANDED_SEED SymCryptMarvin32DefaultSeedStruct = { + {0xcd0893b7, 0xd53cd9ce}, +#if defined( SYMCRYPT_MAGIC_ENABLED ) + SYMCRYPT_MAGIC_VALUE( &SymCryptMarvin32DefaultSeedStruct ), +#endif + }; + +PCSYMCRYPT_MARVIN32_EXPANDED_SEED const SymCryptMarvin32DefaultSeed = &SymCryptMarvin32DefaultSeedStruct; + +// +// Round rotation amounts. This array is optimized away by the compiler +// as we inline all our rotations. +// +static const int rotate[4] = { + 20, 9, 27, 19, +}; + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMarvin32ExpandSeed( + _Out_ PSYMCRYPT_MARVIN32_EXPANDED_SEED pExpandedSeed, + _In_reads_(cbSeed) PCBYTE pbSeed, + SIZE_T cbSeed ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + if( cbSeed != SYMCRYPT_MARVIN32_SEED_SIZE ) + { + scError = SYMCRYPT_WRONG_KEY_SIZE; + goto cleanup; + } + pExpandedSeed->s[0] = SYMCRYPT_LOAD_LSBFIRST32( pbSeed ); + pExpandedSeed->s[1] = SYMCRYPT_LOAD_LSBFIRST32( pbSeed + 4 ); + + SYMCRYPT_SET_MAGIC( pExpandedSeed ); + +cleanup: + return scError; +} + +VOID +SYMCRYPT_CALL +SymCryptMarvin32SeedCopy( _In_ PCSYMCRYPT_MARVIN32_EXPANDED_SEED pSrc, + _Out_ PSYMCRYPT_MARVIN32_EXPANDED_SEED pDst ) +{ + SYMCRYPT_CHECK_MAGIC( pSrc ); + *pDst = *pSrc; + SYMCRYPT_SET_MAGIC( pDst ); +} + + +VOID +SYMCRYPT_CALL +SymCryptMarvin32StateCopy( + _In_ PCSYMCRYPT_MARVIN32_STATE pSrc, + _In_opt_ PCSYMCRYPT_MARVIN32_EXPANDED_SEED pExpandedSeed, + _Out_ PSYMCRYPT_MARVIN32_STATE pDst ) +{ + SYMCRYPT_CHECK_MAGIC( pSrc ); + *pDst = *pSrc; + + if( pExpandedSeed == NULL ) + { + SYMCRYPT_CHECK_MAGIC( pSrc->pSeed ); + pDst->pSeed = pSrc->pSeed; + } + else + { + SYMCRYPT_CHECK_MAGIC( pExpandedSeed ); + pDst->pSeed = pExpandedSeed; + } + + SYMCRYPT_SET_MAGIC( pDst ); +} + + +VOID +SYMCRYPT_CALL +SymCryptMarvin32Init( _Out_ PSYMCRYPT_MARVIN32_STATE pState, + _In_ PCSYMCRYPT_MARVIN32_EXPANDED_SEED pExpandedSeed) +{ + pState->chain = *pExpandedSeed; + pState->dataLength = 0; + pState->pSeed = pExpandedSeed; + + *(UINT32 *) &pState->buffer[4] = 0; // wipe the last 4 bytes of the buffer. + + SYMCRYPT_SET_MAGIC( pState ); +} + + +// +// SymCryptMarvin32Append +// + +VOID +SYMCRYPT_CALL +SymCryptMarvin32Append( _Inout_ PSYMCRYPT_MARVIN32_STATE state, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + UINT32 bytesInBuffer = state->dataLength; + + SYMCRYPT_CHECK_MAGIC( state ); + + state->dataLength += (UINT32) cbData; // We only keep track of the last 2 bits... + +#define ALG MARVIN32 +#define Alg Marvin32 +#include "hash_buffer_pattern.c" +#undef ALG +#undef Alg + +} + + +// +// SymCryptMarvin32Result +// +VOID +SYMCRYPT_CALL +SymCryptMarvin32Result( + _Inout_ PSYMCRYPT_MARVIN32_STATE pState, + _Out_writes_( SYMCRYPT_MARVIN32_RESULT_SIZE ) PBYTE pbResult ) +{ + SIZE_T bytesInBuffer = ( pState->dataLength) & 0x3; + + SYMCRYPT_CHECK_MAGIC( pState ); + + // + // Wipe four bytes in the buffer. + // Doing this first ensures that this write is aligned when the input was of + // length 0 mod 4. + // The buffer is 8 bytes long, so we never overwrite anything else. + // + *(UINT32 *) &pState->buffer[bytesInBuffer] = 0; + + // + // The buffer is never completely full, so we can always put the first + // padding byte in. + // + pState->buffer[bytesInBuffer++] = 0x80; + + // + // Process the final block + // + SymCryptMarvin32AppendBlocks( &pState->chain, pState->buffer, 8 ); + + SYMCRYPT_STORE_LSBFIRST32( pbResult , pState->chain.s[0] ); + SYMCRYPT_STORE_LSBFIRST32( pbResult + 4, pState->chain.s[1] ); + + // + // Wipe only those things that we need to wipe. + // + + *(UINT32 *) &pState->buffer[0] = 0; + pState->dataLength = 0; + + pState->chain = *pState->pSeed; +} + +#define BLOCK( a, b ) \ +{\ + b ^= a; a = ROL32( a, rotate[0] );\ + a += b; b = ROL32( b, rotate[1] );\ + b ^= a; a = ROL32( a, rotate[2] );\ + a += b; b = ROL32( b, rotate[3] );\ +} + +VOID +SYMCRYPT_CALL +SymCryptMarvin32AppendBlocks( + _Inout_ PSYMCRYPT_MARVIN32_CHAINING_STATE pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + UINT32 s0 = pChain->s[0]; + UINT32 s1 = pChain->s[1]; + + SIZE_T bytesInFirstBlock = cbData & 0xc; // 0, 4, 8, or 12 + + SYMCRYPT_ASSERT( (cbData & 3) == 0 ); + + + pbData += bytesInFirstBlock; + cbData -= bytesInFirstBlock; + + switch( bytesInFirstBlock ) + { + case 0: // This handles the cbData == 0 case too + while( cbData > 0 ) + { + pbData += 16; + cbData -= 16; + + s0 += SYMCRYPT_LOAD_LSBFIRST32( pbData - 16 ); + BLOCK( s0, s1 ); + case 12: + s0 += SYMCRYPT_LOAD_LSBFIRST32( pbData - 12 ); + BLOCK( s0, s1 ); + case 8: + s0 += SYMCRYPT_LOAD_LSBFIRST32( pbData - 8 ); + BLOCK( s0, s1 ); + case 4: + s0 += SYMCRYPT_LOAD_LSBFIRST32( pbData - 4 ); + BLOCK( s0, s1 ); + } + } + + pChain->s[0] = s0; + pChain->s[1] = s1; +} + + +VOID +SYMCRYPT_CALL +SymCryptMarvin32( + _In_ PCSYMCRYPT_MARVIN32_EXPANDED_SEED pExpandedSeed, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_MARVIN32_RESULT_SIZE ) PBYTE pbResult ) +// +// To reduce the per-computation overhead, we have a dedicated code here instead of the whole Init/Append/Result stuff. +// +{ + UINT32 tmp; + + UINT32 s0 = pExpandedSeed->s[0]; + UINT32 s1 = pExpandedSeed->s[1]; + + while( cbData > 7 ) + { + s0 += SYMCRYPT_LOAD_LSBFIRST32( pbData ); + BLOCK( s0, s1 ); + s0 += SYMCRYPT_LOAD_LSBFIRST32( pbData + 4 ); + BLOCK( s0, s1 ); + pbData += 8; + cbData -= 8; + } + + /* + switch( cbData ) + { + case 3: + buf[2] = pbData[2]; + case 2: + *(UINT16 *) &buf[0] = *(UINT16 *) pbData; + break; + case 1: + buf[0] = pbData[0]; + case 0: + ; + } + + buf[ cbData ] = 0x80; + + s0 += LOAD_LSBFIRST32( buf ); + */ + + + switch( cbData ) + { + default: + case 4: s0 += SYMCRYPT_LOAD_LSBFIRST32( pbData ); BLOCK( s0, s1 ); pbData += 4; + case 0: tmp = 0x80; break; + + case 5: s0 += SYMCRYPT_LOAD_LSBFIRST32( pbData ); BLOCK( s0, s1 ); pbData += 4; + case 1: tmp = 0x8000 | pbData[0]; break; + + case 6: s0 += SYMCRYPT_LOAD_LSBFIRST32( pbData ); BLOCK( s0, s1 ); pbData += 4; + case 2: tmp = 0x800000 | SYMCRYPT_LOAD_LSBFIRST16( pbData ); break; + + case 7: s0 += SYMCRYPT_LOAD_LSBFIRST32( pbData ); BLOCK( s0, s1 ); pbData += 4; + case 3: tmp = SYMCRYPT_LOAD_LSBFIRST16( pbData ) | (pbData[2] << 16) | 0x80000000; break; + } + s0 += tmp; + + + BLOCK( s0, s1 ); + BLOCK( s0, s1 ); + + SYMCRYPT_STORE_LSBFIRST32( pbResult , s0 ); + SYMCRYPT_STORE_LSBFIRST32( pbResult + 4, s1 ); +} + + + +// +// Simple test vector +// + +static const BYTE marvin32KATAnswer[ 8 ] = { + 0xbf, 0x69, 0x27, 0x49, 0x39, 0x43, 0xc7, 0x22, +} ; + +VOID +SYMCRYPT_CALL +SymCryptMarvin32Selftest(void) +{ + BYTE res[SYMCRYPT_MARVIN32_RESULT_SIZE]; + + SymCryptMarvin32( SymCryptMarvin32DefaultSeed, SymCryptTestMsg3, sizeof( SymCryptTestMsg3 ), res ); + + SymCryptInjectError( res, sizeof( res ) ); + if( memcmp( res, marvin32KATAnswer, sizeof( res ) ) != 0 ) + { + SymCryptFatal( 'marv' ); + } +} diff --git a/libs/symcrypt/lib/md2.c b/libs/symcrypt/lib/md2.c new file mode 100644 index 00000000000..b754df333ce --- /dev/null +++ b/libs/symcrypt/lib/md2.c @@ -0,0 +1,307 @@ +// +// Md2.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// +// This module contains the routines to implement MD2 from RFC 1319 +// +// This is a new implementation, NOT based on the existing one in RSA32.lib, +// which is the one from RSA data security. +// +// The implementation had to be refreshed anyway to conform to our coding +// guidelines for cryptographic functions. +// Re-implementing the function along the lines of our SHA-family implementations +// was easy, and it removes a file with RSA copyright from our system. +// +// The only data copied for this implementation is the S table from the +// RFC. +// + +#include "precomp.h" + +// +// See the symcrypt.h file for documentation on what the various functions do. +// + +const SYMCRYPT_HASH SymCryptMd2Algorithm_default = { + &SymCryptMd2Init, + &SymCryptMd2Append, + &SymCryptMd2Result, + &SymCryptMd2AppendBlocks, + &SymCryptMd2StateCopy, + sizeof( SYMCRYPT_MD2_STATE ), + SYMCRYPT_MD2_RESULT_SIZE, + SYMCRYPT_MD2_INPUT_BLOCK_SIZE, + SYMCRYPT_FIELD_OFFSET( SYMCRYPT_MD2_STATE, chain ), + SYMCRYPT_FIELD_SIZE( SYMCRYPT_MD2_STATE, chain ), +}; + +const PCSYMCRYPT_HASH SymCryptMd2Algorithm = &SymCryptMd2Algorithm_default; + +// +// These entries are called S[i] in RFC1319 +// +const BYTE SymCryptMd2STable[256] = { + 41, 46, 67, 201, 162, 216, 124, 1, 61, 54, 84, 161, 236, 240, 6, + 19, 98, 167, 5, 243, 192, 199, 115, 140, 152, 147, 43, 217, 188, + 76, 130, 202, 30, 155, 87, 60, 253, 212, 224, 22, 103, 66, 111, 24, + 138, 23, 229, 18, 190, 78, 196, 214, 218, 158, 222, 73, 160, 251, + 245, 142, 187, 47, 238, 122, 169, 104, 121, 145, 21, 178, 7, 63, + 148, 194, 16, 137, 11, 34, 95, 33, 128, 127, 93, 154, 90, 144, 50, + 39, 53, 62, 204, 231, 191, 247, 151, 3, 255, 25, 48, 179, 72, 165, + 181, 209, 215, 94, 146, 42, 172, 86, 170, 198, 79, 184, 56, 210, + 150, 164, 125, 182, 118, 252, 107, 226, 156, 116, 4, 241, 69, 157, + 112, 89, 100, 113, 135, 32, 134, 91, 207, 101, 230, 45, 168, 2, 27, + 96, 37, 173, 174, 176, 185, 246, 28, 70, 97, 105, 52, 64, 126, 15, + 85, 71, 163, 35, 221, 81, 175, 58, 195, 92, 249, 206, 186, 197, + 234, 38, 44, 83, 13, 110, 133, 40, 132, 9, 211, 223, 205, 244, 65, + 129, 77, 82, 106, 220, 55, 200, 108, 193, 171, 250, 36, 225, 123, + 8, 12, 189, 177, 74, 120, 136, 149, 139, 227, 99, 232, 109, 233, + 203, 213, 254, 59, 0, 29, 57, 242, 239, 183, 14, 102, 88, 208, 228, + 166, 119, 114, 248, 235, 117, 75, 10, 49, 68, 80, 180, 143, 237, + 31, 26, 219, 153, 141, 51, 159, 17, 131, 20 +}; + + +// +// SymCryptMd2 +// +#define ALG MD2 +#define Alg Md2 +#include "hash_pattern.c" +#undef ALG +#undef Alg + + +// +// SymCryptMd2Init +// +VOID +SYMCRYPT_CALL +SymCryptMd2Init( _Out_ PSYMCRYPT_MD2_STATE pState ) +{ + // + // We use the secure wipe as the init routine is also used to re-initialize + // (and wipe) the state after a hash computation. + // In that case the compiler might conclude that this wipe can be optimized + // away, and that would leak data. + // + SymCryptWipeKnownSize( pState, sizeof( *pState ) ); + SYMCRYPT_SET_MAGIC( pState ); +} + + +// +// SymCryptMd2Append +// +VOID +SYMCRYPT_CALL +SymCryptMd2Append( _Inout_ PSYMCRYPT_MD2_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + SymCryptHashAppendInternal( SymCryptMd2Algorithm, (PSYMCRYPT_COMMON_HASH_STATE)pState, pbData, cbData ); +} + + +// +// SymCryptMd2Result +// +VOID +SYMCRYPT_CALL +SymCryptMd2Result( _Inout_ PSYMCRYPT_MD2_STATE state, + _Out_writes_( SYMCRYPT_MD2_RESULT_SIZE ) PBYTE pbResult ) +{ + // + // The buffer is never completely full, so it is easy to compute the actual padding. + // + SIZE_T tmp; + SIZE_T paddingBytes = 16 - state->bytesInBuffer; + + + SYMCRYPT_CHECK_MAGIC( state ); + + memset( &state->buffer[state->bytesInBuffer], (BYTE)paddingBytes, paddingBytes ); + + SymCryptMd2AppendBlocks( &state->chain, state->buffer, SYMCRYPT_MD2_INPUT_BLOCK_SIZE, &tmp ); + + // + // Append the checksum + // + SymCryptMd2AppendBlocks( &state->chain, state->chain.C, SYMCRYPT_MD2_INPUT_BLOCK_SIZE, &tmp ); + + memcpy( pbResult, &state->chain.X[0], SYMCRYPT_MD2_RESULT_SIZE ); + + // + // Wipe & re-initialize + // + // (Our init code wipes the buffer too, so we don't have to.) + // + SymCryptMd2Init( state ); +} + + +VOID +SYMCRYPT_CALL +SymCryptMd2AppendBlocks( + _Inout_ PSYMCRYPT_MD2_CHAINING_STATE pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ) +{ + // + // For variable names see RFC 1319. + // + unsigned int t; + int j,k; + + while( cbData >= SYMCRYPT_MD2_INPUT_BLOCK_SIZE ) + { + BYTE L; + // + // read the data once into our structure + // + memcpy( &pChain->X[16], pbData, SYMCRYPT_MD2_INPUT_BLOCK_SIZE ); + + // + // Update the checksum block. + // The L value at the end of the previous block is in the last byte of the checksum + // + L = pChain->C[15]; + + for( j=0; j<16; j++ ) + { + pChain->C[j] = L = pChain->C[j] ^ SymCryptMd2STable[ L ^ pChain->X[16+j] ]; + } + + // + // Now we compute the actual hash + // + SymCryptXorBytes( &pChain->X[0], &pChain->X[16], &pChain->X[32], 16 ); + + t = 0; + for( j=0; j<18; j++ ) + { + for( k=0; k<48; k++ ) + { + t = pChain->X[k] ^ SymCryptMd2STable[t]; + pChain->X[k] = (BYTE) t; + } + t = (t + j)& 0xff; + } + + pbData += SYMCRYPT_MD2_INPUT_BLOCK_SIZE; + cbData -= SYMCRYPT_MD2_INPUT_BLOCK_SIZE; + } + + *pcbRemaining = cbData; +} + + +VOID +SYMCRYPT_CALL +SymCryptMd2StateExport( + _In_ PCSYMCRYPT_MD2_STATE pState, + _Out_writes_bytes_( SYMCRYPT_MD2_STATE_EXPORT_SIZE ) PBYTE pbBlob ) +{ + SYMCRYPT_ALIGN SYMCRYPT_MD2_STATE_EXPORT_BLOB blob; // local copy to have proper alignment. + C_ASSERT( sizeof( blob ) == SYMCRYPT_MD2_STATE_EXPORT_SIZE ); + + SYMCRYPT_CHECK_MAGIC( pState ); + + SymCryptWipeKnownSize( &blob, sizeof( blob ) ); // wipe to avoid any data leakage + + blob.header.magic = SYMCRYPT_BLOB_MAGIC; + blob.header.size = SYMCRYPT_MD2_STATE_EXPORT_SIZE; + blob.header.type = SymCryptBlobTypeMd2State; + + // + // Copy the relevant data. Buffer will be 0-padded. + // + memcpy( &blob.C[0], &pState->chain.C[0], 16 ); + memcpy( &blob.X[0], &pState->chain.X[0], 16 ); + blob.bytesInBuffer = (UINT32) pState->bytesInBuffer; + memcpy( &blob.buffer[0], &pState->buffer[0], blob.bytesInBuffer ); + + SYMCRYPT_ASSERT( (PCBYTE) &blob + sizeof( blob ) - sizeof( SYMCRYPT_BLOB_TRAILER ) == (PCBYTE) &blob.trailer ); + SymCryptMarvin32( SymCryptMarvin32DefaultSeed, (PCBYTE) &blob, sizeof( blob ) - sizeof( SYMCRYPT_BLOB_TRAILER ), &blob.trailer.checksum[0] ); + + memcpy( pbBlob, &blob, sizeof( blob ) ); + +//cleanup: + SymCryptWipeKnownSize( &blob, sizeof( blob ) ); + return; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMd2StateImport( + _Out_ PSYMCRYPT_MD2_STATE pState, + _In_reads_bytes_( SYMCRYPT_MD2_STATE_EXPORT_SIZE) PCBYTE pbBlob ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SYMCRYPT_ALIGN SYMCRYPT_MD2_STATE_EXPORT_BLOB blob; // local copy to have proper alignment. + BYTE checksum[8]; + + C_ASSERT( sizeof( blob ) == SYMCRYPT_MD2_STATE_EXPORT_SIZE ); + memcpy( &blob, pbBlob, sizeof( blob ) ); + + if( blob.header.magic != SYMCRYPT_BLOB_MAGIC || + blob.header.size != SYMCRYPT_MD2_STATE_EXPORT_SIZE || + blob.header.type != SymCryptBlobTypeMd2State ) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + SymCryptMarvin32( SymCryptMarvin32DefaultSeed, (PCBYTE) &blob, sizeof( blob ) - sizeof( SYMCRYPT_BLOB_TRAILER ), checksum ); + if( memcmp( checksum, &blob.trailer.checksum[0], 8 ) != 0 ) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + memcpy( &pState->chain.C[0], &blob.C[0], 16 ); + memcpy( &pState->chain.X[0], &blob.X[0], 16 ); + memcpy( &pState->buffer[0], &blob.buffer[0], 16 ); + pState->bytesInBuffer = blob.bytesInBuffer; + + pState->dataLengthL = blob.bytesInBuffer; + pState->dataLengthH = 1; + + SYMCRYPT_SET_MAGIC( pState ); + +cleanup: + SymCryptWipeKnownSize( &blob, sizeof(blob) ); + return scError; +} + + + + + + +// +// Simple test vector for FIPS module testing +// + +static const BYTE md2KATAnswer[ 16 ] = { + 0xda, 0x85, 0x3b, 0x0d, 0x3f, 0x88, 0xd9, 0x9b, + 0x30, 0x28, 0x3a, 0x69, 0xe6, 0xde, 0xd6, 0xbb, +} ; + +VOID +SYMCRYPT_CALL +SymCryptMd2Selftest(void) +{ + BYTE result[SYMCRYPT_MD2_RESULT_SIZE]; + + SymCryptMd2( SymCryptTestMsg3, sizeof( SymCryptTestMsg3 ), result ); + + SymCryptInjectError( result, sizeof( result ) ); + + if( memcmp( result, md2KATAnswer, sizeof( result ) ) != 0 ) { + SymCryptFatal( 'MD2t' ); + } +} diff --git a/libs/symcrypt/lib/md4.c b/libs/symcrypt/lib/md4.c new file mode 100644 index 00000000000..759e317c3a8 --- /dev/null +++ b/libs/symcrypt/lib/md4.c @@ -0,0 +1,425 @@ +// +// Md4.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +// +// This module contains the routines to implement MD4 from RFC 1320 +// +// +// This is a new implementation, NOT based on the existing ones in RSA32.lib. +// There are 2 versions in RSA32.lib, one from RSA data security and one from +// Scott Fields. +// +// MD4 and MD5 are extremely similar. Having already done a new MD5 implementation it +// was very little work to copy the code & turn it into an MD4 implementation. +// In fact, it was easier than reviewing & modifying the old code to bring it up to +// the current implementation guidelines. +// +// This also ensures that this file is not a derived work from RSA data security +// code which simplifies the copyright situation. +// +// We dropped the assembler implementation. MD4 is so weak that it should be removed +// from use, not sped up. +// + +#include "precomp.h" + +// +// See the symcrypt.h file for documentation on what the various functions do. +// + +const SYMCRYPT_HASH SymCryptMd4Algorithm_default = { + &SymCryptMd4Init, + &SymCryptMd4Append, + &SymCryptMd4Result, + &SymCryptMd4AppendBlocks, + &SymCryptMd4StateCopy, + sizeof( SYMCRYPT_MD4_STATE ), + SYMCRYPT_MD4_RESULT_SIZE, + SYMCRYPT_MD4_INPUT_BLOCK_SIZE, + SYMCRYPT_FIELD_OFFSET( SYMCRYPT_MD4_STATE, chain ), + SYMCRYPT_FIELD_SIZE( SYMCRYPT_MD4_STATE, chain ), +}; + +const PCSYMCRYPT_HASH SymCryptMd4Algorithm = &SymCryptMd4Algorithm_default; + +// +// The round constants used by MD4 +// +// +static const UINT32 md4Const[3] = { + 0x00000000UL, + 0x5A827999UL, + 0x6ED9EBA1UL, + }; + +// +// Round rotation amounts. This array is optimized away by the compiler +// as we inline all our rotations. +// +static const int md4Rotate[48] = { + 3, 7, 11, 19, + 3, 7, 11, 19, + 3, 7, 11, 19, + 3, 7, 11, 19, + + 3, 5, 9, 13, + 3, 5, 9, 13, + 3, 5, 9, 13, + 3, 5, 9, 13, + + 3, 9, 11, 15, + 3, 9, 11, 15, + 3, 9, 11, 15, + 3, 9, 11, 15, + +}; + +// +// Message word index table. This array is optimized away by the compiler +// as we inline all our accesses. +// +static const int md4MsgIndex[48] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15, + 0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15, +}; + +// +// Initial state +// +static const UINT32 md4InitialState[4] = { + 0x67452301UL, + 0xefcdab89UL, + 0x98badcfeUL, + 0x10325476UL, +}; + + +// +// SymCryptMd4 +// +#define ALG MD4 +#define Alg Md4 +#include "hash_pattern.c" +#undef ALG +#undef Alg + + + +// +// SymCryptmd4Init +// +VOID +SYMCRYPT_CALL +SymCryptMd4Init( _Out_ PSYMCRYPT_MD4_STATE pState ) +{ + SYMCRYPT_SET_MAGIC( pState ); + + pState->dataLengthL = 0; + pState->dataLengthH = 0; + pState->bytesInBuffer = 0; + + memcpy( &pState->chain.H[0], &md4InitialState[0], sizeof( md4InitialState ) ); + + // + // There is no need to initialize the buffer part of the state as that will be + // filled before it is used. + // +} + + +// +// SymCryptMd4Append +// +VOID +SYMCRYPT_CALL +SymCryptMd4Append( _Inout_ PSYMCRYPT_MD4_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + SymCryptHashAppendInternal( SymCryptMd4Algorithm, (PSYMCRYPT_COMMON_HASH_STATE)pState, pbData, cbData ); +} + + +// +// SymCryptmd4Result +// +VOID +SYMCRYPT_CALL +SymCryptMd4Result( + _Inout_ PSYMCRYPT_MD4_STATE pState, + _Out_writes_( SYMCRYPT_MD4_RESULT_SIZE ) PBYTE pbResult ) +{ + SymCryptHashCommonPaddingMd4Style( SymCryptMd4Algorithm, (PSYMCRYPT_COMMON_HASH_STATE)pState ); + + // + // Write the output in the correct byte order + // + SymCryptUint32ToLsbFirst( &pState->chain.H[0], pbResult, 4 ); + + // + // Wipe & re-initialize + // We have to wipe the whole state as the initialization might be optimized away. + // + SymCryptWipeKnownSize( pState, sizeof( *pState )); + SymCryptMd4Init( pState ); + } + + +// +// For documentation on these function see rfc-1320 +// +//#define F( x, y, z ) (((x) & (y)) | ((~(x)) & (z))) +//#define G( x, y, z ) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) + +#define F( x, y, z ) ((((z) ^ (y)) & (x)) ^ (z)) +#define G( x, y, z ) ((((x) | (y)) & (z) ) | ((x) & (y))) +#define H( x, y, z ) ((x) ^ (y) ^ (z) ) + +// +// The values a-d are stored in an array called ad. +// We have unrolled the code completely. This makes both the indices into +// the ad array constant, and it makes the message addressing constant. +// +// We copy the message into our own buffer to obey the read-once rule. +// Memory is sometimes aliased so that multiple threads or processes can access +// the same memory at the same time. With MD4 there is a danger that some other +// process could modify the memory while the computation is ongoing and introduce +// changes in the computation not envisioned by the designers or cryptanalists. +// At this level in the library we cannot guarantee that this is not the case, +// and we can't trust the higher layers to respect a don't-change-it-while-computing-md4 +// restriction. (In practice, such restrictions are lost through the many +// layers in the stack.) +// + +// +// r is the round number +// ad[(r+0)%4] = a; +// ad[(r+1)%4] = d; +// ad[(r+2)%4] = c; +// ad[(r+3)%4] = b; +// +// When r increments the register re-naming is automatically correct. +// + +// +// CROUND is the core round function +// +#define CROUND( r, Func ) { \ + ad[r%4] = ROL32( ad[r%4] + Func(ad[(r+3)%4], ad[(r+2)%4], ad[(r+1)%4]) + Wt + md4Const[r/16], md4Rotate[r] ); \ +} + +// +// IROUND is the initial round that loads the message and copies it into our buffer. +// +#define IROUND( r, Func ) { \ + Wt = SYMCRYPT_LOAD_LSBFIRST32( &pbData[ 4*md4MsgIndex[r] ] ); \ + W[r] = Wt; \ + CROUND( r, Func ); \ +} + +// +// FROUND are the subsequent rounds. +// +#define FROUND( r, Func ) { \ + Wt = W[md4MsgIndex[r]];\ + CROUND( r, Func ); \ +} + +VOID +SYMCRYPT_CALL +SymCryptMd4AppendBlocks( + _Inout_ PSYMCRYPT_MD4_CHAINING_STATE pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ) +{ + + SYMCRYPT_ALIGN UINT32 W[16]; + SYMCRYPT_ALIGN UINT32 ad[4]; + UINT32 Wt; + + ad[0] = pChain->H[0]; + ad[1] = pChain->H[3]; + ad[2] = pChain->H[2]; + ad[3] = pChain->H[1]; + + while( cbData >= 64 ) + { + // + // initial rounds 1 to 16 + // + + IROUND( 0, F ); + IROUND( 1, F ); + IROUND( 2, F ); + IROUND( 3, F ); + IROUND( 4, F ); + IROUND( 5, F ); + IROUND( 6, F ); + IROUND( 7, F ); + IROUND( 8, F ); + IROUND( 9, F ); + IROUND( 10, F ); + IROUND( 11, F ); + IROUND( 12, F ); + IROUND( 13, F ); + IROUND( 14, F ); + IROUND( 15, F ); + + FROUND( 16, G ); + FROUND( 17, G ); + FROUND( 18, G ); + FROUND( 19, G ); + FROUND( 20, G ); + FROUND( 21, G ); + FROUND( 22, G ); + FROUND( 23, G ); + FROUND( 24, G ); + FROUND( 25, G ); + FROUND( 26, G ); + FROUND( 27, G ); + FROUND( 28, G ); + FROUND( 29, G ); + FROUND( 30, G ); + FROUND( 31, G ); + + FROUND( 32, H ); + FROUND( 33, H ); + FROUND( 34, H ); + FROUND( 35, H ); + FROUND( 36, H ); + FROUND( 37, H ); + FROUND( 38, H ); + FROUND( 39, H ); + FROUND( 40, H ); + FROUND( 41, H ); + FROUND( 42, H ); + FROUND( 43, H ); + FROUND( 44, H ); + FROUND( 45, H ); + FROUND( 46, H ); + FROUND( 47, H ); + + pChain->H[0] = ad[0] = ad[0] + pChain->H[0]; + pChain->H[3] = ad[1] = ad[1] + pChain->H[3]; + pChain->H[2] = ad[2] = ad[2] + pChain->H[2]; + pChain->H[1] = ad[3] = ad[3] + pChain->H[1]; + + pbData += 64; + cbData -= 64; + } + + *pcbRemaining = cbData; + // + // Wipe the variables; + // + SymCryptWipeKnownSize( ad, sizeof( ad ) ); + SymCryptWipeKnownSize( W, sizeof( W ) ); + SYMCRYPT_FORCE_WRITE32( &Wt, 0 ); +} + +VOID +SYMCRYPT_CALL +SymCryptMd4StateExport( + _In_ PCSYMCRYPT_MD4_STATE pState, + _Out_writes_bytes_( SYMCRYPT_MD4_STATE_EXPORT_SIZE ) PBYTE pbBlob ) +{ + SYMCRYPT_ALIGN SYMCRYPT_MD4_STATE_EXPORT_BLOB blob; // local copy to have proper alignment. + C_ASSERT( sizeof( blob ) == SYMCRYPT_MD4_STATE_EXPORT_SIZE ); + + SYMCRYPT_CHECK_MAGIC( pState ); + + SymCryptWipeKnownSize( &blob, sizeof( blob ) ); // wipe to avoid any data leakage + + blob.header.magic = SYMCRYPT_BLOB_MAGIC; + blob.header.size = SYMCRYPT_MD4_STATE_EXPORT_SIZE; + blob.header.type = SymCryptBlobTypeMd4State; + + // + // Copy the relevant data. Buffer will be 0-padded. + // + + SymCryptUint32ToLsbFirst( &pState->chain.H[0], &blob.chain[0], 4 ); + blob.dataLength = pState->dataLengthL; + memcpy( &blob.buffer[0], &pState->buffer[0], blob.dataLength & 0x3f ); + + SYMCRYPT_ASSERT( (PCBYTE) &blob + sizeof( blob ) - sizeof( SYMCRYPT_BLOB_TRAILER ) == (PCBYTE) &blob.trailer ); + SymCryptMarvin32( SymCryptMarvin32DefaultSeed, (PCBYTE) &blob, sizeof( blob ) - sizeof( SYMCRYPT_BLOB_TRAILER ), &blob.trailer.checksum[0] ); + + memcpy( pbBlob, &blob, sizeof( blob ) ); + +//cleanup: + SymCryptWipeKnownSize( &blob, sizeof( blob ) ); + return; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMd4StateImport( + _Out_ PSYMCRYPT_MD4_STATE pState, + _In_reads_bytes_( SYMCRYPT_MD4_STATE_EXPORT_SIZE) PCBYTE pbBlob ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SYMCRYPT_ALIGN SYMCRYPT_MD4_STATE_EXPORT_BLOB blob; // local copy to have proper alignment. + BYTE checksum[8]; + + C_ASSERT( sizeof( blob ) == SYMCRYPT_MD4_STATE_EXPORT_SIZE ); + memcpy( &blob, pbBlob, sizeof( blob ) ); + + if( blob.header.magic != SYMCRYPT_BLOB_MAGIC || + blob.header.size != SYMCRYPT_MD4_STATE_EXPORT_SIZE || + blob.header.type != SymCryptBlobTypeMd4State ) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + SymCryptMarvin32( SymCryptMarvin32DefaultSeed, (PCBYTE) &blob, sizeof( blob ) - sizeof( SYMCRYPT_BLOB_TRAILER ), checksum ); + if( memcmp( checksum, &blob.trailer.checksum[0], 8 ) != 0 ) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + SymCryptLsbFirstToUint32( &blob.chain[0], &pState->chain.H[0], 4 ); + pState->dataLengthL = blob.dataLength; + pState->dataLengthH = 0; + pState->bytesInBuffer = blob.dataLength & 0x3f; + memcpy( &pState->buffer[0], &blob.buffer[0], pState->bytesInBuffer ); + + SYMCRYPT_SET_MAGIC( pState ); + +cleanup: + SymCryptWipeKnownSize( &blob, sizeof(blob) ); + return scError; +} + + + +// +// Simple test vector for FIPS module testing +// + +static const BYTE md4KATAnswer[ 16 ] = { + 0xa4, 0x48, 0x01, 0x7a, 0xaf, 0x21, 0xd8, 0x52, + 0x5f, 0xc1, 0x0a, 0xe8, 0x7a, 0xa6, 0x72, 0x9d, +} ; + +VOID +SYMCRYPT_CALL +SymCryptMd4Selftest(void) +{ + BYTE result[SYMCRYPT_MD4_RESULT_SIZE]; + + SymCryptMd4( SymCryptTestMsg3, sizeof( SymCryptTestMsg3 ), result ); + + SymCryptInjectError( result, sizeof( result ) ); + + if( memcmp( result, md4KATAnswer, sizeof( result ) ) != 0 ) { + SymCryptFatal( 'MD4t' ); + } +} diff --git a/libs/symcrypt/lib/md5.c b/libs/symcrypt/lib/md5.c new file mode 100644 index 00000000000..c5de50f2646 --- /dev/null +++ b/libs/symcrypt/lib/md5.c @@ -0,0 +1,503 @@ +// +// Md5.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +// +// This module contains the routines to implement MD5 from RFC 1321 +// +// +// This is a new implementation, NOT based on the existing one in RSA32.lib, +// which is the one from RSA data security. RFC-1321 also contains code that +// at a glance looks very similar to the RSA32.lib code. +// +// The implementation had to be refreshed anyway to conform to our coding +// guidelines for cryptographic functions. +// Re-implementing the function along the lines of our SHA-family implementations +// was easy, and it removes one file with RSA copyright from our system. +// +// The only data copied for this implementation is the round constant values +// which were copied from the RFC. +// + +#include "precomp.h" + +// +// See the symcrypt.h file for documentation on what the various functions do. +// + +const SYMCRYPT_HASH SymCryptMd5Algorithm_default = { + &SymCryptMd5Init, + &SymCryptMd5Append, + &SymCryptMd5Result, + &SymCryptMd5AppendBlocks, + &SymCryptMd5StateCopy, + sizeof( SYMCRYPT_MD5_STATE ), + SYMCRYPT_MD5_RESULT_SIZE, + SYMCRYPT_MD5_INPUT_BLOCK_SIZE, + SYMCRYPT_FIELD_OFFSET( SYMCRYPT_MD5_STATE, chain ), + SYMCRYPT_FIELD_SIZE( SYMCRYPT_MD5_STATE, chain ), +}; + +const PCSYMCRYPT_HASH SymCryptMd5Algorithm = &SymCryptMd5Algorithm_default; + +// +// The round constants used by MD5 +// +// These are called T[i] in RFC1321 although T[i] uses the range [1..64] and we use [0..63] +// This array should be optimized away by the compiler as all values are inlined. +// +static const UINT32 md5Const[64] = { + 0xd76aa478UL, + 0xe8c7b756UL, + 0x242070dbUL, + 0xc1bdceeeUL, + 0xf57c0fafUL, + 0x4787c62aUL, + 0xa8304613UL, + 0xfd469501UL, + 0x698098d8UL, + 0x8b44f7afUL, + 0xffff5bb1UL, + 0x895cd7beUL, + 0x6b901122UL, + 0xfd987193UL, + 0xa679438eUL, + 0x49b40821UL, + 0xf61e2562UL, + 0xc040b340UL, + 0x265e5a51UL, + 0xe9b6c7aaUL, + 0xd62f105dUL, + 0x02441453UL, + 0xd8a1e681UL, + 0xe7d3fbc8UL, + 0x21e1cde6UL, + 0xc33707d6UL, + 0xf4d50d87UL, + 0x455a14edUL, + 0xa9e3e905UL, + 0xfcefa3f8UL, + 0x676f02d9UL, + 0x8d2a4c8aUL, + 0xfffa3942UL, + 0x8771f681UL, + 0x6d9d6122UL, + 0xfde5380cUL, + 0xa4beea44UL, + 0x4bdecfa9UL, + 0xf6bb4b60UL, + 0xbebfbc70UL, + 0x289b7ec6UL, + 0xeaa127faUL, + 0xd4ef3085UL, + 0x04881d05UL, + 0xd9d4d039UL, + 0xe6db99e5UL, + 0x1fa27cf8UL, + 0xc4ac5665UL, + 0xf4292244UL, + 0x432aff97UL, + 0xab9423a7UL, + 0xfc93a039UL, + 0x655b59c3UL, + 0x8f0ccc92UL, + 0xffeff47dUL, + 0x85845dd1UL, + 0x6fa87e4fUL, + 0xfe2ce6e0UL, + 0xa3014314UL, + 0x4e0811a1UL, + 0xf7537e82UL, + 0xbd3af235UL, + 0x2ad7d2bbUL, + 0xeb86d391UL, +}; + +// +// Round rotation amounts. This array is optimized away by the compiler +// as we inline all our rotations. +// +static const int md5Rotate[64] = { + 7, 12, 17, 22, + 7, 12, 17, 22, + 7, 12, 17, 22, + 7, 12, 17, 22, + + 5, 9, 14, 20, + 5, 9, 14, 20, + 5, 9, 14, 20, + 5, 9, 14, 20, + + 4, 11, 16, 23, + 4, 11, 16, 23, + 4, 11, 16, 23, + 4, 11, 16, 23, + + 6, 10, 15, 21, + 6, 10, 15, 21, + 6, 10, 15, 21, + 6, 10, 15, 21, +}; + +// +// Message word index table. This array is optimized away by the compiler +// as we inline all our accesses. +// +static const int md5MsgIndex[64] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 1, 6, 11, 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, + 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, + 0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9, +}; + +// +// Initial state +// +static const UINT32 md5InitialState[4] = { + 0x67452301UL, + 0xefcdab89UL, + 0x98badcfeUL, + 0x10325476UL, +}; + +// +// SymCryptMd5 +// +#define ALG MD5 +#define Alg Md5 +#include "hash_pattern.c" +#undef ALG +#undef Alg + + + + +// +// SymCryptMd5Init +// +VOID +SYMCRYPT_CALL +SymCryptMd5Init( _Out_ PSYMCRYPT_MD5_STATE pState ) +{ + SYMCRYPT_SET_MAGIC( pState ); + + pState->dataLengthL = 0; + pState->dataLengthH = 0; + pState->bytesInBuffer = 0; + + memcpy( &pState->chain.H[0], &md5InitialState[0], sizeof( md5InitialState ) ); + + // + // There is no need to initialize the buffer part of the state as that will be + // filled before it is used. + // +} + + +// +// SymCryptMd5Append +// +VOID +SYMCRYPT_CALL +SymCryptMd5Append( + _Inout_ PSYMCRYPT_MD5_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + SymCryptHashAppendInternal( SymCryptMd5Algorithm, (PSYMCRYPT_COMMON_HASH_STATE)pState, pbData, cbData ); +} + +// +// SymCryptMd5Result +// +VOID +SYMCRYPT_CALL +SymCryptMd5Result( + _Inout_ PSYMCRYPT_MD5_STATE pState, + _Out_writes_( SYMCRYPT_MD5_RESULT_SIZE ) PBYTE pbResult ) +{ + SymCryptHashCommonPaddingMd4Style( SymCryptMd5Algorithm, (PSYMCRYPT_COMMON_HASH_STATE) pState ); + + // + // Write the output in the correct byte order + // + SymCryptUint32ToLsbFirst( &pState->chain.H[0], pbResult, 4 ); + + // + // Wipe & re-initialize + // We have to wipe the whole state because the Init call + // might be optimized away by a smart compiler. + // And we need to wipe old data. + // + SymCryptWipeKnownSize( pState, sizeof( *pState ) ); + SymCryptMd5Init( pState ); +} + + +// +// For documentation on these function see rfc-1321 +// +//#define F( x, y, z ) (((x) & (y)) ^ ((~(x)) & (z))) +#define F( x, y, z ) ((((z) ^ (y)) & (x)) ^ (z)) +#define G( x, y, z ) F( (z), (x), (y) ) +#define H( x, y, z ) ((x) ^ (y) ^ (z) ) +#define I( x, y, z ) ((y) ^ ((x) | ~(z))) + +// +// The values a-d are stored in an array called ad. +// We have unrolled the code completely. This makes both the indices into +// the ad array constant, and it makes the message addressing constant. +// +// We copy the message into our own buffer to obey the read-once rule. +// Memory is sometimes aliased so that multiple threads or processes can access +// the same memory at the same time. With MD5 there is a danger that some other +// process could modify the memory while the computation is ongoing and introduce +// changes in the computation not envisioned by the designers or cryptanalysts. +// At this level in the library we cannot guarantee that this is not the case, +// and we can't trust the higher layers to respect a don't-change-it-while-computing-md5 +// restriction. (In practice, such restrictions are lost through the many +// layers in the stack.) +// +// +// Initial round macro +// +// r is the round number +// ad[(r+0)%4] = a; +// ad[(r+1)%4] = d; +// ad[(r+2)%4] = c; +// ad[(r+3)%4] = b; +// +// When r increments the register re-naming is automatically correct. +// +#define CROUND( r, Func ) { \ + ad[r%4] = ad[(r+3)%4] + ROL32( ad[r%4] + Func(ad[(r+3)%4], ad[(r+2)%4], ad[(r+1)%4]) + Wt + md5Const[r], md5Rotate[r] ); \ +} + +#define IROUND( r, Func ) { \ + Wt = SYMCRYPT_LOAD_LSBFIRST32( &pbData[ 4*md5MsgIndex[r] ] ); \ + W[r] = Wt; \ + CROUND( r, Func ); \ +} + +// +// Subsequent rounds. +// This is the same as the IROUND except that it uses the copied message. +// +#define FROUND( r, Func ) { \ + Wt = W[md5MsgIndex[r]];\ + CROUND( r, Func ); \ +} + +VOID +SYMCRYPT_CALL +SymCryptMd5AppendBlocks( + _Inout_ SYMCRYPT_MD5_CHAINING_STATE * pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ) +{ + + UINT32 W[16]; + UINT32 ad[4]; + UINT32 Wt; + + ad[0] = pChain->H[0]; + ad[1] = pChain->H[3]; + ad[2] = pChain->H[2]; + ad[3] = pChain->H[1]; + + while( cbData >= 64 ) + { + // + // initial rounds 1 to 16 + // + + IROUND( 0, F ); + IROUND( 1, F ); + IROUND( 2, F ); + IROUND( 3, F ); + IROUND( 4, F ); + IROUND( 5, F ); + IROUND( 6, F ); + IROUND( 7, F ); + IROUND( 8, F ); + IROUND( 9, F ); + IROUND( 10, F ); + IROUND( 11, F ); + IROUND( 12, F ); + IROUND( 13, F ); + IROUND( 14, F ); + IROUND( 15, F ); + + FROUND( 16, G ); + FROUND( 17, G ); + FROUND( 18, G ); + FROUND( 19, G ); + FROUND( 20, G ); + FROUND( 21, G ); + FROUND( 22, G ); + FROUND( 23, G ); + FROUND( 24, G ); + FROUND( 25, G ); + FROUND( 26, G ); + FROUND( 27, G ); + FROUND( 28, G ); + FROUND( 29, G ); + FROUND( 30, G ); + FROUND( 31, G ); + + FROUND( 32, H ); + FROUND( 33, H ); + FROUND( 34, H ); + FROUND( 35, H ); + FROUND( 36, H ); + FROUND( 37, H ); + FROUND( 38, H ); + FROUND( 39, H ); + FROUND( 40, H ); + FROUND( 41, H ); + FROUND( 42, H ); + FROUND( 43, H ); + FROUND( 44, H ); + FROUND( 45, H ); + FROUND( 46, H ); + FROUND( 47, H ); + + FROUND( 48, I ); + FROUND( 49, I ); + FROUND( 50, I ); + FROUND( 51, I ); + FROUND( 52, I ); + FROUND( 53, I ); + FROUND( 54, I ); + FROUND( 55, I ); + FROUND( 56, I ); + FROUND( 57, I ); + FROUND( 58, I ); + FROUND( 59, I ); + FROUND( 60, I ); + FROUND( 61, I ); + FROUND( 62, I ); + FROUND( 63, I ); + + pChain->H[0] = ad[0] = ad[0] + pChain->H[0]; + pChain->H[3] = ad[1] = ad[1] + pChain->H[3]; + pChain->H[2] = ad[2] = ad[2] + pChain->H[2]; + pChain->H[1] = ad[3] = ad[3] + pChain->H[1]; + + pbData += 64; + cbData -= 64; + } + + *pcbRemaining = cbData; + + // + // Wipe the variables; + // + SymCryptWipeKnownSize( ad, sizeof( ad ) ); + SymCryptWipeKnownSize( W, sizeof( W ) ); + SymCryptWipeKnownSize( &Wt, sizeof( Wt ) ); +} + +VOID +SYMCRYPT_CALL +SymCryptMd5StateExport( + _In_ PCSYMCRYPT_MD5_STATE pState, + _Out_writes_bytes_( SYMCRYPT_MD5_STATE_EXPORT_SIZE ) PBYTE pbBlob ) +{ + SYMCRYPT_MD5_STATE_EXPORT_BLOB blob; // local copy to have proper alignment. + C_ASSERT( sizeof( blob ) == SYMCRYPT_MD5_STATE_EXPORT_SIZE ); + + SYMCRYPT_CHECK_MAGIC( pState ); + + SymCryptWipeKnownSize( &blob, sizeof( blob ) ); // wipe to avoid any data leakage + + blob.header.magic = SYMCRYPT_BLOB_MAGIC; + blob.header.size = SYMCRYPT_MD5_STATE_EXPORT_SIZE; + blob.header.type = SymCryptBlobTypeMd5State; + + // + // Copy the relevant data. Buffer will be 0-padded. + // + + SymCryptUint32ToLsbFirst( &pState->chain.H[0], &blob.chain[0], 4 ); + blob.dataLength = pState->dataLengthL; + memcpy( &blob.buffer[0], &pState->buffer[0], blob.dataLength & 0x3f ); + + SYMCRYPT_ASSERT( (PCBYTE) &blob + sizeof( blob ) - sizeof( SYMCRYPT_BLOB_TRAILER ) == (PCBYTE) &blob.trailer ); + SymCryptMarvin32( SymCryptMarvin32DefaultSeed, (PCBYTE) &blob, sizeof( blob ) - sizeof( SYMCRYPT_BLOB_TRAILER ), &blob.trailer.checksum[0] ); + + memcpy( pbBlob, &blob, sizeof( blob ) ); + +//cleanup: + SymCryptWipeKnownSize( &blob, sizeof( blob ) ); + return; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMd5StateImport( + _Out_ PSYMCRYPT_MD5_STATE pState, + _In_reads_bytes_( SYMCRYPT_MD5_STATE_EXPORT_SIZE) PCBYTE pbBlob ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SYMCRYPT_MD5_STATE_EXPORT_BLOB blob; // local copy to have proper alignment. + BYTE checksum[8]; + + C_ASSERT( sizeof( blob ) == SYMCRYPT_MD5_STATE_EXPORT_SIZE ); + memcpy( &blob, pbBlob, sizeof( blob ) ); + + if( blob.header.magic != SYMCRYPT_BLOB_MAGIC || + blob.header.size != SYMCRYPT_MD5_STATE_EXPORT_SIZE || + blob.header.type != SymCryptBlobTypeMd5State ) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + SymCryptMarvin32( SymCryptMarvin32DefaultSeed, (PCBYTE) &blob, sizeof( blob ) - sizeof( SYMCRYPT_BLOB_TRAILER ), checksum ); + if( memcmp( checksum, &blob.trailer.checksum[0], 8 ) != 0 ) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + SymCryptLsbFirstToUint32( &blob.chain[0], &pState->chain.H[0], 4 ); + pState->dataLengthL = blob.dataLength; + pState->dataLengthH = 0; + pState->bytesInBuffer = blob.dataLength & 0x3f; + memcpy( &pState->buffer[0], &blob.buffer[0], pState->bytesInBuffer ); + + SYMCRYPT_SET_MAGIC( pState ); + +cleanup: + SymCryptWipeKnownSize( &blob, sizeof(blob) ); + return scError; +} + + +// +// Simple test vector for FIPS module testing +// + +static const BYTE md5KATAnswer[ 16 ] = { + 0x90, 0x01, 0x50, 0x98, 0x3c, 0xd2, 0x4f, 0xb0, + 0xd6, 0x96, 0x3f, 0x7d, 0x28, 0xe1, 0x7f, 0x72, +} ; + +VOID +SYMCRYPT_CALL +SymCryptMd5Selftest(void) +{ + BYTE result[SYMCRYPT_MD5_RESULT_SIZE]; + + SymCryptMd5( SymCryptTestMsg3, sizeof( SymCryptTestMsg3 ), result ); + + SymCryptInjectError( result, sizeof( result ) ); + + if( memcmp( result, md5KATAnswer, sizeof( result ) ) != 0 ) { + SymCryptFatal( 'MD5t' ); + } +} diff --git a/libs/symcrypt/lib/mldsa.c b/libs/symcrypt/lib/mldsa.c new file mode 100644 index 00000000000..2dca3270368 --- /dev/null +++ b/libs/symcrypt/lib/mldsa.c @@ -0,0 +1,1096 @@ +// +// mldsa.c ML-DSA related functionality +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +_Use_decl_annotations_ +PSYMCRYPT_MLDSAKEY +SYMCRYPT_CALL +SymCryptMlDsakeyAllocate( + SYMCRYPT_MLDSA_PARAMS params ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PSYMCRYPT_MLDSAKEY pkMlDsakey = NULL; + PSYMCRYPT_MLDSA_INTERNAL_PARAMS pInternalParams = NULL; + PBYTE pbKey = NULL; + UINT32 cbKey = 0; + + scError = SymCryptMlDsaGetInternalParamsFromParams( params, &pInternalParams ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + cbKey = SYMCRYPT_INTERNAL_MLDSA_SIZEOF_KEY( pInternalParams->nRows, pInternalParams->nCols ); + pbKey = SymCryptCallbackAlloc( cbKey ); + if( pbKey == NULL ) + { + goto cleanup; + } + + pkMlDsakey = SymCryptMlDsakeyInitialize( pInternalParams, pbKey, cbKey ); + if( pkMlDsakey == NULL ) + { + goto cleanup; + } + + // On success, memory is owned by pkMlDsakey + pbKey = NULL; + +cleanup: + if( pbKey != NULL ) + { + SymCryptCallbackFree( pbKey ); + } + + return pkMlDsakey; +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsakeyFree( + PSYMCRYPT_MLDSAKEY pkMlDsakey ) +{ + SYMCRYPT_CHECK_MAGIC( pkMlDsakey ); + + SymCryptWipe( pkMlDsakey, pkMlDsakey->cbTotalSize ); + SymCryptCallbackFree( pkMlDsakey ); +} + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaKeyGenerateEx( + PSYMCRYPT_MLDSAKEY pkMlDsakey, + PCBYTE pbRootSeed, + SIZE_T cbRootSeed, + UINT32 flags ) +{ + UNREFERENCED_PARAMETER( flags ); + + SYMCRYPT_ASSERT( cbRootSeed == SYMCRYPT_MLDSA_ROOT_SEED_SIZE ); + + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams = pkMlDsakey->pParams; + PSYMCRYPT_MLDSA_INTERNAL_COMPUTATION_TEMPORARIES pTemps = NULL; + + BYTE privateVectorSeed[SYMCRYPT_MLDSA_PRIVATE_VECTOR_SEED_SIZE]; + + pTemps = SymCryptMlDsaTemporariesAllocateAndInitialize( + pParams, + 1, // row vectors + 0, // column vectors + 1, // poly elements + pParams->cbEncodedPublicKey ); // scratch space + if( pTemps == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + memcpy( pkMlDsakey->rootSeed, pbRootSeed, cbRootSeed ); + + { + PSYMCRYPT_SHAKE256_STATE pShakeState = &(pTemps->shake256State); + SymCryptShake256Init( pShakeState ); + SymCryptShake256Append( pShakeState, pkMlDsakey->rootSeed, cbRootSeed ); + SymCryptShake256Append( pShakeState, (PCBYTE) &pParams->nRows, sizeof(BYTE) ); + SymCryptShake256Append( pShakeState, (PCBYTE) &pParams->nCols, sizeof(BYTE) ); + + SymCryptShake256Extract( pShakeState, pkMlDsakey->publicSeed, sizeof(pkMlDsakey->publicSeed), FALSE); + SymCryptShake256Extract( pShakeState, privateVectorSeed, sizeof(privateVectorSeed), FALSE ); + SymCryptShake256Extract( pShakeState, pkMlDsakey->privateSigningSeed, sizeof(pkMlDsakey->privateSigningSeed), FALSE); // Wiped when pTemps is freed + } + + SymCryptMlDsaExpandA( pkMlDsakey->publicSeed, sizeof(pkMlDsakey->publicSeed), pkMlDsakey->pmA ); + + SymCryptMlDsaExpandS( + pkMlDsakey->pParams, + privateVectorSeed, + sizeof(privateVectorSeed), + pkMlDsakey->pvs1, + pkMlDsakey->pvs2 ); + + // Convert s1 and s2 to NTT form + SymCryptMlDsaVectorNTT( pkMlDsakey->pvs1 ); + SymCryptMlDsaVectorNTT( pkMlDsakey->pvs2 ); + + SymCryptMlDsakeyComputeT( + pkMlDsakey->pmA, + pkMlDsakey->pvs1, + pkMlDsakey->pvs2, + pkMlDsakey->pvt0, + pkMlDsakey->pvt1, + pTemps->pvRowVectors[0], + pTemps->pePolyElements[0] ); + + // Convert t0 and t1 to NTT form + SymCryptMlDsaVectorNTT( pkMlDsakey->pvt0 ); + SymCryptMlDsaVectorNTT( pkMlDsakey->pvt1 ); + + scError = SymCryptMlDsaPkEncode( pkMlDsakey, pTemps->pbScratch, pParams->cbEncodedPublicKey ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + SymCryptShake256( + pTemps->pbScratch, + pParams->cbEncodedPublicKey, + pkMlDsakey->publicKeyHash, + sizeof(pkMlDsakey->publicKeyHash) ); + + pkMlDsakey->hasRootSeed = TRUE; + pkMlDsakey->hasPrivateKey = TRUE; + +cleanup: + if( pTemps != NULL ) + { + SymCryptMlDsaTemporariesFree( pTemps ); + } + + SymCryptWipeKnownSize( privateVectorSeed, sizeof(privateVectorSeed) ); + + return scError; +} + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsakeyGenerate( + PSYMCRYPT_MLDSAKEY pkMlDsakey, + UINT32 flags) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + BYTE random[SYMCRYPT_MLDSA_ROOT_SEED_SIZE]; + PBYTE pbPctSignature = NULL; + SIZE_T cbPctSignature = 0; + + // Ensure only allowed flags are specified + UINT32 allowedFlags = SYMCRYPT_FLAG_KEY_NO_FIPS; + + if ( ( flags & ~allowedFlags ) != 0 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + scError = SymCryptCallbackRandom( random, sizeof(random) ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + scError = SymCryptMlDsakeySetValue( random, sizeof(random), SYMCRYPT_MLDSAKEY_FORMAT_PRIVATE_SEED, flags, pkMlDsakey ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // SymCryptMlDsakeySetValue ensures the self-test is run before + // first operational use of MlDsa + + if( ( flags & SYMCRYPT_FLAG_KEY_NO_FIPS ) == 0 ) + { + // PCT on key generation, sign/verify the empty message with the generated key + + cbPctSignature = pkMlDsakey->pParams->cbEncodedSignature; + + pbPctSignature = SymCryptCallbackAlloc( cbPctSignature ); + if( pbPctSignature == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + scError = SymCryptMlDsaSign( pkMlDsakey, + NULL, 0, + NULL, 0, + 0, + pbPctSignature, cbPctSignature ); + if( scError != SYMCRYPT_NO_ERROR ) + { + scError = SYMCRYPT_FIPS_FAILURE; + goto cleanup; + } + + scError = SymCryptMlDsaVerify( pkMlDsakey, + NULL, 0, + NULL, 0, + pbPctSignature, cbPctSignature, + 0 ); + if( scError != SYMCRYPT_NO_ERROR ) + { + scError = SYMCRYPT_FIPS_FAILURE; + goto cleanup; + } + + // could track having run the PCT with a flag in pkMlDsakey->fAlgorithmInfo, + // but currently no need to do that given we don't ever defer the PCT + } + +cleanup: + if( pbPctSignature != NULL ) + { + // Wiping is not required for security, but has low relative cost + // and better to be on the safe side for FIPS + SymCryptWipe( pbPctSignature, cbPctSignature ); + SymCryptCallbackFree( pbPctSignature ); + } + + SymCryptWipeKnownSize( random, sizeof(random) ); + + return scError; +} + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsakeySetValue( + PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_MLDSAKEY_FORMAT mlDsakeyFormat, + UINT32 flags, + PSYMCRYPT_MLDSAKEY pkMlDsakey ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + // Ensure only allowed flags are specified + UINT32 allowedFlags = SYMCRYPT_FLAG_KEY_NO_FIPS; + + if ( ( flags & ~allowedFlags ) != 0 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if( ( flags & SYMCRYPT_FLAG_KEY_NO_FIPS ) == 0 ) + { + // Ensure ML-DSA algorithm selftest is run before first use of ML-DSA algorithms; + // notably _before_ first full KeyGen + SYMCRYPT_RUN_SELFTEST_ONCE( + SymCryptMlDsaSelftest, + SYMCRYPT_SELFTEST_ALGORITHM_MLDSA); + } + + switch( mlDsakeyFormat ) + { + case SYMCRYPT_MLDSAKEY_FORMAT_PRIVATE_SEED: + if( cbSrc != SYMCRYPT_MLDSA_ROOT_SEED_SIZE ) + { + scError = SYMCRYPT_WRONG_KEY_SIZE; + goto cleanup; + } + + scError = SymCryptMlDsaKeyGenerateEx( pkMlDsakey, pbSrc, cbSrc, flags ); + break; + case SYMCRYPT_MLDSAKEY_FORMAT_PRIVATE_KEY: + scError = SymCryptMlDsaSkDecode( pbSrc, cbSrc, flags, pkMlDsakey ); + break; + case SYMCRYPT_MLDSAKEY_FORMAT_PUBLIC_KEY: + scError = SymCryptMlDsaPkDecode( pbSrc, cbSrc, flags, pkMlDsakey ); + break; + default: + scError = SYMCRYPT_INVALID_ARGUMENT; + break; + } + +cleanup: + return scError; +} + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsakeyGetValue( + PCSYMCRYPT_MLDSAKEY pkMlDsakey, + PBYTE pbDst, + SIZE_T cbDst, + SYMCRYPT_MLDSAKEY_FORMAT mlDsakeyFormat, + UINT32 flags ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + if( flags != 0 ) // No flags currently supported + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + switch( mlDsakeyFormat ) + { + case SYMCRYPT_MLDSAKEY_FORMAT_PRIVATE_KEY: + scError = SymCryptMlDsaSkEncode( + pkMlDsakey, + pbDst, + cbDst ); + break; + case SYMCRYPT_MLDSAKEY_FORMAT_PUBLIC_KEY: + scError = SymCryptMlDsaPkEncode( + pkMlDsakey, + pbDst, + cbDst ); + break; + case SYMCRYPT_MLDSAKEY_FORMAT_PRIVATE_SEED: + if( cbDst < SYMCRYPT_MLDSA_ROOT_SEED_SIZE ) + { + scError = SYMCRYPT_BUFFER_TOO_SMALL; + goto cleanup; + } + + if( !pkMlDsakey->hasRootSeed ) + { + scError = SYMCRYPT_INCOMPATIBLE_FORMAT; + goto cleanup; + } + + memcpy( pbDst, pkMlDsakey->rootSeed, SYMCRYPT_MLDSA_ROOT_SEED_SIZE ); + + break; + default: + scError = SYMCRYPT_INVALID_ARGUMENT; + break; + } + +cleanup: + return scError; +} + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaSignEx( + PCSYMCRYPT_MLDSAKEY pkMlDsakey, + PCBYTE pbInput, + SIZE_T cbInput, + PCBYTE pbContext, + SIZE_T cbContext, + PCBYTE pbHashOid, + SIZE_T cbHashOid, + PCBYTE pbRandom, + SIZE_T cbRandom, + UINT32 flags, + PBYTE pbSignature, + SIZE_T cbSignature ) +{ + SYMCRYPT_ASSERT( pkMlDsakey->hasPrivateKey == TRUE ); + SYMCRYPT_ASSERT( cbContext <= SYMCRYPT_MLDSA_CONTEXT_MAX_LENGTH ); + SYMCRYPT_ASSERT( cbRandom == SYMCRYPT_MLDSA_SIGNING_RANDOM_SIZE ); + SYMCRYPT_ASSERT( pbHashOid != NULL || cbHashOid == 0 ); + SYMCRYPT_ASSERT( pbContext != NULL || cbContext == 0 ); + SYMCRYPT_ASSERT( cbSignature == pkMlDsakey->pParams->cbEncodedSignature ); + SYMCRYPT_ASSERT( (flags & ~SYMCRYPT_FLAG_MLDSA_EXTERNALMU) == 0 ); + SYMCRYPT_ASSERT( ((flags & SYMCRYPT_FLAG_MLDSA_EXTERNALMU) == 0) || (pbContext == NULL && pbHashOid == NULL) ); + + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams = pkMlDsakey->pParams; + PSYMCRYPT_MLDSA_INTERNAL_COMPUTATION_TEMPORARIES pTemps = NULL; + + const UINT32 beta = (UINT32) pParams->nChallengeNonZeroCoeffs * pParams->privateKeyRange; + + BOOL bExternalMu = (flags & SYMCRYPT_FLAG_MLDSA_EXTERNALMU) != 0; + UINT8 modeId = (pbHashOid == NULL) ? 0 : 1; // 0 for ML-DSA, 1 for HashML-DSA + UINT8 cbContextByte = (UINT8) cbContext; + BYTE messageRepresentative[SYMCRYPT_SHAKE256_RESULT_SIZE]; + BYTE privateRandom[SYMCRYPT_SHAKE256_RESULT_SIZE]; + BYTE commitmentHash[64]; // Largest possible size for commitment hash + + const UINT32 cbw1Encoded = pParams->nRows * pParams->w1EncodeCoefficientBitLength * + ( SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8 ); + + pTemps = SymCryptMlDsaTemporariesAllocateAndInitialize( + pParams, + 2, // row vectors - W, W1, cs2, ct0, r0, hint (not all needed simultaneously) + 3, // column vectors - mask, cs1, response + 1, // poly element - challenge + cbw1Encoded ); // scratch space - w1 encoded + if( pTemps == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + PSYMCRYPT_SHAKE256_STATE pShakeState = &(pTemps->shake256State); + SymCryptShake256Init( pShakeState ); + + if ( bExternalMu ) + { + // Caller passes the externally-computed message representative mu + SYMCRYPT_ASSERT( cbInput == SYMCRYPT_SHAKE256_RESULT_SIZE ); + memcpy( messageRepresentative, pbInput, SYMCRYPT_SHAKE256_RESULT_SIZE ); + } + else + { + // Line 6: calculate message representative mu + // = SHAKE256( public key hash || modeId || cbContextByte || context || OID? || message/hash, 64 ) + // The OID is only included in the HashML-DSA mode + SymCryptShake256Append( pShakeState, pkMlDsakey->publicKeyHash, sizeof(pkMlDsakey->publicKeyHash) ); + SymCryptShake256Append( pShakeState, &modeId, sizeof( modeId ) ); + SymCryptShake256Append( pShakeState, &cbContextByte, sizeof( cbContextByte ) ); + + // These appends are no-ops if the length is zero + SymCryptShake256Append( pShakeState, pbContext, cbContext ); + SymCryptShake256Append( pShakeState, pbHashOid, cbHashOid ); + + SymCryptShake256Append( pShakeState, pbInput, cbInput ); + SymCryptShake256Result( pShakeState, messageRepresentative ); + } + + // Line 7: Calculate private random seed rho prime prime + // = SHAKE256( private signing seed K || pbRandom || message representative mu, 64 ) + SymCryptShake256Append( pShakeState, pkMlDsakey->privateSigningSeed, sizeof(pkMlDsakey->privateSigningSeed) ); + SymCryptShake256Append( pShakeState, pbRandom, cbRandom ); + SymCryptShake256Append( pShakeState, messageRepresentative, sizeof(messageRepresentative) ); + SymCryptShake256Result( pShakeState, privateRandom ); + + PSYMCRYPT_MLDSA_VECTOR pvW = pTemps->pvRowVectors[0]; + PSYMCRYPT_MLDSA_VECTOR pvHint = NULL; + + PSYMCRYPT_MLDSA_VECTOR pvMask = pTemps->pvColVectors[0]; + PSYMCRYPT_MLDSA_VECTOR pvResponse = pTemps->pvColVectors[1]; + PSYMCRYPT_MLDSA_VECTOR pvcs1 = pTemps->pvColVectors[2]; + + PBYTE pbW1Encoded = pTemps->pbScratch; + + UINT16 k = 0; + while( TRUE ) + { + SymCryptMlDsaExpandMask( + pParams, + pShakeState, + privateRandom, + sizeof(privateRandom), + k, + pvMask ); + + // Increment k early so we can continue to the next loop iteration when validity checks fail + // It's okay to leak how many iterations this loop takes because the SHAKE inputs and + // outputs are still unpredictable; this does not leak information about the private key + k += (UINT16) pParams->nCols; + + SymCryptMlDsaMatrixVectorMontMul( pkMlDsakey->pmA, pvMask, pvW, pTemps->pePolyElements[0] ); + + for(UINT8 i = 0; i < pvW->nElems; ++i) + { + SymCryptMlDsaPolyElementMulR( SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT(i, pvW) ); + } + + SymCryptMlDsaVectorINTT( pvW ); + + { + // Scope for pvW1 + PSYMCRYPT_MLDSA_VECTOR pvW1 = pTemps->pvRowVectors[1]; + SymCryptMlDsaVectorHighBits( pParams, pvW, pvW1 ); + SymCryptMlDsaVectorEncode( pvW1, pParams->w1EncodeCoefficientBitLength, 0, pbW1Encoded ); + } + + // Calculate commitment hash + SymCryptShake256Append( pShakeState, messageRepresentative, sizeof(messageRepresentative) ); + SymCryptShake256Append( pShakeState, pbW1Encoded, cbw1Encoded ); + SymCryptShake256Extract( pShakeState, commitmentHash, pParams->cbCommitmentHash, TRUE ); + + // Calculate challenge + // Reusing poly element 0 for challenge (previously temp space for multiplication) + PSYMCRYPT_MLDSA_POLYELEMENT peC = pTemps->pePolyElements[0]; + SymCryptMlDsaSampleInBall( pParams, commitmentHash, pParams->cbCommitmentHash, peC ); + + SymCryptMlDsaPolyElementNTT( peC ); + SymCryptMlDsaPolyElementMulR( peC ); + + { + // Scope for cs2 - reusing row vector 1, previously W1 + PSYMCRYPT_MLDSA_VECTOR pvcs2 = pTemps->pvRowVectors[1]; + SymCryptMlDsaVectorPolyElementMontMul( pkMlDsakey->pvs1, peC, pvcs1 ); + SymCryptMlDsaVectorPolyElementMontMul( pkMlDsakey->pvs2, peC, pvcs2 ); + + SymCryptMlDsaVectorINTT( pvcs1 ); + SymCryptMlDsaVectorINTT( pvcs2 ); + + SymCryptMlDsaVectorINTT( pvMask ); + SymCryptMlDsaVectorAdd( pvMask, pvcs1, pvResponse ); + + // (w - cs2) is an input to both LowBits (for r0) and MakeHint + SymCryptMlDsaVectorSub( pvW, pvcs2, pvW ); + } + + { + // Scope for r0 - reusing row vector 1, previously cs2 + PSYMCRYPT_MLDSA_VECTOR pvr0 = pTemps->pvRowVectors[1]; + SymCryptMlDsaVectorLowBits( pParams, pvW, pvr0 ); + + UINT32 zInfinityNorm = SymCryptMlDsaVectorInfinityNorm( pvResponse ); + UINT32 r0InfinityNorm = SymCryptMlDsaVectorInfinityNorm( pvr0 ); + + if( (zInfinityNorm >= (1 << pParams->maskCoefficientRangeLog2) - beta) || + (r0InfinityNorm >= pParams->commitmentRoundingRange - beta) ) + { + continue; + } + } + + { + // Scope for ct0 - reusing row vector 1, previously r0 + PSYMCRYPT_MLDSA_VECTOR pvct0 = pTemps->pvRowVectors[1]; + + SymCryptMlDsaVectorPolyElementMontMul( pkMlDsakey->pvt0, peC, pvct0 ); + SymCryptMlDsaVectorINTT( pvct0 ); + + UINT32 ct0InfinityNorm = SymCryptMlDsaVectorInfinityNorm( pvct0 ); + if( ct0InfinityNorm >= pParams->commitmentRoundingRange ) + { + continue; + } + + // MakeHint vectors + // w - cs2 = pvW + // w - cs2 + ct0 = pvct0 + pvW + SymCryptMlDsaVectorAdd( pvct0, pvW, pvct0 ); + + // Write hint in-place over ct0 + UINT32 nHintBitsSet = 0; + SymCryptMlDsaMakeHint( pParams, pvW, pvct0, pvct0, &nHintBitsSet ); + + if( nHintBitsSet > pParams->nHintNonZeroCoeffs ) + { + continue; + } + } + + // Row vector 1, previously ct0, now contains the hint + pvHint = pTemps->pvRowVectors[1]; + + break; + } + + SYMCRYPT_ASSERT( pvHint != NULL ); + + SymCryptMlDsaSigEncode( + pParams, + commitmentHash, + pParams->cbCommitmentHash, + pvResponse, + pvHint, + pbSignature, + cbSignature ); + +cleanup: + if( pTemps != NULL ) + { + SymCryptMlDsaTemporariesFree( pTemps ); + } + + return scError; +} + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaSign( + PCSYMCRYPT_MLDSAKEY pkMlDsakey, + PCBYTE pbMessage, + SIZE_T cbMessage, + PCBYTE pbContext, + SIZE_T cbContext, + UINT32 flags, + PBYTE pbSignature, + SIZE_T cbSignature ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + if( (flags != 0) || // No flags currently supported + (cbContext > SYMCRYPT_MLDSA_CONTEXT_MAX_LENGTH) || + (pkMlDsakey->hasPrivateKey == FALSE) || + (cbSignature != pkMlDsakey->pParams->cbEncodedSignature) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + BYTE random[SYMCRYPT_MLDSA_SIGNING_RANDOM_SIZE]; + scError = SymCryptCallbackRandom( random, sizeof(random) ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + scError = SymCryptMlDsaSignEx( + pkMlDsakey, + pbMessage, + cbMessage, + pbContext, + cbContext, + NULL, // pbHashOid + 0, // cbHashOid + random, + sizeof(random), + flags, + pbSignature, + cbSignature ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + +cleanup: + SymCryptWipeKnownSize( random, sizeof(random) ); + + return scError; +} + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptExternalMuMlDsaSign( + PCSYMCRYPT_MLDSAKEY pkMlDsakey, + PCBYTE pbMu, + SIZE_T cbMu, + UINT32 flags, + PBYTE pbSignature, + SIZE_T cbSignature ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + if( (flags != 0) || // No flags currently supported + (pkMlDsakey->hasPrivateKey == FALSE) || + (cbMu != SYMCRYPT_SHAKE256_RESULT_SIZE) || + (cbSignature != pkMlDsakey->pParams->cbEncodedSignature) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + BYTE random[SYMCRYPT_MLDSA_SIGNING_RANDOM_SIZE]; + scError = SymCryptCallbackRandom( random, sizeof(random) ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + scError = SymCryptMlDsaSignEx( + pkMlDsakey, + pbMu, + cbMu, + NULL, // pbContext + 0, // cbContext + NULL, // pbHashOid + 0, // cbHashOid + random, + sizeof(random), + SYMCRYPT_FLAG_MLDSA_EXTERNALMU, + pbSignature, + cbSignature ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + +cleanup: + SymCryptWipeKnownSize( random, sizeof(random) ); + + return scError; +} + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHashMlDsaSign( + PCSYMCRYPT_MLDSAKEY pkMlDsakey, + SYMCRYPT_PQDSA_HASH_ID hashAlg, + PCBYTE pbHash, + SIZE_T cbHash, + PCBYTE pbContext, + SIZE_T cbContext, + UINT32 flags, + PBYTE pbSignature, + SIZE_T cbSignature ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PCSYMCRYPT_OID pHashOid = NULL; + + if( (flags != 0) || // No flags currently supported + (cbContext > SYMCRYPT_MLDSA_CONTEXT_MAX_LENGTH) || + (pkMlDsakey->hasPrivateKey == FALSE) || + (cbSignature != pkMlDsakey->pParams->cbEncodedSignature) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + BYTE random[SYMCRYPT_MLDSA_SIGNING_RANDOM_SIZE]; + scError = SymCryptCallbackRandom( random, sizeof(random) ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + scError = SymCryptHashMlDsaValidateHashAlgAndGetOid( + pkMlDsakey->pParams, + hashAlg, + cbHash, + &pHashOid ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + scError = SymCryptMlDsaSignEx( + pkMlDsakey, + pbHash, + cbHash, + pbContext, + cbContext, + pHashOid->pbOID, + pHashOid->cbOID, + random, + sizeof(random), + flags, + pbSignature, + cbSignature ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + +cleanup: + SymCryptWipeKnownSize( random, sizeof(random) ); + + return scError; +} + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaVerifyEx( + PCSYMCRYPT_MLDSAKEY pkMlDsakey, + PCBYTE pbInput, + SIZE_T cbInput, + PCBYTE pbContext, + SIZE_T cbContext, + PCBYTE pbHashOid, + SIZE_T cbHashOid, + PCBYTE pbSignature, + SIZE_T cbSignature, + UINT32 flags ) +{ + UNREFERENCED_PARAMETER( flags ); + + SYMCRYPT_ASSERT( cbContext <= SYMCRYPT_MLDSA_CONTEXT_MAX_LENGTH ); + SYMCRYPT_ASSERT( pbHashOid != NULL || cbHashOid == 0 ); + SYMCRYPT_ASSERT( pbContext != NULL || cbContext == 0 ); + SYMCRYPT_ASSERT( cbSignature == pkMlDsakey->pParams->cbEncodedSignature ); + SYMCRYPT_ASSERT( (flags & ~SYMCRYPT_FLAG_MLDSA_EXTERNALMU) == 0 ); + SYMCRYPT_ASSERT( ((flags & SYMCRYPT_FLAG_MLDSA_EXTERNALMU) == 0) || (pbContext == NULL && pbHashOid == NULL) ); + + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams = pkMlDsakey->pParams; + PSYMCRYPT_MLDSA_INTERNAL_COMPUTATION_TEMPORARIES pTemps = NULL; + + const UINT32 beta = (UINT32) pParams->nChallengeNonZeroCoeffs * pParams->privateKeyRange; + + BOOL bExternalMu = (flags & SYMCRYPT_FLAG_MLDSA_EXTERNALMU) != 0; + UINT8 modeId = (pbHashOid == NULL) ? 0 : 1; // 0 for ML-DSA, 1 for HashML-DSA + UINT8 cbContextByte = (UINT8) cbContext; + BYTE messageRepresentative[SYMCRYPT_SHAKE256_RESULT_SIZE]; + BYTE commitmentHash[64]; // Largest possible size for commitment hash + BYTE recalculatedCommitmentHash[64]; + UINT32 responseInfinityNorm; + + const UINT32 cbw1Encoded = pParams->nRows * pParams->w1EncodeCoefficientBitLength * + ( SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8 ); + + pTemps = SymCryptMlDsaTemporariesAllocateAndInitialize( + pParams, + 4, // row vectors - hint, A*NTT(z), T1*(2^d), commitment + 1, // column vectors - response + 2, // poly elements - challenge, temp space for multiplication + cbw1Encoded ); // scratch space - w1 encoded + if( pTemps == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + // Row vectors + PSYMCRYPT_MLDSA_VECTOR pvHint = pTemps->pvRowVectors[0]; + PSYMCRYPT_MLDSA_VECTOR pvATimesNTTz = pTemps->pvRowVectors[1]; + PSYMCRYPT_MLDSA_VECTOR pvT1Times2D = pTemps->pvRowVectors[2]; + PSYMCRYPT_MLDSA_VECTOR pvCommitment = pTemps->pvRowVectors[3]; + + // Column vectors + PSYMCRYPT_MLDSA_VECTOR pvResponse = pTemps->pvColVectors[0]; + + // Poly elements + PSYMCRYPT_MLDSA_POLYELEMENT peC = pTemps->pePolyElements[0]; + PSYMCRYPT_MLDSA_POLYELEMENT peTmp = pTemps->pePolyElements[1]; + + PBYTE pbw1Encoded = pTemps->pbScratch; + + scError = SymCryptMlDsaSigDecode( + pParams, + pbSignature, + cbSignature, + commitmentHash, + pParams->cbCommitmentHash, + pvResponse, + pvHint ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + responseInfinityNorm = SymCryptMlDsaVectorInfinityNorm( pvResponse ); + + // For the signature to be valid, the response infinity norm must be <= (gamma_1 - beta) + // gamma_1 = (1 << maskCoefficientRangeLog2) + if( responseInfinityNorm >= (1 << pParams->maskCoefficientRangeLog2) - beta ) + { + scError = SYMCRYPT_SIGNATURE_VERIFICATION_FAILURE; + goto cleanup; + } + + SymCryptMlDsaSampleInBall( + pParams, + commitmentHash, + pParams->cbCommitmentHash, + peC ); + + SymCryptMlDsaVectorNTT( pvResponse ); + + for(UINT8 i = 0; i < pvResponse->nElems; ++i) + { + SymCryptMlDsaPolyElementMulR( SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT(i, pvResponse) ); + } + + SymCryptMlDsaMatrixVectorMontMul( + pkMlDsakey->pmA, + pvResponse, + pvATimesNTTz, + peTmp ); + + // TODO osgvsowi/55435592 - Consider precomputing t1 * 2^d + const UINT32 pow2DTimesR = 4214781; + for(UINT8 i = 0; i < pkMlDsakey->pvt1->nElems; ++i) + { + PSYMCRYPT_MLDSA_POLYELEMENT peSrc = SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pkMlDsakey->pvt1 ); + PSYMCRYPT_MLDSA_POLYELEMENT peDst = SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvT1Times2D ); + for(UINT32 j = 0; j < SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; ++j) + { + peDst->coeffs[j] = SymCryptMlDsaMontMul( peSrc->coeffs[j], pow2DTimesR ); + } + } + + SymCryptMlDsaPolyElementNTT( peC ); + SymCryptMlDsaPolyElementMulR( peC ); + + SymCryptMlDsaVectorPolyElementMontMul( + pvT1Times2D, + peC, + pvT1Times2D ); + + SymCryptMlDsaVectorSub( + pvATimesNTTz, + pvT1Times2D, + pvCommitment ); + + SymCryptMlDsaVectorINTT( pvCommitment ); + + SymCryptMlDsaUseHint( + pParams, + pvHint, + pvCommitment ); + + SymCryptMlDsaVectorEncode( + pvCommitment, + pParams->w1EncodeCoefficientBitLength, + 0, + pbw1Encoded ); + + PSYMCRYPT_SHAKE256_STATE pShakeState = &(pTemps->shake256State); + SymCryptShake256Init( pShakeState ); + + if ( bExternalMu ) + { + // Caller passes the externally-computed message representative mu + SYMCRYPT_ASSERT( cbInput == SYMCRYPT_SHAKE256_RESULT_SIZE ); + memcpy( messageRepresentative, pbInput, SYMCRYPT_SHAKE256_RESULT_SIZE ); + } + else + { + // Line 7: calculate message representative mu + // = SHAKE256( public key hash || modeId || cbContextByte || context || OID? || message/hash, 64 ) + // The OID is only included in the HashML-DSA mode + SymCryptShake256Append( pShakeState, pkMlDsakey->publicKeyHash, sizeof(pkMlDsakey->publicKeyHash) ); + SymCryptShake256Append( pShakeState, &modeId, sizeof( modeId ) ); + SymCryptShake256Append( pShakeState, &cbContextByte, sizeof( cbContextByte ) ); + + SymCryptShake256Append( pShakeState, pbContext, cbContext ); + SymCryptShake256Append( pShakeState, pbHashOid, cbHashOid ); + + SymCryptShake256Append( pShakeState, pbInput, cbInput ); + SymCryptShake256Result( pShakeState, messageRepresentative ); + } + + SymCryptShake256Append( pShakeState, messageRepresentative, sizeof(messageRepresentative) ); + SymCryptShake256Append( pShakeState, pbw1Encoded, cbw1Encoded ); + SymCryptShake256Extract( pShakeState, recalculatedCommitmentHash, pParams->cbCommitmentHash, TRUE ); + + if( !SymCryptEqual( recalculatedCommitmentHash, commitmentHash, pParams->cbCommitmentHash ) ) + { + scError = SYMCRYPT_SIGNATURE_VERIFICATION_FAILURE; + goto cleanup; + } + +cleanup: + if( pTemps != NULL ) + { + SymCryptMlDsaTemporariesFree( pTemps ); + } + + return scError; +} + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaVerify( + PCSYMCRYPT_MLDSAKEY pkMlDsakey, + PCBYTE pbMessage, + SIZE_T cbMessage, + PCBYTE pbContext, + SIZE_T cbContext, + PCBYTE pbSignature, + SIZE_T cbSignature, + UINT32 flags ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + if( (flags != 0) || // No flags currently supported + (cbContext > SYMCRYPT_MLDSA_CONTEXT_MAX_LENGTH) || + (cbSignature != pkMlDsakey->pParams->cbEncodedSignature) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + scError = SymCryptMlDsaVerifyEx( + pkMlDsakey, + pbMessage, + cbMessage, + pbContext, + cbContext, + NULL, // pbHashOid + 0, // cbHashOid + pbSignature, + cbSignature, + flags ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + +cleanup: + return scError; +} + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptExternalMuMlDsaVerify( + PCSYMCRYPT_MLDSAKEY pkMlDsakey, + PCBYTE pbMu, + SIZE_T cbMu, + PCBYTE pbSignature, + SIZE_T cbSignature, + UINT32 flags ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + if( (flags != 0) || // No flags currently supported + (cbMu != SYMCRYPT_SHAKE256_RESULT_SIZE) || + (cbSignature != pkMlDsakey->pParams->cbEncodedSignature) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + scError = SymCryptMlDsaVerifyEx( + pkMlDsakey, + pbMu, + cbMu, + NULL, // pbContext + 0, // cbContext + NULL, // pbHashOid + 0, // cbHashOid + pbSignature, + cbSignature, + SYMCRYPT_FLAG_MLDSA_EXTERNALMU ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + +cleanup: + return scError; +} + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHashMlDsaVerify( + PCSYMCRYPT_MLDSAKEY pkMlDsakey, + SYMCRYPT_PQDSA_HASH_ID hashAlg, + PCBYTE pbHash, + SIZE_T cbHash, + PCBYTE pbContext, + SIZE_T cbContext, + PCBYTE pbSignature, + SIZE_T cbSignature, + UINT32 flags ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PCSYMCRYPT_OID pHashOid = NULL; + + if( (flags != 0) || // No flags currently supported + (cbContext > SYMCRYPT_MLDSA_CONTEXT_MAX_LENGTH) || + (cbSignature != pkMlDsakey->pParams->cbEncodedSignature) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + scError = SymCryptHashMlDsaValidateHashAlgAndGetOid( + pkMlDsakey->pParams, + hashAlg, + cbHash, + &pHashOid ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + scError = SymCryptMlDsaVerifyEx( + pkMlDsakey, + pbHash, + cbHash, + pbContext, + cbContext, + pHashOid->pbOID, + pHashOid->cbOID, + pbSignature, + cbSignature, + flags ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + +cleanup: + return scError; +} diff --git a/libs/symcrypt/lib/mldsa_primitives.c b/libs/symcrypt/lib/mldsa_primitives.c new file mode 100644 index 00000000000..a6fe610e147 --- /dev/null +++ b/libs/symcrypt/lib/mldsa_primitives.c @@ -0,0 +1,2410 @@ +// +// mldsa_primitives.c ML-DSA low-level primitive implementations +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +// +// Q^-1 mod 2^32 - used in Montgomery reduction +// +#define SYMCRYPT_MLDSA_Q_INV (58728449) + +// +// Inverse NTT fixup times R = (256^-1 << 32) mod Q +// +#define SYMCRYPT_MLDSA_INTT_FIXUP_TIMES_R (16382) + +// +// R^2 mod Q - used for multiplying a factor of R into a polynomial in NTT form via +// Montgomery multiplication +// +#define SYMCRYPT_MLDSA_RSQR (2365951) + +// +// Size of the expanded public seed used in SymCryptMlDsaRejNttPoly +// Defined in FIPS 204 to be 272 bits (256 bit public seed rho || 8 bit index s || 8 bit index r) +// +#define SYMCRYPT_MLDSA_REJNTTPOLY_SEED_SIZE (34) + +// +// Size of the expanded private seed used in SymCryptMlDsaRejBoundedPoly +// Defined in FIPS 204 to be 528 bits (512 bit private vector seed rho' || 16 bit index) +// +#define SYMCRYPT_MLDSA_REJBOUNDEDPOLY_SEED_SIZE (66) + +// +// Number of low-order bits dropped by Power2Round. Defined as d in FIPS 204 +// +#define SYMCRYPT_POWER2ROUND_LOW_ORDER_BITS (13) + +// +// Zeta tables. +// For ML-DSA, zeta = 1753, which is a 512th root of unity modulo Q +// +// In ML-DSA we use powers of zeta to convert to and from NTT form +// and to perform multiplication between polynomials in NTT form +// + +// This table is a lookup for (Zeta^(BitRev(index)) * R) mod Q +// Used in NTT and Inverse NTT +// i.e. element 1 is Zeta^(BitRev(1)) * (2^32) mod Q == (1753^128)*(2^32) mod 8380417 == 25847 +// +// MLDSA_ZETA_BITREV_TIMES_R = [ (pow(1753, bitRev(i), 8380417) << 32) % 8380417 for i in range(256) ] +// +const UINT32 MLDSA_ZETA_BITREV_TIMES_R[SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS] = { + 4193792, 25847, 5771523, 7861508, 237124, 7602457, 7504169, 466468, + 1826347, 2353451, 8021166, 6288512, 3119733, 5495562, 3111497, 2680103, + 2725464, 1024112, 7300517, 3585928, 7830929, 7260833, 2619752, 6271868, + 6262231, 4520680, 6980856, 5102745, 1757237, 8360995, 4010497, 280005, + 2706023, 95776, 3077325, 3530437, 6718724, 4788269, 5842901, 3915439, + 4519302, 5336701, 3574422, 5512770, 3539968, 8079950, 2348700, 7841118, + 6681150, 6736599, 3505694, 4558682, 3507263, 6239768, 6779997, 3699596, + 811944, 531354, 954230, 3881043, 3900724, 5823537, 2071892, 5582638, + 4450022, 6851714, 4702672, 5339162, 6927966, 3475950, 2176455, 6795196, + 7122806, 1939314, 4296819, 7380215, 5190273, 5223087, 4747489, 126922, + 3412210, 7396998, 2147896, 2715295, 5412772, 4686924, 7969390, 5903370, + 7709315, 7151892, 8357436, 7072248, 7998430, 1349076, 1852771, 6949987, + 5037034, 264944, 508951, 3097992, 44288, 7280319, 904516, 3958618, + 4656075, 8371839, 1653064, 5130689, 2389356, 8169440, 759969, 7063561, + 189548, 4827145, 3159746, 6529015, 5971092, 8202977, 1315589, 1341330, + 1285669, 6795489, 7567685, 6940675, 5361315, 4499357, 4751448, 3839961, + 2091667, 3407706, 2316500, 3817976, 5037939, 2244091, 5933984, 4817955, + 266997, 2434439, 7144689, 3513181, 4860065, 4621053, 7183191, 5187039, + 900702, 1859098, 909542, 819034, 495491, 6767243, 8337157, 7857917, + 7725090, 5257975, 2031748, 3207046, 4823422, 7855319, 7611795, 4784579, + 342297, 286988, 5942594, 4108315, 3437287, 5038140, 1735879, 203044, + 2842341, 2691481, 5790267, 1265009, 4055324, 1247620, 2486353, 1595974, + 4613401, 1250494, 2635921, 4832145, 5386378, 1869119, 1903435, 7329447, + 7047359, 1237275, 5062207, 6950192, 7929317, 1312455, 3306115, 6417775, + 7100756, 1917081, 5834105, 7005614, 1500165, 777191, 2235880, 3406031, + 7838005, 5548557, 6709241, 6533464, 5796124, 4656147, 594136, 4603424, + 6366809, 2432395, 2454455, 8215696, 1957272, 3369112, 185531, 7173032, + 5196991, 162844, 1616392, 3014001, 810149, 1652634, 4686184, 6581310, + 5341501, 3523897, 3866901, 269760, 2213111, 7404533, 1717735, 472078, + 7953734, 1723600, 6577327, 1910376, 6712985, 7276084, 8119771, 4546524, + 5441381, 6144432, 7959518, 6094090, 183443, 7403526, 1612842, 4834730, + 7826001, 3919660, 8332111, 7018208, 3937738, 1400424, 7534263, 1976782 +}; + +const UINT32 MLDSA_NEGATIVE_ZETA_BITREV_TIMES_R[SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS] = { + 4186625, 8354570, 2608894, 518909, 8143293, 777960, 876248, 7913949, + 6554070, 6026966, 359251, 2091905, 5260684, 2884855, 5268920, 5700314, + 5654953, 7356305, 1079900, 4794489, 549488, 1119584, 5760665, 2108549, + 2118186, 3859737, 1399561, 3277672, 6623180, 19422, 4369920, 8100412, + 5674394, 8284641, 5303092, 4849980, 1661693, 3592148, 2537516, 4464978, + 3861115, 3043716, 4805995, 2867647, 4840449, 300467, 6031717, 539299, + 1699267, 1643818, 4874723, 3821735, 4873154, 2140649, 1600420, 4680821, + 7568473, 7849063, 7426187, 4499374, 4479693, 2556880, 6308525, 2797779, + 3930395, 1528703, 3677745, 3041255, 1452451, 4904467, 6203962, 1585221, + 1257611, 6441103, 4083598, 1000202, 3190144, 3157330, 3632928, 8253495, + 4968207, 983419, 6232521, 5665122, 2967645, 3693493, 411027, 2477047, + 671102, 1228525, 22981, 1308169, 381987, 7031341, 6527646, 1430430, + 3343383, 8115473, 7871466, 5282425, 8336129, 1100098, 7475901, 4421799, + 3724342, 8578, 6727353, 3249728, 5991061, 210977, 7620448, 1316856, + 8190869, 3553272, 5220671, 1851402, 2409325, 177440, 7064828, 7039087, + 7094748, 1584928, 812732, 1439742, 3019102, 3881060, 3628969, 4540456, + 6288750, 4972711, 6063917, 4562441, 3342478, 6136326, 2446433, 3562462, + 8113420, 5945978, 1235728, 4867236, 3520352, 3759364, 1197226, 3193378, + 7479715, 6521319, 7470875, 7561383, 7884926, 1613174, 43260, 522500, + 655327, 3122442, 6348669, 5173371, 3556995, 525098, 768622, 3595838, + 8038120, 8093429, 2437823, 4272102, 4943130, 3342277, 6644538, 8177373, + 5538076, 5688936, 2590150, 7115408, 4325093, 7132797, 5894064, 6784443, + 3767016, 7129923, 5744496, 3548272, 2994039, 6511298, 6476982, 1050970, + 1333058, 7143142, 3318210, 1430225, 451100, 7067962, 5074302, 1962642, + 1279661, 6463336, 2546312, 1374803, 6880252, 7603226, 6144537, 4974386, + 542412, 2831860, 1671176, 1846953, 2584293, 3724270, 7786281, 3776993, + 2013608, 5948022, 5925962, 164721, 6423145, 5011305, 8194886, 1207385, + 3183426, 8217573, 6764025, 5366416, 7570268, 6727783, 3694233, 1799107, + 3038916, 4856520, 4513516, 8110657, 6167306, 975884, 6662682, 7908339, + 426683, 6656817, 1803090, 6470041, 1667432, 1104333, 260646, 3833893, + 2939036, 2235985, 420899, 2286327, 8196974, 976891, 6767575, 3545687, + 554416, 4460757, 48306, 1362209, 4442679, 6979993, 846154, 6403635 +}; + +const SYMCRYPT_MLDSA_INTERNAL_PARAMS SymCryptMlDsaInternalParams44 = +{ + .params = SYMCRYPT_MLDSA_PARAMS_MLDSA44, + .cbPolyElement = SYMCRYPT_INTERNAL_MLDSA_SIZEOF_POLYELEMENT, + .cbRowVector = SYMCRYPT_INTERNAL_MLDSA_SIZEOF_VECTOR(4), + .cbColVector = SYMCRYPT_INTERNAL_MLDSA_SIZEOF_VECTOR(4), + .cbMatrix = SYMCRYPT_INTERNAL_MLDSA_SIZEOF_MATRIX(4, 4), + .nRows = 4, + .nCols = 4, + .privateKeyRange = 2, + .encodedCoefficientBitLength = 3, + .nChallengeNonZeroCoeffs = 39, + .nHintNonZeroCoeffs = 80, + .maskCoefficientRangeLog2 = 17, + .commitmentModulus = 44, + .decomposeR1Factor = 11275, + .commitmentRoundingRange = 95232, + .w1EncodeCoefficientBitLength = 6, // [0, 43] + .cbCommitmentHash = 32, + .cbEncodedPrivateKey = 2560, + .cbEncodedPublicKey = 1312, + .cbEncodedSignature = SYMCRYPT_MLDSA_SIGNATURE_SIZE_MLDSA44 +}; + +const SYMCRYPT_MLDSA_INTERNAL_PARAMS SymCryptMlDsaInternalParams65 = +{ + .params = SYMCRYPT_MLDSA_PARAMS_MLDSA65, + .cbPolyElement = SYMCRYPT_INTERNAL_MLDSA_SIZEOF_POLYELEMENT, + .cbRowVector = SYMCRYPT_INTERNAL_MLDSA_SIZEOF_VECTOR(6), + .cbColVector = SYMCRYPT_INTERNAL_MLDSA_SIZEOF_VECTOR(5), + .cbMatrix = SYMCRYPT_INTERNAL_MLDSA_SIZEOF_MATRIX(6, 5), + .nRows = 6, + .nCols = 5, + .privateKeyRange = 4, + .encodedCoefficientBitLength = 4, + .nChallengeNonZeroCoeffs = 49, + .nHintNonZeroCoeffs = 55, + .maskCoefficientRangeLog2 = 19, + .commitmentModulus = 16, + .decomposeR1Factor = 4100, + .commitmentRoundingRange = 261888, + .w1EncodeCoefficientBitLength = 4, // [0, 15] + .cbCommitmentHash = 48, + .cbEncodedPrivateKey = 4032, + .cbEncodedPublicKey = 1952, + .cbEncodedSignature = SYMCRYPT_MLDSA_SIGNATURE_SIZE_MLDSA65 +}; + +const SYMCRYPT_MLDSA_INTERNAL_PARAMS SymCryptMlDsaInternalParams87 = +{ + .params = SYMCRYPT_MLDSA_PARAMS_MLDSA87, + .cbPolyElement = SYMCRYPT_INTERNAL_MLDSA_SIZEOF_POLYELEMENT, + .cbRowVector = SYMCRYPT_INTERNAL_MLDSA_SIZEOF_VECTOR(8), + .cbColVector = SYMCRYPT_INTERNAL_MLDSA_SIZEOF_VECTOR(7), + .cbMatrix = SYMCRYPT_INTERNAL_MLDSA_SIZEOF_MATRIX(8, 7), + .nRows = 8, + .nCols = 7, + .privateKeyRange = 2, + .encodedCoefficientBitLength = 3, + .nChallengeNonZeroCoeffs = 60, + .nHintNonZeroCoeffs = 75, + .maskCoefficientRangeLog2 = 19, + .commitmentModulus = 16, + .decomposeR1Factor = 4100, + .commitmentRoundingRange = 261888, + .w1EncodeCoefficientBitLength = 4, // [0, 15] + .cbCommitmentHash = 64, + .cbEncodedPrivateKey = 4896, + .cbEncodedPublicKey = 2592, + .cbEncodedSignature = SYMCRYPT_MLDSA_SIGNATURE_SIZE_MLDSA87 +}; + +typedef struct _SYMCRYPT_HASH_OID_MAPPING +{ + SYMCRYPT_PQDSA_HASH_ID hashId; + const PCSYMCRYPT_HASH pHashAlgorithm; + PCSYMCRYPT_OID pOid; + BOOLEAN fIsXof; +} SYMCRYPT_HASH_OID_MAPPING, *PSYMCRYPT_HASH_OID_MAPPING; + +// +// Mapping of hash OIDs to SymCrypt hash algorithms. Currently this only contains the "short" hash +// OIDs, and only for those algorithms that are approved for use in ML-DSA. In the future, we might +// want to make this functionality more generic, but that requires more thought about the design. +// Ideally, the SYMCRYPT_HASH structures could contain pointers to their corresponding OIDs, but +// those structures are exposed externally, so extending them would be a breaking change. +// +const SYMCRYPT_HASH_OID_MAPPING g_hashOidMap[] = +{ + { SYMCRYPT_PQDSA_HASH_ID_SHA256, &SymCryptSha256Algorithm_default, &SymCryptSha256OidList[1], FALSE }, + { SYMCRYPT_PQDSA_HASH_ID_SHA384, &SymCryptSha384Algorithm_default, &SymCryptSha384OidList[1], FALSE }, + { SYMCRYPT_PQDSA_HASH_ID_SHA512, &SymCryptSha512Algorithm_default, &SymCryptSha512OidList[1], FALSE }, + { SYMCRYPT_PQDSA_HASH_ID_SHA512_256, &SymCryptSha512_256Algorithm_default, &SymCryptSha512_256OidList[1], FALSE }, + { SYMCRYPT_PQDSA_HASH_ID_SHA3_256, &SymCryptSha3_256Algorithm_default, &SymCryptSha3_256OidList[1], FALSE }, + { SYMCRYPT_PQDSA_HASH_ID_SHA3_384, &SymCryptSha3_384Algorithm_default, &SymCryptSha3_384OidList[1], FALSE }, + { SYMCRYPT_PQDSA_HASH_ID_SHA3_512, &SymCryptSha3_512Algorithm_default, &SymCryptSha3_512OidList[1], FALSE }, + { SYMCRYPT_PQDSA_HASH_ID_SHAKE128, &SymCryptShake128HashAlgorithm_default, &SymCryptShake128OidList[1], TRUE }, + { SYMCRYPT_PQDSA_HASH_ID_SHAKE256, &SymCryptShake256HashAlgorithm_default, &SymCryptShake256OidList[1], TRUE } +}; + +// +// The table above relies on the OID lists having (at least) two entries, where the second one +// is the 11-byte encoding of the OID. If this ever changes, the table needs to be updated. +// +C_ASSERT( SYMCRYPT_SHA256_OID_COUNT == 2 ); +C_ASSERT( SYMCRYPT_SHA384_OID_COUNT == 2 ); +C_ASSERT( SYMCRYPT_SHA512_OID_COUNT == 2 ); +C_ASSERT( SYMCRYPT_SHA512_256_OID_COUNT == 2 ); +C_ASSERT( SYMCRYPT_SHA3_256_OID_COUNT == 2 ); +C_ASSERT( SYMCRYPT_SHA3_384_OID_COUNT == 2 ); +C_ASSERT( SYMCRYPT_SHA3_512_OID_COUNT == 2 ); +C_ASSERT( SYMCRYPT_SHAKE128_OID_COUNT == 2 ); +C_ASSERT( SYMCRYPT_SHAKE256_OID_COUNT == 2 ); + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaGetInternalParamsFromParams( + SYMCRYPT_MLDSA_PARAMS params, + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS* pInternalParams ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + switch( params ) + { + case SYMCRYPT_MLDSA_PARAMS_MLDSA44: + *pInternalParams = &SymCryptMlDsaInternalParams44; + break; + case SYMCRYPT_MLDSA_PARAMS_MLDSA65: + *pInternalParams = &SymCryptMlDsaInternalParams65; + break; + case SYMCRYPT_MLDSA_PARAMS_MLDSA87: + *pInternalParams = &SymCryptMlDsaInternalParams87; + break; + case SYMCRYPT_MLDSA_PARAMS_NULL: + scError = SYMCRYPT_INCOMPATIBLE_FORMAT; + break; + default: + scError = SYMCRYPT_INVALID_ARGUMENT; + break; + } + + return scError; +} + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaSizeofKeyFormatFromParams( + SYMCRYPT_MLDSA_PARAMS params, + SYMCRYPT_MLDSAKEY_FORMAT mlDsakeyFormat, + SIZE_T* pcbKeyFormat ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pInternalParams = NULL; + + scError = SymCryptMlDsaGetInternalParamsFromParams( params, &pInternalParams ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + switch( mlDsakeyFormat ) + { + case SYMCRYPT_MLDSAKEY_FORMAT_PRIVATE_SEED: + *pcbKeyFormat = SYMCRYPT_MLDSA_ROOT_SEED_SIZE; + break; + case SYMCRYPT_MLDSAKEY_FORMAT_PRIVATE_KEY: + *pcbKeyFormat = pInternalParams->cbEncodedPrivateKey; + break; + case SYMCRYPT_MLDSAKEY_FORMAT_PUBLIC_KEY: + *pcbKeyFormat = pInternalParams->cbEncodedPublicKey; + break; + default: + scError = SYMCRYPT_INVALID_ARGUMENT; + break; + } + +cleanup: + return scError; +} + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaSizeofSignatureFromParams( + SYMCRYPT_MLDSA_PARAMS params, + SIZE_T* pcbSignature ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pInternalParams = NULL; + + scError = SymCryptMlDsaGetInternalParamsFromParams( params, &pInternalParams ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + *pcbSignature = pInternalParams->cbEncodedSignature; + +cleanup: + return scError; +} + +_Use_decl_annotations_ +PSYMCRYPT_MLDSAKEY +SYMCRYPT_CALL +SymCryptMlDsakeyInitialize( + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pInternalParams, + PBYTE pbKey, + UINT32 cbKey ) +{ + PSYMCRYPT_MLDSAKEY pkMlDsakey = (PSYMCRYPT_MLDSAKEY) pbKey; + SYMCRYPT_ASSERT( pkMlDsakey != NULL ); + + UINT8 nRows = pInternalParams->nRows; + UINT8 nCols = pInternalParams->nCols; + + SYMCRYPT_ASSERT( cbKey == SYMCRYPT_INTERNAL_MLDSA_SIZEOF_KEY(nRows, nCols) ); + + UINT32 cbMatrix = pInternalParams->cbMatrix; // A matrix + UINT32 cbRowVector = pInternalParams->cbRowVector; // s2, t vectors + UINT32 cbColVector = pInternalParams->cbColVector; // s1 vector + + SymCryptWipe( pbKey, cbKey ); + + pkMlDsakey->pParams = pInternalParams; + pkMlDsakey->cbTotalSize = cbKey; + + PBYTE pbCurrent = pbKey + sizeof(SYMCRYPT_MLDSAKEY); + + // Public components + pkMlDsakey->pmA = SymCryptMlDsaMatrixCreate( pbCurrent, cbMatrix, nRows, nCols ); + pbCurrent += cbMatrix; + + pkMlDsakey->pvt1 = SymCryptMlDsaVectorCreate( pbCurrent, cbRowVector, nRows ); + pbCurrent += cbRowVector; + + // Private components + pkMlDsakey->pvs1 = SymCryptMlDsaVectorCreate( pbCurrent, cbColVector, nCols ); + pbCurrent += cbColVector; + + pkMlDsakey->pvs2 = SymCryptMlDsaVectorCreate( pbCurrent, cbRowVector, nRows ); + pbCurrent += cbRowVector; + + pkMlDsakey->pvt0 = SymCryptMlDsaVectorCreate( pbCurrent, cbRowVector, nRows ); + pbCurrent += cbRowVector; + + SYMCRYPT_ASSERT( pbCurrent == pbKey + cbKey ); + + SYMCRYPT_SET_MAGIC( pkMlDsakey ); + + return pkMlDsakey; +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsakeyComputeT( + PCSYMCRYPT_MLDSA_MATRIX pmA, + PCSYMCRYPT_MLDSA_VECTOR pvs1, + PCSYMCRYPT_MLDSA_VECTOR pvs2, + PSYMCRYPT_MLDSA_VECTOR pvt0, + PSYMCRYPT_MLDSA_VECTOR pvt1, + PSYMCRYPT_MLDSA_VECTOR pvTmp, + PSYMCRYPT_MLDSA_POLYELEMENT peTmp ) +{ + // T = InvNTT(NTT(A)*NTT(s1) + NTT(s2)) + // pvTmp := NTT(A)*NTT(s1) + SymCryptMlDsaMatrixVectorMontMul( + pmA, + pvs1, + pvTmp, + peTmp ); + + // TODO: should probably do multiplication by directly in the matrix multiplication function + for(UINT8 i = 0; i < pvTmp->nElems; ++i) + { + SymCryptMlDsaPolyElementMulR(SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT(i, pvTmp)); + } + + // pvTmp := pvTmp + NTT(s2) + SymCryptMlDsaVectorAdd( pvTmp, pvs2, pvTmp ); + + // T = pvTmp := InvNTT(NTT(A)*NTT(s1) + NTT(s2)) + SymCryptMlDsaVectorINTT( pvTmp ); + + SymCryptMlDsaVectorPower2Round( pvTmp, pvt1, pvt0 ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptMlDsaMontReduce( UINT64 a ) +{ + UINT32 t = ((UINT32) a) * SYMCRYPT_MLDSA_Q_INV; + UINT32 m = (((UINT64) t) * SYMCRYPT_MLDSA_Q) >> SYMCRYPT_MLDSA_R_LOG2; + + UINT64 res = (a >> SYMCRYPT_MLDSA_R_LOG2) - m; + UINT32 additionMask = SYMCRYPT_MASK32_LT( res, 0 ); + + res = res + (SYMCRYPT_MLDSA_Q & additionMask); + SYMCRYPT_ASSERT( res < SYMCRYPT_MLDSA_Q ); + + return (UINT32) res; +} + +UINT32 +SYMCRYPT_CALL +SymCryptMlDsaMontMul( UINT32 a, UINT32 b ) +{ + SYMCRYPT_ASSERT( a < SYMCRYPT_MLDSA_Q ); + SYMCRYPT_ASSERT( b < SYMCRYPT_MLDSA_Q ); + + return SymCryptMlDsaMontReduce((UINT64) a * b); +} + +UINT32 +SYMCRYPT_CALL +SymCryptMlDsaModAdd( UINT32 a, UINT32 b ) +{ + SYMCRYPT_ASSERT( a < SYMCRYPT_MLDSA_Q ); + SYMCRYPT_ASSERT( b < SYMCRYPT_MLDSA_Q ); + + UINT32 res = a + b; + UINT32 subtractionMask = SYMCRYPT_MASK32_LT( SYMCRYPT_MLDSA_Q - 1, res ); + + // If res >= Q, subtract Q + res = res - (SYMCRYPT_MLDSA_Q & subtractionMask); + + return res; +} + +UINT32 +SYMCRYPT_CALL +SymCryptMlDsaModSub( UINT32 a, UINT32 b ) +{ + SYMCRYPT_ASSERT( a < SYMCRYPT_MLDSA_Q ); + SYMCRYPT_ASSERT( b < SYMCRYPT_MLDSA_Q ); + + UINT32 additionMask = SYMCRYPT_MASK32_LT( a, b ); + + // If a < b, result is negative, so we add Q + return (INT32) a - (INT32) b + (SYMCRYPT_MLDSA_Q & additionMask); +} + +_Use_decl_annotations_ +PSYMCRYPT_MLDSA_POLYELEMENT +SYMCRYPT_CALL +SymCryptMlDsaPolyElementCreate( + PBYTE pbBuffer, + SIZE_T cbBuffer ) +{ + UNREFERENCED_PARAMETER( cbBuffer ); + SYMCRYPT_ASSERT( cbBuffer == SYMCRYPT_INTERNAL_MLDSA_SIZEOF_POLYELEMENT ); + + PSYMCRYPT_MLDSA_POLYELEMENT peElement = (PSYMCRYPT_MLDSA_POLYELEMENT) pbBuffer; + SYMCRYPT_ASSERT( peElement != NULL ); + + return peElement; +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaPolyElementSetZero( + PSYMCRYPT_MLDSA_POLYELEMENT peDst ) +{ + for( UINT32 i = 0; i < SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; ++i ) + { + peDst->coeffs[i] = 0; + } +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaPolyElementNTT( + PSYMCRYPT_MLDSA_POLYELEMENT peSrc ) +{ + C_ASSERT( (SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS & 1) == 0); + + UINT32 k = 0; + + for(UINT32 len = SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 2; len >= 1; len /= 2) + { + for(UINT32 start = 0; start < SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; start += 2 * len) + { + k++; + UINT32 twiddleFactor = MLDSA_ZETA_BITREV_TIMES_R[k]; + + for(UINT32 j = start; j < start + len; j++) + { + // + // Typically for Montgomery multiplication, both operands have a factor of R. + // After multiplying, the product has a factor for R^2, and a reduction is then + // performed which divides out a factor of R, resulting in the product again having + // a factor of R^1, mod Q. In this case, the twiddleFactor is pre-multiplied by R, + // but the coefficients are not expected to have a factor of R; thus, after + // reduction, the result does not have a factor of R either. + // + UINT32 t = SymCryptMlDsaMontMul(twiddleFactor, peSrc->coeffs[j + len]); + peSrc->coeffs[j + len] = SymCryptMlDsaModSub(peSrc->coeffs[j], t); + peSrc->coeffs[j] = SymCryptMlDsaModAdd(peSrc->coeffs[j], t); + } + } + } +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaPolyElementINTT( + PSYMCRYPT_MLDSA_POLYELEMENT peSrc ) +{ + C_ASSERT( (SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS & 1) == 0); + + UINT32 k = SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; + + for(UINT32 len = 1; len < SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; len *= 2) + { + for(UINT32 start = 0; start < SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; start += 2 * len) + { + k--; + UINT32 twiddleFactor = MLDSA_NEGATIVE_ZETA_BITREV_TIMES_R[k]; + + for(UINT32 j = start; j < start + len; j++) + { + // + // As above, our twiddleFactor is pre-multiplied by R, but the coefficients are not, + // so after the reduction, the result does not have a factor of R. + // + UINT32 t = peSrc->coeffs[j]; + peSrc->coeffs[j] = SymCryptMlDsaModAdd(t, peSrc->coeffs[j + len]); + peSrc->coeffs[j + len] = SymCryptMlDsaModSub(t, peSrc->coeffs[j + len]); + peSrc->coeffs[j + len] = SymCryptMlDsaMontMul(twiddleFactor, peSrc->coeffs[j + len]); + } + } + } + + for(UINT32 i = 0; i < SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; i++) + { + peSrc->coeffs[i] = SymCryptMlDsaMontMul(SYMCRYPT_MLDSA_INTT_FIXUP_TIMES_R, peSrc->coeffs[i]); + } +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaPolyElementMulR( + PSYMCRYPT_MLDSA_POLYELEMENT peSrc ) +{ + for(UINT32 i = 0; i < SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; i++) + { + peSrc->coeffs[i] = SymCryptMlDsaMontMul(SYMCRYPT_MLDSA_RSQR, peSrc->coeffs[i]); + } +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaPolyElementMontMul( + PCSYMCRYPT_MLDSA_POLYELEMENT peSrc1, + PCSYMCRYPT_MLDSA_POLYELEMENT peSrc2, + PSYMCRYPT_MLDSA_POLYELEMENT peDst ) +{ + for(UINT32 i = 0; i < SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; i++) + { + peDst->coeffs[i] = SymCryptMlDsaMontMul(peSrc1->coeffs[i], peSrc2->coeffs[i]); + } +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaPolyElementAdd( + PCSYMCRYPT_MLDSA_POLYELEMENT peSrc1, + PCSYMCRYPT_MLDSA_POLYELEMENT peSrc2, + PSYMCRYPT_MLDSA_POLYELEMENT peDst ) +{ + for(UINT32 i = 0; i < SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; i++) + { + peDst->coeffs[i] = SymCryptMlDsaModAdd(peSrc1->coeffs[i], peSrc2->coeffs[i]); + } +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaPolyElementSub( + PCSYMCRYPT_MLDSA_POLYELEMENT peSrc1, + PCSYMCRYPT_MLDSA_POLYELEMENT peSrc2, + PSYMCRYPT_MLDSA_POLYELEMENT peDst ) +{ + for(UINT32 i = 0; i < SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; i++) + { + peDst->coeffs[i] = SymCryptMlDsaModSub(peSrc1->coeffs[i], peSrc2->coeffs[i]); + } +} + +_Use_decl_annotations_ +PSYMCRYPT_MLDSA_VECTOR +SYMCRYPT_CALL +SymCryptMlDsaVectorCreate( + PBYTE pbBuffer, + UINT32 cbBuffer, + UINT8 nElems ) +{ + SYMCRYPT_ASSERT( nElems > 0); + SYMCRYPT_ASSERT( nElems <= SYMCRYPT_MLDSA_VECTOR_MAX_LENGTH ); + SYMCRYPT_ASSERT( cbBuffer == SYMCRYPT_INTERNAL_MLDSA_SIZEOF_VECTOR(nElems) ); + + PSYMCRYPT_MLDSA_VECTOR pvVector = (PSYMCRYPT_MLDSA_VECTOR) pbBuffer; + SYMCRYPT_ASSERT( pvVector != NULL ); + + pvVector->nElems = nElems; + pvVector->cbTotalSize = cbBuffer; + + PBYTE pbCurrent = pbBuffer + sizeof(SYMCRYPT_MLDSA_VECTOR); + for( UINT32 i = 0; i < nElems; ++i ) + { + SymCryptMlDsaPolyElementCreate( pbCurrent, SYMCRYPT_INTERNAL_MLDSA_SIZEOF_POLYELEMENT ); + pbCurrent += SYMCRYPT_INTERNAL_MLDSA_SIZEOF_POLYELEMENT; + } + + return pvVector; +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaVectorCopy( + PCSYMCRYPT_MLDSA_VECTOR pvSrc, + PSYMCRYPT_MLDSA_VECTOR pvDst ) +{ + SYMCRYPT_ASSERT( pvSrc->nElems == pvDst->nElems ); + + memcpy( pvDst, pvSrc, pvSrc->cbTotalSize ); +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaVectorSetZero( + PSYMCRYPT_MLDSA_VECTOR pvDst ) +{ + for( UINT32 i = 0; i < pvDst->nElems; ++i ) + { + PSYMCRYPT_MLDSA_POLYELEMENT peDst = SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvDst ); + SymCryptMlDsaPolyElementSetZero( peDst ); + } +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaVectorAdd( + PCSYMCRYPT_MLDSA_VECTOR pvSrc1, + PCSYMCRYPT_MLDSA_VECTOR pvSrc2, + PSYMCRYPT_MLDSA_VECTOR pvDst ) +{ + SYMCRYPT_ASSERT( pvSrc1->nElems == pvSrc2->nElems ); + SYMCRYPT_ASSERT( pvSrc1->nElems == pvDst->nElems ); + + PCSYMCRYPT_MLDSA_POLYELEMENT peSrc1, peSrc2; + PSYMCRYPT_MLDSA_POLYELEMENT peDst; + + for( UINT32 i = 0; i < pvSrc1->nElems; ++i ) + { + peSrc1 = SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvSrc1 ); + peSrc2 = SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvSrc2 ); + peDst = SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvDst ); + + SymCryptMlDsaPolyElementAdd( peSrc1, peSrc2, peDst ); + } +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaVectorSub( + PCSYMCRYPT_MLDSA_VECTOR pvSrc1, + PCSYMCRYPT_MLDSA_VECTOR pvSrc2, + PSYMCRYPT_MLDSA_VECTOR pvDst ) +{ + SYMCRYPT_ASSERT( pvSrc1->nElems == pvSrc2->nElems ); + SYMCRYPT_ASSERT( pvSrc1->nElems == pvDst->nElems ); + + PCSYMCRYPT_MLDSA_POLYELEMENT peSrc1, peSrc2; + PSYMCRYPT_MLDSA_POLYELEMENT peDst; + + for( UINT32 i = 0; i < pvSrc1->nElems; ++i ) + { + peSrc1 = SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvSrc1 ); + peSrc2 = SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvSrc2 ); + peDst = SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvDst ); + + SymCryptMlDsaPolyElementSub( peSrc1, peSrc2, peDst ); + } +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaVectorPolyElementMontMul( + PCSYMCRYPT_MLDSA_VECTOR pvSrc1, + PCSYMCRYPT_MLDSA_POLYELEMENT peSrc2, + PSYMCRYPT_MLDSA_VECTOR pvDst ) +{ + SYMCRYPT_ASSERT( pvSrc1->nElems == pvDst->nElems ); + + for( UINT32 i = 0; i < pvSrc1->nElems; ++i ) + { + PSYMCRYPT_MLDSA_POLYELEMENT peSrc1 = SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvSrc1 ); + PSYMCRYPT_MLDSA_POLYELEMENT peDst = SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvDst ); + + SymCryptMlDsaPolyElementMontMul( peSrc1, peSrc2, peDst ); + } +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaVectorNTT( + PSYMCRYPT_MLDSA_VECTOR pvSrc ) +{ + for( UINT32 i = 0; i < pvSrc->nElems; ++i ) + { + SymCryptMlDsaPolyElementNTT( SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvSrc ) ); + } +} + +VOID +SYMCRYPT_CALL +SymCryptMlDsaVectorINTT( + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvSrc ) +{ + for( UINT32 i = 0; i < pvSrc->nElems; ++i ) + { + SymCryptMlDsaPolyElementINTT( SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvSrc ) ); + } +} + +_Use_decl_annotations_ +PSYMCRYPT_MLDSA_MATRIX +SYMCRYPT_CALL +SymCryptMlDsaMatrixCreate( + PBYTE pbBuffer, + UINT32 cbBuffer, + UINT8 nRows, + UINT8 nCols ) +{ + SYMCRYPT_ASSERT( nRows > 0); + SYMCRYPT_ASSERT( nCols > 0); + SYMCRYPT_ASSERT( nRows <= SYMCRYPT_MLDSA_MATRIX_MAX_NROWS ); + SYMCRYPT_ASSERT( nCols <= SYMCRYPT_MLDSA_MATRIX_MAX_NCOLS ); + SYMCRYPT_ASSERT( cbBuffer == SYMCRYPT_INTERNAL_MLDSA_SIZEOF_MATRIX(nRows, nCols) ); + + PSYMCRYPT_MLDSA_MATRIX pMatrix = (PSYMCRYPT_MLDSA_MATRIX) pbBuffer; + SYMCRYPT_ASSERT( pMatrix != NULL ); + + pMatrix->nRows = nRows; + pMatrix->nCols = nCols; + pMatrix->cbTotalSize = cbBuffer; + + PBYTE pbCurrent = pbBuffer + sizeof(SYMCRYPT_MLDSA_MATRIX); + for(UINT32 i = 0; i < (UINT32) nRows * nCols; ++i) + { + SymCryptMlDsaPolyElementCreate( pbCurrent, SYMCRYPT_INTERNAL_MLDSA_SIZEOF_POLYELEMENT ); + pbCurrent += SYMCRYPT_INTERNAL_MLDSA_SIZEOF_POLYELEMENT; + } + + return pMatrix; +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaMatrixVectorMontMul( + PCSYMCRYPT_MLDSA_MATRIX pmSrc1, + PCSYMCRYPT_MLDSA_VECTOR pvSrc2, + PSYMCRYPT_MLDSA_VECTOR pvDst, + PSYMCRYPT_MLDSA_POLYELEMENT peTmp) +{ + SYMCRYPT_ASSERT( pmSrc1->nCols == pvSrc2->nElems ); + SYMCRYPT_ASSERT( pmSrc1->nRows == pvDst->nElems ); + + PCSYMCRYPT_MLDSA_POLYELEMENT peSrc1, peSrc2; + PSYMCRYPT_MLDSA_POLYELEMENT peDst; + + SymCryptMlDsaVectorSetZero( pvDst ); + + _Analysis_assume_( pmSrc1->nRows > 0 ); + _Analysis_assume_( pmSrc1->nCols > 0 ); + + for( UINT32 i = 0; i < pmSrc1->nRows; ++i ) + { + // peDst = pvDst[i] + peDst = SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvDst ); + + for( UINT32 j = 0; j < pmSrc1->nCols; ++j ) + { + peSrc1 = SYMCRYPT_INTERNAL_MLDSA_MATRIX_ELEMENT( i, j, pmSrc1 ); + peSrc2 = SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( j, pvSrc2 ); + + SymCryptMlDsaPolyElementMontMul( peSrc1, peSrc2, peTmp ); + SymCryptMlDsaPolyElementAdd( peDst, peTmp, peDst ); + } + } +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaRejNttPoly( + PCBYTE pbRejNttPolySeed, + SIZE_T cbRejNttPolySeed, + PSYMCRYPT_MLDSA_POLYELEMENT peDst ) +{ + SYMCRYPT_ASSERT( cbRejNttPolySeed == SYMCRYPT_MLDSA_REJNTTPOLY_SEED_SIZE ); + + SYMCRYPT_SHAKE128_STATE shakeState; + SymCryptShake128Init( &shakeState ); + SymCryptShake128Append( &shakeState, pbRejNttPolySeed, cbRejNttPolySeed ); + + UINT32 coeff = 0; + BYTE shakeBytes[4]; // We only use 3 bytes, but using 4 allows converting to UINT32 more easily + + SymCryptWipeKnownSize( shakeBytes, sizeof(shakeBytes) ); + + for(UINT32 i = 0; i < SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; i++) + { + // CoeffFromThreeBytes from FIPS 204 + do + { + SymCryptShake128Extract( &shakeState, shakeBytes, 3, FALSE ); + shakeBytes[2] &= 0x7F; // if b2 > 127, b2 -= 128 + coeff = SYMCRYPT_LOAD_LSBFIRST32( shakeBytes ); + } while (coeff >= SYMCRYPT_MLDSA_Q); + + peDst->coeffs[i] = coeff; + } +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaExpandA( + PCBYTE pbPublicSeed, + SIZE_T cbPublicSeed, + PSYMCRYPT_MLDSA_MATRIX pmA ) +{ + SYMCRYPT_ASSERT( cbPublicSeed == SYMCRYPT_MLDSA_PUBLIC_SEED_SIZE ); + C_ASSERT( SYMCRYPT_MLDSA_REJNTTPOLY_SEED_SIZE == SYMCRYPT_MLDSA_PUBLIC_SEED_SIZE + 2 ); + + // The expanded seed is the public seed concatenated with one byte each for the column and row + // indices of the matrix element being expanded. + BYTE rejNttSeed[SYMCRYPT_MLDSA_REJNTTPOLY_SEED_SIZE]; + memcpy( rejNttSeed, pbPublicSeed, cbPublicSeed ); + + for( UINT8 i = 0; i < pmA->nRows; ++i ) + { + for( UINT8 j = 0; j < pmA->nCols; ++j ) + { + rejNttSeed[SYMCRYPT_MLDSA_REJNTTPOLY_SEED_SIZE - 2] = j; + rejNttSeed[SYMCRYPT_MLDSA_REJNTTPOLY_SEED_SIZE - 1] = i; + + #pragma prefast( suppress: 6385, "False warning - reading invalid data from rejNttSeed" ); + SymCryptMlDsaRejNttPoly( rejNttSeed, sizeof(rejNttSeed), SYMCRYPT_INTERNAL_MLDSA_MATRIX_ELEMENT( i, j, pmA ) ); + + } + } +} + +_Use_decl_annotations_ +FORCEINLINE +INT8 +SYMCRYPT_CALL +SymCryptMlDsaCoeffFromHalfByte( + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + UINT8 halfByte ) +{ + SYMCRYPT_ASSERT( halfByte <= 15 ); + SYMCRYPT_ASSERT( pParams->privateKeyRange == 2 || pParams->privateKeyRange == 4 ); + + if( pParams->privateKeyRange == 2 && halfByte < 15) + { + UINT8 halfByteDiv5 = (UINT8) ( ( halfByte * 13 ) >> 6 ); + UINT8 halfByteMod5 = halfByte - (5 * halfByteDiv5); + return 2 - halfByteMod5; + } + else if( pParams->privateKeyRange == 4 && halfByte < 9 ) + { + return 4 - halfByte; + } + + return INT8_MIN; +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaRejBoundedPoly( + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + PCBYTE pbRejBoundedPolySeed, + SIZE_T cbRejBoundedPolySeed, + PSYMCRYPT_MLDSA_POLYELEMENT peDst ) +{ + SYMCRYPT_ASSERT( cbRejBoundedPolySeed == SYMCRYPT_MLDSA_REJBOUNDEDPOLY_SEED_SIZE ); + + SYMCRYPT_SHAKE256_STATE shakeState; + SymCryptShake256Init( &shakeState ); + SymCryptShake256Append( &shakeState, pbRejBoundedPolySeed, cbRejBoundedPolySeed ); + + BYTE shakeByte; + UINT32 i = 0; + INT8 z0, z1; + + do + { + // Note on sidechannel safety: the rejection sampling here can leak which bytes of the SHAKE + // output are used and which are rejected. However, bytes themselves are not leaked. This + // may allow the attacker to more quickly eliminate incorrect seed values when doing an + // exhaustive search, but given the size of the seed this should still not make the + // exhaustive search computationally feasible. + SymCryptShake256Extract( &shakeState, &shakeByte, sizeof(shakeByte), FALSE ); + z0 = SymCryptMlDsaCoeffFromHalfByte( pParams, shakeByte & 0x0F ); + z1 = SymCryptMlDsaCoeffFromHalfByte( pParams, shakeByte >> 4 ); + + SYMCRYPT_ASSERT( z0 == INT8_MIN || (( z0 + pParams->privateKeyRange >= 0 ) && ( z0 + pParams->privateKeyRange <= 2 * pParams->privateKeyRange )) ); + SYMCRYPT_ASSERT( z1 == INT8_MIN || (( z1 + pParams->privateKeyRange >= 0 ) && ( z1 + pParams->privateKeyRange <= 2 * pParams->privateKeyRange )) ); + + if(z0 != INT8_MIN) + { + peDst->coeffs[i] = SymCryptMlDsaSignedCoefficientModQ( z0 ); + i++; + } + + if(z1 != INT8_MIN && i < SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS) + { + peDst->coeffs[i] = SymCryptMlDsaSignedCoefficientModQ( z1 ); + i++; + } + + } while( i < SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS ); + + SymCryptWipeKnownSize( &shakeState, sizeof(shakeState) ); +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaExpandS( + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + PCBYTE pbPrivateVectorSeed, + SIZE_T cbPrivateVectorSeed, + PSYMCRYPT_MLDSA_VECTOR pvs1, + PSYMCRYPT_MLDSA_VECTOR pvs2 ) +{ + SYMCRYPT_ASSERT( cbPrivateVectorSeed == SYMCRYPT_MLDSA_PRIVATE_VECTOR_SEED_SIZE ); + C_ASSERT( SYMCRYPT_MLDSA_REJBOUNDEDPOLY_SEED_SIZE == SYMCRYPT_MLDSA_PRIVATE_VECTOR_SEED_SIZE + 2 ); + + UINT32 nRows = pParams->nRows; + UINT32 nCols = pParams->nCols; + + // The expanded seed is the private vector seed concatenated with the (two-byte) row/column + // index of the vector element being expanded. + BYTE rejBoundedPolySeed[SYMCRYPT_MLDSA_REJBOUNDEDPOLY_SEED_SIZE]; + memcpy( rejBoundedPolySeed, pbPrivateVectorSeed, cbPrivateVectorSeed ); + + for(UINT16 i = 0; i < nCols; ++i) + { + SYMCRYPT_STORE_LSBFIRST16( rejBoundedPolySeed + SYMCRYPT_MLDSA_REJBOUNDEDPOLY_SEED_SIZE - sizeof(UINT16), i ); + SymCryptMlDsaRejBoundedPoly( pParams, rejBoundedPolySeed, sizeof(rejBoundedPolySeed), + SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvs1 ) ); + } + + for(UINT16 i = 0; i < nRows; ++i) + { + SYMCRYPT_STORE_LSBFIRST16( rejBoundedPolySeed + SYMCRYPT_MLDSA_REJBOUNDEDPOLY_SEED_SIZE - sizeof(UINT16), (UINT16) nCols + i ); + #pragma prefast( suppress: 6385, "False warning - reading invalid data from rejBoundedPolySeed" ); // Doesn't trigger in previous loop for some reason + SymCryptMlDsaRejBoundedPoly( pParams, rejBoundedPolySeed, sizeof(rejBoundedPolySeed), + SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvs2 ) ); + } + + SymCryptWipeKnownSize( rejBoundedPolySeed, sizeof(rejBoundedPolySeed) ); +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaSampleInBall( + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + PCBYTE pbCommitmentHash, + SIZE_T cbCommitmentHash, + PSYMCRYPT_MLDSA_POLYELEMENT peChallenge ) +{ + SymCryptMlDsaPolyElementSetZero( peChallenge ); + + SYMCRYPT_SHAKE256_STATE shakeState; + SymCryptShake256Init( &shakeState ); + SymCryptShake256Append( &shakeState, pbCommitmentHash, cbCommitmentHash ); + + // The first 8 bytes are used as as powers of negative one when sampling the challenge + // polynomial: c[j] = -1^(H(rho)[i + tau - 256]) + BYTE temp[8]; + SYMCRYPT_ASSERT( pParams->nChallengeNonZeroCoeffs <= 8 * sizeof(temp) ); + SymCryptShake256Extract( &shakeState, temp, sizeof(temp), FALSE ); + + UINT64 powersOfNegativeOne = SYMCRYPT_LOAD_LSBFIRST64( temp ); + + for(UINT32 i = SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS - pParams->nChallengeNonZeroCoeffs; + i < SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; + ++i) + { + BYTE j = 0; + do + { + SymCryptShake256Extract( &shakeState, &j, sizeof(j), FALSE ); + } while( j > i ); + + peChallenge->coeffs[i] = peChallenge->coeffs[j]; + + UINT32 negativeMask = SYMCRYPT_MASK32_NONZERO( powersOfNegativeOne & 1 ); + powersOfNegativeOne >>= 1; + + // Set the coefficient modulo Q + peChallenge->coeffs[j] = ((SYMCRYPT_MLDSA_Q - 1) & negativeMask) | (1 & ~negativeMask); + } + + SymCryptWipeKnownSize( &shakeState, sizeof(shakeState) ); +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaExpandMask( + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + PSYMCRYPT_SHAKE256_STATE pShakeState, + PCBYTE pbPrivateRandom, + SIZE_T cbPrivateRandom, + UINT16 counter, + PSYMCRYPT_MLDSA_VECTOR pvMask ) +{ + SYMCRYPT_ASSERT( pParams->nCols == pvMask->nElems ); + SYMCRYPT_ASSERT( cbPrivateRandom == SYMCRYPT_SHAKE256_RESULT_SIZE ); + + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + BYTE seedSuffix[2]; + + UINT32 cbShakeOutput = (pParams->maskCoefficientRangeLog2 + 1) * (SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8); + BYTE shakeOutput[20 * 32]; // Maximum size of the SHAKE output + SYMCRYPT_ASSERT( cbShakeOutput <= sizeof(shakeOutput) ); + + for(UINT16 i = 0; i < pvMask->nElems; ++i) + { + SYMCRYPT_STORE_LSBFIRST16( seedSuffix, counter + i ); + SymCryptShake256Append( pShakeState, pbPrivateRandom, cbPrivateRandom ); + SymCryptShake256Append( pShakeState, (PBYTE) &seedSuffix, sizeof(seedSuffix) ); + SymCryptShake256Extract( pShakeState, shakeOutput, cbShakeOutput, TRUE ); + + scError = SymCryptMlDsaPolyElementDecode( + shakeOutput, + pParams->maskCoefficientRangeLog2 + 1, + 1 << pParams->maskCoefficientRangeLog2, + SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvMask ) ); + SYMCRYPT_ASSERT( scError == SYMCRYPT_NO_ERROR ); + } + + SymCryptMlDsaVectorNTT( pvMask ); +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaMakeHint( + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + PSYMCRYPT_MLDSA_VECTOR pvWMinusCs2, + PSYMCRYPT_MLDSA_VECTOR pvWMinusCs2PlusCt0, + PSYMCRYPT_MLDSA_VECTOR pvDst, + UINT32* nBitsSet ) +{ + SYMCRYPT_ASSERT( pvWMinusCs2->nElems == pvWMinusCs2PlusCt0->nElems ); + SYMCRYPT_ASSERT( pvWMinusCs2->nElems == pvDst->nElems ); + + *nBitsSet = 0; + + SymCryptMlDsaVectorHighBits( pParams, pvWMinusCs2, pvWMinusCs2 ); + SymCryptMlDsaVectorHighBits( pParams, pvWMinusCs2PlusCt0, pvWMinusCs2PlusCt0 ); + + for( UINT32 i = 0; i < pvDst->nElems; ++i ) + { + PSYMCRYPT_MLDSA_POLYELEMENT peDst = SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvDst ); + PSYMCRYPT_MLDSA_POLYELEMENT peVec0 = SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvWMinusCs2 ); + PSYMCRYPT_MLDSA_POLYELEMENT peVec1 = SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvWMinusCs2PlusCt0 ); + + for( UINT32 j = 0; j < SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; ++j ) + { + peDst->coeffs[j] = 1 & ~SYMCRYPT_MASK32_EQ(peVec0->coeffs[j], peVec1->coeffs[j]); + *nBitsSet += peDst->coeffs[j]; + } + } +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaUseHint( + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + PCSYMCRYPT_MLDSA_VECTOR pvHint, + PSYMCRYPT_MLDSA_VECTOR pvCommitment ) +{ + SYMCRYPT_ASSERT( pvHint->nElems == pvCommitment->nElems ); + + UINT32 r1, r0; + UINT32 hintIsZeroMask; + UINT32 tmpMask; + UINT32 r0PrimeGtZeroMask; + UINT32 positiveOffsetCoeff; + UINT32 negativeOffsetCoeff; + + for(UINT32 i = 0; i < pvHint->nElems; ++i) + { + PSYMCRYPT_MLDSA_POLYELEMENT peHint = SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvHint ); + PSYMCRYPT_MLDSA_POLYELEMENT peCommitment = SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvCommitment ); + + for(UINT32 j = 0; j < SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; ++j) + { + SYMCRYPT_ASSERT( peHint->coeffs[j] == 0 || peHint->coeffs[j] == 1 ); + + SymCryptMlDsaDecompose( pParams, peCommitment->coeffs[j], &r1, &r0 ); + + // + // FIPS-204 UseHint lines 3-5 + // r1 in range [0, commitmentModulus) + // r0 in range [-commitmentRoundingRange, commitmentRoundingRange], encoded + // as an unsigned integer modulo Q + // + // Let r0' := r0 mod+- 2*gamma_2 + // (This is just r0 in FIPS 204, which uses signed integers.) + // + // r0 - 1 < commitmentRoundingRange => r0' > 0 + // r0 - 1 >= commitmentRoundingRange => r0' <= 0 + // + // There are three cases to consider: + // 1. If the hint is zero, the coefficient is set to r1. + // 2. Else if r0' > 0, the coefficient is (r1 + 1) mod commitmentModulus. + // 3. Else (r0' <= 0), the coefficient is (r1 - 1) mod commitmentModulus. + // + // The hint is public so we don't have to implement this in a sidechannel-safe manner, + // but avoiding branches will improve performance. + // + + // Set up masks to determine which case we fall into + hintIsZeroMask = peHint->coeffs[j] - 1; // 0 if hint is 1, 0xFFFFFFFF if hint is 0 + r0PrimeGtZeroMask = SYMCRYPT_MASK32_LT( r0 - 1, pParams->commitmentRoundingRange ); + + // Case 2: r0' > 0, so the coefficient is (r1 + 1) mod commitmentModulus, + // i.e. (r1 + 1) if r1 != commitmentModulus - 1, else 0 + tmpMask = ~SYMCRYPT_MASK32_EQ( r1, (UINT32) (pParams->commitmentModulus - 1) ); + positiveOffsetCoeff = tmpMask & (r1 + 1); + + // Case 3: r0' <= 0, so the coefficient is (r1 - 1) mod commitmentModulus, + // i.e. (r1 - 1) if r1 != 0, else commitmentModulus - 1 + tmpMask = SYMCRYPT_MASK32_EQ( r1, 0 ); + negativeOffsetCoeff = ( tmpMask & (pParams->commitmentModulus - 1) ) | ( ~tmpMask & ( r1 - 1 ) ); + + // Mask out each possible coefficient based on which case we fall into + r1 &= hintIsZeroMask; + positiveOffsetCoeff &= ~hintIsZeroMask & r0PrimeGtZeroMask; + negativeOffsetCoeff &= ~hintIsZeroMask & ~r0PrimeGtZeroMask; + + // Sanity check: no combination of masked values should be set simultaneously + SYMCRYPT_ASSERT( (r1 & positiveOffsetCoeff) == 0 ); + SYMCRYPT_ASSERT( (r1 & negativeOffsetCoeff) == 0 ); + SYMCRYPT_ASSERT( (positiveOffsetCoeff & negativeOffsetCoeff) == 0 ); + + // Finally, we can pick the correct coefficient using our masks + peCommitment->coeffs[j] = r1 | positiveOffsetCoeff | negativeOffsetCoeff; + } + } +} + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaPkEncode( + PCSYMCRYPT_MLDSAKEY pkMlDsakey, + PBYTE pbDst, + SIZE_T cbDst ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams = pkMlDsakey->pParams; + UINT32 cbEncodedKey = pParams->cbEncodedPublicKey; + PSYMCRYPT_MLDSA_INTERNAL_COMPUTATION_TEMPORARIES pTemps = NULL; + + if( cbDst != cbEncodedKey ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + pTemps = SymCryptMlDsaTemporariesAllocateAndInitialize( pParams, 1, 0, 0, 0 ); + if( pTemps == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + { + PSYMCRYPT_MLDSA_VECTOR pvT1InvNTT = pTemps->pvRowVectors[0]; + + PBYTE pbCurr = pbDst; + memcpy( pbCurr, pkMlDsakey->publicSeed, SYMCRYPT_MLDSA_PUBLIC_SEED_SIZE ); + + pbCurr += SYMCRYPT_MLDSA_PUBLIC_SEED_SIZE; + + SymCryptMlDsaVectorCopy( pkMlDsakey->pvt1, pvT1InvNTT ); + SymCryptMlDsaVectorINTT( pvT1InvNTT ); + + // Pack each coefficient of T1 into 10 bits. Coefficients are rounded by Power2Round so they're + // guaranteed to be at most 10 bits long. + SYMCRYPT_ASSERT( cbDst - SYMCRYPT_MLDSA_PUBLIC_SEED_SIZE == SYMCRYPT_INTERNAL_MLDSA_SIZEOF_ENCODED_VECTOR( pvT1InvNTT, 10u ) ); + SymCryptMlDsaVectorEncode( pvT1InvNTT, 10, 0, pbCurr ); + } + +cleanup: + if( pTemps != NULL) + { + SymCryptMlDsaTemporariesFree( pTemps ); + } + + return scError; +} + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaPkDecode( + PCBYTE pbSrc, + SIZE_T cbSrc, + UINT32 flags, + PSYMCRYPT_MLDSAKEY pkMlDsakey ) +{ + UNREFERENCED_PARAMETER( flags ); + + // Size of one encoded polynomial from t1: 256 coefficients * 10 bits per coefficient / 8 bits per byte + const UINT32 cbEncodedPoly = SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS * 10 / 8; + + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PCBYTE pbCurr = pbSrc; + PSYMCRYPT_MLDSA_INTERNAL_COMPUTATION_TEMPORARIES pTemps = NULL; + PSYMCRYPT_SHAKE256_STATE pShakeState = NULL; + + if( cbSrc != pkMlDsakey->pParams->cbEncodedPublicKey ) + { + scError = SYMCRYPT_WRONG_KEY_SIZE; + goto cleanup; + } + + // Allocate space for an encoded polynomial so we can copy the input, decode it, and append it + // to our SHAKE state. We copy it to a local buffer so we don't violate the read-once rule when + // appending to the SHAKE state. + pTemps = SymCryptMlDsaTemporariesAllocateAndInitialize( pkMlDsakey->pParams, 0, 0, 0, cbEncodedPoly ); + if( pTemps == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + // Reset the private key state in case this key object is being reused + if( pkMlDsakey->hasRootSeed ) + { + SymCryptWipeKnownSize( pkMlDsakey->rootSeed, SYMCRYPT_MLDSA_ROOT_SEED_SIZE ); + pkMlDsakey->hasRootSeed = FALSE; + } + + if( pkMlDsakey->hasPrivateKey ) + { + SymCryptWipeKnownSize( pkMlDsakey->privateSigningSeed, SYMCRYPT_MLDSA_PRIVATE_SIGNING_SEED_SIZE ); + SymCryptMlDsaVectorSetZero( pkMlDsakey->pvs1 ); + SymCryptMlDsaVectorSetZero( pkMlDsakey->pvs2 ); + SymCryptMlDsaVectorSetZero( pkMlDsakey->pvt0 ); + pkMlDsakey->hasPrivateKey = FALSE; + } + + memcpy( pkMlDsakey->publicSeed, pbCurr, SYMCRYPT_MLDSA_PUBLIC_SEED_SIZE ); + pbCurr += SYMCRYPT_MLDSA_PUBLIC_SEED_SIZE; + + pShakeState = &(pTemps->shake256State); + + SymCryptShake256Init( pShakeState ); + SymCryptShake256Append( pShakeState, pkMlDsakey->publicSeed, SYMCRYPT_MLDSA_PUBLIC_SEED_SIZE ); + + for( UINT32 i = 0; i < pkMlDsakey->pvt1->nElems; ++i ) + { + memcpy( pTemps->pbScratch, pbCurr, cbEncodedPoly ); + + PSYMCRYPT_MLDSA_POLYELEMENT peElement = SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pkMlDsakey->pvt1 ); + + scError = SymCryptMlDsaPolyElementDecode( + pTemps->pbScratch, + 10, + 0, + peElement ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + SymCryptShake256Append( pShakeState, pTemps->pbScratch, cbEncodedPoly ); + + pbCurr += cbEncodedPoly; + } + + SYMCRYPT_ASSERT( pbCurr == pbSrc + cbSrc ); + + SymCryptMlDsaVectorNTT( pkMlDsakey->pvt1 ); + + SymCryptMlDsaExpandA( + pkMlDsakey->publicSeed, + SYMCRYPT_MLDSA_PUBLIC_SEED_SIZE, + pkMlDsakey->pmA ); + + SymCryptShake256Result( pShakeState, pkMlDsakey->publicKeyHash ); + +cleanup: + if( pTemps != NULL ) + { + SymCryptMlDsaTemporariesFree( pTemps ); + } + + return scError; +} + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaSkEncode( + PCSYMCRYPT_MLDSAKEY pkMlDsakey, + PBYTE pbDst, + SIZE_T cbDst ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams = pkMlDsakey->pParams; + UINT32 cbEncodedKey = pParams->cbEncodedPrivateKey; + PSYMCRYPT_MLDSA_INTERNAL_COMPUTATION_TEMPORARIES pTemps = NULL; + PBYTE pbCurr = pbDst; + + if( !pkMlDsakey->hasPrivateKey ) + { + scError = SYMCRYPT_INCOMPATIBLE_FORMAT; + goto cleanup; + } + + if( cbDst != cbEncodedKey ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + pTemps = SymCryptMlDsaTemporariesAllocateAndInitialize( pParams, 1, 1, 0, 0 ); + if( pTemps == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + memcpy( pbCurr, pkMlDsakey->publicSeed, SYMCRYPT_MLDSA_PUBLIC_SEED_SIZE ); + pbCurr += SYMCRYPT_MLDSA_PUBLIC_SEED_SIZE; + + memcpy( pbCurr, pkMlDsakey->privateSigningSeed, SYMCRYPT_MLDSA_PRIVATE_SIGNING_SEED_SIZE ); + pbCurr += SYMCRYPT_MLDSA_PRIVATE_SIGNING_SEED_SIZE; + + memcpy( pbCurr, pkMlDsakey->publicKeyHash, SYMCRYPT_MLDSA_PUBLIC_KEY_HASH_SIZE ); + pbCurr += SYMCRYPT_MLDSA_PUBLIC_KEY_HASH_SIZE; + + { + // Inverse NTT and encode s1 + PSYMCRYPT_MLDSA_VECTOR pvs1InvNTT = pTemps->pvColVectors[0]; + SymCryptMlDsaVectorCopy( pkMlDsakey->pvs1, pvs1InvNTT ); + SymCryptMlDsaVectorINTT( pvs1InvNTT ); + + SymCryptMlDsaVectorEncode( + pvs1InvNTT, + pParams->encodedCoefficientBitLength, + pParams->privateKeyRange, + pbCurr ); + pbCurr += SYMCRYPT_INTERNAL_MLDSA_SIZEOF_ENCODED_VECTOR( pvs1InvNTT, pParams->encodedCoefficientBitLength ); + } + + { + // Inverse NTT and encode s2 + PSYMCRYPT_MLDSA_VECTOR pvs2InvNTT = pTemps->pvRowVectors[0]; + SymCryptMlDsaVectorCopy( pkMlDsakey->pvs2, pvs2InvNTT ); + SymCryptMlDsaVectorINTT( pvs2InvNTT ); + + SymCryptMlDsaVectorEncode( + pvs2InvNTT, + pParams->encodedCoefficientBitLength, + pParams->privateKeyRange, + pbCurr ); + pbCurr += SYMCRYPT_INTERNAL_MLDSA_SIZEOF_ENCODED_VECTOR( pvs2InvNTT, pParams->encodedCoefficientBitLength ); + } + + { + // Inverse NTT and encode t0 + // Can re-use the previous temporary row vector as it's no longer needed + PSYMCRYPT_MLDSA_VECTOR pvt0InvNTT = pTemps->pvRowVectors[0]; + SymCryptMlDsaVectorCopy( pkMlDsakey->pvt0, pvt0InvNTT ); + SymCryptMlDsaVectorINTT( pvt0InvNTT ); + + SymCryptMlDsaVectorEncode( + pvt0InvNTT, + SYMCRYPT_POWER2ROUND_LOW_ORDER_BITS, + 1 << (SYMCRYPT_POWER2ROUND_LOW_ORDER_BITS - 1), + pbCurr ); + pbCurr += SYMCRYPT_INTERNAL_MLDSA_SIZEOF_ENCODED_VECTOR( pvt0InvNTT, SYMCRYPT_POWER2ROUND_LOW_ORDER_BITS ); + } + + SYMCRYPT_ASSERT( pbCurr == pbDst + cbDst ); + +cleanup: + if( pTemps != NULL) + { + SymCryptMlDsaTemporariesFree( pTemps ); + } + + return scError; +} + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaSkDecode( + PCBYTE pbSrc, + SIZE_T cbSrc, + UINT32 flags, + PSYMCRYPT_MLDSAKEY pkMlDsakey ) +{ + UNREFERENCED_PARAMETER( flags ); + + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams = pkMlDsakey->pParams; + PSYMCRYPT_MLDSA_INTERNAL_COMPUTATION_TEMPORARIES pTemps = NULL; + PCBYTE pbCurr = pbSrc; + BYTE pubKeyHashTmp[SYMCRYPT_MLDSA_PUBLIC_KEY_HASH_SIZE]; + + if( cbSrc != pkMlDsakey->pParams->cbEncodedPrivateKey ) + { + scError = SYMCRYPT_WRONG_KEY_SIZE; + goto cleanup; + } + + pTemps = SymCryptMlDsaTemporariesAllocateAndInitialize( + pParams, + 2, // row vectors - t, t0 + 0, // column vectors + 1, // poly elements - temporary space + pkMlDsakey->pParams->cbEncodedPublicKey ); // scratch space + if( pTemps == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + // We use temporaries to recalculate t, t0, t1 and the public key hash from the import values + // for A (derived from the public seed), s1 and s2. This is not strictly necessary for callers + // who just want to import a private key and use it for signing, but it allows the private + // key to also be used for verification, and more importantly, provides extra robustness by + // ensuring that the derived values are consistent with the encoded values. If the perf of + // importing a key becomes a concern, we can move this recalculation to the first time the key + // is used for verification. + PSYMCRYPT_MLDSA_VECTOR pvtTmp = pTemps->pvRowVectors[0]; + PSYMCRYPT_MLDSA_VECTOR pvt0Tmp = pTemps->pvRowVectors[1]; + PSYMCRYPT_MLDSA_POLYELEMENT peTmp = pTemps->pePolyElements[0]; + PBYTE pbEncodedPubKeyTmp = pTemps->pbScratch; + + // Reset the key state in case this key is being reused + pkMlDsakey->hasRootSeed = FALSE; + pkMlDsakey->hasPrivateKey = FALSE; + SymCryptWipeKnownSize( pkMlDsakey->rootSeed, SYMCRYPT_MLDSA_ROOT_SEED_SIZE ); + + memcpy( pkMlDsakey->publicSeed, pbCurr, SYMCRYPT_MLDSA_PUBLIC_SEED_SIZE ); + pbCurr += SYMCRYPT_MLDSA_PUBLIC_SEED_SIZE; + + memcpy( pkMlDsakey->privateSigningSeed, pbCurr, SYMCRYPT_MLDSA_PRIVATE_SIGNING_SEED_SIZE ); + pbCurr += SYMCRYPT_MLDSA_PRIVATE_SIGNING_SEED_SIZE; + + memcpy( pubKeyHashTmp, pbCurr, SYMCRYPT_MLDSA_PUBLIC_KEY_HASH_SIZE ); + pbCurr += SYMCRYPT_MLDSA_PUBLIC_KEY_HASH_SIZE; + + // Expand A matrix + SymCryptMlDsaExpandA( pkMlDsakey->publicSeed, SYMCRYPT_MLDSA_PUBLIC_SEED_SIZE, pkMlDsakey->pmA ); + + scError = SymCryptMlDsaVectorDecode( + pbCurr, + pParams->encodedCoefficientBitLength, + pParams->privateKeyRange, + pkMlDsakey->pvs1 ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + pbCurr += SYMCRYPT_INTERNAL_MLDSA_SIZEOF_ENCODED_VECTOR( pkMlDsakey->pvs1, pParams->encodedCoefficientBitLength ); + + + scError = SymCryptMlDsaVectorDecode( + pbCurr, + pParams->encodedCoefficientBitLength, + pParams->privateKeyRange, + pkMlDsakey->pvs2 ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + pbCurr += SYMCRYPT_INTERNAL_MLDSA_SIZEOF_ENCODED_VECTOR( pkMlDsakey->pvs2, pParams->encodedCoefficientBitLength ); + + scError = SymCryptMlDsaVectorDecode( + pbCurr, + SYMCRYPT_POWER2ROUND_LOW_ORDER_BITS, + 1 << (SYMCRYPT_POWER2ROUND_LOW_ORDER_BITS - 1), + pvt0Tmp ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + pbCurr += SYMCRYPT_INTERNAL_MLDSA_SIZEOF_ENCODED_VECTOR( pkMlDsakey->pvt0, SYMCRYPT_POWER2ROUND_LOW_ORDER_BITS ); + + SYMCRYPT_ASSERT( pbCurr == pbSrc + cbSrc ); + + // Convert s1 and s2 to NTT form + SymCryptMlDsaVectorNTT( pkMlDsakey->pvs1 ); + SymCryptMlDsaVectorNTT( pkMlDsakey->pvs2 ); + + SymCryptMlDsakeyComputeT( + pkMlDsakey->pmA, + pkMlDsakey->pvs1, + pkMlDsakey->pvs2, + pkMlDsakey->pvt0, + pkMlDsakey->pvt1, + pvtTmp, + peTmp ); + + // If the recalculated t0 doesn't match, the imported key is invalid. + // Note: SymCryptMlDsakeyComputeT sets t0 and t1 in NTT form. + if( memcmp( pkMlDsakey->pvt0, pvt0Tmp, pvt0Tmp->cbTotalSize ) != 0 ) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + // Convert t0 and t1 to NTT form + SymCryptMlDsaVectorNTT( pkMlDsakey->pvt0 ); + SymCryptMlDsaVectorNTT( pkMlDsakey->pvt1 ); + + scError = SymCryptMlDsaPkEncode( pkMlDsakey, pbEncodedPubKeyTmp, pParams->cbEncodedPublicKey ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Recalculate the public key hash and compare it to the imported value. If they don't match, + // the imported key is invalid. + SymCryptShake256( + pbEncodedPubKeyTmp, + pParams->cbEncodedPublicKey, + pkMlDsakey->publicKeyHash, + SYMCRYPT_MLDSA_PUBLIC_KEY_HASH_SIZE ); + + if( memcmp( pkMlDsakey->publicKeyHash, pubKeyHashTmp, SYMCRYPT_MLDSA_PUBLIC_KEY_HASH_SIZE ) != 0 ) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + pkMlDsakey->hasPrivateKey = TRUE; + +cleanup: + if( pTemps != NULL ) + { + SymCryptMlDsaTemporariesFree( pTemps ); + } + + // Wipe private state on error as defense-in-depth + if( scError != SYMCRYPT_NO_ERROR ) + { + SymCryptWipeKnownSize( pkMlDsakey->privateSigningSeed, SYMCRYPT_MLDSA_PRIVATE_SIGNING_SEED_SIZE ); + SymCryptMlDsaVectorSetZero( pkMlDsakey->pvs1 ); + SymCryptMlDsaVectorSetZero( pkMlDsakey->pvs2 ); + SymCryptMlDsaVectorSetZero( pkMlDsakey->pvt0 ); + } + + return scError; +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaSigEncode( + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + PBYTE pbCommitmentHash, + SIZE_T cbCommitmentHash, + PCSYMCRYPT_MLDSA_VECTOR pvResponse, + PCSYMCRYPT_MLDSA_VECTOR pvHint, + PBYTE pbDst, + SIZE_T cbDst ) +{ + SYMCRYPT_ASSERT( cbDst == pParams->cbEncodedSignature ); + UNREFERENCED_PARAMETER( cbDst ); + + const SIZE_T cbEncodedHint = pParams->nHintNonZeroCoeffs + pvHint->nElems; + + PBYTE pbCurr = pbDst; + + memcpy( pbCurr, pbCommitmentHash, cbCommitmentHash ); + pbCurr += cbCommitmentHash; + + SymCryptMlDsaVectorEncode( + pvResponse, + pParams->maskCoefficientRangeLog2 + 1, + 1 << pParams->maskCoefficientRangeLog2, + pbCurr ); + + pbCurr += SYMCRYPT_INTERNAL_MLDSA_SIZEOF_ENCODED_VECTOR( pvResponse, pParams->maskCoefficientRangeLog2 + 1 ); + + SymCryptMlDsaHintBitPack( pParams, pvHint, pbCurr ); + pbCurr += cbEncodedHint; + + SYMCRYPT_ASSERT( pbCurr == pbDst + cbDst ); +} + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaSigDecode( + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + PCBYTE pbSig, + SIZE_T cbSig, + PBYTE pbCommitmentHash, + SIZE_T cbCommitmentHash, + PSYMCRYPT_MLDSA_VECTOR pvResponse, + PSYMCRYPT_MLDSA_VECTOR pvHint ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + if( cbSig != pParams->cbEncodedSignature ) + { + scError = SYMCRYPT_WRONG_DATA_SIZE; + goto cleanup; + } + + SYMCRYPT_ASSERT( cbCommitmentHash == pParams->cbCommitmentHash ); + + PCBYTE pbCurr = pbSig; + + memcpy( pbCommitmentHash, pbSig, cbCommitmentHash ); + pbCurr += cbCommitmentHash; + + SymCryptMlDsaVectorDecode( + pbCurr, + pParams->maskCoefficientRangeLog2 + 1, + 1 << pParams->maskCoefficientRangeLog2, + pvResponse ); + pbCurr += SYMCRYPT_INTERNAL_MLDSA_SIZEOF_ENCODED_VECTOR( pvResponse, pParams->maskCoefficientRangeLog2 + 1 ); + + SYMCRYPT_ASSERT( cbSig - (pbCurr - pbSig) == (SIZE_T) pParams->nHintNonZeroCoeffs + pvHint->nElems ); + + scError = SymCryptMlDsaHintBitUnpack( pParams, pbCurr, pvHint ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + +cleanup: + return scError; +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaHintBitPack( + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + PCSYMCRYPT_MLDSA_VECTOR pvSrc, + PBYTE pbDst ) +{ + SymCryptWipe( pbDst, pParams->nHintNonZeroCoeffs ); + + UINT32 index = 0; + for( UINT32 i = 0; i < pvSrc->nElems; ++i ) + { + PSYMCRYPT_MLDSA_POLYELEMENT peElement = SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvSrc ); + for( UINT32 j = 0; j < SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; ++j ) + { + // Side channel safety: the hint is public (part of the signature) so it's okay to + // leak information here + if( peElement->coeffs[j] != 0 ) + { + // Each byte in the hint is the index of a non-zero coefficient + pbDst[index] = (BYTE) j; + index++; + } + } + + // The number of non-zero coefficients in polynomials 0..i is stored in the + // (nHintNonZeroCoeffs + i)th byte. This allows us to determine which indices correspond + // to which polynomials during decoding while still only using one byte per index. + SYMCRYPT_ASSERT( index <= pParams->nHintNonZeroCoeffs ); + pbDst[pParams->nHintNonZeroCoeffs + i] = (BYTE) index; + } +} + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaHintBitUnpack( + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + PCBYTE pbSrc, + PSYMCRYPT_MLDSA_VECTOR pvDst ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + UINT32 index = 0; + UINT32 maxIndex = 0; + UINT32 first = 0; + + for( UINT32 i = 0; i < pvDst->nElems; ++i ) + { + PSYMCRYPT_MLDSA_POLYELEMENT peElement = SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvDst ); + + // Ensure pvDst is zeroed out before unpacking + SymCryptMlDsaPolyElementSetZero( peElement ); + + maxIndex = pbSrc[pParams->nHintNonZeroCoeffs + i]; + if( ( maxIndex < index) || + ( maxIndex > pParams->nHintNonZeroCoeffs) ) + { + // Invalid input + scError = SYMCRYPT_SIGNATURE_VERIFICATION_FAILURE; + goto cleanup; + } + + first = index; + while( index < maxIndex ) + { + if( index > first && pbSrc[index - 1] >= pbSrc[index]) + { + // Invalid input + scError = SYMCRYPT_SIGNATURE_VERIFICATION_FAILURE; + goto cleanup; + } + + peElement->coeffs[pbSrc[index]] = 1; + index++; + } + } + + for(UINT32 leftover = index; leftover < pParams->nHintNonZeroCoeffs; ++leftover) + { + if( pbSrc[leftover] != 0 ) + { + // Invalid input + scError = SYMCRYPT_SIGNATURE_VERIFICATION_FAILURE; + goto cleanup; + } + } + +cleanup: + return scError; +} + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHashMlDsaValidateHashAlgAndGetOid( + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + SYMCRYPT_PQDSA_HASH_ID hashAlg, + SIZE_T cbHash, + PCSYMCRYPT_OID* ppOid ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PCSYMCRYPT_OID pHashOid = NULL; + BOOLEAN fFound = FALSE; + BOOLEAN fIsXof = FALSE; + SIZE_T cbHashExpected = 0; + + for( UINT32 i = 0; i < SYMCRYPT_ARRAY_SIZE(g_hashOidMap); ++i ) + { + if( g_hashOidMap[i].hashId == hashAlg ) + { + fFound = TRUE; + pHashOid = g_hashOidMap[i].pOid; + fIsXof = g_hashOidMap[i].fIsXof; + cbHashExpected = g_hashOidMap[i].pHashAlgorithm->resultSize; + break; + } + } + + if( !fFound ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + SYMCRYPT_ASSERT( pHashOid->cbOID == SYMCRYPT_MLDSA_SUPPORTED_HASH_OID_SIZE ); + + // For traditional hash algorithms (non-XOFs), the hash length must exactly match the expected + // value. For XOFs, the output length is arbitrary, and any length is acceptable as long as it + // meets the minimum collision strength specified by the parameter set (cbCommitmentHash) + if( (!fIsXof && cbHash != cbHashExpected ) || + ( cbHash < pParams->cbCommitmentHash ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + *ppOid = pHashOid; + +cleanup: + return scError; +} + +FORCEINLINE +INT32 +SYMCRYPT_CALL +SymCryptMlDsaModPlusMinus( UINT32 r, UINT32 modulus ) +{ + SYMCRYPT_ASSERT( r < modulus ); + + // In most cases this function is used with even moduli, e.g. with Power2Round. + // However, it's okay if the modulus is odd. FIPS 204 specifies that the output is in the range + // (-ceil(modulus/2), floor(modulus/2) ] + // = ( -((modulus + 1) // 2), modulus // 2 ] + // = [ -modulus // 2, modulus // 2 ] + const INT32 halfModulus = modulus >> 1; + + // Mask for conditional subtraction: 0 if r <= (modulus/2), 0xFFFFFFFF otherwise + UINT32 subtractionMask = SYMCRYPT_MASK32_LT( halfModulus, r ); + + INT32 r0 = (INT32) r - (modulus & subtractionMask); + SYMCRYPT_ASSERT( r0 > -halfModulus && r0 <= halfModulus); + + return r0; +} + +_Use_decl_annotations_ +FORCEINLINE +UINT32 +SYMCRYPT_CALL +SymCryptMlDsaPolyElementInfinityNorm( PCSYMCRYPT_MLDSA_POLYELEMENT peSrc ) +{ + UINT32 norm = 0; + UINT32 mask; + INT32 curr; + for( UINT32 i = 0; i < SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; i++ ) + { + // Convert the coefficient to signed form + curr = SymCryptMlDsaModPlusMinus( peSrc->coeffs[i], SYMCRYPT_MLDSA_Q ); + + // If the coefficient is less than 0, negate it + mask = SYMCRYPT_MASK32_LT( curr, 0 ); + curr = (curr & ~mask) | ((curr * -1) & mask); + + // If the coefficient is greater than the current norm, update the norm + mask = SYMCRYPT_MASK32_LT( norm, curr ); + norm = (norm & ~mask) | (curr & mask); + } + + return norm; +} + +_Use_decl_annotations_ +UINT32 +SYMCRYPT_CALL +SymCryptMlDsaVectorInfinityNorm( PCSYMCRYPT_MLDSA_VECTOR pvSrc ) +{ + UINT32 norm = 0; + UINT32 curr; + INT32 mask; + for( UINT32 i = 0; i < pvSrc->nElems; i++ ) + { + curr = SymCryptMlDsaPolyElementInfinityNorm( SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT(i, pvSrc) ); + mask = SYMCRYPT_MASK32_LT( norm, curr ); + norm = (norm & ~mask) | (curr & mask); + } + + return norm; +} + +_Use_decl_annotations_ +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptMlDsaDecompose( + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + UINT32 r, + UINT32 *puR1, + UINT32 *puR0 ) +{ + SYMCRYPT_ASSERT( r < SYMCRYPT_MLDSA_Q ); + SYMCRYPT_ASSERT( puR1 != NULL || puR0 != NULL ); + + UINT32 r1 = 0; + UINT32 mask = 0; + INT32 r0 = 0; + + // Some tricks for calculating this are borrowed from the reference implementation + // https://github.com/pq-crystals/dilithium/blob/master/ref/rounding.c + // + // The multiplication constants for calculating r1 are in the PCSYMCRYPT_MLDSA_INTERNAL_PARAMS + // structure. They are calculated as follows. + // + // To keep intermediate values in the 32-bit range, instead of using r directly, we calculate + // ceil( r/128 ). We likewise divide the commitment rounding range by 128. + // + // For ML-DSA 44: + // 2*commmitmentRoundingRange = 2 * 95,232 = 190,464 + // 190464 // 128 = 1488 + // 1 / 1488 ~= floor(2^24 // 1488) * 2^24 = 11,275 // 2^24 + // For ML-DSA 65 and 87: + // 2*commmitmentRoundingRange = 2*261888 = 523776 + // 523776 // 128 = 4092 + // 1 / 4092 ~= floor(2^22 // 4092) * 2^22 = 1025 // 2^22 = 4100 // 2^24 + // + + UINT32 rdiv128 = (r + 127) >> 7; + r1 = ( rdiv128 * pParams->decomposeR1Factor + (1 << 23)) >> 24; + + // Handle corner case: if r1 is outside of the expected range, set it to 0 + mask = SYMCRYPT_MASK32_LT( r1, pParams->commitmentModulus ); + r1 &= mask; + + r0 = r - ( r1 * 2 * pParams->commitmentRoundingRange ); + + // Handle corner case for r0 + r0 -= ((((SYMCRYPT_MLDSA_Q - 1) >> 1) - r0) >> 31) & SYMCRYPT_MLDSA_Q; + r0 = SymCryptMlDsaSignedCoefficientModQ( r0 ); + + if( puR1 != NULL ) + { + *puR1 = r1; + } + + if( puR0 != NULL ) + { + *puR0 = r0; + } +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaVectorHighBits( + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + PCSYMCRYPT_MLDSA_VECTOR pvSrc, + PSYMCRYPT_MLDSA_VECTOR pvDst ) +{ + SYMCRYPT_ASSERT( pvSrc->nElems == pvDst->nElems ); + + for(UINT32 i = 0; i < pvSrc->nElems; i++) + { + for(UINT32 j = 0; j < SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; j++) + { + SymCryptMlDsaDecompose( + pParams, + SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvSrc )->coeffs[j], + &(SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvDst )->coeffs[j]), + NULL ); + } + } +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaVectorLowBits( + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + PCSYMCRYPT_MLDSA_VECTOR pvSrc, + PSYMCRYPT_MLDSA_VECTOR pvDst ) +{ + SYMCRYPT_ASSERT( pvSrc->nElems == pvDst->nElems ); + + for(UINT32 i = 0; i < pvSrc->nElems; i++) + { + for(UINT32 j = 0; j < SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; j++) + { + SymCryptMlDsaDecompose( + pParams, + SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvSrc )->coeffs[j], + NULL, + &(SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvDst )->coeffs[j]) ); + } + } +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaPower2Round( + UINT32 r, + UINT32* puR1, + UINT32* puR0 ) +{ + SYMCRYPT_ASSERT( r < SYMCRYPT_MLDSA_Q ); + + UINT32 rPrime = r & ( (1 << SYMCRYPT_POWER2ROUND_LOW_ORDER_BITS) - 1 ); // r mod 2^d + INT32 r0 = SymCryptMlDsaModPlusMinus( rPrime, 1 << SYMCRYPT_POWER2ROUND_LOW_ORDER_BITS ); + + UINT32 r1 = (r - r0) >> SYMCRYPT_POWER2ROUND_LOW_ORDER_BITS; + + *puR1 = r1; + *puR0 = SymCryptMlDsaSignedCoefficientModQ( r0 ); //, 4096 ); +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaPolyElementPower2Round( + PCSYMCRYPT_MLDSA_POLYELEMENT peSrc, + PSYMCRYPT_MLDSA_POLYELEMENT peDst1, + PSYMCRYPT_MLDSA_POLYELEMENT peDst0 ) +{ + UINT32 r1, r0; + + for(UINT32 i = 0; i < SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; i++) + { + SymCryptMlDsaPower2Round( peSrc->coeffs[i], &r1, &r0 ); + peDst1->coeffs[i] = r1; + peDst0->coeffs[i] = r0; + } +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaVectorPower2Round( + PCSYMCRYPT_MLDSA_VECTOR pvSrc, + PSYMCRYPT_MLDSA_VECTOR pvDst1, + PSYMCRYPT_MLDSA_VECTOR pvDst0 ) +{ + SYMCRYPT_ASSERT( pvSrc->nElems == pvDst1->nElems ); + SYMCRYPT_ASSERT( pvSrc->nElems == pvDst0->nElems ); + + for(UINT32 i = 0; i < pvSrc->nElems; i++) + { + SymCryptMlDsaPolyElementPower2Round( + SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvSrc ), + SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvDst1 ), + SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvDst0 ) ); + } +} + +FORCEINLINE +UINT32 +SYMCRYPT_CALL +SymCryptMlDsaSignedCoefficientModQ( INT32 coefficient ) +{ + SYMCRYPT_ASSERT(coefficient > -1 * SYMCRYPT_MLDSA_Q && coefficient < SYMCRYPT_MLDSA_Q); + + UINT32 result; + UINT32 negativeMask = SYMCRYPT_MASK32_LT( coefficient, 0 ); + + result = coefficient + (SYMCRYPT_MLDSA_Q & negativeMask); + SYMCRYPT_ASSERT( result < SYMCRYPT_MLDSA_Q ); + + return result; +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaPolyElementEncode( + PCSYMCRYPT_MLDSA_POLYELEMENT peSrc, + UINT32 nBitsPerCoefficient, + UINT32 signedCoefficientBound, + PBYTE pbDst ) +{ + SYMCRYPT_ASSERT( nBitsPerCoefficient > 0 ); + SYMCRYPT_ASSERT( nBitsPerCoefficient <= 20 ); // Maximum number of bits per coefficient across all encodings + + INT32 coefficient; + UINT32 nBitsInCoefficient; + UINT32 bitsToEncode; + UINT32 nBitsToEncode; + UINT32 cbDstWritten = 0; + UINT32 accumulator = 0; + UINT32 nBitsInAccumulator = 0; + + for(UINT32 i = 0; i < SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; i++ ) + { + nBitsInCoefficient = nBitsPerCoefficient; + coefficient = peSrc->coeffs[i]; + + SYMCRYPT_ASSERT( coefficient < SYMCRYPT_MLDSA_Q ); + + // If the coefficient is greater than the signedCoefficientBound, that means it is + // a negative value modulo Q, so we need to subtract Q to get the original value. + coefficient -= ( SYMCRYPT_MLDSA_Q & SYMCRYPT_MASK32_LT( signedCoefficientBound, coefficient ) ); + + // The coefficient is now in the range [-signedCoefficientBound, signedCoefficientBound], + // we need to map it to the range [0, 2*signedCoefficientBound] for encoding. + coefficient = SYMCRYPT_INTERNAL_MLDSA_SHORT_COEFFICIENT_ENCODE_DECODE( coefficient, signedCoefficientBound ); + + // Some coefficients are always positive and so do not need any special encoding. In this + // case, we revert to the original value from the source polynomial. + coefficient = ( peSrc->coeffs[i] & SYMCRYPT_MASK32_ZERO( signedCoefficientBound ) ) | + ( coefficient & SYMCRYPT_MASK32_NONZERO( signedCoefficientBound ) ); + + SYMCRYPT_ASSERT( coefficient >= 0 ); + SYMCRYPT_ASSERT( signedCoefficientBound == 0 || (UINT32) coefficient <= signedCoefficientBound * 2 ); + SYMCRYPT_ASSERT( (UINT32) coefficient < (1ul << nBitsPerCoefficient) ); + + // encode the coefficient + // simple loop to add bits to accumulator and write accumulator to output + do + { + nBitsToEncode = SYMCRYPT_MIN( nBitsInCoefficient, 32 - nBitsInAccumulator ); + + bitsToEncode = coefficient & ( ( 1UL << nBitsToEncode ) - 1 ); + coefficient >>= nBitsToEncode; + nBitsInCoefficient -= nBitsToEncode; + + accumulator |= ( bitsToEncode << nBitsInAccumulator ); + nBitsInAccumulator += nBitsToEncode; + if(nBitsInAccumulator == 32) + { + SYMCRYPT_STORE_LSBFIRST32( pbDst + cbDstWritten, accumulator ); + cbDstWritten += 4; + accumulator = 0; + nBitsInAccumulator = 0; + } + } while( nBitsInCoefficient > 0 ); + } + + SYMCRYPT_ASSERT(nBitsInAccumulator == 0); + SYMCRYPT_ASSERT(cbDstWritten == (nBitsPerCoefficient*(SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8))); +} + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaPolyElementDecode( + PCBYTE pbSrc, + UINT32 nBitsPerCoefficient, + UINT32 signedCoefficientBound, + PSYMCRYPT_MLDSA_POLYELEMENT peDst ) +{ + SYMCRYPT_ASSERT( nBitsPerCoefficient > 0 ); + SYMCRYPT_ASSERT( nBitsPerCoefficient <= 20 ); // Maximum number of bits per coefficient across all encodings + + INT32 coefficient; + UINT32 nBitsInCoefficient; + UINT32 bitsToDecode; + UINT32 nBitsToDecode; + UINT32 cbSrcRead = 0; + UINT32 accumulator = 0; + UINT32 nBitsInAccumulator = 0; + + for(UINT32 i = 0; i < SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; i++ ) + { + coefficient = 0; + nBitsInCoefficient = 0; + + // first gather and decode bits from pbSrc + do + { + if(nBitsInAccumulator == 0) + { + accumulator = SYMCRYPT_LOAD_LSBFIRST32( pbSrc+cbSrcRead ); + cbSrcRead += 4; + nBitsInAccumulator = 32; + } + + nBitsToDecode = SYMCRYPT_MIN(nBitsPerCoefficient-nBitsInCoefficient, nBitsInAccumulator); + SYMCRYPT_ASSERT(nBitsToDecode <= nBitsInAccumulator); + + bitsToDecode = accumulator & ((1UL<<nBitsToDecode)-1); + accumulator >>= nBitsToDecode; + nBitsInAccumulator -= nBitsToDecode; + + coefficient |= (bitsToDecode << nBitsInCoefficient); + nBitsInCoefficient += nBitsToDecode; + } while( nBitsPerCoefficient > nBitsInCoefficient ); + SYMCRYPT_ASSERT( nBitsInCoefficient == nBitsPerCoefficient ); + + // Coefficient should always be positive at this point since it's encoded in <= 20 bits + SYMCRYPT_ASSERT( coefficient >= 0 ); + + if( ( signedCoefficientBound != 0 ) && ( (UINT32) coefficient > 2 * signedCoefficientBound ) ) + { + // Most of the encoded components of ML-DSA keys and signatures cannot be outside the + // valid range, because the number of bits in their encodings do not permit invalid + // values. However, the private key components s1 and s2 have encodings that do allow + // for invalid values (because their valid ranges are [-2, 2] or [-4, 4]). If any + // coefficient is outside this range, the key is invalid and we return an error. + // We do not need to do this check in constant time because we treat the validity of an + // imported key as public information. + return SYMCRYPT_INVALID_BLOB; + } + + // If this coefficient is intended to be signed, we need to decode it into its original + // signed form and then map it modulo Q. + // Side-channel safety: signedCoefficientBound just indicates which component we are + // decoding, so it's public information + if( signedCoefficientBound != 0 ) + { + coefficient = SYMCRYPT_INTERNAL_MLDSA_SHORT_COEFFICIENT_ENCODE_DECODE( coefficient, signedCoefficientBound ); + coefficient = SymCryptMlDsaSignedCoefficientModQ( coefficient ); + } + + peDst->coeffs[i] = coefficient; + } + + SYMCRYPT_ASSERT(nBitsInAccumulator == 0); + SYMCRYPT_ASSERT(cbSrcRead == (nBitsPerCoefficient*(SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8))); + + return SYMCRYPT_NO_ERROR; +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaVectorEncode( + PCSYMCRYPT_MLDSA_VECTOR pvSrc, + UINT32 nBitsPerCoefficient, + UINT32 signedCoefficientBound, + PBYTE pbDst ) +{ + PBYTE pbCurr = pbDst; + const SIZE_T cbEncodedPoly = nBitsPerCoefficient * ( SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8 ); + + for( UINT32 i = 0; i < pvSrc->nElems; ++i ) + { + PCSYMCRYPT_MLDSA_POLYELEMENT peElement = SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvSrc ); + + SymCryptMlDsaPolyElementEncode( + peElement, + nBitsPerCoefficient, + signedCoefficientBound, + pbCurr ); + + pbCurr += cbEncodedPoly; + } + + SYMCRYPT_ASSERT( pbCurr == pbDst + ( pvSrc->nElems * cbEncodedPoly ) ); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaVectorDecode( + _In_reads_bytes_( pvDst->nElems * nBitsPerCoefficient * (SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8) ) + PCBYTE pbSrc, + UINT32 nBitsPerCoefficient, + UINT32 signedCoefficientBound, + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvDst ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PCBYTE pbCurr = pbSrc; + const SIZE_T cbEncodedPoly = nBitsPerCoefficient * ( SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8 ); + + for( UINT32 i = 0; i < pvDst->nElems; ++i ) + { + PSYMCRYPT_MLDSA_POLYELEMENT peElement = SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvDst ); + + scError = SymCryptMlDsaPolyElementDecode( + pbCurr, + nBitsPerCoefficient, + signedCoefficientBound, + peElement ); + if( scError != SYMCRYPT_NO_ERROR ) + { + // Side-channel safety: the validity of an imported key is public information. + // See comment in SymCryptMlDsaPolyElementDecode. + goto cleanup; + } + + pbCurr += cbEncodedPoly; + } + + SYMCRYPT_ASSERT( pbCurr == pbSrc + ( pvDst->nElems * cbEncodedPoly ) ); + +cleanup: + return scError; +} + +_Use_decl_annotations_ +PSYMCRYPT_MLDSA_INTERNAL_COMPUTATION_TEMPORARIES +SYMCRYPT_CALL +SymCryptMlDsaTemporariesAllocateAndInitialize( + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + UINT32 nRowVectors, + UINT32 nColVectors, + UINT32 nPolyElements, + UINT32 cbScratch ) +{ + // Round scratch space to nearest multiple of 8 for alignment + cbScratch = (cbScratch + 7) & ~7; + + UINT32 cbTotalSize = sizeof( SYMCRYPT_MLDSA_INTERNAL_COMPUTATION_TEMPORARIES) + + ( nRowVectors * sizeof( PSYMCRYPT_MLDSA_VECTOR ) ) + // Row vector pointers + ( nColVectors * sizeof( PSYMCRYPT_MLDSA_VECTOR ) ) + // Col vector pointers + ( nPolyElements * sizeof( PSYMCRYPT_MLDSA_POLYELEMENT ) ) + // Poly element pointers + ( nRowVectors * pParams->cbRowVector ) + // Row vector buffer + ( nColVectors * pParams->cbColVector ) + // Col vector buffer + ( nPolyElements * pParams->cbPolyElement ) + // Poly element buffer + ( cbScratch ); // Scratch buffer + + PBYTE pbBuffer = SymCryptCallbackAlloc( cbTotalSize ); + if( pbBuffer == NULL ) + { + return NULL; + } + + SymCryptWipe( pbBuffer, cbTotalSize ); + + PSYMCRYPT_MLDSA_INTERNAL_COMPUTATION_TEMPORARIES pTemporaries = + (PSYMCRYPT_MLDSA_INTERNAL_COMPUTATION_TEMPORARIES) pbBuffer; + + pTemporaries->cbTotalSize = cbTotalSize; + pTemporaries->nRowVectors = nRowVectors; + pTemporaries->nColVectors = nColVectors; + pTemporaries->nPolyElements = nPolyElements; + pTemporaries->cbScratch = cbScratch; + + PBYTE pbCurrent = pbBuffer + sizeof( SYMCRYPT_MLDSA_INTERNAL_COMPUTATION_TEMPORARIES ); + + if( nRowVectors > 0 ) + { + pTemporaries->pvRowVectors = (PSYMCRYPT_MLDSA_VECTOR *) pbCurrent; + pbCurrent += nRowVectors * sizeof( PSYMCRYPT_MLDSA_VECTOR ); + } + + if( nColVectors > 0 ) + { + pTemporaries->pvColVectors = (PSYMCRYPT_MLDSA_VECTOR *) pbCurrent; + pbCurrent += nColVectors * sizeof( PSYMCRYPT_MLDSA_VECTOR ); + } + + if( nPolyElements > 0 ) + { + pTemporaries->pePolyElements = (PSYMCRYPT_MLDSA_POLYELEMENT *) pbCurrent; + pbCurrent += nPolyElements * sizeof( PSYMCRYPT_MLDSA_POLYELEMENT ); + } + + for(UINT32 i = 0; i < nRowVectors; i++) + { + pTemporaries->pvRowVectors[i] = SymCryptMlDsaVectorCreate( pbCurrent, pParams->cbRowVector, pParams->nRows ); + pbCurrent += pParams->cbRowVector; + } + + for(UINT32 i = 0; i < nColVectors; i++) + { + pTemporaries->pvColVectors[i] = SymCryptMlDsaVectorCreate( pbCurrent, pParams->cbColVector, pParams->nCols ); + pbCurrent += pParams->cbColVector; + } + + for(UINT32 i = 0; i < nPolyElements; i++) + { + pTemporaries->pePolyElements[i] = SymCryptMlDsaPolyElementCreate( pbCurrent, pParams->cbPolyElement ); + pbCurrent += pParams->cbPolyElement; + } + + if( cbScratch > 0 ) + { + pTemporaries->pbScratch = pbCurrent; + pbCurrent += cbScratch; + } + + SYMCRYPT_ASSERT( pbCurrent == pbBuffer + cbTotalSize ); + + SYMCRYPT_SET_MAGIC( pTemporaries ); + + return pTemporaries; +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaTemporariesFree( + PSYMCRYPT_MLDSA_INTERNAL_COMPUTATION_TEMPORARIES pTemporaries ) +{ + SYMCRYPT_CHECK_MAGIC( pTemporaries ); + + SymCryptWipe( pTemporaries, pTemporaries->cbTotalSize ); + SymCryptCallbackFree( pTemporaries ); +} diff --git a/libs/symcrypt/lib/mlkem.c b/libs/symcrypt/lib/mlkem.c new file mode 100644 index 00000000000..975775587ce --- /dev/null +++ b/libs/symcrypt/lib/mlkem.c @@ -0,0 +1,1164 @@ +// +// mlkem.c ML-KEM related functionality +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +#define NROWS_MLKEM512 (2) +#define NROWS_MLKEM768 (3) +#define NROWS_MLKEM1024 (4) + +const SYMCRYPT_MLKEM_INTERNAL_PARAMS SymCryptMlKemInternalParamsMlKem512 = +{ + .params = SYMCRYPT_MLKEM_PARAMS_MLKEM512, + .cbPolyElement = SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT, + .nRows = NROWS_MLKEM512, + .cbVector = sizeof(SYMCRYPT_MLKEM_VECTOR) + (NROWS_MLKEM512 * SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT), + .cbMatrix = sizeof(SYMCRYPT_MLKEM_MATRIX) + (NROWS_MLKEM512 * + NROWS_MLKEM512 * + SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT), + .nEta1 = 3, + .nEta2 = 2, + .nBitsOfU = 10, + .nBitsOfV = 4, +}; + +const SYMCRYPT_MLKEM_INTERNAL_PARAMS SymCryptMlKemInternalParamsMlKem768 = +{ + .params = SYMCRYPT_MLKEM_PARAMS_MLKEM768, + .cbPolyElement = SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT, + .nRows = NROWS_MLKEM768, + .cbVector = sizeof(SYMCRYPT_MLKEM_VECTOR) + (NROWS_MLKEM768 * SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT), + .cbMatrix = sizeof(SYMCRYPT_MLKEM_MATRIX) + (NROWS_MLKEM768 * + NROWS_MLKEM768 * + SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT), + .nEta1 = 2, + .nEta2 = 2, + .nBitsOfU = 10, + .nBitsOfV = 4, +}; + +const SYMCRYPT_MLKEM_INTERNAL_PARAMS SymCryptMlKemInternalParamsMlKem1024 = +{ + .params = SYMCRYPT_MLKEM_PARAMS_MLKEM1024, + .cbPolyElement = SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT, + .nRows = NROWS_MLKEM1024, + .cbVector = sizeof(SYMCRYPT_MLKEM_VECTOR) + (NROWS_MLKEM1024 * SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT), + .cbMatrix = sizeof(SYMCRYPT_MLKEM_MATRIX) + (NROWS_MLKEM1024 * + NROWS_MLKEM1024 * + SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT), + .nEta1 = 2, + .nEta2 = 2, + .nBitsOfU = 11, + .nBitsOfV = 5, +}; + +static +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlKemkeyGetInternalParamsFromParams( + SYMCRYPT_MLKEM_PARAMS params, + _Out_ PSYMCRYPT_MLKEM_INTERNAL_PARAMS pInternalParams ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + switch( params ) + { + case SYMCRYPT_MLKEM_PARAMS_MLKEM512: + *pInternalParams = SymCryptMlKemInternalParamsMlKem512; + break; + case SYMCRYPT_MLKEM_PARAMS_MLKEM768: + *pInternalParams = SymCryptMlKemInternalParamsMlKem768; + break; + case SYMCRYPT_MLKEM_PARAMS_MLKEM1024: + *pInternalParams = SymCryptMlKemInternalParamsMlKem1024; + break; + default: + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + +cleanup: + return scError; +} + +static +PSYMCRYPT_MLKEMKEY +SYMCRYPT_CALL +SymCryptMlKemkeyInitialize( + _In_ PCSYMCRYPT_MLKEM_INTERNAL_PARAMS pInternalParams, + _Out_writes_bytes_(cbKey) PBYTE pbKey, + UINT32 cbKey ) +{ + PSYMCRYPT_MLKEMKEY pRes = NULL; + PSYMCRYPT_MLKEMKEY pKey = (PSYMCRYPT_MLKEMKEY)pbKey; + PBYTE pbCurr = pbKey + sizeof(SYMCRYPT_MLKEMKEY); + + SymCryptWipeKnownSize( pbKey, cbKey ); + + pKey->fAlgorithmInfo = 0; + pKey->params = *pInternalParams; + pKey->cbTotalSize = cbKey; + pKey->hasPrivateSeed = FALSE; + pKey->hasPrivateKey = FALSE; + + pKey->pmAtranspose = SymCryptMlKemMatrixCreate( pbCurr, pInternalParams->cbMatrix, pInternalParams->nRows ); + if( pKey->pmAtranspose == NULL ) + { + goto cleanup; + } + pbCurr += pInternalParams->cbMatrix; + + pKey->pvt = SymCryptMlKemVectorCreate( pbCurr, pInternalParams->cbVector, pInternalParams->nRows ); + if( pKey->pvt == NULL ) + { + goto cleanup; + } + pbCurr += pInternalParams->cbVector; + + pKey->pvs = SymCryptMlKemVectorCreate( pbCurr, pInternalParams->cbVector, pInternalParams->nRows ); + if( pKey->pvs == NULL ) + { + goto cleanup; + } + pbCurr += pInternalParams->cbVector; + + SYMCRYPT_ASSERT( pbCurr == (pbKey + cbKey) ); + + SYMCRYPT_SET_MAGIC( pKey ); + + pRes = pKey; + +cleanup: + return pRes; +} + +PSYMCRYPT_MLKEMKEY +SYMCRYPT_CALL +SymCryptMlKemkeyAllocate( + SYMCRYPT_MLKEM_PARAMS params ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PBYTE pbKey = NULL; + UINT32 cbKey; + SYMCRYPT_MLKEM_INTERNAL_PARAMS internalParams; + + PSYMCRYPT_MLKEMKEY pKey = NULL; + + scError = SymCryptMlKemkeyGetInternalParamsFromParams(params, &internalParams); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + cbKey = sizeof(SYMCRYPT_MLKEMKEY) + internalParams.cbMatrix + (2*internalParams.cbVector); + + pbKey = SymCryptCallbackAlloc( cbKey ); + if ( pbKey == NULL ) + { + goto cleanup; + } + + pKey = SymCryptMlKemkeyInitialize( &internalParams, pbKey, cbKey ); + if ( pKey == NULL ) + { + goto cleanup; + } + + pbKey = NULL; + +cleanup: + if ( pbKey != NULL ) + { + SymCryptCallbackFree( pbKey ); + } + + return pKey; +} + +VOID +SYMCRYPT_CALL +SymCryptMlKemkeyFree( + _Inout_ PSYMCRYPT_MLKEMKEY pkMlKemkey ) +{ + SYMCRYPT_CHECK_MAGIC( pkMlKemkey ); + + SymCryptWipe( (PBYTE) pkMlKemkey, pkMlKemkey->cbTotalSize ); + + SymCryptCallbackFree( pkMlKemkey ); +} + +#define SYMCRYPT_MLKEM_SIZEOF_ENCODED_UNCOMPRESSED_VECTOR(_nRows) (384UL * _nRows) + +// s and t are encoded uncompressed vectors +// public seed, H(encapsulation key) and z are each 32 bytes +#define SYMCRYPT_MLKEM_SIZEOF_FORMAT_DECAPSULATION_KEY(_nRows) ((2*SYMCRYPT_MLKEM_SIZEOF_ENCODED_UNCOMPRESSED_VECTOR(_nRows)) + (3*32)) +// t is encoded uncompressed vector +// public seed is 32 bytes +#define SYMCRYPT_MLKEM_SIZEOF_FORMAT_ENCAPSULATION_KEY(_nRows) (SYMCRYPT_MLKEM_SIZEOF_ENCODED_UNCOMPRESSED_VECTOR(_nRows) + 32) + +C_ASSERT( SYMCRYPT_MLKEM_SIZEOF_FORMAT_DECAPSULATION_KEY(NROWS_MLKEM512) == SYMCRYPT_MLKEM_DECAPSULATION_KEY_SIZE_MLKEM512 ); +C_ASSERT( SYMCRYPT_MLKEM_SIZEOF_FORMAT_DECAPSULATION_KEY(NROWS_MLKEM768) == SYMCRYPT_MLKEM_DECAPSULATION_KEY_SIZE_MLKEM768 ); +C_ASSERT( SYMCRYPT_MLKEM_SIZEOF_FORMAT_DECAPSULATION_KEY(NROWS_MLKEM1024) == SYMCRYPT_MLKEM_DECAPSULATION_KEY_SIZE_MLKEM1024 ); + +C_ASSERT( SYMCRYPT_MLKEM_SIZEOF_FORMAT_ENCAPSULATION_KEY(NROWS_MLKEM512) == SYMCRYPT_MLKEM_ENCAPSULATION_KEY_SIZE_MLKEM512 ); +C_ASSERT( SYMCRYPT_MLKEM_SIZEOF_FORMAT_ENCAPSULATION_KEY(NROWS_MLKEM768) == SYMCRYPT_MLKEM_ENCAPSULATION_KEY_SIZE_MLKEM768 ); +C_ASSERT( SYMCRYPT_MLKEM_SIZEOF_FORMAT_ENCAPSULATION_KEY(NROWS_MLKEM1024) == SYMCRYPT_MLKEM_ENCAPSULATION_KEY_SIZE_MLKEM1024 ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlKemSizeofKeyFormatFromParams( + SYMCRYPT_MLKEM_PARAMS params, + SYMCRYPT_MLKEMKEY_FORMAT mlKemkeyFormat, + _Out_ SIZE_T* pcbKeyFormat ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SYMCRYPT_MLKEM_INTERNAL_PARAMS internalParams; + + if( mlKemkeyFormat == SYMCRYPT_MLKEMKEY_FORMAT_NULL ) + { + scError = SYMCRYPT_INCOMPATIBLE_FORMAT; + goto cleanup; + } + + scError = SymCryptMlKemkeyGetInternalParamsFromParams(params, &internalParams); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + switch( mlKemkeyFormat ) + { + case SYMCRYPT_MLKEMKEY_FORMAT_PRIVATE_SEED: + *pcbKeyFormat = SYMCRYPT_MLKEM_PRIVATE_SEED_SIZE; + break; + + case SYMCRYPT_MLKEMKEY_FORMAT_DECAPSULATION_KEY: + *pcbKeyFormat = SYMCRYPT_MLKEM_SIZEOF_FORMAT_DECAPSULATION_KEY(internalParams.nRows); + break; + + case SYMCRYPT_MLKEMKEY_FORMAT_ENCAPSULATION_KEY: + *pcbKeyFormat = SYMCRYPT_MLKEM_SIZEOF_FORMAT_ENCAPSULATION_KEY(internalParams.nRows); + break; + + default: + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + +cleanup: + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlKemSizeofCiphertextFromParams( + SYMCRYPT_MLKEM_PARAMS params, + _Out_ SIZE_T* pcbCiphertext ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SYMCRYPT_MLKEM_INTERNAL_PARAMS internalParams; + SIZE_T cbU, cbV; + + scError = SymCryptMlKemkeyGetInternalParamsFromParams(params, &internalParams); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // u vector encoded with nBitsOfU * SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS bits per polynomial + cbU = ((SIZE_T)internalParams.nRows) * internalParams.nBitsOfU * (SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8); + // v polynomial encoded with nBitsOfV * SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS bits + cbV = ((SIZE_T)internalParams.nBitsOfV) * (SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8); + *pcbCiphertext = cbU + cbV; + + SYMCRYPT_ASSERT( (internalParams.params != SYMCRYPT_MLKEM_PARAMS_MLKEM512) || ((cbU + cbV) == SYMCRYPT_MLKEM_CIPHERTEXT_SIZE_MLKEM512) ); + SYMCRYPT_ASSERT( (internalParams.params != SYMCRYPT_MLKEM_PARAMS_MLKEM768) || ((cbU + cbV) == SYMCRYPT_MLKEM_CIPHERTEXT_SIZE_MLKEM768) ); + SYMCRYPT_ASSERT( (internalParams.params != SYMCRYPT_MLKEM_PARAMS_MLKEM1024) || ((cbU + cbV) == SYMCRYPT_MLKEM_CIPHERTEXT_SIZE_MLKEM1024) ); + +cleanup: + return scError; +} + +static +VOID +SYMCRYPT_CALL +SymCryptMlKemkeyExpandPublicMatrixFromPublicSeed( + _Inout_ PSYMCRYPT_MLKEMKEY pkMlKemkey, + _Inout_ PSYMCRYPT_MLKEM_INTERNAL_COMPUTATION_TEMPORARIES pCompTemps ) +{ + UINT32 i, j; + BYTE coordinates[2]; + + PSYMCRYPT_SHAKE128_STATE pShakeStateBase = &pCompTemps->hashState0.shake128State; + PSYMCRYPT_SHAKE128_STATE pShakeStateWork = &pCompTemps->hashState1.shake128State; + const UINT32 nRows = pkMlKemkey->params.nRows; + + SymCryptShake128Init( pShakeStateBase ); + SymCryptShake128Append( pShakeStateBase, pkMlKemkey->publicSeed, sizeof(pkMlKemkey->publicSeed) ); + + for( i=0; i<nRows; i++ ) + { + coordinates[1] = (BYTE)i; + for( j=0; j<nRows; j++ ) + { + coordinates[0] = (BYTE)j; + SymCryptShake128StateCopy( pShakeStateBase, pShakeStateWork ); + SymCryptShake128Append( pShakeStateWork, coordinates, sizeof(coordinates) ); + + SymCryptMlKemPolyElementSampleNTTFromShake128( pShakeStateWork, pkMlKemkey->pmAtranspose->apPolyElements[(i*nRows)+j] ); + } + } + + // no need to wipe; everything computed here is always public +} + +static +VOID +SYMCRYPT_CALL +SymCryptMlKemkeyComputeEncapsulationKeyHash( + _Inout_ PSYMCRYPT_MLKEMKEY pkMlKemkey, + _Inout_ PSYMCRYPT_MLKEM_INTERNAL_COMPUTATION_TEMPORARIES pCompTemps, + SIZE_T cbEncodedVector ) +{ + PSYMCRYPT_SHA3_256_STATE pState = &pCompTemps->hashState0.sha3_256State; + + SymCryptSha3_256Init( pState ); + SymCryptSha3_256Append( pState, pkMlKemkey->encodedT, cbEncodedVector ); + SymCryptSha3_256Append( pState, pkMlKemkey->publicSeed, sizeof(pkMlKemkey->publicSeed) ); + SymCryptSha3_256Result( pState, pkMlKemkey->encapsKeyHash ); +} + +static +VOID +SYMCRYPT_CALL +SymCryptMlKemkeyExpandFromPrivateSeed( + _Inout_ PSYMCRYPT_MLKEMKEY pkMlKemkey, + _Inout_ PSYMCRYPT_MLKEM_INTERNAL_COMPUTATION_TEMPORARIES pCompTemps ) +{ + BYTE privateSeedHash[SYMCRYPT_SHA3_512_RESULT_SIZE]; + BYTE CBDSampleBuffer[3*64 + 1]; + PSYMCRYPT_MLKEM_VECTOR pvTmp; + PSYMCRYPT_MLKEM_POLYELEMENT_ACCUMULATOR paTmp; + PSYMCRYPT_SHAKE256_STATE pShakeStateBase = &pCompTemps->hashState0.shake256State; + PSYMCRYPT_SHAKE256_STATE pShakeStateWork = &pCompTemps->hashState1.shake256State; + UINT32 i; + const UINT32 nRows = pkMlKemkey->params.nRows; + const UINT32 nEta1 = pkMlKemkey->params.nEta1; + const SIZE_T cbEncodedVector = SYMCRYPT_MLKEM_SIZEOF_ENCODED_UNCOMPRESSED_VECTOR(nRows); + const UINT32 cbPolyElement = pkMlKemkey->params.cbPolyElement; + const UINT32 cbVector = pkMlKemkey->params.cbVector; + + SYMCRYPT_ASSERT( pkMlKemkey->hasPrivateSeed ); + SYMCRYPT_ASSERT( (nEta1 == 2) || (nEta1 == 3) ); + SYMCRYPT_ASSERT( cbEncodedVector <= sizeof(pkMlKemkey->encodedT) ); + + pvTmp = SymCryptMlKemVectorCreate( pCompTemps->abVectorBuffer0, cbVector, nRows ); + SYMCRYPT_ASSERT( pvTmp != NULL ); + paTmp = SymCryptMlKemPolyElementAccumulatorCreate( pCompTemps->abPolyElementAccumulatorBuffer, 2*cbPolyElement ); + SYMCRYPT_ASSERT( paTmp != NULL ); + + // (rho || sigma) = G(d || k) + // use CBDSampleBuffer to concatenate the private seed and encoding of nRows + memcpy( CBDSampleBuffer, pkMlKemkey->privateSeed, sizeof(pkMlKemkey->privateSeed) ); + CBDSampleBuffer[sizeof(pkMlKemkey->privateSeed)] = (BYTE) nRows; + SymCryptSha3_512( CBDSampleBuffer, sizeof(pkMlKemkey->privateSeed)+1, privateSeedHash ); + + // copy public seed + memcpy( pkMlKemkey->publicSeed, privateSeedHash, sizeof(pkMlKemkey->publicSeed) ); + + // generate A from public seed + SymCryptMlKemkeyExpandPublicMatrixFromPublicSeed( pkMlKemkey, pCompTemps ); + + // Initialize pShakeStateBase with sigma + SymCryptShake256Init( pShakeStateBase ); + SymCryptShake256Append( pShakeStateBase, privateSeedHash+sizeof(pkMlKemkey->publicSeed), 32 ); + + // Expand s in place + for( i=0; i<nRows; i++ ) + { + CBDSampleBuffer[0] = (BYTE) i; + SymCryptShake256StateCopy( pShakeStateBase, pShakeStateWork ); + SymCryptShake256Append( pShakeStateWork, CBDSampleBuffer, 1 ); + + SymCryptShake256Extract( pShakeStateWork, CBDSampleBuffer, 64ul*nEta1, FALSE ); + + SymCryptMlKemPolyElementSampleCBDFromBytes( CBDSampleBuffer, nEta1, SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT(i, pkMlKemkey->pvs) ); + } + // Expand e in t, ready for multiply-add + for( i=0; i<nRows; i++ ) + { + CBDSampleBuffer[0] = (BYTE) (nRows+i); + SymCryptShake256StateCopy( pShakeStateBase, pShakeStateWork ); + SymCryptShake256Append( pShakeStateWork, CBDSampleBuffer, 1 ); + + SymCryptShake256Extract( pShakeStateWork, CBDSampleBuffer, 64ul*nEta1, FALSE ); + + SymCryptMlKemPolyElementSampleCBDFromBytes( CBDSampleBuffer, nEta1, SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT(i, pkMlKemkey->pvt) ); + } + + // Perform NTT on s and e + SymCryptMlKemVectorNTT( pkMlKemkey->pvs ); + SymCryptMlKemVectorNTT( pkMlKemkey->pvt ); + + // pvTmp = s .* R + SymCryptMlKemVectorMulR( pkMlKemkey->pvs, pvTmp ); + + // t = ((A o (s .* R)) ./ R) + e = A o s + e + SymCryptMlKemMatrixVectorMontMulAndAdd( pkMlKemkey->pmAtranspose, pvTmp, pkMlKemkey->pvt, paTmp ); + + // transpose A + SymCryptMlKemMatrixTranspose( pkMlKemkey->pmAtranspose ); + + // precompute byte-encoding of public vector t + SymCryptMlKemVectorCompressAndEncode( pkMlKemkey->pvt, 12, pkMlKemkey->encodedT, cbEncodedVector ); + + // precompute hash of encapsulation key blob + SymCryptMlKemkeyComputeEncapsulationKeyHash( pkMlKemkey, pCompTemps, cbEncodedVector ); + + // Cleanup! + SymCryptWipeKnownSize( privateSeedHash, sizeof(privateSeedHash) ); + SymCryptWipeKnownSize( CBDSampleBuffer, sizeof(CBDSampleBuffer) ); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlKemkeySetValue( + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_MLKEMKEY_FORMAT mlKemkeyFormat, + UINT32 flags, + _Inout_ PSYMCRYPT_MLKEMKEY pkMlKemkey ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PCBYTE pbCurr = pbSrc; + PSYMCRYPT_MLKEM_INTERNAL_COMPUTATION_TEMPORARIES pCompTemps = NULL; + const UINT32 nRows = pkMlKemkey->params.nRows; + const SIZE_T cbEncodedVector = SYMCRYPT_MLKEM_SIZEOF_ENCODED_UNCOMPRESSED_VECTOR( nRows ); + + // Ensure only allowed flags are specified + UINT32 allowedFlags = SYMCRYPT_FLAG_KEY_NO_FIPS | SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION; + + if ( ( flags & ~allowedFlags ) != 0 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Check that minimal validation flag only specified with no fips + if ( ( ( flags & SYMCRYPT_FLAG_KEY_NO_FIPS ) == 0 ) && + ( ( flags & SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION ) != 0 ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if( mlKemkeyFormat == SYMCRYPT_MLKEMKEY_FORMAT_NULL ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if( ( flags & SYMCRYPT_FLAG_KEY_NO_FIPS ) == 0 ) + { + // Ensure ML-KEM algorithm selftest is run before first use of ML-KEM algorithms; + // notably _before_ first full KeyGen + SYMCRYPT_RUN_SELFTEST_ONCE( + SymCryptMlKemSelftest, + SYMCRYPT_SELFTEST_ALGORITHM_MLKEM); + } + + pCompTemps = SymCryptCallbackAlloc( sizeof(SYMCRYPT_MLKEM_INTERNAL_COMPUTATION_TEMPORARIES) ); + if( pCompTemps == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + if( mlKemkeyFormat == SYMCRYPT_MLKEMKEY_FORMAT_PRIVATE_SEED ) + { + if( cbSrc != SYMCRYPT_MLKEM_PRIVATE_SEED_SIZE ) + { + scError = SYMCRYPT_WRONG_KEY_SIZE; + goto cleanup; + } + + pkMlKemkey->hasPrivateSeed = TRUE; + memcpy( pkMlKemkey->privateSeed, pbCurr, sizeof(pkMlKemkey->privateSeed) ); + pbCurr += sizeof(pkMlKemkey->privateSeed); + + pkMlKemkey->hasPrivateKey = TRUE; + memcpy( pkMlKemkey->privateRandom, pbCurr, sizeof(pkMlKemkey->privateRandom) ); + pbCurr += sizeof(pkMlKemkey->privateRandom); + + SymCryptMlKemkeyExpandFromPrivateSeed( pkMlKemkey, pCompTemps ); + } + else if( mlKemkeyFormat == SYMCRYPT_MLKEMKEY_FORMAT_DECAPSULATION_KEY ) + { + if( cbSrc != SYMCRYPT_MLKEM_SIZEOF_FORMAT_DECAPSULATION_KEY( nRows ) ) + { + scError = SYMCRYPT_WRONG_KEY_SIZE; + goto cleanup; + } + + // decode s + scError = SymCryptMlKemVectorDecodeAndDecompress( pbCurr, cbEncodedVector, 12, pkMlKemkey->pvs ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + pbCurr += cbEncodedVector; + + // copy t and decode t + memcpy( pkMlKemkey->encodedT, pbCurr, cbEncodedVector ); + pbCurr += cbEncodedVector; + scError = SymCryptMlKemVectorDecodeAndDecompress( pkMlKemkey->encodedT, cbEncodedVector, 12, pkMlKemkey->pvt ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // copy public seed and expand public matrix + memcpy( pkMlKemkey->publicSeed, pbCurr, sizeof(pkMlKemkey->publicSeed) ); + pbCurr += sizeof(pkMlKemkey->publicSeed); + SymCryptMlKemkeyExpandPublicMatrixFromPublicSeed( pkMlKemkey, pCompTemps ); + + // transpose A + SymCryptMlKemMatrixTranspose( pkMlKemkey->pmAtranspose ); + + // compute hash of encapsulation key blob + SymCryptMlKemkeyComputeEncapsulationKeyHash( pkMlKemkey, pCompTemps, cbEncodedVector ); + + // check hash of encapsulation key matches hash in the provided blob + if( !SymCryptEqual( pbCurr, pkMlKemkey->encapsKeyHash, sizeof(pkMlKemkey->encapsKeyHash) ) ) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + pbCurr += sizeof(pkMlKemkey->encapsKeyHash); + + // copy private random + memcpy( pkMlKemkey->privateRandom, pbCurr, sizeof(pkMlKemkey->privateRandom) ); + pbCurr += sizeof(pkMlKemkey->privateRandom); + + pkMlKemkey->hasPrivateSeed = FALSE; + pkMlKemkey->hasPrivateKey = TRUE; + } + else if( mlKemkeyFormat == SYMCRYPT_MLKEMKEY_FORMAT_ENCAPSULATION_KEY ) + { + if( cbSrc != SYMCRYPT_MLKEM_SIZEOF_FORMAT_ENCAPSULATION_KEY( nRows ) ) + { + scError = SYMCRYPT_WRONG_KEY_SIZE; + goto cleanup; + } + + // copy t and decode t + memcpy( pkMlKemkey->encodedT, pbCurr, cbEncodedVector ); + pbCurr += cbEncodedVector; + scError = SymCryptMlKemVectorDecodeAndDecompress( pkMlKemkey->encodedT, cbEncodedVector, 12, pkMlKemkey->pvt ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // copy public seed and expand public matrix + memcpy( pkMlKemkey->publicSeed, pbCurr, sizeof(pkMlKemkey->publicSeed) ); + pbCurr += sizeof(pkMlKemkey->publicSeed); + SymCryptMlKemkeyExpandPublicMatrixFromPublicSeed( pkMlKemkey, pCompTemps ); + + // transpose A + SymCryptMlKemMatrixTranspose( pkMlKemkey->pmAtranspose ); + + // precompute hash of encapsulation key blob + SymCryptMlKemkeyComputeEncapsulationKeyHash( pkMlKemkey, pCompTemps, cbEncodedVector ); + + pkMlKemkey->hasPrivateSeed = FALSE; + pkMlKemkey->hasPrivateKey = FALSE; + } + else + { + scError = SYMCRYPT_NOT_IMPLEMENTED; + goto cleanup; + } + + SYMCRYPT_ASSERT( pbCurr == pbSrc + cbSrc ); + +cleanup: + if( pCompTemps != NULL ) + { + SymCryptWipe( pCompTemps, sizeof(*pCompTemps) ); + SymCryptCallbackFree( pCompTemps ); + } + + return scError; +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlKemkeyGetValue( + _In_ PCSYMCRYPT_MLKEMKEY pkMlKemkey, + _Out_writes_bytes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst, + SYMCRYPT_MLKEMKEY_FORMAT mlKemkeyFormat, + UINT32 flags ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PBYTE pbCurr = pbDst; + const UINT32 nRows = pkMlKemkey->params.nRows; + const SIZE_T cbEncodedVector = SYMCRYPT_MLKEM_SIZEOF_ENCODED_UNCOMPRESSED_VECTOR( nRows ); + + UNREFERENCED_PARAMETER( flags ); + + if( mlKemkeyFormat == SYMCRYPT_MLKEMKEY_FORMAT_NULL ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if( mlKemkeyFormat == SYMCRYPT_MLKEMKEY_FORMAT_PRIVATE_SEED ) + { + if( cbDst != SYMCRYPT_MLKEM_PRIVATE_SEED_SIZE ) + { + scError = SYMCRYPT_WRONG_KEY_SIZE; + goto cleanup; + } + + if( !pkMlKemkey->hasPrivateSeed ) + { + scError = SYMCRYPT_INCOMPATIBLE_FORMAT; + goto cleanup; + } + + memcpy( pbCurr, pkMlKemkey->privateSeed, sizeof(pkMlKemkey->privateSeed) ); + pbCurr += sizeof(pkMlKemkey->privateSeed); + + memcpy( pbCurr, pkMlKemkey->privateRandom, sizeof(pkMlKemkey->privateRandom) ); + pbCurr += sizeof(pkMlKemkey->privateRandom); + } + else if( mlKemkeyFormat == SYMCRYPT_MLKEMKEY_FORMAT_DECAPSULATION_KEY ) + { + if( cbDst != SYMCRYPT_MLKEM_SIZEOF_FORMAT_DECAPSULATION_KEY( nRows ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if( !pkMlKemkey->hasPrivateKey ) + { + scError = SYMCRYPT_INCOMPATIBLE_FORMAT; + goto cleanup; + } + + // We don't precompute byte-encoding of private key as exporting decapsulation key is not a critical path operation + // All other fields are kept in memory + SymCryptMlKemVectorCompressAndEncode( pkMlKemkey->pvs, 12, pbCurr, cbEncodedVector ); + pbCurr += cbEncodedVector; + + memcpy( pbCurr, pkMlKemkey->encodedT, cbEncodedVector ); + pbCurr += cbEncodedVector; + + memcpy( pbCurr, pkMlKemkey->publicSeed, sizeof(pkMlKemkey->publicSeed) ); + pbCurr += sizeof(pkMlKemkey->publicSeed); + + memcpy( pbCurr, pkMlKemkey->encapsKeyHash, sizeof(pkMlKemkey->encapsKeyHash) ); + pbCurr += sizeof(pkMlKemkey->encapsKeyHash); + + memcpy( pbCurr, pkMlKemkey->privateRandom, sizeof(pkMlKemkey->privateRandom) ); + pbCurr += sizeof(pkMlKemkey->privateRandom); + } + else if( mlKemkeyFormat == SYMCRYPT_MLKEMKEY_FORMAT_ENCAPSULATION_KEY ) + { + if( cbDst != SYMCRYPT_MLKEM_SIZEOF_FORMAT_ENCAPSULATION_KEY( nRows ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + memcpy( pbCurr, pkMlKemkey->encodedT, cbEncodedVector ); + pbCurr += cbEncodedVector; + + memcpy( pbCurr, pkMlKemkey->publicSeed, sizeof(pkMlKemkey->publicSeed) ); + pbCurr += sizeof(pkMlKemkey->publicSeed); + } + else + { + scError = SYMCRYPT_NOT_IMPLEMENTED; + goto cleanup; + } + + SYMCRYPT_ASSERT( pbCurr == pbDst + cbDst ); + +cleanup: + return scError; +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlKemkeyGenerate( + _Inout_ PSYMCRYPT_MLKEMKEY pkMlKemkey, + UINT32 flags ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + BYTE privateSeed[SYMCRYPT_MLKEM_PRIVATE_SEED_SIZE]; + PBYTE pbPctCipherText = NULL; + SIZE_T cbPctCipherText = 0; + + // Ensure only allowed flags are specified + UINT32 allowedFlags = SYMCRYPT_FLAG_KEY_NO_FIPS; + + if ( ( flags & ~allowedFlags ) != 0 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + scError = SymCryptCallbackRandom( privateSeed, sizeof(privateSeed) ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + scError = SymCryptMlKemkeySetValue( privateSeed, sizeof(privateSeed), SYMCRYPT_MLKEMKEY_FORMAT_PRIVATE_SEED, flags, pkMlKemkey ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // SymCryptMlKemkeySetValue ensures the self-test is run before + // first operational use of MlKem + + if( ( flags & SYMCRYPT_FLAG_KEY_NO_FIPS ) == 0 ) + { + // PCT on key generation, encaps/decaps and check that both parties get the same shared secret with the generated key + SIZE_T cbU, cbV; + const UINT32 nRows = pkMlKemkey->params.nRows; + const UINT32 nBitsOfU = pkMlKemkey->params.nBitsOfU; + const UINT32 nBitsOfV = pkMlKemkey->params.nBitsOfV; + + // u vector encoded with nBitsOfU * SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS bits per polynomial + cbU = nRows * nBitsOfU * (SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8); + // v polynomial encoded with nBitsOfV * SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS bits + cbV = nBitsOfV * (SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8); + cbPctCipherText = cbU + cbV; + + pbPctCipherText = SymCryptCallbackAlloc( cbPctCipherText ); + if( pbPctCipherText == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + C_ASSERT( SYMCRYPT_MLKEM_PRIVATE_SEED_SIZE >= 2*SYMCRYPT_MLKEM_SIZEOF_AGREED_SECRET ); + + // reuse bytes 0..31 of privateSeed buffer for encapsulation shared secret + scError = SymCryptMlKemEncapsulate( + pkMlKemkey, + &privateSeed[0], SYMCRYPT_MLKEM_SIZEOF_AGREED_SECRET, + pbPctCipherText, cbPctCipherText ); + if( scError != SYMCRYPT_NO_ERROR ) + { + scError = SYMCRYPT_FIPS_FAILURE; + goto cleanup; + } + + // reuse second 32..63 bytes of privateSeed buffer for encapsulation shared secret + scError = SymCryptMlKemDecapsulate( + pkMlKemkey, + pbPctCipherText, cbPctCipherText, + &privateSeed[SYMCRYPT_MLKEM_SIZEOF_AGREED_SECRET], SYMCRYPT_MLKEM_SIZEOF_AGREED_SECRET ); + if( scError != SYMCRYPT_NO_ERROR ) + { + scError = SYMCRYPT_FIPS_FAILURE; + goto cleanup; + } + + if( !SymCryptEqual( &privateSeed[0], &privateSeed[SYMCRYPT_MLKEM_SIZEOF_AGREED_SECRET], SYMCRYPT_MLKEM_SIZEOF_AGREED_SECRET ) ) + { + // Do not fatal on PCT failure here, as it is expected with very low probability that + // with correct keygen and encaps/decaps, the agreed secrets do not match + scError = SYMCRYPT_FIPS_FAILURE; + goto cleanup; + } + + // could track having run the PCT with a flag in pkMlKemkey->fAlgorithmInfo, + // but currently no need to do that given we don't ever defer the PCT + } + +cleanup: + if( pbPctCipherText != NULL ) + { + // Wiping is not required for security, but has low relative cost + // and better to be on the safe side for FIPS + SymCryptWipe( pbPctCipherText, cbPctCipherText ); + SymCryptCallbackFree( pbPctCipherText ); + } + + SymCryptWipeKnownSize( privateSeed, sizeof(privateSeed) ); + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlKemEncapsulateInternal( + _In_ PCSYMCRYPT_MLKEMKEY pkMlKemkey, + _Out_writes_bytes_( cbAgreedSecret ) + PBYTE pbAgreedSecret, + SIZE_T cbAgreedSecret, + _Out_writes_bytes_( cbCiphertext ) + PBYTE pbCiphertext, + SIZE_T cbCiphertext, + _In_reads_bytes_( SYMCRYPT_MLKEM_SIZEOF_ENCAPS_RANDOM ) + PCBYTE pbRandom, + _Inout_ PSYMCRYPT_MLKEM_INTERNAL_COMPUTATION_TEMPORARIES pCompTemps ) +{ + BYTE CBDSampleBuffer[3*64 + 1]; + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PSYMCRYPT_MLKEM_VECTOR pvrInner; + PSYMCRYPT_MLKEM_VECTOR pvTmp; + PSYMCRYPT_MLKEM_POLYELEMENT peTmp0, peTmp1; + PSYMCRYPT_MLKEM_POLYELEMENT_ACCUMULATOR paTmp; + PSYMCRYPT_SHA3_512_STATE pHashState = &pCompTemps->hashState0.sha3_512State; + PSYMCRYPT_SHAKE256_STATE pShakeBaseState = &pCompTemps->hashState0.shake256State; + PSYMCRYPT_SHAKE256_STATE pShakeWorkState = &pCompTemps->hashState1.shake256State; + SIZE_T cbU, cbV; + UINT32 i; + const UINT32 nRows = pkMlKemkey->params.nRows; + const UINT32 nBitsOfU = pkMlKemkey->params.nBitsOfU; + const UINT32 nBitsOfV = pkMlKemkey->params.nBitsOfV; + const UINT32 nEta1 = pkMlKemkey->params.nEta1; + const UINT32 nEta2 = pkMlKemkey->params.nEta2; + const UINT32 cbPolyElement = pkMlKemkey->params.cbPolyElement; + const UINT32 cbVector = pkMlKemkey->params.cbVector; + + // u vector encoded with nBitsOfU * SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS bits per polynomial + cbU = nRows * nBitsOfU * (SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8); + // v polynomial encoded with nBitsOfV * SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS bits + cbV = nBitsOfV * (SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8); + + if( (cbAgreedSecret != SYMCRYPT_MLKEM_SIZEOF_AGREED_SECRET) || + (cbCiphertext != cbU + cbV) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + pvrInner = SymCryptMlKemVectorCreate( pCompTemps->abVectorBuffer0, cbVector, nRows ); + SYMCRYPT_ASSERT( pvrInner != NULL ); + pvTmp = SymCryptMlKemVectorCreate( pCompTemps->abVectorBuffer1, cbVector, nRows ); + SYMCRYPT_ASSERT( pvTmp != NULL ); + peTmp0 = SymCryptMlKemPolyElementCreate( pCompTemps->abPolyElementBuffer0, cbPolyElement ); + SYMCRYPT_ASSERT( peTmp0 != NULL ); + peTmp1 = SymCryptMlKemPolyElementCreate( pCompTemps->abPolyElementBuffer1, cbPolyElement ); + SYMCRYPT_ASSERT( peTmp1 != NULL ); + paTmp = SymCryptMlKemPolyElementAccumulatorCreate( pCompTemps->abPolyElementAccumulatorBuffer, 2*cbPolyElement ); + SYMCRYPT_ASSERT( paTmp != NULL ); + + // CBDSampleBuffer = (K || rOuter) = SHA3-512(pbRandom || encapsKeyHash) + SymCryptSha3_512Init( pHashState ); + SymCryptSha3_512Append( pHashState, pbRandom, SYMCRYPT_MLKEM_SIZEOF_ENCAPS_RANDOM ); + SymCryptSha3_512Append( pHashState, pkMlKemkey->encapsKeyHash, sizeof(pkMlKemkey->encapsKeyHash) ); + SymCryptSha3_512Result( pHashState, CBDSampleBuffer ); + + // Write K to pbAgreedSecret + memcpy( pbAgreedSecret, CBDSampleBuffer, SYMCRYPT_MLKEM_SIZEOF_AGREED_SECRET ); + + // Initialize pShakeStateBase with rOuter + SymCryptShake256Init( pShakeBaseState ); + SymCryptShake256Append( pShakeBaseState, CBDSampleBuffer+cbAgreedSecret, 32 ); + + // Expand rInner vector + for( i=0; i<nRows; i++ ) + { + CBDSampleBuffer[0] = (BYTE) i; + SymCryptShake256StateCopy( pShakeBaseState, pShakeWorkState ); + SymCryptShake256Append( pShakeWorkState, CBDSampleBuffer, 1 ); + + SymCryptShake256Extract( pShakeWorkState, CBDSampleBuffer, 64ul*nEta1, FALSE ); + + SymCryptMlKemPolyElementSampleCBDFromBytes( CBDSampleBuffer, nEta1, SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT(i, pvrInner) ); + } + + // Perform NTT on rInner + SymCryptMlKemVectorNTT( pvrInner ); + + // Set pvTmp to 0 + SymCryptMlKemVectorSetZero( pvTmp ); + + // pvTmp = (Atranspose o rInner) ./ R + SymCryptMlKemMatrixVectorMontMulAndAdd( pkMlKemkey->pmAtranspose, pvrInner, pvTmp, paTmp ); + + // pvTmp = INTT(Atranspose o rInner) + SymCryptMlKemVectorINTTAndMulR( pvTmp ); + + // Expand e1 and add it to pvTmp - do addition PolyElement-wise to reduce memory usage + for( i=0; i<nRows; i++ ) + { + CBDSampleBuffer[0] = (BYTE) (nRows+i); + SymCryptShake256StateCopy( pShakeBaseState, pShakeWorkState ); + SymCryptShake256Append( pShakeWorkState, CBDSampleBuffer, 1 ); + + SymCryptShake256Extract( pShakeWorkState, CBDSampleBuffer, 64ul*nEta2, FALSE ); + + SymCryptMlKemPolyElementSampleCBDFromBytes( CBDSampleBuffer, nEta2, peTmp0 ); + + SymCryptMlKemPolyElementAdd( SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT(i, pvTmp), peTmp0, SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT(i, pvTmp) ); + } + + // pvTmp = u = INTT(Atranspose o rInner) + e1 + // Compress and encode u into prefix of ciphertext + SymCryptMlKemVectorCompressAndEncode( pvTmp, nBitsOfU, pbCiphertext, cbU ); + + // peTmp0 = (t o r) ./ R + SymCryptMlKemVectorMontDotProduct( pkMlKemkey->pvt, pvrInner, peTmp0, paTmp ); + + // peTmp0 = INTT(t o r) + SymCryptMlKemPolyElementINTTAndMulR( peTmp0 ); + + // Expand e2 polynomial in peTmp1 + CBDSampleBuffer[0] = (BYTE) (2*nRows); + SymCryptShake256StateCopy( pShakeBaseState, pShakeWorkState ); + SymCryptShake256Append( pShakeWorkState, CBDSampleBuffer, 1 ); + + SymCryptShake256Extract( pShakeWorkState, CBDSampleBuffer, 64ul*nEta2, FALSE ); + + SymCryptMlKemPolyElementSampleCBDFromBytes( CBDSampleBuffer, nEta2, peTmp1 ); + + // peTmp = INTT(t o r) + e2 + SymCryptMlKemPolyElementAdd( peTmp0, peTmp1, peTmp0 ); + + // peTmp1 = mu + SymCryptMlKemPolyElementDecodeAndDecompress( pbRandom, 1, peTmp1 ); + + // peTmp0 = v = INTT(t o r) + e2 + mu + SymCryptMlKemPolyElementAdd( peTmp0, peTmp1, peTmp0 ); + + // Compress and encode v into remainder of ciphertext + SymCryptMlKemPolyElementCompressAndEncode( peTmp0, nBitsOfV, pbCiphertext+cbU ); + +cleanup: + SymCryptWipeKnownSize( CBDSampleBuffer, sizeof(CBDSampleBuffer) ); + + return scError; +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlKemEncapsulateEx( + _In_ PCSYMCRYPT_MLKEMKEY pkMlKemkey, + _In_reads_bytes_( cbRandom ) PCBYTE pbRandom, + SIZE_T cbRandom, + _Out_writes_bytes_( cbAgreedSecret ) PBYTE pbAgreedSecret, + SIZE_T cbAgreedSecret, + _Out_writes_bytes_( cbCiphertext ) PBYTE pbCiphertext, + SIZE_T cbCiphertext ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PSYMCRYPT_MLKEM_INTERNAL_COMPUTATION_TEMPORARIES pCompTemps = NULL; + + if( cbRandom != SYMCRYPT_MLKEM_SIZEOF_ENCAPS_RANDOM ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + pCompTemps = SymCryptCallbackAlloc( sizeof(SYMCRYPT_MLKEM_INTERNAL_COMPUTATION_TEMPORARIES) ); + if( pCompTemps == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + scError = SymCryptMlKemEncapsulateInternal( + pkMlKemkey, + pbAgreedSecret, cbAgreedSecret, + pbCiphertext, cbCiphertext, + pbRandom, + pCompTemps ); + +cleanup: + if( pCompTemps != NULL ) + { + SymCryptWipe( pCompTemps, sizeof(*pCompTemps) ); + SymCryptCallbackFree( pCompTemps ); + } + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlKemEncapsulate( + _In_ PCSYMCRYPT_MLKEMKEY pkMlKemkey, + _Out_writes_bytes_( cbAgreedSecret ) PBYTE pbAgreedSecret, + SIZE_T cbAgreedSecret, + _Out_writes_bytes_( cbCiphertext ) PBYTE pbCiphertext, + SIZE_T cbCiphertext ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + BYTE pbm[SYMCRYPT_MLKEM_SIZEOF_ENCAPS_RANDOM]; + + scError = SymCryptCallbackRandom( pbm, sizeof(pbm) ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + scError = SymCryptMlKemEncapsulateEx( + pkMlKemkey, + pbm, sizeof(pbm), + pbAgreedSecret, cbAgreedSecret, + pbCiphertext, cbCiphertext ); + +cleanup: + SymCryptWipeKnownSize( pbm, sizeof(pbm) ); + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlKemDecapsulate( + _In_ PCSYMCRYPT_MLKEMKEY pkMlKemkey, + _In_reads_bytes_( cbCiphertext ) PCBYTE pbCiphertext, + SIZE_T cbCiphertext, + _Out_writes_bytes_( cbAgreedSecret ) PBYTE pbAgreedSecret, + SIZE_T cbAgreedSecret ) +{ + PSYMCRYPT_MLKEM_INTERNAL_COMPUTATION_TEMPORARIES pCompTemps = NULL; + BYTE pbDecryptedRandom[SYMCRYPT_MLKEM_SIZEOF_ENCAPS_RANDOM]; + BYTE pbDecapsulatedSecret[SYMCRYPT_MLKEM_SIZEOF_AGREED_SECRET]; + BYTE pbImplicitRejectionSecret[SYMCRYPT_MLKEM_SIZEOF_AGREED_SECRET]; + PBYTE pbReadCiphertext, pbReencapsulatedCiphertext; + BOOLEAN successfulReencrypt; + + PBYTE pbCurr; + PBYTE pbAlloc = NULL; + const SIZE_T cbAlloc = sizeof(SYMCRYPT_MLKEM_INTERNAL_COMPUTATION_TEMPORARIES) + (2*cbCiphertext); + + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SIZE_T cbU, cbV, cbCopy; + PSYMCRYPT_MLKEM_VECTOR pvu; + PSYMCRYPT_MLKEM_POLYELEMENT peTmp0, peTmp1; + PSYMCRYPT_MLKEM_POLYELEMENT_ACCUMULATOR paTmp; + PSYMCRYPT_SHAKE256_STATE pShakeState; + const UINT32 nRows = pkMlKemkey->params.nRows; + const UINT32 nBitsOfU = pkMlKemkey->params.nBitsOfU; + const UINT32 nBitsOfV = pkMlKemkey->params.nBitsOfV; + const UINT32 cbPolyElement = pkMlKemkey->params.cbPolyElement; + const UINT32 cbVector = pkMlKemkey->params.cbVector; + + // u vector encoded with nBitsOfU * SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS bits per polynomial + cbU = nRows * nBitsOfU * (SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8); + // v polynomial encoded with nBitsOfV * SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS bits + cbV = nBitsOfV * (SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8); + + if( (cbAgreedSecret != SYMCRYPT_MLKEM_SIZEOF_AGREED_SECRET) || + (cbCiphertext != cbU + cbV) || + !pkMlKemkey->hasPrivateKey ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + pbAlloc = SymCryptCallbackAlloc( cbAlloc ); + if( pbAlloc == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + pbCurr = pbAlloc; + + pCompTemps = (PSYMCRYPT_MLKEM_INTERNAL_COMPUTATION_TEMPORARIES) pbCurr; + pbCurr += sizeof(SYMCRYPT_MLKEM_INTERNAL_COMPUTATION_TEMPORARIES); + + pbReadCiphertext = pbCurr; + pbCurr += cbCiphertext; + + pbReencapsulatedCiphertext = pbCurr; + pbCurr += cbCiphertext; + + SYMCRYPT_ASSERT( pbCurr == (pbAlloc + cbAlloc) ); + + // Read the input ciphertext once to local pbReadCiphertext to ensure our view of ciphertext consistent + memcpy( pbReadCiphertext, pbCiphertext, cbCiphertext ); + + pvu = SymCryptMlKemVectorCreate( pCompTemps->abVectorBuffer0, cbVector, nRows ); + SYMCRYPT_ASSERT( pvu != NULL ); + peTmp0 = SymCryptMlKemPolyElementCreate( pCompTemps->abPolyElementBuffer0, cbPolyElement ); + SYMCRYPT_ASSERT( peTmp0 != NULL ); + peTmp1 = SymCryptMlKemPolyElementCreate( pCompTemps->abPolyElementBuffer1, cbPolyElement ); + SYMCRYPT_ASSERT( peTmp1 != NULL ); + paTmp = SymCryptMlKemPolyElementAccumulatorCreate( pCompTemps->abPolyElementAccumulatorBuffer, 2*cbPolyElement ); + SYMCRYPT_ASSERT( paTmp != NULL ); + + // Decode and decompress u + scError = SymCryptMlKemVectorDecodeAndDecompress( pbReadCiphertext, cbU, nBitsOfU, pvu ); + SYMCRYPT_ASSERT( scError == SYMCRYPT_NO_ERROR ); + + // Perform NTT on u + SymCryptMlKemVectorNTT( pvu ); + + // peTmp0 = (s o NTT(u)) ./ R + SymCryptMlKemVectorMontDotProduct( pkMlKemkey->pvs, pvu, peTmp0, paTmp ); + + // peTmp0 = INTT(s o NTT(u)) + SymCryptMlKemPolyElementINTTAndMulR( peTmp0 ); + + // Decode and decompress v + scError = SymCryptMlKemPolyElementDecodeAndDecompress( pbReadCiphertext+cbU, nBitsOfV, peTmp1 ); + SYMCRYPT_ASSERT( scError == SYMCRYPT_NO_ERROR ); + + // peTmp0 = w = v - INTT(s o NTT(u)) + SymCryptMlKemPolyElementSub( peTmp1, peTmp0, peTmp0 ); + + // pbDecryptedRandom = m' = Encoding of w + SymCryptMlKemPolyElementCompressAndEncode( peTmp0, 1, pbDecryptedRandom ); + + // Compute: + // pbDecapsulatedSecret = K' = Decapsulated secret (without implicit rejection) + // pbReencapsulatedCiphertext = c' = Ciphertext from re-encapsulating decrypted random value + scError = SymCryptMlKemEncapsulateInternal( + pkMlKemkey, + pbDecapsulatedSecret, sizeof(pbDecapsulatedSecret), + pbReencapsulatedCiphertext, cbCiphertext, + pbDecryptedRandom, + pCompTemps ); + SYMCRYPT_ASSERT( scError == SYMCRYPT_NO_ERROR ); + + // Compute the secret we will return if using implicit rejection + // pbImplicitRejectionSecret = K_bar = SHAKE256( z || c ) + pShakeState = &pCompTemps->hashState0.shake256State; + SymCryptShake256Init( pShakeState ); + SymCryptShake256Append( pShakeState, pkMlKemkey->privateRandom, sizeof(pkMlKemkey->privateRandom) ); + SymCryptShake256Append( pShakeState, pbReadCiphertext, cbCiphertext ); + SymCryptShake256Extract( pShakeState, pbImplicitRejectionSecret, sizeof(pbImplicitRejectionSecret), FALSE ); + + // Constant time test if re-encryption successful + successfulReencrypt = SymCryptEqual( pbReencapsulatedCiphertext, pbReadCiphertext, cbCiphertext ); + + // If not successful, perform side-channel-safe copy of Implicit Rejection secret over Decapsulated secret + cbCopy = (((SIZE_T)successfulReencrypt)-1) & SYMCRYPT_MLKEM_SIZEOF_AGREED_SECRET; + SymCryptScsCopy( pbImplicitRejectionSecret, cbCopy, pbDecapsulatedSecret, SYMCRYPT_MLKEM_SIZEOF_AGREED_SECRET ); + + // Write agreed secret (with implicit rejection) to pbAgreedSecret + memcpy( pbAgreedSecret, pbDecapsulatedSecret, SYMCRYPT_MLKEM_SIZEOF_AGREED_SECRET ); + +cleanup: + if( pbAlloc != NULL ) + { + SymCryptWipe( pbAlloc, cbAlloc ); + SymCryptCallbackFree( pbAlloc ); + } + + SymCryptWipeKnownSize( pbDecryptedRandom, sizeof(pbDecryptedRandom) ); + SymCryptWipeKnownSize( pbDecapsulatedSecret, sizeof(pbDecapsulatedSecret) ); + SymCryptWipeKnownSize( pbImplicitRejectionSecret, sizeof(pbImplicitRejectionSecret) ); + + return scError; +} diff --git a/libs/symcrypt/lib/mlkem_primitives.c b/libs/symcrypt/lib/mlkem_primitives.c new file mode 100644 index 00000000000..ab8a6e86c82 --- /dev/null +++ b/libs/symcrypt/lib/mlkem_primitives.c @@ -0,0 +1,1442 @@ +// +// mlkem_primitives.c ML-KEM related functionality +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +// +// Current approach is to represent polynomial ring elements as a 512-byte buffer (256 UINT16s). +// + +// Coefficients are added and subtracted when polynomials are in the NTT domain and in the lattice domain. +// +// Coefficients are only multiplied in the NTT/INTT operations, and in MulAdd which only operates on +// polynomials in NTT form. +// We choose to perform modular multiplication exclusively using Montgomery multiplication, that is, we choose +// a Montgomery divisor R, and modular multiplication always divides by R, as this make reduction logic easy +// and quick. +// i.e. MontMul(a,b) -> ((a*b) / R) mod Q +// +// For powers of Zeta used in as multiplication twiddle factors in NTT/INTT and base polynomial multiplication, +// we pre-multiply the constants by R s.t. +// MontMul(x, twiddleForZetaToTheK) -> x*(Zeta^K) mod Q. +// +// Most other modular multiplication can be done with a fixup deferred until the INTT. The one exception is in key +// generation, where A o s + e = t, we need to pre-multiply s' + +// R = 2^16 +const UINT32 SYMCRYPT_MLKEM_Rlog2 = 16; +const UINT32 SYMCRYPT_MLKEM_Rmask = 0xffff; + +// NegQInvModR = -Q^(-1) mod R +const UINT32 SYMCRYPT_MLKEM_NegQInvModR = 3327; + +// Rsqr = R^2 = (1<<32) mod Q +const UINT32 SYMCRYPT_MLKEM_Rsqr = 1353; +// RsqrTimesNegQInvModR = R^2 = ((1<<32) mod Q) * -Q^(-1) mod R +const UINT32 SYMCRYPT_MLKEM_RsqrTimesNegQInvModR = 44983; + +// +// Zeta tables. +// Zeta = 17, which is a primitive 256-th root of unity modulo Q +// +// In ML-KEM we use powers of zeta to convert to and from NTT form +// and to perform multiplication between polynomials in NTT form +// + +// This table is a lookup for (Zeta^(BitRev(index)) * R) mod Q +// Used in NTT and INTT +// i.e. element 1 is Zeta^(BitRev(1)) * (2^16) mod Q == (17^64)*(2^16) mod 3329 == 2571 +// +// MlKemZetaBitRevTimesR = [ (pow(17, bitRev(i), 3329) << 16) % 3329 for i in range(128) ] +const UINT16 MlKemZetaBitRevTimesR[128] = +{ + 2285, 2571, 2970, 1812, 1493, 1422, 287, 202, + 3158, 622, 1577, 182, 962, 2127, 1855, 1468, + 573, 2004, 264, 383, 2500, 1458, 1727, 3199, + 2648, 1017, 732, 608, 1787, 411, 3124, 1758, + 1223, 652, 2777, 1015, 2036, 1491, 3047, 1785, + 516, 3321, 3009, 2663, 1711, 2167, 126, 1469, + 2476, 3239, 3058, 830, 107, 1908, 3082, 2378, + 2931, 961, 1821, 2604, 448, 2264, 677, 2054, + 2226, 430, 555, 843, 2078, 871, 1550, 105, + 422, 587, 177, 3094, 3038, 2869, 1574, 1653, + 3083, 778, 1159, 3182, 2552, 1483, 2727, 1119, + 1739, 644, 2457, 349, 418, 329, 3173, 3254, + 817, 1097, 603, 610, 1322, 2044, 1864, 384, + 2114, 3193, 1218, 1994, 2455, 220, 2142, 1670, + 2144, 1799, 2051, 794, 1819, 2475, 2459, 478, + 3221, 3021, 996, 991, 958, 1869, 1522, 1628, +}; + +// This table is a lookup for ((Zeta^(BitRev(index)) * R) mod Q) * -Q^(-1) mod R +// Used in NTT and INTT +// +// MlKemZetaBitRevTimesRTimesNegQInvModR = [ (((pow(17, bitRev(i), Q) << 16) % Q) * 3327) & 0xffff for i in range(128) ] +const UINT16 MlKemZetaBitRevTimesRTimesNegQInvModR[128] = +{ + 19, 34037, 50790, 64748, 52011, 12402, 37345, 16694, + 20906, 37778, 3799, 15690, 54846, 64177, 11201, 34372, + 5827, 48172, 26360, 29057, 59964, 1102, 44097, 26241, + 28072, 41223, 10532, 56736, 47109, 56677, 38860, 16162, + 5689, 6516, 64039, 34569, 23564, 45357, 44825, 40455, + 12796, 38919, 49471, 12441, 56401, 649, 25986, 37699, + 45652, 28249, 15886, 8898, 28309, 56460, 30198, 47286, + 52109, 51519, 29155, 12756, 48704, 61224, 24155, 17914, + 334, 54354, 11477, 52149, 32226, 14233, 45042, 21655, + 27738, 52405, 64591, 4586, 14882, 42443, 59354, 60043, + 33525, 32502, 54905, 35218, 36360, 18741, 28761, 52897, + 18485, 45436, 47975, 47011, 14430, 46007, 5275, 12618, + 31183, 45239, 40101, 63390, 7382, 50180, 41144, 32384, + 20926, 6279, 54590, 14902, 41321, 11044, 48546, 51066, + 55200, 21497, 7933, 20198, 22501, 42325, 54629, 17442, + 33899, 23859, 36892, 20257, 41538, 57779, 17422, 42404, +}; + +// This table is a lookup for ((Zeta^(2*BitRev(index) + 1) * R) mod Q) +// Used in multiplication of 2 NTT-form polynomials +// +// zetaTwoTimesBitRevPlus1TimesR = [ (pow(17, 2*bitRev(i)+1, 3329) << 16) % 3329 for i in range(128) ] +const UINT16 zetaTwoTimesBitRevPlus1TimesR[128] = +{ + 2226, 1103, 430, 2899, 555, 2774, 843, 2486, + 2078, 1251, 871, 2458, 1550, 1779, 105, 3224, + 422, 2907, 587, 2742, 177, 3152, 3094, 235, + 3038, 291, 2869, 460, 1574, 1755, 1653, 1676, + 3083, 246, 778, 2551, 1159, 2170, 3182, 147, + 2552, 777, 1483, 1846, 2727, 602, 1119, 2210, + 1739, 1590, 644, 2685, 2457, 872, 349, 2980, + 418, 2911, 329, 3000, 3173, 156, 3254, 75, + 817, 2512, 1097, 2232, 603, 2726, 610, 2719, + 1322, 2007, 2044, 1285, 1864, 1465, 384, 2945, + 2114, 1215, 3193, 136, 1218, 2111, 1994, 1335, + 2455, 874, 220, 3109, 2142, 1187, 1670, 1659, + 2144, 1185, 1799, 1530, 2051, 1278, 794, 2535, + 1819, 1510, 2475, 854, 2459, 870, 478, 2851, + 3221, 108, 3021, 308, 996, 2333, 991, 2338, + 958, 2371, 1869, 1460, 1522, 1807, 1628, 1701, +}; + +PSYMCRYPT_MLKEM_POLYELEMENT +SYMCRYPT_CALL +SymCryptMlKemPolyElementCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + UINT32 cbBuffer ) +{ + PSYMCRYPT_MLKEM_POLYELEMENT pDst = (PSYMCRYPT_MLKEM_POLYELEMENT) pbBuffer; + + UNREFERENCED_PARAMETER( cbBuffer ); + + SYMCRYPT_ASSERT_ASYM_ALIGNED( pbBuffer ); + SYMCRYPT_ASSERT( cbBuffer == SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT ); + + return pDst; +} + +PSYMCRYPT_MLKEM_POLYELEMENT_ACCUMULATOR +SYMCRYPT_CALL +SymCryptMlKemPolyElementAccumulatorCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + UINT32 cbBuffer ) +{ + PSYMCRYPT_MLKEM_POLYELEMENT_ACCUMULATOR pDst = (PSYMCRYPT_MLKEM_POLYELEMENT_ACCUMULATOR) pbBuffer; + + UNREFERENCED_PARAMETER( cbBuffer ); + + SYMCRYPT_ASSERT_ASYM_ALIGNED( pbBuffer ); + SYMCRYPT_ASSERT( cbBuffer == SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT_ACCUMULATOR ); + + return pDst; +} + +PSYMCRYPT_MLKEM_VECTOR +SYMCRYPT_CALL +SymCryptMlKemVectorCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + UINT32 cbBuffer, + UINT32 nRows ) +{ + PSYMCRYPT_MLKEM_VECTOR pDst = NULL; + PSYMCRYPT_MLKEM_VECTOR pVector = (PSYMCRYPT_MLKEM_VECTOR)pbBuffer; + PSYMCRYPT_MLKEM_POLYELEMENT peTmp = NULL; + UINT32 i; + PBYTE pbTmp = pbBuffer + sizeof(SYMCRYPT_MLKEM_VECTOR); + + SYMCRYPT_ASSERT_ASYM_ALIGNED( pbBuffer ); + + SYMCRYPT_ASSERT( nRows > 0 ); + SYMCRYPT_ASSERT( nRows <= SYMCRYPT_MLKEM_MATRIX_MAX_NROWS ); + + pVector->nRows = nRows; + pVector->cbTotalSize = cbBuffer; + + for( i=0; i<nRows; i++ ) + { + peTmp = SymCryptMlKemPolyElementCreate( pbTmp, SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT ); + if( peTmp == NULL ) + { + goto cleanup; + } + + pbTmp += SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT; + } + + SYMCRYPT_ASSERT( pbTmp == (pbBuffer + cbBuffer) ); + + pDst = pVector; + +cleanup: + return pDst; +} + +PSYMCRYPT_MLKEM_MATRIX +SYMCRYPT_CALL +SymCryptMlKemMatrixCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + UINT32 cbBuffer, + UINT32 nRows ) +{ + PSYMCRYPT_MLKEM_MATRIX pDst = NULL; + PSYMCRYPT_MLKEM_MATRIX pMatrix = (PSYMCRYPT_MLKEM_MATRIX)pbBuffer; + UINT32 i; + PBYTE pbTmp = pbBuffer + sizeof(SYMCRYPT_MLKEM_MATRIX); + + SYMCRYPT_ASSERT_ASYM_ALIGNED( pbBuffer ); + + SYMCRYPT_ASSERT( nRows > 0 ); + SYMCRYPT_ASSERT( nRows <= SYMCRYPT_MLKEM_MATRIX_MAX_NROWS ); + + pMatrix->nRows = nRows; + pMatrix->cbTotalSize = cbBuffer; + + for( i=0; i<(nRows*nRows); i++ ) + { + pMatrix->apPolyElements[i] = SymCryptMlKemPolyElementCreate( pbTmp, SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT ); + if( pMatrix->apPolyElements[i] == NULL ) + { + goto cleanup; + } + + pbTmp += SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT; + } + + SYMCRYPT_ASSERT( pbTmp == (pbBuffer + cbBuffer) ); + + pDst = pMatrix; + +cleanup: + return pDst; +} + +#if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_ARM64 + +#if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_X86 + +#ifdef __clang__ +#pragma clang attribute push (__attribute__((target("sse2"))), apply_to=function) +#else +#pragma GCC push_options +#pragma GCC target("sse2") +#endif + +#define VEC128_TYPE_UINT16 __m128i + +#define VEC128_LOAD_UINT16( addr ) _mm_loadu_si128( (__m128i*) (addr) ) +#define VEC64_LOAD_UINT16( addr ) _mm_loadu_si64( (PBYTE) (addr) ) +#define VEC32_LOAD_UINT16( addr ) _mm_cvtsi32_si128( SYMCRYPT_LOAD_LSBFIRST32( addr ) ) + +#define VEC128_STORE_UINT16( addr, vec ) _mm_storeu_si128( (__m128i*) (addr), (vec) ) +#define VEC64_STORE_UINT16( addr, vec ) _mm_storeu_si64( (PBYTE) (addr), (vec) ) +#define VEC32_STORE_UINT16( addr, vec ) SYMCRYPT_STORE_LSBFIRST32( (addr), _mm_cvtsi128_si32( vec ) ) + +#define VEC128_SET_UINT16( value ) _mm_set1_epi16( (value) ) + +#define VEC128_MOD_SUB_UINT16( res, a, b, Q, zero, tmp1 ) \ + /* res = a - b */ \ + res = _mm_sub_epi16( a, b ); \ + /* tmp1 = (a - b) < 0 ? -1 : 0 */ \ + tmp1 = _mm_cmpgt_epi16( zero, res ); \ + /* tmp1 = (a - b) < 0 ? Q : 0 */ \ + tmp1 = _mm_and_si128( tmp1, Q ); \ + /* res = (a - b) mod Q */ \ + res = _mm_add_epi16( res, tmp1 ); + +#define VEC128_MOD_ADD_UINT16( res, a, b, Q, tmp1 ) \ + /* res = a + b */ \ + res = _mm_add_epi16( a, b ); \ + /* tmp1 = (a + b) < Q ? -1 : 0 */ \ + tmp1 = _mm_cmpgt_epi16( Q, res ); \ + /* tmp1 = (a + b) < Q ? 0 : Q */ \ + tmp1 = _mm_andnot_si128( tmp1, Q ); \ + /* res = (a + b) mod Q */ \ + res = _mm_sub_epi16( res, tmp1 ); + +#define VEC128_MONTGOMERY_MUL_UINT16( res, a, b, bTimesNegQInvModR, Q, zero, one, tmp1, tmp2 ) \ + /* tmp1 = a *low bTimesNegQInvModR */ \ + tmp1 = _mm_mullo_epi16( a, bTimesNegQInvModR ); \ + /* res = a *high b */ \ + res = _mm_mulhi_epu16( a, b ); \ + /* tmp2 = (tmp1 == 0) ? -1 : 0 */ \ + tmp2 = _mm_cmpeq_epi16( tmp1, zero ); \ + /* tmp1 = (a *low bTimesNegQInvModR) *high Q */ \ + tmp1 = _mm_mulhi_epu16( tmp1, Q ); \ + /* res = a *high b + 1 */ \ + res = _mm_add_epi16( res, one ); \ + /* res = a *high b (+ 1 if a != 0) */ \ + res = _mm_add_epi16( res, tmp2 ); \ + /* res = a *high b + inv*Q (+ 1 if a != 0) */ \ + res = _mm_add_epi16( res, tmp1 ); \ + /* res = (a*b + inv*Q >> 16) mod Q */ \ + VEC128_MOD_SUB_UINT16( res, res, Q, Q, zero, tmp1 ); + +#elif SYMCRYPT_CPU_ARM64 + +#define VEC128_TYPE_UINT16 uint16x8_t + +#define VEC128_LOAD_UINT16( addr ) vld1q_u16( addr ) +#define VEC64_LOAD_UINT16( addr ) vld1q_dup_u64( addr ) +#define VEC32_LOAD_UINT16( addr ) vld1q_dup_u32( addr ) + +#define VEC128_STORE_UINT16( addr, vec ) vst1q_u16( (addr), (vec) ) +#define VEC64_STORE_UINT16( addr, vec ) vst1_u16( (uint16_t*) (addr), vget_low_u16(vec) ) +#define VEC32_STORE_UINT16( addr, vec ) vst1_lane_u32( (PBYTE) (addr), vget_low_u32(vec), 0 ) + +#define VEC128_SET_UINT16( value ) vdupq_n_u16( (value) ) + +#define VEC128_MOD_SUB_UINT16( res, a, b, Q, zero, tmp1 ) \ + /* res = a - b */ \ + res = vsubq_u16( a, b ); \ + /* tmp1 = (a - b) < 0 ? -1 : 0 */ \ + tmp1 = vcltzq_s16( res ); \ + /* tmp1 = (a - b) < 0 ? Q : 0 */ \ + tmp1 = vandq_u16( tmp1, Q ); \ + /* res = (a - b) mod Q */ \ + res = vaddq_u16( res, tmp1 ); + +#define VEC128_MOD_ADD_UINT16( res, a, b, Q, tmp1 ) \ + /* res = a + b */ \ + res = vaddq_u16( a, b ); \ + /* tmp1 = (a + b) >= Q ? -1 : 0 */ \ + tmp1 = vcgeq_u16( res, Q ); \ + /* tmp1 = (a + b) >= Q ? Q : 0 */ \ + tmp1 = vandq_u16( tmp1, Q ); \ + /* res = (a + b) mod Q */ \ + res = vsubq_u16( res, tmp1 ); + +#define VEC128_MONTGOMERY_MUL_UINT16( res, a, b, bTimesNegQInvModR, Q, zero, one, tmp1, tmp2 ) \ + /* tmp1 = a *low bTimesNegQInvModR */ \ + tmp1 = vmulq_u16( a, bTimesNegQInvModR ); \ + /* tmp2 = a*b [0-3]*/ \ + tmp2 = vmull_u16( vget_low_u16(a), vget_low_u16(b) ); \ + /* res = a*b [4-7]*/ \ + res = vmull_high_u16( a, b ); \ + /* tmp2 = a*b + inv*Q [0-3]*/ \ + tmp2 = vmlal_u16( tmp2, vget_low_u16(tmp1), vget_low_u16(Q) ); \ + /* res = a*b + inv*Q [4-7]*/ \ + res = vmlal_high_u16( res, tmp1, Q ); \ + /* res = a*b + inv*Q >> 16 */ \ + res = vuzp2q_u16( tmp2, res ); \ + /* res = (a*b + inv*Q >> 16) mod Q */ \ + VEC128_MOD_SUB_UINT16( res, res, Q, Q, zero, tmp1 ); + +#endif + +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptMlKemPolyElementNTTLayerVec128( + _Inout_ PSYMCRYPT_MLKEM_POLYELEMENT peSrc, + UINT32 k, + UINT32 len ) +{ + UINT32 start, j; + VEC128_TYPE_UINT16 vc0, vc1, vTmp0, vTmp1, vc1Twiddle, vTwiddleFactor, vTwiddleFactorMont, vQ, vZero, vOne; + + SYMCRYPT_ASSERT( len >= 2 ); + + vQ = VEC128_SET_UINT16( SYMCRYPT_MLKEM_Q ); + vZero = VEC128_SET_UINT16( 0 ); + vOne = VEC128_SET_UINT16( 1 ); + + for( start=0; start<256; start+=(2*len) ) + { + vTwiddleFactor = VEC128_SET_UINT16( MlKemZetaBitRevTimesR[k] ); + vTwiddleFactorMont = VEC128_SET_UINT16( MlKemZetaBitRevTimesRTimesNegQInvModR[k] ); + k++; + for( j=0; j<len; j+=8 ) + { + if( len >= 8 ) + { + vc0 = VEC128_LOAD_UINT16( &(peSrc->coeffs[start+j] ) ); + vc1 = VEC128_LOAD_UINT16( &(peSrc->coeffs[start+j+len]) ); + } + else if ( len == 4 ) + { + vc0 = VEC64_LOAD_UINT16( &(peSrc->coeffs[start+j] ) ); + vc1 = VEC64_LOAD_UINT16( &(peSrc->coeffs[start+j+len]) ); + } + else /*if ( len == 2 )*/ + { + vc0 = VEC32_LOAD_UINT16( &(peSrc->coeffs[start+j] ) ); + vc1 = VEC32_LOAD_UINT16( &(peSrc->coeffs[start+j+len]) ); + } + + // c1TimesTwiddle = twiddleFactor * c1 mod Q; + VEC128_MONTGOMERY_MUL_UINT16( vc1Twiddle, vc1, vTwiddleFactor, vTwiddleFactorMont, vQ, vZero, vOne, vTmp0, vTmp1 ); + // c1 = c0 - c1TimesTwiddle mod Q + VEC128_MOD_SUB_UINT16( vc1, vc0, vc1Twiddle, vQ, vZero, vTmp0 ); + // c0 = c0 + c1TimesTwiddle mod Q + VEC128_MOD_ADD_UINT16( vc0, vc0, vc1Twiddle, vQ, vTmp1 ); + + if( len >= 8 ) + { + VEC128_STORE_UINT16( &(peSrc->coeffs[start+j] ), vc0 ); + VEC128_STORE_UINT16( &(peSrc->coeffs[start+j+len]), vc1 ); + } + else if ( len == 4 ) + { + VEC64_STORE_UINT16( &(peSrc->coeffs[start+j] ), vc0 ); + VEC64_STORE_UINT16( &(peSrc->coeffs[start+j+len]), vc1 ); + } + else /*if ( len == 2 )*/ + { + VEC32_STORE_UINT16( &(peSrc->coeffs[start+j] ), vc0 ); + VEC32_STORE_UINT16( &(peSrc->coeffs[start+j+len]), vc1 ); + } + } + } +} + +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptMlKemPolyElementINTTLayerVec128( + _Inout_ PSYMCRYPT_MLKEM_POLYELEMENT peSrc, + UINT32 k, + UINT32 len ) +{ + UINT32 start, j; + VEC128_TYPE_UINT16 vc0, vc1, vTmp0, vTmp1, vTmp2, vTwiddleFactor, vTwiddleFactorMont, vQ, vZero, vOne; + + SYMCRYPT_ASSERT( len >= 2 ); + + vQ = VEC128_SET_UINT16( SYMCRYPT_MLKEM_Q ); + vZero = VEC128_SET_UINT16( 0 ); + vOne = VEC128_SET_UINT16( 1 ); + + for( start=0; start<256; start+=(2*len) ) + { + vTwiddleFactor = VEC128_SET_UINT16( MlKemZetaBitRevTimesR[k] ); + vTwiddleFactorMont = VEC128_SET_UINT16( MlKemZetaBitRevTimesRTimesNegQInvModR[k] ); + k--; + for( j=0; j<len; j+=8 ) + { + if( len >= 8 ) + { + vc0 = VEC128_LOAD_UINT16( &(peSrc->coeffs[start+j] ) ); + vc1 = VEC128_LOAD_UINT16( &(peSrc->coeffs[start+j+len]) ); + } + else if ( len == 4 ) + { + vc0 = VEC64_LOAD_UINT16( &(peSrc->coeffs[start+j] ) ); + vc1 = VEC64_LOAD_UINT16( &(peSrc->coeffs[start+j+len]) ); + } + else /*if ( len == 2 )*/ + { + vc0 = VEC32_LOAD_UINT16( &(peSrc->coeffs[start+j] ) ); + vc1 = VEC32_LOAD_UINT16( &(peSrc->coeffs[start+j+len]) ); + } + + // tmp = c0 + c1 mod Q + VEC128_MOD_ADD_UINT16( vTmp2, vc0, vc1, vQ, vTmp0 ); + // c1 = c1 - c0 mod Q + VEC128_MOD_SUB_UINT16( vc1, vc1, vc0, vQ, vZero, vTmp1 ); + // c1 = twiddleFactor * c1; + VEC128_MONTGOMERY_MUL_UINT16( vc1, vc1, vTwiddleFactor, vTwiddleFactorMont, vQ, vZero, vOne, vTmp0, vTmp1 ); + + if( len >= 8 ) + { + VEC128_STORE_UINT16( &(peSrc->coeffs[start+j] ), vTmp2 ); + VEC128_STORE_UINT16( &(peSrc->coeffs[start+j+len]), vc1 ); + } + else if ( len == 4 ) + { + VEC64_STORE_UINT16( &(peSrc->coeffs[start+j] ), vTmp2 ); + VEC64_STORE_UINT16( &(peSrc->coeffs[start+j+len]), vc1 ); + } + else /*if ( len == 2 )*/ + { + VEC32_STORE_UINT16( &(peSrc->coeffs[start+j] ), vTmp2 ); + VEC32_STORE_UINT16( &(peSrc->coeffs[start+j+len]), vc1 ); + } + } + } +} + +#endif + +FORCEINLINE +UINT32 +SYMCRYPT_CALL +SymCryptMlKemModAdd( + UINT32 a, + UINT32 b ) +{ + UINT32 res; + + SYMCRYPT_ASSERT( a < SYMCRYPT_MLKEM_Q ); + SYMCRYPT_ASSERT( b < SYMCRYPT_MLKEM_Q ); + + res = a + b - SYMCRYPT_MLKEM_Q; + SYMCRYPT_ASSERT( ((res >> 16) == 0) || ((res >> 16) == 0xffff) ); + res = res + (SYMCRYPT_MLKEM_Q & (res >> 16)); + SYMCRYPT_ASSERT( res < SYMCRYPT_MLKEM_Q ); + + return res; +} + +FORCEINLINE +UINT32 +SYMCRYPT_CALL +SymCryptMlKemModSub( + UINT32 a, + UINT32 b ) +{ + UINT32 res; + + SYMCRYPT_ASSERT( a < 2*SYMCRYPT_MLKEM_Q ); + SYMCRYPT_ASSERT( b <= SYMCRYPT_MLKEM_Q ); + + res = a - b; + SYMCRYPT_ASSERT( ((res >> 16) == 0) || ((res >> 16) == 0xffff) ); + res = res + (SYMCRYPT_MLKEM_Q & (res >> 16)); + SYMCRYPT_ASSERT( res < SYMCRYPT_MLKEM_Q ); + + return res; +} + +FORCEINLINE +UINT32 +SYMCRYPT_CALL +SymCryptMlKemMontMul( + UINT32 a, + UINT32 b, + UINT32 bMont ) +{ + UINT32 res, inv; + + SYMCRYPT_ASSERT( a < SYMCRYPT_MLKEM_Q ); + SYMCRYPT_ASSERT( b < SYMCRYPT_MLKEM_Q ); + SYMCRYPT_ASSERT( bMont <= SYMCRYPT_MLKEM_Rmask ); + SYMCRYPT_ASSERT( bMont == ((b * SYMCRYPT_MLKEM_NegQInvModR) & SYMCRYPT_MLKEM_Rmask) ); + + res = a * b; + inv = (a * bMont) & SYMCRYPT_MLKEM_Rmask; + res += inv * SYMCRYPT_MLKEM_Q; + SYMCRYPT_ASSERT( (res & SYMCRYPT_MLKEM_Rmask) == 0 ); + res = res >> SYMCRYPT_MLKEM_Rlog2; + + return SymCryptMlKemModSub( res, SYMCRYPT_MLKEM_Q ); +} + +VOID +SYMCRYPT_CALL +SymCryptMlKemPolyElementNTTLayerC( + _Inout_ PSYMCRYPT_MLKEM_POLYELEMENT peSrc, + UINT32 k, + UINT32 len ) +{ + UINT32 start, j; + UINT32 twiddleFactor, twiddleFactorMont, c0, c1, c1TimesTwiddle; + + for( start=0; start<256; start+=(2*len) ) + { + twiddleFactor = MlKemZetaBitRevTimesR[k]; + twiddleFactorMont = MlKemZetaBitRevTimesRTimesNegQInvModR[k]; + k++; + for( j=0; j<len; j++ ) + { + c0 = peSrc->coeffs[start+j]; + SYMCRYPT_ASSERT( c0 < SYMCRYPT_MLKEM_Q ); + c1 = peSrc->coeffs[start+j+len]; + SYMCRYPT_ASSERT( c1 < SYMCRYPT_MLKEM_Q ); + + c1TimesTwiddle = SymCryptMlKemMontMul( c1, twiddleFactor, twiddleFactorMont ); + c1 = SymCryptMlKemModSub( c0, c1TimesTwiddle ); + c0 = SymCryptMlKemModAdd( c0, c1TimesTwiddle ); + + peSrc->coeffs[start+j] = (UINT16) c0; + peSrc->coeffs[start+j+len] = (UINT16) c1; + } + } +} + +VOID +SYMCRYPT_CALL +SymCryptMlKemPolyElementINTTLayerC( + _Inout_ PSYMCRYPT_MLKEM_POLYELEMENT peSrc, + UINT32 k, + UINT32 len ) +{ + UINT32 start, j; + UINT32 twiddleFactor, twiddleFactorMont, c0, c1, tmp; + + for( start=0; start<256; start+=(2*len) ) + { + twiddleFactor = MlKemZetaBitRevTimesR[k]; + twiddleFactorMont = MlKemZetaBitRevTimesRTimesNegQInvModR[k]; + k--; + for( j=0; j<len; j++ ) + { + c0 = peSrc->coeffs[start+j]; + SYMCRYPT_ASSERT( c0 < SYMCRYPT_MLKEM_Q ); + c1 = peSrc->coeffs[start+j+len]; + SYMCRYPT_ASSERT( c1 < SYMCRYPT_MLKEM_Q ); + + tmp = SymCryptMlKemModAdd( c0, c1 ); + c1 = SymCryptMlKemModSub( c1, c0 ); + c1 = SymCryptMlKemMontMul( c1, twiddleFactor, twiddleFactorMont ); + + peSrc->coeffs[start+j] = (UINT16) tmp; + peSrc->coeffs[start+j+len] = (UINT16) c1; + } + } +} + +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptMlKemPolyElementNTTLayer( + _Inout_ PSYMCRYPT_MLKEM_POLYELEMENT peSrc, + UINT32 k, + UINT32 len ) +{ +#if SYMCRYPT_CPU_X86 + SYMCRYPT_EXTENDED_SAVE_DATA SaveData; + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_SSE2 ) && SymCryptSaveXmm( &SaveData ) == SYMCRYPT_NO_ERROR ) + { + SymCryptMlKemPolyElementNTTLayerVec128( peSrc, k, len ); + SymCryptRestoreXmm( &SaveData ); + } else { + SymCryptMlKemPolyElementNTTLayerC( peSrc, k, len ); + } +#elif SYMCRYPT_CPU_AMD64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_SSE2 ) ) + { + SymCryptMlKemPolyElementNTTLayerVec128( peSrc, k, len ); + } else { + SymCryptMlKemPolyElementNTTLayerC( peSrc, k, len ); + } +#elif SYMCRYPT_CPU_ARM64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON ) ) + { + SymCryptMlKemPolyElementNTTLayerVec128( peSrc, k, len ); + } else { + SymCryptMlKemPolyElementNTTLayerC( peSrc, k, len ); + } +#else + SymCryptMlKemPolyElementNTTLayerC( peSrc, k, len ); +#endif +} + +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptMlKemPolyElementINTTLayer( + _Inout_ PSYMCRYPT_MLKEM_POLYELEMENT peSrc, + UINT32 k, + UINT32 len ) +{ +#if SYMCRYPT_CPU_X86 + SYMCRYPT_EXTENDED_SAVE_DATA SaveData; + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_SSE2 ) && SymCryptSaveXmm( &SaveData ) == SYMCRYPT_NO_ERROR ) + { + SymCryptMlKemPolyElementINTTLayerVec128( peSrc, k, len ); + SymCryptRestoreXmm( &SaveData ); + } else { + SymCryptMlKemPolyElementINTTLayerC( peSrc, k, len ); + } +#elif SYMCRYPT_CPU_AMD64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_SSE2 ) ) + { + SymCryptMlKemPolyElementINTTLayerVec128( peSrc, k, len ); + } else { + SymCryptMlKemPolyElementINTTLayerC( peSrc, k, len ); + } +#elif SYMCRYPT_CPU_ARM64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON ) ) + { + SymCryptMlKemPolyElementINTTLayerVec128( peSrc, k, len ); + } else { + SymCryptMlKemPolyElementINTTLayerC( peSrc, k, len ); + } +#else + SymCryptMlKemPolyElementINTTLayerC( peSrc, k, len ); +#endif +} + +#define SYMCRYPT_MLKEM_MaxCoeff (SYMCRYPT_MLKEM_Q - 1) +#define SYMCRYPT_MLKEM_MaxCoeffProduct (SYMCRYPT_MLKEM_MaxCoeff*SYMCRYPT_MLKEM_MaxCoeff) + +// max([ ((i*j) + ((((i*j)*NegQInvModR) & Rmask)*Q)) >> Rlog2 for i in range(Q) for j in range(Q) ]) +#define SYMCRYPT_MLKEM_MaxFirstStepReduction (3494) +// max([ ( pow(17, (2*i)+1, Q) << Rlog2 ) % Q for i in range(128) ]) +#define SYMCRYPT_MLKEM_MaxZetaTwoTimesPlus1TimesR (3254) +#define SYMCRYPT_MLKEM_MaxA1B1ZetaPow (SYMCRYPT_MLKEM_MaxFirstStepReduction*SYMCRYPT_MLKEM_MaxZetaTwoTimesPlus1TimesR) + +VOID +SYMCRYPT_CALL +SymCryptMlKemPolyElementMulAndAccumulate( + _In_ PCSYMCRYPT_MLKEM_POLYELEMENT peSrc1, + _In_ PCSYMCRYPT_MLKEM_POLYELEMENT peSrc2, + _Inout_ PSYMCRYPT_MLKEM_POLYELEMENT_ACCUMULATOR paDst ) +{ + UINT32 i; + UINT32 a0, a1, b0, b1, c0, c1; + UINT32 a0b0, a1b1, a0b1, a1b0, a1b1zetapow, inv; + + for( i=0; i<(SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 2); i++ ) + { + a0 = peSrc1->coeffs[(2*i) ]; + SYMCRYPT_ASSERT( a0 < SYMCRYPT_MLKEM_Q ); + a1 = peSrc1->coeffs[(2*i)+1]; + SYMCRYPT_ASSERT( a1 < SYMCRYPT_MLKEM_Q ); + + b0 = peSrc2->coeffs[(2*i) ]; + SYMCRYPT_ASSERT( b0 < SYMCRYPT_MLKEM_Q ); + b1 = peSrc2->coeffs[(2*i)+1]; + SYMCRYPT_ASSERT( b1 < SYMCRYPT_MLKEM_Q ); + + c0 = paDst->coeffs[(2*i) ]; + SYMCRYPT_ASSERT( c0 <= 3*(SYMCRYPT_MLKEM_MaxCoeffProduct + SYMCRYPT_MLKEM_MaxA1B1ZetaPow) ); + c1 = paDst->coeffs[(2*i)+1]; + SYMCRYPT_ASSERT( c1 <= 3*(SYMCRYPT_MLKEM_MaxCoeffProduct + SYMCRYPT_MLKEM_MaxA1B1ZetaPow) ); + + // multiplication results in range [0, MaxCoeffProduct = 3328*3328] + a0b0 = a0 * b0; + a1b1 = a1 * b1; + a0b1 = a0 * b1; + a1b0 = a1 * b0; + + // we need a1*b1*zetaTwoTimesBitRevPlus1TimesR[i] + // eagerly reduce a1*b1 with montgomery reduction + // a1b1 = red(a1*b1) -> range [0, MaxFirstStepReduction = 3494] + // (3494 is maximum result of first step of montgomery reduction of x*y for x,y in [0, 3328]) + // we do not need to do final reduction yet + inv = (a1b1 * SYMCRYPT_MLKEM_NegQInvModR) & SYMCRYPT_MLKEM_Rmask; + a1b1 = (a1b1 + (inv * SYMCRYPT_MLKEM_Q)) >> SYMCRYPT_MLKEM_Rlog2; // in range [0, MaxFirstStepReduction] + SYMCRYPT_ASSERT( a1b1 <= SYMCRYPT_MLKEM_MaxFirstStepReduction ); + + // now multiply a1b1 by power of zeta + a1b1zetapow = a1b1 * zetaTwoTimesBitRevPlus1TimesR[i]; + // MaxZetaTwoTimesPlus1TimesR = 3254 + // MaxA1B1ZetaPow = MaxFirstStepReduction*MaxZetaTwoTimesPlus1TimesR = 3494*3254 + SYMCRYPT_ASSERT( a1b1zetapow <= SYMCRYPT_MLKEM_MaxA1B1ZetaPow ); + + // sum pairs of products + a0b0 += a1b1zetapow; // a0*b0 + red(a1*b1)*zetapower in range [0, MaxCoeffProduct + MaxA1B1ZetaPow] + SYMCRYPT_ASSERT( a0b0 <= SYMCRYPT_MLKEM_MaxCoeffProduct + SYMCRYPT_MLKEM_MaxA1B1ZetaPow ); + a0b1 += a1b0; // a0*b1 + a1*b0 in range [0, 2*MaxCoeffProduct] + SYMCRYPT_ASSERT( a0b1 <= 2*SYMCRYPT_MLKEM_MaxCoeffProduct ); + + // We sum at most 4 pairs of products into an accumulator in ML-KEM + C_ASSERT( SYMCRYPT_MLKEM_MATRIX_MAX_NROWS <= 4 ); + c0 += a0b0; // in range [0,4*MaxCoeffProduct + 4*MaxA1B1ZetaPow] + SYMCRYPT_ASSERT( c0 < (4*SYMCRYPT_MLKEM_MaxCoeffProduct) + (4*SYMCRYPT_MLKEM_MaxA1B1ZetaPow) ); + c1 += a0b1; // in range [0,5*MaxCoeffProduct + 3*MaxA1B1ZetaPow] + SYMCRYPT_ASSERT( c1 < (5*SYMCRYPT_MLKEM_MaxCoeffProduct) + (3*SYMCRYPT_MLKEM_MaxA1B1ZetaPow) ); + + paDst->coeffs[(2*i) ] = c0; + paDst->coeffs[(2*i)+1] = c1; + } +} + +VOID +SYMCRYPT_CALL +SymCryptMlKemMontgomeryReduceAndAddPolyElementAccumulatorToPolyElement( + _Inout_ PSYMCRYPT_MLKEM_POLYELEMENT_ACCUMULATOR paSrc, + _Inout_ PSYMCRYPT_MLKEM_POLYELEMENT peDst ) +{ + UINT32 i; + UINT32 a, c, inv; + + for( i=0; i<SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; i++ ) + { + a = paSrc->coeffs[i]; + SYMCRYPT_ASSERT( a <= 4*(SYMCRYPT_MLKEM_MaxCoeffProduct + SYMCRYPT_MLKEM_MaxA1B1ZetaPow) ); + paSrc->coeffs[i] = 0; + + c = peDst->coeffs[i]; + SYMCRYPT_ASSERT( c < SYMCRYPT_MLKEM_Q ); + + // montgomery reduce sum of products + inv = (a * SYMCRYPT_MLKEM_NegQInvModR) & SYMCRYPT_MLKEM_Rmask; + a = (a + (inv * SYMCRYPT_MLKEM_Q)) >> SYMCRYPT_MLKEM_Rlog2; // in range [0, 4711] + SYMCRYPT_ASSERT( a <= 4711 ); + + // add destination + c += a; + SYMCRYPT_ASSERT( c <= 8039 ); + + // subtraction and conditional additions for constant time range reduction + c -= 2*SYMCRYPT_MLKEM_Q; // in range [-2Q, 1381] + SYMCRYPT_ASSERT( (c >= ((UINT32)(-2*SYMCRYPT_MLKEM_Q))) || (c < 1381) ); + c += SYMCRYPT_MLKEM_Q & (c >> 16); // in range [-Q, Q-1] + SYMCRYPT_ASSERT( (c >= ((UINT32)-SYMCRYPT_MLKEM_Q)) || (c < SYMCRYPT_MLKEM_Q) ); + c += SYMCRYPT_MLKEM_Q & (c >> 16); // in range [0, Q-1] + SYMCRYPT_ASSERT( c < SYMCRYPT_MLKEM_Q ); + + peDst->coeffs[i] = (UINT16) c; + } +} + +VOID +SYMCRYPT_CALL +SymCryptMlKemPolyElementMulR( + _In_ PCSYMCRYPT_MLKEM_POLYELEMENT peSrc, + _Out_ PSYMCRYPT_MLKEM_POLYELEMENT peDst ) +{ + UINT32 i; + for( i=0; i<SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; i++ ) + { + peDst->coeffs[i] = (UINT16) SymCryptMlKemMontMul( + peSrc->coeffs[i], SYMCRYPT_MLKEM_Rsqr, SYMCRYPT_MLKEM_RsqrTimesNegQInvModR ); + } +} + +VOID +SYMCRYPT_CALL +SymCryptMlKemPolyElementAdd( + _In_ PCSYMCRYPT_MLKEM_POLYELEMENT peSrc1, + _In_ PCSYMCRYPT_MLKEM_POLYELEMENT peSrc2, + _Out_ PSYMCRYPT_MLKEM_POLYELEMENT peDst ) +{ + UINT32 i; + for( i=0; i<SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; i++ ) + { + peDst->coeffs[i] = (UINT16) SymCryptMlKemModAdd( peSrc1->coeffs[i], peSrc2->coeffs[i] ); + } +} + +VOID +SYMCRYPT_CALL +SymCryptMlKemPolyElementSub( + _In_ PCSYMCRYPT_MLKEM_POLYELEMENT peSrc1, + _In_ PCSYMCRYPT_MLKEM_POLYELEMENT peSrc2, + _Out_ PSYMCRYPT_MLKEM_POLYELEMENT peDst ) +{ + UINT32 i; + for( i=0; i<SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; i++ ) + { + peDst->coeffs[i] = (UINT16) SymCryptMlKemModSub( peSrc1->coeffs[i], peSrc2->coeffs[i] ); + } +} + +VOID +SYMCRYPT_CALL +SymCryptMlKemPolyElementNTT( + _Inout_ PSYMCRYPT_MLKEM_POLYELEMENT peSrc ) +{ + SymCryptMlKemPolyElementNTTLayer( peSrc, 1, 128 ); + SymCryptMlKemPolyElementNTTLayer( peSrc, 2, 64 ); + SymCryptMlKemPolyElementNTTLayer( peSrc, 4, 32 ); + SymCryptMlKemPolyElementNTTLayer( peSrc, 8, 16 ); + SymCryptMlKemPolyElementNTTLayer( peSrc, 16, 8 ); + SymCryptMlKemPolyElementNTTLayer( peSrc, 32, 4 ); + SymCryptMlKemPolyElementNTTLayer( peSrc, 64, 2 ); +} + +// INTTFixupTimesRsqr = R^2 * 3303 = (3303<<32) mod Q +// 3303 constant is fixup from FIPS 203 +// Multiplied by R^2 to additionally multiply coefficients by R after montgomery reduction +const UINT32 SYMCRYPT_MLKEM_INTTFixupTimesRsqr = 1441; +const UINT32 SYMCRYPT_MLKEM_INTTFixupTimesRsqrTimesNegQInvModR = 10079; + +VOID +SYMCRYPT_CALL +SymCryptMlKemPolyElementINTTAndMulR( + _Inout_ PSYMCRYPT_MLKEM_POLYELEMENT peSrc ) +{ + UINT32 i; + + SymCryptMlKemPolyElementINTTLayer( peSrc, 127, 2 ); + SymCryptMlKemPolyElementINTTLayer( peSrc, 63, 4 ); + SymCryptMlKemPolyElementINTTLayer( peSrc, 31, 8 ); + SymCryptMlKemPolyElementINTTLayer( peSrc, 15, 16 ); + SymCryptMlKemPolyElementINTTLayer( peSrc, 7, 32 ); + SymCryptMlKemPolyElementINTTLayer( peSrc, 3, 64 ); + SymCryptMlKemPolyElementINTTLayer( peSrc, 1, 128 ); + + for( i=0; i<SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; i++) + { + peSrc->coeffs[i] = (UINT16) SymCryptMlKemMontMul( + peSrc->coeffs[i], SYMCRYPT_MLKEM_INTTFixupTimesRsqr, SYMCRYPT_MLKEM_INTTFixupTimesRsqrTimesNegQInvModR ); + } +} + +// ((1<<33) / SYMCRYPT_MLKEM_Q) rounded to nearest integer +// +// 1<<33 is the smallest power of 2 s.t. the constant has sufficient precision to round +// all inputs correctly in compression for all nBitsPerCoefficient < 12. A smaller +// constant could be used for smaller nBitsPerCoefficient for a small performance gain +// +const UINT32 SYMCRYPT_MLKEM_COMPRESS_MULCONSTANT = 0x275f6f; +const UINT32 SYMCRYPT_MLKEM_COMPRESS_SHIFTCONSTANT = 33; + +VOID +SYMCRYPT_CALL +SymCryptMlKemPolyElementCompressAndEncode( + _In_ PCSYMCRYPT_MLKEM_POLYELEMENT peSrc, + UINT32 nBitsPerCoefficient, + _Out_writes_bytes_(nBitsPerCoefficient*(SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8)) + PBYTE pbDst ) +{ + UINT32 i; + UINT64 multiplication; + UINT32 coefficient; + UINT32 nBitsInCoefficient; + UINT32 bitsToEncode; + UINT32 nBitsToEncode; + UINT32 cbDstWritten = 0; + UINT32 accumulator = 0; + UINT32 nBitsInAccumulator = 0; + + SYMCRYPT_ASSERT( nBitsPerCoefficient > 0 ); + SYMCRYPT_ASSERT( nBitsPerCoefficient <= 12 ); + + for( i=0; i<SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; i++ ) + { + nBitsInCoefficient = nBitsPerCoefficient; + coefficient = peSrc->coeffs[i]; // in range [0, Q-1] + SYMCRYPT_ASSERT( coefficient < SYMCRYPT_MLKEM_Q ); + + // first compress the coefficient + // when nBitsPerCoefficient < 12 we compress per Compress_d in FIPS 203; + if(nBitsPerCoefficient < 12) + { + // Multiply by 2^(nBitsPerCoefficient+1) / Q by multiplying by constant and shifting right + multiplication = SYMCRYPT_MUL32x32TO64(coefficient, SYMCRYPT_MLKEM_COMPRESS_MULCONSTANT); + coefficient = (UINT32) (multiplication >> (SYMCRYPT_MLKEM_COMPRESS_SHIFTCONSTANT-(nBitsPerCoefficient+1))); + + // add "half" to round to nearest integer + coefficient++; + + // final divide by two to get multiplication by 2^nBitsPerCoefficient / Q + coefficient >>= 1; // in range [0, 2^nBitsPerCoefficient] + SYMCRYPT_ASSERT(coefficient <= (1UL<<nBitsPerCoefficient)); + + // modular reduction by masking + coefficient &= (1UL<<nBitsPerCoefficient)-1; // in range [0, 2^nBitsPerCoefficient - 1] + SYMCRYPT_ASSERT(coefficient < (1UL<<nBitsPerCoefficient)); + } + + // encode the coefficient + // simple loop to add bits to accumulator and write accumulator to output + do + { + nBitsToEncode = SYMCRYPT_MIN(nBitsInCoefficient, 32-nBitsInAccumulator); + + bitsToEncode = coefficient & ((1UL<<nBitsToEncode)-1); + coefficient >>= nBitsToEncode; + nBitsInCoefficient -= nBitsToEncode; + + accumulator |= (bitsToEncode << nBitsInAccumulator); + nBitsInAccumulator += nBitsToEncode; + if(nBitsInAccumulator == 32) + { + SYMCRYPT_STORE_LSBFIRST32( pbDst+cbDstWritten, accumulator ); + cbDstWritten += 4; + accumulator = 0; + nBitsInAccumulator = 0; + } + } while( nBitsInCoefficient > 0 ); + } + + SYMCRYPT_ASSERT(nBitsInAccumulator == 0); + SYMCRYPT_ASSERT(cbDstWritten == (nBitsPerCoefficient*(SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8))); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlKemPolyElementDecodeAndDecompress( + _In_reads_bytes_(nBitsPerCoefficient*(SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8)) + PCBYTE pbSrc, + UINT32 nBitsPerCoefficient, + _Out_ PSYMCRYPT_MLKEM_POLYELEMENT peDst ) +{ + UINT32 i; + UINT32 coefficient; + UINT32 nBitsInCoefficient; + UINT32 bitsToDecode; + UINT32 nBitsToDecode; + UINT32 cbSrcRead = 0; + UINT32 accumulator = 0; + UINT32 nBitsInAccumulator = 0; + + SYMCRYPT_ASSERT( nBitsPerCoefficient > 0 ); + SYMCRYPT_ASSERT( nBitsPerCoefficient <= 12 ); + + for( i=0; i<SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; i++ ) + { + coefficient = 0; + nBitsInCoefficient = 0; + + // first gather and decode bits from pbSrc + do + { + if(nBitsInAccumulator == 0) + { + accumulator = SYMCRYPT_LOAD_LSBFIRST32( pbSrc+cbSrcRead ); + cbSrcRead += 4; + nBitsInAccumulator = 32; + } + + nBitsToDecode = SYMCRYPT_MIN(nBitsPerCoefficient-nBitsInCoefficient, nBitsInAccumulator); + SYMCRYPT_ASSERT(nBitsToDecode <= nBitsInAccumulator); + + bitsToDecode = accumulator & ((1UL<<nBitsToDecode)-1); + accumulator >>= nBitsToDecode; + nBitsInAccumulator -= nBitsToDecode; + + coefficient |= (bitsToDecode << nBitsInCoefficient); + nBitsInCoefficient += nBitsToDecode; + } while( nBitsPerCoefficient > nBitsInCoefficient ); + SYMCRYPT_ASSERT(nBitsInCoefficient == nBitsPerCoefficient); + + // decompress the coefficient + // when nBitsPerCoefficient < 12 we decompress per Decompress_d in FIPS 203 + // otherwise we perform input validation per 203 6.2 Input validation 2 (Modulus check) + if(nBitsPerCoefficient < 12) + { + // Multiply by Q / 2^(nBitsPerCoefficient-1) by multiplying by constant and shifting right + coefficient *= SYMCRYPT_MLKEM_Q; + coefficient >>= (nBitsPerCoefficient-1); + + // add "half" to round to nearest integer + coefficient++; + + // final divide by two to get multiplication by Q / 2^nBitsPerCoefficient + coefficient >>= 1; // in range [0, Q] + + // modular reduction by conditional subtraction + coefficient = SymCryptMlKemModSub( coefficient, SYMCRYPT_MLKEM_Q ); + SYMCRYPT_ASSERT( coefficient < SYMCRYPT_MLKEM_Q ); + } + else if( coefficient >= SYMCRYPT_MLKEM_Q ) + { + // input validation failure - this can happen with a malformed or corrupt encapsulation + // or decapsulation key; we do not need to be constant time because we treat the + // validity of an imported key as public information. + return SYMCRYPT_INVALID_BLOB; + } + + peDst->coeffs[i] = (UINT16) coefficient; + } + + SYMCRYPT_ASSERT(nBitsInAccumulator == 0); + SYMCRYPT_ASSERT(cbSrcRead == (nBitsPerCoefficient*(SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8))); + + return SYMCRYPT_NO_ERROR; +} + +VOID +SYMCRYPT_CALL +SymCryptMlKemPolyElementSampleNTTFromShake128( + _Inout_ PSYMCRYPT_SHAKE128_STATE pState, + _Out_ PSYMCRYPT_MLKEM_POLYELEMENT peDst ) +{ + UINT32 i=0; + BYTE shakeOutputBuf[3*8]; // Keccak likes extracting multiples of 8-bytes + UINT32 currBufIndex = sizeof(shakeOutputBuf); + UINT16 sample0, sample1; + + while( i<SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS ) + { + SYMCRYPT_ASSERT(currBufIndex <= sizeof(shakeOutputBuf)); + if( currBufIndex == sizeof(shakeOutputBuf) ) + { + SymCryptShake128Extract(pState, shakeOutputBuf, sizeof(shakeOutputBuf), FALSE); + currBufIndex = 0; + } + + sample0 = SYMCRYPT_LOAD_LSBFIRST16( shakeOutputBuf+currBufIndex ) & 0xfff; + sample1 = SYMCRYPT_LOAD_LSBFIRST16( shakeOutputBuf+currBufIndex+1 ) >> 4; + currBufIndex += 3; + + peDst->coeffs[i] = sample0; + i += sample0 < SYMCRYPT_MLKEM_Q; + + if( i<SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS ) + { + peDst->coeffs[i] = sample1; + i += sample1 < SYMCRYPT_MLKEM_Q; + } + } +} + +VOID +SYMCRYPT_CALL +SymCryptMlKemPolyElementSampleCBDFromBytes( + _In_reads_bytes_(eta*2*(SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8) + 1) + PCBYTE pbSrc, + _In_range_(2,3) UINT32 eta, + _Out_ PSYMCRYPT_MLKEM_POLYELEMENT peDst ) +{ + UINT32 i, j; + UINT32 sampleBits; + UINT32 coefficient; + + SYMCRYPT_ASSERT((eta == 2) || (eta == 3)); + if( eta == 3 ) + { + for( i=0; i<SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; i+=4 ) + { + // unconditionally load 4 bytes into sampleBits, but only treat the load + // as being 3 bytes (24-bits -> 4 coefficients) for eta==3 to align to + // byte boundaries. Source buffer must be 1 byte larger than shake output + sampleBits = SYMCRYPT_LOAD_LSBFIRST32( pbSrc ); + pbSrc += 3; + + // sum bit samples - each consecutive slice of eta bits is summed together + sampleBits = (sampleBits&0x249249) + ((sampleBits>>1)&0x249249) + ((sampleBits>>2)&0x249249); + + for( j=0; j<4; j++ ) + { + // each coefficient is formed by taking the difference of two consecutive slices of eta bits + // the first eta bits are positive, the second eta bits are negative + coefficient = sampleBits & 0x3f; + sampleBits >>= 6; + coefficient = (coefficient&3) - (coefficient>>3); + SYMCRYPT_ASSERT((coefficient >= ((UINT32)-3)) || (coefficient <= 3)); + + coefficient = coefficient + (SYMCRYPT_MLKEM_Q & (coefficient >> 16)); // in range [0, Q-1] + SYMCRYPT_ASSERT( coefficient < SYMCRYPT_MLKEM_Q ); + + peDst->coeffs[i+j] = (UINT16) coefficient; + } + } + } + else + { + for( i=0; i<SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; i+=8 ) + { + // unconditionally load 4 bytes (32-bits -> 8 coefficients) into sampleBits + sampleBits = SYMCRYPT_LOAD_LSBFIRST32( pbSrc ); + pbSrc += 4; + + // sum bit samples - each consecutive slice of eta bits is summed together + sampleBits = (sampleBits&0x55555555) + ((sampleBits>>1)&0x55555555); + + for( j=0; j<8; j++ ) + { + // each coefficient is formed by taking the difference of two consecutive slices of eta bits + // the first eta bits are positive, the second eta bits are negative + coefficient = sampleBits & 0xf; + sampleBits >>= 4; + coefficient = (coefficient&3) - (coefficient>>2); + SYMCRYPT_ASSERT((coefficient >= ((UINT32)-2)) || (coefficient <= 2)); + + coefficient = coefficient + (SYMCRYPT_MLKEM_Q & (coefficient >> 16)); // in range [0, Q-1] + SYMCRYPT_ASSERT( coefficient < SYMCRYPT_MLKEM_Q ); + + peDst->coeffs[i+j] = (UINT16) coefficient; + } + } + } +} + +VOID +SYMCRYPT_CALL +SymCryptMlKemMatrixTranspose( + _Inout_ PSYMCRYPT_MLKEM_MATRIX pmSrc ) +{ + UINT32 i, j; + PSYMCRYPT_MLKEM_POLYELEMENT swap; + const UINT32 nRows = pmSrc->nRows; + + SYMCRYPT_ASSERT( nRows > 0 ); + SYMCRYPT_ASSERT( nRows <= SYMCRYPT_MLKEM_MATRIX_MAX_NROWS ); + + for( i=0; i<nRows; i++ ) + { + for( j=i+1; j<nRows; j++ ) + { + swap = pmSrc->apPolyElements[(i*nRows) + j]; + pmSrc->apPolyElements[(i*nRows) + j] = pmSrc->apPolyElements[(j*nRows) + i]; + pmSrc->apPolyElements[(j*nRows) + i] = swap; + } + } +} + +VOID +SYMCRYPT_CALL +SymCryptMlKemMatrixVectorMontMulAndAdd( + _In_ PCSYMCRYPT_MLKEM_MATRIX pmSrc1, + _In_ PCSYMCRYPT_MLKEM_VECTOR pvSrc2, + _Inout_ PSYMCRYPT_MLKEM_VECTOR pvDst, + _Inout_ PSYMCRYPT_MLKEM_POLYELEMENT_ACCUMULATOR paTmp ) +{ + UINT32 i, j; + const UINT32 nRows = pmSrc1->nRows; + PCSYMCRYPT_MLKEM_POLYELEMENT peSrc1, peSrc2; + PSYMCRYPT_MLKEM_POLYELEMENT peDst; + + SYMCRYPT_ASSERT( nRows > 0 ); + SYMCRYPT_ASSERT( nRows <= SYMCRYPT_MLKEM_MATRIX_MAX_NROWS ); + SYMCRYPT_ASSERT( pvSrc2->nRows == nRows ); + SYMCRYPT_ASSERT( pvDst->nRows == nRows ); + + // Zero paTmp + SymCryptWipeKnownSize( paTmp, SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT_ACCUMULATOR ); + + for( i=0; i<nRows; i++ ) + { + for( j=0; j<nRows; j++ ) + { + peSrc1 = pmSrc1->apPolyElements[(i*nRows) + j]; + peSrc2 = SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT( j, pvSrc2 ); + SymCryptMlKemPolyElementMulAndAccumulate( peSrc1, peSrc2, paTmp ); + } + + // write accumulator to dest and zero accumulator + peDst = SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT( i, pvDst ); + SymCryptMlKemMontgomeryReduceAndAddPolyElementAccumulatorToPolyElement( paTmp, peDst ); + } +} + +VOID +SYMCRYPT_CALL +SymCryptMlKemVectorMontDotProduct( + _In_ PCSYMCRYPT_MLKEM_VECTOR pvSrc1, + _In_ PCSYMCRYPT_MLKEM_VECTOR pvSrc2, + _Inout_ PSYMCRYPT_MLKEM_POLYELEMENT peDst, + _Inout_ PSYMCRYPT_MLKEM_POLYELEMENT_ACCUMULATOR paTmp ) +{ + UINT32 i; + const UINT32 nRows = pvSrc1->nRows; + PCSYMCRYPT_MLKEM_POLYELEMENT peSrc1, peSrc2; + + SYMCRYPT_ASSERT( nRows > 0 ); + SYMCRYPT_ASSERT( nRows <= SYMCRYPT_MLKEM_MATRIX_MAX_NROWS ); + SYMCRYPT_ASSERT( pvSrc2->nRows == nRows ); + + // Zero paTmp and peDst + SymCryptWipeKnownSize( paTmp, SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT_ACCUMULATOR ); + SymCryptWipeKnownSize( peDst, SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT ); + + for( i=0; i<nRows; i++ ) + { + peSrc1 = SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT( i, pvSrc1 ); + peSrc2 = SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT( i, pvSrc2 ); + SymCryptMlKemPolyElementMulAndAccumulate( peSrc1, peSrc2, paTmp ); + } + + // write accumulator to dest and zero accumulator + SymCryptMlKemMontgomeryReduceAndAddPolyElementAccumulatorToPolyElement( paTmp, peDst ); +} + +VOID +SYMCRYPT_CALL +SymCryptMlKemVectorSetZero( + _Inout_ PSYMCRYPT_MLKEM_VECTOR pvSrc ) +{ + const UINT32 nRows = pvSrc->nRows; + + SYMCRYPT_ASSERT( nRows > 0 ); + SYMCRYPT_ASSERT( nRows <= SYMCRYPT_MLKEM_MATRIX_MAX_NROWS ); + + SymCryptWipe( (PBYTE) SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT( 0, pvSrc ), nRows*SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT ); +} + +VOID +SYMCRYPT_CALL +SymCryptMlKemVectorMulR( + _In_ PCSYMCRYPT_MLKEM_VECTOR pvSrc, + _Out_ PSYMCRYPT_MLKEM_VECTOR pvDst ) +{ + UINT32 i; + const UINT32 nRows = pvSrc->nRows; + + SYMCRYPT_ASSERT( nRows > 0 ); + SYMCRYPT_ASSERT( nRows <= SYMCRYPT_MLKEM_MATRIX_MAX_NROWS ); + SYMCRYPT_ASSERT( pvDst->nRows == nRows ); + + for( i=0; i<nRows; i++ ) + { + SymCryptMlKemPolyElementMulR( + SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT( i, pvSrc ), + SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT( i, pvDst ) ); + } +} + +VOID +SYMCRYPT_CALL +SymCryptMlKemVectorAdd( + _In_ PCSYMCRYPT_MLKEM_VECTOR pvSrc1, + _In_ PCSYMCRYPT_MLKEM_VECTOR pvSrc2, + _Out_ PSYMCRYPT_MLKEM_VECTOR pvDst ) +{ + UINT32 i; + const UINT32 nRows = pvSrc1->nRows; + PCSYMCRYPT_MLKEM_POLYELEMENT peSrc1, peSrc2; + PSYMCRYPT_MLKEM_POLYELEMENT peDst; + + SYMCRYPT_ASSERT( nRows > 0 ); + SYMCRYPT_ASSERT( nRows <= SYMCRYPT_MLKEM_MATRIX_MAX_NROWS ); + SYMCRYPT_ASSERT( pvSrc2->nRows == nRows ); + SYMCRYPT_ASSERT( pvDst->nRows == nRows ); + + for( i=0; i<nRows; i++ ) + { + peSrc1 = SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT( i, pvSrc1 ); + peSrc2 = SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT( i, pvSrc2 ); + peDst = SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT( i, pvDst ); + SymCryptMlKemPolyElementAdd( peSrc1, peSrc2, peDst ); + } +} + +VOID +SYMCRYPT_CALL +SymCryptMlKemVectorSub( + _In_ PCSYMCRYPT_MLKEM_VECTOR pvSrc1, + _In_ PCSYMCRYPT_MLKEM_VECTOR pvSrc2, + _Out_ PSYMCRYPT_MLKEM_VECTOR pvDst ) +{ + UINT32 i; + const UINT32 nRows = pvSrc1->nRows; + PCSYMCRYPT_MLKEM_POLYELEMENT peSrc1, peSrc2; + PSYMCRYPT_MLKEM_POLYELEMENT peDst; + + SYMCRYPT_ASSERT( nRows > 0 ); + SYMCRYPT_ASSERT( nRows <= SYMCRYPT_MLKEM_MATRIX_MAX_NROWS ); + SYMCRYPT_ASSERT( pvSrc2->nRows == nRows ); + SYMCRYPT_ASSERT( pvDst->nRows == nRows ); + + for( i=0; i<nRows; i++ ) + { + peSrc1 = SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT( i, pvSrc1 ); + peSrc2 = SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT( i, pvSrc2 ); + peDst = SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT( i, pvDst ); + SymCryptMlKemPolyElementSub( peSrc1, peSrc2, peDst ); + } +} + +VOID +SYMCRYPT_CALL +SymCryptMlKemVectorNTT( + _Inout_ PSYMCRYPT_MLKEM_VECTOR pvSrc ) +{ + UINT32 i; + const UINT32 nRows = pvSrc->nRows; + + SYMCRYPT_ASSERT( nRows > 0 ); + SYMCRYPT_ASSERT( nRows <= SYMCRYPT_MLKEM_MATRIX_MAX_NROWS ); + + for( i=0; i<nRows; i++ ) + { + SymCryptMlKemPolyElementNTT( SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT( i, pvSrc ) ); + } +} + +VOID +SYMCRYPT_CALL +SymCryptMlKemVectorINTTAndMulR( + _Inout_ PSYMCRYPT_MLKEM_VECTOR pvSrc ) +{ + UINT32 i; + const UINT32 nRows = pvSrc->nRows; + + SYMCRYPT_ASSERT( nRows > 0 ); + SYMCRYPT_ASSERT( nRows <= SYMCRYPT_MLKEM_MATRIX_MAX_NROWS ); + + for( i=0; i<nRows; i++ ) + { + SymCryptMlKemPolyElementINTTAndMulR( SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT( i, pvSrc ) ); + } +} + +VOID +SYMCRYPT_CALL +SymCryptMlKemVectorCompressAndEncode( + _In_ PCSYMCRYPT_MLKEM_VECTOR pvSrc, + UINT32 nBitsPerCoefficient, + _Out_writes_bytes_(cbDst) PBYTE pbDst, + SIZE_T cbDst ) +{ + UINT32 i; + const UINT32 nRows = pvSrc->nRows; + PCSYMCRYPT_MLKEM_POLYELEMENT peSrc; + + SYMCRYPT_ASSERT( nRows > 0 ); + SYMCRYPT_ASSERT( nRows <= SYMCRYPT_MLKEM_MATRIX_MAX_NROWS ); + SYMCRYPT_ASSERT( nBitsPerCoefficient > 0 ); + SYMCRYPT_ASSERT( nBitsPerCoefficient <= 12 ); + SYMCRYPT_ASSERT( cbDst == nRows*nBitsPerCoefficient*(SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8) ); + + UNREFERENCED_PARAMETER( cbDst ); + + for( i=0; i<nRows; i++ ) + { + peSrc = SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT( i, pvSrc ); + SymCryptMlKemPolyElementCompressAndEncode( peSrc, nBitsPerCoefficient, pbDst ); + pbDst += nBitsPerCoefficient*(SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8); + } +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlKemVectorDecodeAndDecompress( + _In_reads_bytes_(cbSrc) PCBYTE pbSrc, + SIZE_T cbSrc, + UINT32 nBitsPerCoefficient, + _Out_ PSYMCRYPT_MLKEM_VECTOR pvDst ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + UINT32 i; + const UINT32 nRows = pvDst->nRows; + PSYMCRYPT_MLKEM_POLYELEMENT peDst; + + SYMCRYPT_ASSERT( nRows > 0 ); + SYMCRYPT_ASSERT( nRows <= SYMCRYPT_MLKEM_MATRIX_MAX_NROWS ); + SYMCRYPT_ASSERT( nBitsPerCoefficient > 0 ); + SYMCRYPT_ASSERT( nBitsPerCoefficient <= 12 ); + SYMCRYPT_ASSERT( cbSrc == nRows*nBitsPerCoefficient*(SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8) ); + + UNREFERENCED_PARAMETER( cbSrc ); + + for( i=0; i<nRows; i++ ) + { + peDst = SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT( i, pvDst ); + scError = SymCryptMlKemPolyElementDecodeAndDecompress( pbSrc, nBitsPerCoefficient, peDst ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + pbSrc += nBitsPerCoefficient*(SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8); + } + +cleanup: + return scError; +} + +VOID +SYMCRYPT_CALL +SymCryptMlKemkeyWipePrivateState( + _Inout_ PSYMCRYPT_MLKEMKEY pkMlKemkey ) +{ + SymCryptMlKemVectorSetZero( pkMlKemkey->pvs ); + SymCryptWipeKnownSize( pkMlKemkey->privateRandom, sizeof(pkMlKemkey->privateRandom) ); + SymCryptWipeKnownSize( pkMlKemkey->privateSeed, sizeof(pkMlKemkey->privateSeed) ); + pkMlKemkey->hasPrivateKey = FALSE; + pkMlKemkey->hasPrivateSeed = FALSE; +} + +#if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_X86 +#ifdef __clang__ +#pragma clang attribute pop +#else +#pragma GCC pop_options +#endif +#endif diff --git a/libs/symcrypt/lib/modexp.c b/libs/symcrypt/lib/modexp.c new file mode 100644 index 00000000000..674200cd640 --- /dev/null +++ b/libs/symcrypt/lib/modexp.c @@ -0,0 +1,510 @@ +// +// modexp.c Modular exponentiation functions +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +// +// The windowed modular exponentiation algorithm works by generating a +// side-channel table of all the powers of the base from 0 up to 2^W - 1 +// where W is the window size: +// scsPrecomp = { 1, base, base^2, ..., base^(2^W-1) } +// +// TODO: To mitigate power analysis attacks when multiplying by 1 (which might +// contain a lot of zeros in non-Montgomery moduli), future work is to +// get rid of the 1 in the table. The leak is limited since now we always +// have Montgomery moduli. +// +// Then it slices the exponent into chunks of W bits and goes through +// each chunk of the exponent starting from the most significant +// chunk. For each chunk c_i it squares a temporary modelement +// W times and then multiplies it by scsPrecomp[c_i]. The starting +// value of the temporary modelement is scsPrecomp[c_0] i.e. the one +// corresponding to the most significant chunk. +// +// Denote by M and SQ the multiplications and squarings and by B = nBitsExp +// number of bits of the exponent. Then the algorithm does +// (2^W - 2)*M + (B-1)/W*(W*SQ + M) = +// (2^W + (B-1)/W -2) multiplications and (B-1) squarings +// +// It is beneficial to change the window size from W to W+1 when +// 2^(W+1) + (B-1)/(W+1) < 2^W + (B-1)/W => +// B > 2^W*W(W+1)+1 +// A simple table that calculates the optimal values for the window size +// is shown below. +// +// The minimum value of W is W=4 as 2^W should be a multiple +// of the groupsize of the scsTable, which is 4 by default. + +#define MIN_WINDOW_SIZE (4) + +static const UINT32 cutoffs[] = +{ + // 5, // W should be 2 for 5 < B <= 25 + // 25, // W should be 3 for 25 < B <= 97 + // 97, // W should be 4 for 97 < B <= 321 + 321, // W should be 5 for 321 < B <= 961 + // 961, // W should be 6 for 961 < B +}; + +static const UINT32 nCuttoffs = sizeof(cutoffs) / sizeof(cutoffs[0]); + +VOID +SYMCRYPT_CALL +SymCryptModExpWindowed( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peBase, + _In_ PCSYMCRYPT_INT piExp, + UINT32 nBitsExp, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 W = 0; + UINT32 nTableElements = 0; + + SYMCRYPT_SCSTABLE scsPrecomp = { 0 }; + UINT32 cbScsPrecomp = 0; + + UINT32 cbModElement = SymCryptSizeofModElementFromModulus( pmMod ); + + PSYMCRYPT_MODELEMENT peT1 = NULL; + PSYMCRYPT_MODELEMENT peT2 = NULL; + + UINT32 nIterations = 0; + UINT32 iBit = 0; + UINT32 nBits = 0; + UINT32 index = 0; + + // Truncate the nBitsExp if above the object size + nBitsExp = SYMCRYPT_MIN( nBitsExp, SymCryptIntBitsizeOfObject(piExp) ); + + // Calculate the window size + W = MIN_WINDOW_SIZE; + while ((W-MIN_WINDOW_SIZE < nCuttoffs) && (cutoffs[W-MIN_WINDOW_SIZE]<nBitsExp)) + { + W++; + } + nTableElements = (1<<W); + + // Initialize the table of temporary modelements + cbScsPrecomp = SymCryptScsTableInit( &scsPrecomp, nTableElements, cbModElement ); + + SYMCRYPT_ASSERT( cbScratch >= cbScsPrecomp + 2*cbModElement + SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pmMod->nDigits ) ); + + SymCryptScsTableSetBuffer( &scsPrecomp, pbScratch, cbScsPrecomp ); + pbScratch += cbScsPrecomp; + cbScratch -= cbScsPrecomp; + + // Create the temporary modelement + peT1 = SymCryptModElementCreate( pbScratch, cbModElement, pmMod ); + SYMCRYPT_ASSERT( peT1 != NULL ); + pbScratch += cbModElement; + cbScratch -= cbModElement; + peT2 = SymCryptModElementCreate( pbScratch, cbModElement, pmMod ); + SYMCRYPT_ASSERT( peT2 != NULL ); + pbScratch += cbModElement; + cbScratch -= cbModElement; + + // Fill the first element with 1 (**note: this will cause 0^0 = 1) + // and the second with peBase + SYMCRYPT_ASSERT( nTableElements >= 2 ); + + SymCryptModElementSetValueUint32( 1, pmMod, peT1, pbScratch, cbScratch ); + SymCryptScsTableStore( &scsPrecomp, 0, (PBYTE)peT1, cbModElement ); + + SymCryptModElementCopy( pmMod, peBase, peT1 ); + SymCryptScsTableStore( &scsPrecomp, 1, (PBYTE)peT1, cbModElement ); + + // Fill the table with the powers of peBase + for (UINT32 i=2; i<nTableElements; i++) + { + // TODO: Future improvement, use squarings for this table. + SymCryptModMul( pmMod, peT1, peBase, peT1, pbScratch, cbScratch ); + SymCryptScsTableStore( &scsPrecomp, i, (PBYTE)peT1, cbModElement ); + } + + // Find the number of iterations (minus one) and the starting position bit + SYMCRYPT_ASSERT( nBitsExp != 0 ); + nIterations = (nBitsExp - 1) / W; + iBit = nIterations * W; + + // Do the first chunk (it might be smaller than W bits) + nBits = nBitsExp - iBit; + index = SymCryptIntGetBits( piExp, iBit, nBits ); + SymCryptScsTableLoad( &scsPrecomp, index, (PBYTE)peT1, cbModElement ); + + // Work in batches of W bits in the exponent + for (UINT32 i=0; i<nIterations; i++) + { + // Square W times + for (UINT32 j=0; j<W; j++) + { + SymCryptModSquare( pmMod, peT1, peT1, pbScratch, cbScratch ); + } + + iBit -= W; + index = SymCryptIntGetBits( piExp, iBit, W ); + SymCryptScsTableLoad( &scsPrecomp, index, (PBYTE)peT2, cbModElement ); + + SymCryptModMul( pmMod, peT1, peT2, peT1, pbScratch, cbScratch ); + } + + SYMCRYPT_ASSERT( iBit == 0 ); + + SymCryptModElementCopy( pmMod, peT1, peDst ); +} + +VOID +SYMCRYPT_CALL +SymCryptModExpSquareAndMultiply32( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peBase, + _In_ PCSYMCRYPT_INT piExp, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 cbModElement = SymCryptSizeofModElementFromModulus( pmMod ); + + PSYMCRYPT_MODELEMENT peT1 = NULL; + PSYMCRYPT_MODELEMENT peT2 = NULL; + + // The bits of the exponent when this function is called are + // always less than 32. + UINT32 exp = SymCryptIntGetValueLsbits32( piExp ); + + SYMCRYPT_ASSERT( cbScratch >= 2*cbModElement + SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pmMod->nDigits ) ); + + // Create the temporary modelements + peT1 = SymCryptModElementCreate( pbScratch, cbModElement, pmMod ); + SYMCRYPT_ASSERT( peT1 != NULL ); + pbScratch += cbModElement; + cbScratch -= cbModElement; + peT2 = SymCryptModElementCreate( pbScratch, cbModElement, pmMod ); + SYMCRYPT_ASSERT( peT2 != NULL ); + pbScratch += cbModElement; + cbScratch -= cbModElement; + + if (exp == 0) + { + SymCryptModElementSetValueUint32( 1, pmMod, peDst, pbScratch, cbScratch ); + } + else + { + SymCryptModElementSetValueUint32( 1, pmMod, peT1, pbScratch, cbScratch ); + SymCryptModElementCopy( pmMod, peBase, peT2 ); + + while (exp>1) + { + if (exp%2 == 1) + { + SymCryptModMul( pmMod, peT1, peT2, peT1, pbScratch, cbScratch ); + } + + SymCryptModSquare( pmMod, peT2, peT2, pbScratch, cbScratch ); + exp /= 2; + } + + SymCryptModMul( pmMod, peT1, peT2, peDst, pbScratch, cbScratch ); + } +} + +VOID +SYMCRYPT_CALL +SymCryptModExpGeneric( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peBase, + _In_ PCSYMCRYPT_INT piExp, + UINT32 nBitsExp, + UINT32 flags, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + if ( ((flags & SYMCRYPT_FLAG_DATA_PUBLIC)!=0) && (nBitsExp <= sizeof(UINT32)*8) ) + { + SymCryptModExpSquareAndMultiply32( pmMod, peBase, piExp, peDst, pbScratch, cbScratch ); + } + else + { + SymCryptModExpWindowed( pmMod, peBase, piExp, nBitsExp, peDst, pbScratch, cbScratch ); // This is the default + } +} + +// +// MultiExponentiation +// + +// SYMCRYPT_MODMULTIEXP_MAX_NPRECOMP: The maximum number of precomputed powers of the +// base point allowed for the multi-exponentiation operation. +// It should be equal to 2^(SYMCRYPT_FDEF_MAX_WINDOW_MODEXP-1) +#define SYMCRYPT_MODMULTIEXP_MAX_NPRECOMP (1<<(SYMCRYPT_FDEF_MAX_WINDOW_MODEXP-1)) + +// SYMCRYPT_MODMULTIEXP_WINDOW_SIZE: Fixed window size for the WnafWithInterleaving +// implementation. It is found to give the faster running times for sizes +// 512 - 2048 bits. +#define SYMCRYPT_MODMULTIEXP_WINDOW_SIZE (5) + +C_ASSERT( (1 << (SYMCRYPT_MODMULTIEXP_WINDOW_SIZE-1)) <= SYMCRYPT_MODMULTIEXP_MAX_NPRECOMP ); + +// +// The following function fills the table with odd powers +// of the base point B. +// +// The first value must be filled by the caller. +VOID +SYMCRYPT_CALL +SymCryptModExpPrecomputation( + _In_ PCSYMCRYPT_MODULUS pmP, + UINT32 nPrecomputedPowers, + _In_reads_( SYMCRYPT_MODMULTIEXP_MAX_NPRECOMP ) + PSYMCRYPT_MODELEMENT * pePIs, + PSYMCRYPT_MODELEMENT peTemp, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch +) +{ + SYMCRYPT_ASSERT(nPrecomputedPowers>=2); + + // Calculate B^2 + SymCryptModSquare( pmP, pePIs[0], peTemp, pbScratch, cbScratch ); + + for (UINT32 i=1; i<nPrecomputedPowers; i++) + { + // B[i] = B^2*B[i-1] + SymCryptModMul( pmP, peTemp, pePIs[i-1], pePIs[i], pbScratch, cbScratch ); + } +} + +// +// The following is a similar algorithm to SymCryptEcpointMultiScalarMulWnafWithInterleaving. +// It is a NON SIDE-CHANNEL SAFE algorithm. +// +VOID +SYMCRYPT_CALL +SymCryptModMultiExpWnafWithInterleaving( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_reads_( nBases ) PCSYMCRYPT_MODELEMENT * peBaseArray, + _In_reads_( nBases ) PCSYMCRYPT_INT * piExpArray, + UINT32 nBases, + UINT32 nBitsExp, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 i, j; + + UINT32 w = 0; + UINT32 nPrecompPoints = 0; + UINT32 nRecodedDigits = 0; + + // Masks + UINT32 fOne[SYMCRYPT_MODMULTIEXP_MAX_NBASES] = { 0 }; + UINT32 fOneTot = 0xffffffff; // Final result 1 + + UINT32 fZeroExp = 0; // Zero exponent + UINT32 fZeroTot = 0; // Final result 0 + + UINT32 cbModElement = SymCryptSizeofModElementFromModulus( pmMod ); + + // ==================================================== + // Temporaries + PSYMCRYPT_MODELEMENT pePIs[SYMCRYPT_MODMULTIEXP_MAX_NBASES*SYMCRYPT_MODMULTIEXP_MAX_NPRECOMP] = { 0 }; + PSYMCRYPT_MODELEMENT peTemp = NULL; + PSYMCRYPT_MODELEMENT peOne = NULL; + + PUINT32 absofKIs = NULL; + // =================================================== + + // Calculate the window size + w = SYMCRYPT_MODMULTIEXP_WINDOW_SIZE; + nPrecompPoints = (1 << (w-1)); // We only store odd powers of the base point + + // Number of recoded digits + nRecodedDigits = nBitsExp; + + // + // From symcrypt_internal.h we have: + // - sizeof results are upper bounded by 2^19 + // - SYMCRYPT_SCRATCH_BYTES results are upper bounded by 2^27 (including RSA and ECURVE) + // - nBases, nPrecompPoints, and nRecodedDigits are bounded by SYMCRYPT_MODMULTIEXP_MAX_NBASES, + // SYMCRYPT_MODMULTIEXP_MAX_NBITSEXP, and SYMCRYPT_MODMULTIEXP_MAX_NPRECOMP, respectively. + // Thus the following calculation does not overflow cbScratch. + // + SYMCRYPT_ASSERT( SYMCRYPT_MODMULTIEXP_MAX_NBASES >= nBases ); + SYMCRYPT_ASSERT( SYMCRYPT_MODMULTIEXP_MAX_NPRECOMP >= nPrecompPoints ); + + // Creating temporary precomputed modelements + for (i=0; i<nBases*nPrecompPoints; i++) + { + SYMCRYPT_ASSERT( cbScratch >= cbModElement ); + pePIs[i] = SymCryptModElementCreate( pbScratch, cbModElement, pmMod ); + SYMCRYPT_ASSERT( pePIs[i] != NULL ); + pbScratch += cbModElement; + cbScratch -= cbModElement; + } + + SYMCRYPT_ASSERT( cbScratch >= + 2*cbModElement + + ((nBases*nRecodedDigits*sizeof(UINT32) + SYMCRYPT_ASYM_ALIGN_VALUE - 1)/SYMCRYPT_ASYM_ALIGN_VALUE)*SYMCRYPT_ASYM_ALIGN_VALUE + + SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( SymCryptModulusDigitsizeOfObject( pmMod ) ) ); + + // Creating temporary points + peTemp = SymCryptModElementCreate( pbScratch, cbModElement, pmMod ); + SYMCRYPT_ASSERT( peTemp != NULL ); + pbScratch += cbModElement; + cbScratch -= cbModElement; + + peOne = SymCryptModElementCreate( pbScratch, cbModElement, pmMod ); + SYMCRYPT_ASSERT( peOne != NULL ); + pbScratch += cbModElement; + cbScratch -= cbModElement; + + // Fixing pointers to recoded digits (be careful that the remaining space is SYMCRYPT_ASYM_ALIGNed) + absofKIs = (PUINT32) pbScratch; + pbScratch += nBases * nRecodedDigits * sizeof(UINT32); + cbScratch -= nBases * nRecodedDigits * sizeof(UINT32); + + // Update cbScratch first using pbScratch, as the amount of scratch skipped for alignment depends upon the alignment of pbScratch + cbScratch -= ( ((SIZE_T)pbScratch + SYMCRYPT_ASYM_ALIGN_VALUE - 1) & ~(SYMCRYPT_ASYM_ALIGN_VALUE - 1) ) - (SIZE_T)pbScratch; + pbScratch = (PBYTE) ( ((SIZE_T)pbScratch + SYMCRYPT_ASYM_ALIGN_VALUE - 1) & ~(SYMCRYPT_ASYM_ALIGN_VALUE - 1) ); + + + // + // Main algorithm + // + + // Set peOne to 1 + SymCryptModElementSetValueUint32( 1, pmMod, peOne, pbScratch, cbScratch ); + + // Zero-out all recoded digits + SymCryptWipe( (PBYTE)absofKIs, nBases*nRecodedDigits*sizeof(UINT32) ); + + for (j = 0; j<nBases; j++) + { + // Check if the exponent is zero + fZeroExp = SymCryptIntIsEqualUint32( piExpArray[j], 0 ); + + // Check if the result is 0 (i.e. 0^e with e!=0) + if( !fZeroExp && SymCryptModElementIsZero(pmMod, peBaseArray[j]) ) + { + fZeroTot = 0xffffffff; + break; + } + + // Check if the exponent is 0 or if the base point is 1 + fOne[j] = ( fZeroExp | SymCryptModElementIsEqual( pmMod, peBaseArray[j], peOne ) ); + fOneTot &= fOne[j]; + + // Skip the recoding stage (and all remaining steps) if this point will give result 1 + if (!fOne[j]) + { + // Recoding stage + SymCryptPositiveWidthNafRecoding( w, piExpArray[j], nBitsExp, &absofKIs[j*nRecodedDigits], nRecodedDigits ); + + // Copy the base in the start of the pePIs array + SymCryptModElementCopy( pmMod, peBaseArray[j], pePIs[j*nPrecompPoints] ); + + // Precomputation stage + SymCryptModExpPrecomputation( pmMod, nPrecompPoints, &pePIs[j*nPrecompPoints], peTemp, pbScratch, cbScratch ); + } + } + + if (fZeroTot) + { + SymCryptModElementSetValueUint32( 0, pmMod, peDst, pbScratch, cbScratch ); + } + else + { + SymCryptModElementSetValueUint32( 1, pmMod, peTemp, pbScratch, cbScratch ); + + if (!fOneTot) + { + // Main loop + for (INT32 i = nRecodedDigits-1; i>-1; i--) + { + SymCryptModSquare( pmMod, peTemp, peTemp, pbScratch, cbScratch ); + + for (j = 0; j<nBases; j++) + { + if (absofKIs[j*nRecodedDigits + i] != 0) + { + SymCryptModMul( pmMod, peTemp, pePIs[j*nPrecompPoints + absofKIs[j*nRecodedDigits + i]/2], peTemp, pbScratch, cbScratch ); + } + } + } + } + + // Copy the result into the destination + SymCryptModElementCopy( pmMod, peTemp, peDst ); + } +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptModMultiExpGeneric( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_reads_( nBases ) PCSYMCRYPT_MODELEMENT * peBaseArray, + _In_reads_( nBases ) PCSYMCRYPT_INT * piExpArray, + UINT32 nBases, + UINT32 nBitsExp, + UINT32 flags, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + if ( (nBases > SYMCRYPT_MODMULTIEXP_MAX_NBASES) || + (nBitsExp > SYMCRYPT_MODMULTIEXP_MAX_NBITSEXP) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if ((flags & SYMCRYPT_FLAG_DATA_PUBLIC)!=0) + { + SymCryptModMultiExpWnafWithInterleaving( pmMod, peBaseArray, piExpArray, nBases, nBitsExp, peDst, pbScratch, cbScratch ); + } + else + { + UINT32 cbModElement = 0; + PSYMCRYPT_MODELEMENT peTemp = NULL; + PSYMCRYPT_MODELEMENT peAcc = NULL; + + // Use two temporary modelements to store the results + // *** Make sure that the scratch space is enough i.e. the scratch space of ModMultiExp is + // at least 2 modelements bigger than the scratch space of ModExp + cbModElement = SymCryptSizeofModElementFromModulus( pmMod ); + + SYMCRYPT_ASSERT( SYMCRYPT_SCRATCH_BYTES_FOR_MODEXP(SymCryptModulusDigitsizeOfObject(pmMod)) + 2*cbModElement <= + SYMCRYPT_SCRATCH_BYTES_FOR_MODMULTIEXP( SymCryptModulusDigitsizeOfObject(pmMod), nBases, nBitsExp ) ); + SYMCRYPT_ASSERT( cbScratch >= 2*cbModElement + SYMCRYPT_SCRATCH_BYTES_FOR_MODEXP(SymCryptModulusDigitsizeOfObject(pmMod)) ); + + peTemp = SymCryptModElementCreate( pbScratch, cbModElement, pmMod ); + pbScratch += cbModElement; cbScratch -= cbModElement; + + peAcc = SymCryptModElementCreate( pbScratch, cbModElement, pmMod ); + pbScratch += cbModElement; cbScratch -= cbModElement; + + // Set peAcc to 1 + SymCryptModElementSetValueUint32( 1, pmMod, peAcc, pbScratch, cbScratch ); + + for (UINT32 i=0; i<nBases; i++) + { + SymCryptModExpWindowed( pmMod, peBaseArray[i], piExpArray[i], nBitsExp, peTemp, pbScratch, cbScratch ); + + SymCryptModMul( pmMod, peAcc, peTemp, peAcc, pbScratch, cbScratch ); + } + + // Copy the result into the destination + SymCryptModElementCopy( pmMod, peAcc, peDst ); + } + +cleanup: + return scError; +} diff --git a/libs/symcrypt/lib/paddingPkcs7.c b/libs/symcrypt/lib/paddingPkcs7.c new file mode 100644 index 00000000000..5d3a466b253 --- /dev/null +++ b/libs/symcrypt/lib/paddingPkcs7.c @@ -0,0 +1,167 @@ +// +// paddingPkcs7.c Add/Remove PKCS7 padding +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + + +VOID +SYMCRYPT_CALL +SymCryptPaddingPkcs7Add( + SIZE_T cbBlockSize, + _In_reads_(cbSrc) PCBYTE pbSrc, + SIZE_T cbSrc, + _Out_writes_to_(cbDst, *pcbResult) PBYTE pbDst, + SIZE_T cbDst, + SIZE_T* pcbResult) +{ + SIZE_T cbPadVal; // PadVal is the number of bytes to pad. + SIZE_T cbDataLastBlock; // dwDataLastBlock is the number of bytes of data at the final block. + SIZE_T cbResult = 0; // This variable must always have a valid value when we finish the function. + + SYMCRYPT_ASSERT(cbBlockSize < 256); // cbBlockSize must be < 256 + SYMCRYPT_ASSERT((cbBlockSize & (cbBlockSize - 1)) == 0); // cbBlockSize must be a power of 2 + + // + // Compute the padding parameters. + // + + cbDataLastBlock = (cbSrc & (cbBlockSize - 1)); + + cbResult = (cbSrc - cbDataLastBlock + cbBlockSize); + + SYMCRYPT_ASSERT(cbDst >= cbResult); // cbDst >= cbSrc - cbSrc % cbBlockSize + cbBlockSize + + if (cbResult > cbDst) + { + goto cleanup; + } + + cbPadVal = (cbBlockSize - cbDataLastBlock); + + // + // perform the padding + // + + // cbSrc must be greater than zero. memcpy(pbDst, NULL, 0) is not defined! + if (pbDst != pbSrc && cbSrc > 0) + { + memcpy(pbDst, pbSrc, cbSrc); + } + + memset(pbDst + cbSrc, (int)cbPadVal, cbPadVal); + +cleanup: + *pcbResult = cbResult; +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptPaddingPkcs7Remove( + SIZE_T cbBlockSize, + _In_reads_(cbSrc) PCBYTE pbSrc, + SIZE_T cbSrc, + _Out_writes_to_(cbDst, *pcbResult) PBYTE pbDst, + SIZE_T cbDst, + SIZE_T* pcbResult) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + UINT32 mPaddingError = 0; // Indicates whether there is an error in padding or not. + UINT32 mBufferSizeError = 0; // Indicates whether pbDst is large enough to contain the entire message. + UINT32 mask = 0; // Mask for message bytes at the final block. + UINT32 cbPadVal; // PadVal is the number of padded bytes. + UINT32 cbSrc32; + UINT32 cbDst32; + UINT32 cbMsg32; + + SIZE_T cbBulk = 0; + SIZE_T cbResult; // This variable must always have a valid value when we finish the function. + + + SYMCRYPT_ASSERT(cbBlockSize < 256); // cbBlockSize must be < 256 + SYMCRYPT_ASSERT((cbBlockSize & (cbBlockSize - 1)) == 0); // cbBlockSize must be a power of 2 + SYMCRYPT_ASSERT((cbSrc & (cbBlockSize - 1)) == 0); // cbSrc is a multiple of cbBlockSize + SYMCRYPT_ASSERT(cbSrc > 0); // cbSrc is greaten than zero + + cbPadVal = (UINT32)pbSrc[cbSrc - 1]; + + // check the Padding to make sure it is valid. + mPaddingError |= SymCryptMask32IsZeroU31(cbPadVal) | SymCryptMask32LtU31((UINT32)cbBlockSize, cbPadVal); + + // If cbPadVal is greater than cbSrc, SYMCRYPT_INVALID_ARGUMENT will be returned + // and cbResult will not be the right value. + cbResult = cbSrc - cbPadVal; + + // + // Bulk processing + // + + cbDst = SYMCRYPT_MIN(cbDst, cbSrc); + + cbBulk = cbSrc - cbBlockSize; + + // cbSrc, cbDst, and blockSize are not secrets. + // This condition can be checked in a non-side channel safe way. + if (cbDst < cbBulk) + { + scError = SYMCRYPT_BUFFER_TOO_SMALL; + goto cleanup; + } + + if (pbDst != pbSrc) + { + memcpy(pbDst, pbSrc, cbBulk); + } + + // Updating parameters + pbSrc += cbBulk; cbSrc -= cbBulk; + pbDst += cbBulk; cbDst -= cbBulk; + + cbSrc32 = (UINT32)cbSrc; + cbDst32 = (UINT32)cbDst; + + // + // Validating padding + // + // If cbPadVal is greater than cbBlockSize, + // we have to limit cbPadVal to be at most equal to cbBlockSize. + cbPadVal = 1 + ((cbPadVal - 1) & (cbBlockSize - 1)); + cbMsg32 = (UINT32)(cbBlockSize - cbPadVal); + + //check Dst buffer length to make sure it is possible copy the whole message (not including the padding). + mBufferSizeError |= SymCryptMask32LtU31(cbDst32, cbMsg32); + + // + // Final Block processing + // + + // Updating only the bytes of the message and leaving the other bytes in pbDst unchanged. + // Validating the value of the padded bytes. + + for (UINT32 i = 0; i < cbBlockSize; ++i) // cbDst <= cbSrc == cbBlockSize + { + mask = SymCryptMask32LtU31(i, cbMsg32); + + mPaddingError |= (SymCryptMask32IsNonzeroU31((UINT32)pbSrc[i] ^ cbPadVal) & ~mask); + + if (i < cbDst) + { + pbDst[i] ^= (pbDst[i] ^ pbSrc[i]) & mask; + } + } + +cleanup: + + *pcbResult = cbResult; + + // Update scError with the two error masks. + // SYMCRYPT_INVALID_ARGUMENT gets precedence over SYMCRYPT_BUFFER_TOO_SMALL + scError ^= mBufferSizeError & (scError ^ SYMCRYPT_BUFFER_TOO_SMALL); + scError ^= mPaddingError & (scError ^ SYMCRYPT_INVALID_ARGUMENT); + + return scError; +} diff --git a/libs/symcrypt/lib/parhash.c b/libs/symcrypt/lib/parhash.c new file mode 100644 index 00000000000..ee933e1fd52 --- /dev/null +++ b/libs/symcrypt/lib/parhash.c @@ -0,0 +1,517 @@ +// +// ParHash.c +// Code shared with all the parallel hash implementations +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptParallelHashProcess_serial( + _In_ PCSYMCRYPT_PARALLEL_HASH pParHash, + _Inout_updates_bytes_( nStates * pParHash->pHash->stateSize ) PVOID pStates, + SIZE_T nStates, + _Inout_updates_( nOperations ) PSYMCRYPT_PARALLEL_HASH_OPERATION pOperations, + SIZE_T nOperations, + _Out_writes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SIZE_T i; + PSYMCRYPT_PARALLEL_HASH_OPERATION op; + PCSYMCRYPT_HASH pHash; + + pHash = pParHash->pHash; + op = pOperations; + + // + // Wipe the scratch space to detect erroneous callers. + // We do this so that callers that test on a non-parallel platform will work on a platform that does support + // parallel operations. + // + if( cbScratch < pParHash->parScratchFixed + nStates * SYMCRYPT_PARALLEL_HASH_PER_STATE_SCRATCH ) + { + scError = SYMCRYPT_BUFFER_TOO_SMALL; + goto cleanup; + } + SymCryptWipeKnownSize( pbScratch, pParHash->parScratchFixed + nStates * SYMCRYPT_PARALLEL_HASH_PER_STATE_SCRATCH ); + + for( i=0; i<nOperations; i++ ) + { + if( op->iHash >= nStates ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + switch( op->hashOperation ) + { + case SYMCRYPT_HASH_OPERATION_APPEND: + (*pHash->appendFunc)( (PBYTE)pStates + pHash->stateSize * op->iHash, op->pbBuffer, op->cbBuffer ); + break; + + case SYMCRYPT_HASH_OPERATION_RESULT: + if( op->cbBuffer != pHash->resultSize ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + (*pHash->resultFunc)( (PBYTE)pStates + pHash->stateSize * op->iHash, op->pbBuffer ); + break; + + default: + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + op++; + } + +cleanup: + return scError; +} + +// +// This function looks at a state and decides what to do. +// If it returns FALSE, then this state is done and no further processing is required. +// If it returns TRUE, the pbData/cbData have to be processed in parallel. +// This function is called again on the same state after the pbData/cbData have been processed. +// +// Internally, it keeps track of the next step to be taken for this state. +// the processingState keeps track of the next action to take. +// + +// +// An enum to keep track of the state of a request block +// +BOOLEAN +SYMCRYPT_CALL +SymCryptParallelHashSetNextWork( PCSYMCRYPT_PARALLEL_HASH pParHash, PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE pScratch ) +{ + PSYMCRYPT_COMMON_HASH_STATE pState; + PCSYMCRYPT_HASH pHash; + PCSYMRYPT_PARALLEL_HASH_OPERATION pOp; + SIZE_T bytesInBuffer; + SIZE_T todo; + BOOLEAN res; + + // Retrieve the state we will operate on. + pState = (PSYMCRYPT_COMMON_HASH_STATE) pScratch->hashState; + pHash = pParHash->pHash; + + // + // This is a state machine where some states have to iterate + // The loop allows them to use 'continue' for that. + // +#pragma warning( suppress: 4127 ) // conditional expression is constant + while( TRUE ) + { + // + // At this point, the processing state, pbData/cbData, and next pointer define what needs to be done. + // STATE_NEXT: cbData == 0 and we have to process the remaining operations. + // STATE_DATA_START: We are working on the next operation; the first BytesAlreadyProcessed have been hashed, + // and the hash state has an empty buffer. + // STATE_DATA_END: We are working on the next operation (an append), and pbData/cbData have whatever partial block remains + // after all the whole blocks have been processed. + // STATE_PAD2: We are working on the next operation (a result), and have processed the first half of a 2-block padding. + // STATE_RESULT: We are working on the next operation (a result), and have processed all the padding. + // + // The pState->dataLength is updated whenever we copy bytes from the append into the state's buffer, or when + // we return TRUE and process bulk data. + // + pOp = pScratch->next; + switch( pScratch->processingState ) + { + case STATE_NEXT: + + if( pOp == NULL ) + { + return FALSE; + } + + bytesInBuffer = pState->bytesInBuffer; + + // SYMCRYPT_ASSERT( pOp->cbBuffer < ((SIZE_T)-1)/2 ); // used during testing + + if( pOp->hashOperation == SYMCRYPT_HASH_OPERATION_APPEND ) + { + pState->dataLengthL += pOp->cbBuffer; + if( pState->dataLengthL < pOp->cbBuffer ) { + pState->dataLengthH ++; // This is almost-unreachable code as it requires 2^64 bytes to be hashed. + } + + if( bytesInBuffer > 0 ) + { + SYMCRYPT_ASSERT( pHash->inputBlockSize > bytesInBuffer ); + + todo = SYMCRYPT_MIN( pHash->inputBlockSize - bytesInBuffer, pOp->cbBuffer ); + memcpy( &pState->buffer[bytesInBuffer], pOp->pbBuffer, todo ); + pState->bytesInBuffer += (UINT32) todo; + if( pState->bytesInBuffer == pHash->inputBlockSize ) + { + // + // We filled the buffer; set it for processing. + // Remember the # bytes we did and set the next state to process the rest of the request. + // + pScratch->pbData = &pState->buffer[0]; + pScratch->cbData = pHash->inputBlockSize; + if( todo == pOp->cbBuffer ) + { + // + // We finished the request after the pbData processing + // + pScratch->next = pOp->next; + // pScratch->processingState = STATE_NEXT // already has that value + } else { + pScratch->processingState = STATE_DATA_START; + SYMCRYPT_ASSERT( todo <= 0xff ); + pScratch->bytesAlreadyProcessed = (BYTE) todo; + } + + pState->bytesInBuffer = 0; // it will be after we process the block + return TRUE; + } else { + // + // We finished the operation; skip to the next one. + // + pScratch->next = pOp->next; + // pScratch->processingState = STATE_NEXT // already has that value + continue; + } + } else { + // + // Buffer is empty; process the bulk data + // + pScratch->pbData = pOp->pbBuffer; + pScratch->cbData = pOp->cbBuffer; + pScratch->processingState = STATE_DATA_END; + + // + // Return TRUE if there is real data to process, and just re-run the state + // machine if we should copy the partial block to the buffer. + // + if( pScratch->cbData >= pHash->inputBlockSize ) + { + return TRUE; + } else { + continue; + } + } + } else { + SYMCRYPT_ASSERT( pOp->hashOperation == SYMCRYPT_HASH_OPERATION_RESULT ); + + if( (*pParHash->parResult1Func)( pParHash, pState, pScratch, &res ) ) + { + return res; + } + } + break; + + case STATE_DATA_START: + // + // The next operation is an append, and the first few bytes of that operation have already been copied to + // the buffer and processed. We need to process the rest. + // Note that the # bytes remaining is never zero. + // + SYMCRYPT_ASSERT( pOp->hashOperation == SYMCRYPT_HASH_OPERATION_APPEND && pOp->cbBuffer >= pScratch->bytesAlreadyProcessed ); + + pScratch->pbData = pOp->pbBuffer + pScratch->bytesAlreadyProcessed; + pScratch->cbData = pOp->cbBuffer - pScratch->bytesAlreadyProcessed; + if( pScratch->cbData >= pHash->inputBlockSize ) + { + pScratch->processingState = STATE_DATA_END; + return TRUE; + } + + // + // We have less than one block left; this is exactly the same state as we have at the end of + // a normal append. Fall through to that code. + // + // FALLTHROUGH! + + case STATE_DATA_END: + // + // We finished processing the whole blocks of the pScratch->pbData, and have to process the rest. + // The current append is already popped off the work list. + // + if( pScratch->cbData > 0 ) + { + SYMCRYPT_ASSERT( pScratch->cbData < pHash->inputBlockSize ); + memcpy( &pState->buffer[0], pScratch->pbData, pScratch->cbData ); + pState->bytesInBuffer = (UINT32) pScratch->cbData; + } + pScratch->next = pOp->next; + pScratch->processingState = STATE_NEXT; + continue; + + case STATE_RESULT2: + if( (*pParHash->parResult2Func)( pParHash, pState, pScratch, &res ) ) + { + return res; + } + continue; + + case STATE_RESULT_DONE: + + (*pParHash->parResultDoneFunc)( pParHash, pState, pOp ); + + pScratch->next = pOp->next; + pScratch->processingState = STATE_NEXT; + continue; + } + } + + return FALSE; +} + + +// +// Comparison function used to sort the work into largest-first order. +// +int SYMCRYPT_CDECL +compareRequestSize( PCVOID p1, PCVOID p2 ) +{ + PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE * pp1 = (PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE *) p1; + PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE * pp2 = (PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE *) p2; + + UINT64 c1 = (*pp1)->bytes; + UINT64 c2 = (*pp2)->bytes; + + // + // This is 'reverse' compare function as we want the largest item first. + // + if( c1 < c2 ) + { + return 1; + } else if( c1 > c2 ) + { + return -1; + } else { + return 0; + } +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptParallelHashProcess( + _In_ PCSYMCRYPT_PARALLEL_HASH pParHash, + _Inout_updates_bytes_( nStates * pParHash->pHash->stateSize ) PVOID pStates, + SIZE_T nStates, + _Inout_updates_( nOperations ) PSYMCRYPT_PARALLEL_HASH_OPERATION pOperations, + SIZE_T nOperations, + _Out_writes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch, + UINT32 maxParallel ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE pScratchState; + PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE * pWork; + SIZE_T nWork; + PSYMCRYPT_PARALLEL_HASH_OPERATION pOp; + PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE pSc; + SIZE_T i; + UINT64 singleSize; + BOOLEAN sameSize; + SIZE_T nPar; + PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE * pNextWork; + SIZE_T todo; + SIZE_T nBytes; + PBYTE pbScratchEnd; + PBYTE pbFixedScratch; + SIZE_T cbFixedScratch; + PCSYMCRYPT_HASH pHash; + + if( nOperations == 0 ) + { + goto cleanup; + } + + pHash = pParHash->pHash; + + // + // The caller passes us a scratch buffer. We split that into the following pieces: + // + // <alignment space to SYMCRYPT_ALIGN_VALUE> + // SYMCRYPT_PARALLEL_HASH_SCRATCH pScratchState[ nStates ] + // PSYMCRYPT_PARALLEL_HASH_SCRATCH pWork[ nStates ] + // <alignment space to SYMCRYPT_SIMD_ELEMENT_SIZE> + // scratch space for parallel function + // + + pbScratchEnd = pbScratch + cbScratch; + pScratchState = (PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE) SYMCRYPT_ALIGN_UP( pbScratch ); + pWork = (PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE *) (pScratchState + nStates); + pbFixedScratch = (PBYTE)((((SIZE_T)(pWork + nStates)) + SYMCRYPT_SIMD_ELEMENT_SIZE - 1) & ~(SYMCRYPT_SIMD_ELEMENT_SIZE - 1)); + cbFixedScratch = pParHash->parScratchFixed; + + if( pbFixedScratch + cbFixedScratch > pbScratchEnd ) + { + scError = SYMCRYPT_BUFFER_TOO_SMALL; + goto cleanup; + } + + // + // Wipe the scratch state; this sets the pointers to NULL, and the byte counts to 0. + // + memset( pScratchState, 0, nStates * sizeof( *pScratchState )); + nWork = 0; + + // + // The general data structure is as follows. + // For each hash state, we keep our administration in the pScratchState[i]. This contains a pointer to the actual + // hash state, a pointer to a linked list of operations to be performed on this state, pointer/length of the + // current data to be processed, and a few more administrative items. + // We also keep the pWork array of pointers to our scratch states, which contains all the states that still need + // work to be done. + // + // We process over the operations in reverse order to make it easy to build a forward single-linked list + // + pOp = &pOperations[ nOperations ]; + while( pOp > pOperations ) + { + pOp--; + + if( pOp->iHash >= nStates ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + pSc = &pScratchState[ pOp->iHash ]; + + if( pSc->hashState == NULL ) + { + // + // We found a new state that is being modified by this set of operations. + // Set the pointer to the hash state, and add it to the work list. + // + SYMCRYPT_ASSERT( nWork < nStates ); + pSc->hashState = (PBYTE) pStates + pHash->stateSize * pOp->iHash; + pWork[nWork] = pSc; + nWork++; + } + + // + // We estimate how much work we have to do on each state, so that we can start on the largest ones + // and be more efficient. + // + if( pOp->hashOperation == SYMCRYPT_HASH_OPERATION_APPEND ) + { + pSc->bytes += pOp->cbBuffer; + } else if( pOp->hashOperation == SYMCRYPT_HASH_OPERATION_RESULT ) + { + // + // The result could be a 1 or 2-block operation; but it is mostly a 1-block one so that is what we budget. + // + pSc->bytes += pHash->inputBlockSize; + } else { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // + // Add the operation to the list of operations for this state + // + pOp->next = pSc->next; + pSc->next = pOp; + } + + // + // We have built all the structures. + // Run the SetNextWork on each of them, and drop the ones that don't have work. + // Also detect whether they are all the same size so that we can avoid the sorting cost. + // + SYMCRYPT_ASSERT( nWork > 0 ); + singleSize = (*pWork)->bytes; + sameSize = TRUE; + i = 0; + while( i < nWork ) + { + if( !SymCryptParallelHashSetNextWork( pParHash, pWork[i] ) ) + { + pWork[i] = pWork[nWork-1]; + nWork--; + continue; + } + + if( pWork[i]->bytes != singleSize ) + { + sameSize = FALSE; + } + i++; + } + + if( !sameSize ) + { + qsort( pWork, nWork, sizeof( *pWork ), &compareRequestSize ); + } + + nPar = SYMCRYPT_MIN( nWork, maxParallel ); // # parallel states we currently work on + pNextWork = pWork + nPar; // next work pointer. + + while( nWork > 0 ) + { + todo = pWork[0]->cbData; + for( i=1; i<nPar; i++ ) + { + todo = SYMCRYPT_MIN( todo, pWork[i]->cbData ); + } + + nBytes = todo & ~((SIZE_T)(pHash->inputBlockSize - 1)); + + (*pParHash->parAppendFunc)( pWork, nPar, nBytes, pbFixedScratch, cbFixedScratch ); + + for( i=0; i<nPar; i++ ) + { + if( pWork[i]->cbData < pHash->inputBlockSize ) + { + // + // Once we start a request we finish it; this is not optimal. + // It would be better to switch things around a bit, but that is much more complicated. + // Example: suppose we can do 4-parallel and have requests of size + // 9 8 7 6 6 6 + // Our code does + // Process first 4 of # blocks Resulting state + // 9 8 7 6 / 6 6 6 3 2 1 - / 6 6 + // 6 3 2 1 / 6 1 5 2 1 0 / 6 + // 6 more to finish for a total of 13 blocks. + // + // Better would be: + // Process first 4 of # blocks Resulting state + // 9 8 7 6 / 6 6 6 3 2 1 - / 6 6 + // 6 6 3 2 / 1 - 2 4 4 1 0 / 1 + // 4 more to finish for total of 12 blocks. + // + // Or even better: + // Process first 4 of # blocks Resulting state + // 9 8 7 6 / 6 6 5 4 3 2 1 / 6 6 + // 6 6 4 3 / 2 1 3 3 3 1 - / 2 1 + // 3 3 2 1 / 1 - 1 2 2 1 - / 1 - + // 2 more to finish for a total of 11 blocks. + // Note that this last one requires the interruption of a started hash computation. + // + + if( !SymCryptParallelHashSetNextWork( pParHash, pWork[i] )) + { + if( nWork > nPar ) + { + pWork[i] = *pNextWork++; + nWork--; + } else { + // + // Ugly: copy the last item here, and wind back the loop counter + // by one so that we will process the last item again. + // + pWork[i] = pWork[ --nPar ]; + i--; + nWork--; + } + } + } + } + } + SymCryptWipe( pbFixedScratch, cbFixedScratch ); + +cleanup: + return scError; +} diff --git a/libs/symcrypt/lib/pbkdf2.c b/libs/symcrypt/lib/pbkdf2.c new file mode 100644 index 00000000000..3c1d06e4329 --- /dev/null +++ b/libs/symcrypt/lib/pbkdf2.c @@ -0,0 +1,126 @@ +// +// pbkdf2.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +// +// This module contains the routines to implement the pbkdf2 function +// +// + +#include "precomp.h" + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptPbkdf2Derive( + _In_ PCSYMCRYPT_PBKDF2_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbSalt) PCBYTE pbSalt, + SIZE_T cbSalt, + UINT64 iterationCnt, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult) +{ + SYMCRYPT_MAC_STATE macState; + UINT32 iBlock; + SIZE_T bytes; + SIZE_T blockSize = pExpandedKey->macAlg->resultSize; + UINT64 iterations; + SYMCRYPT_ALIGN BYTE rbBlockResult[SYMCRYPT_MAC_MAX_RESULT_SIZE]; + SYMCRYPT_ALIGN BYTE rbWorkBuffer[SYMCRYPT_MAC_MAX_RESULT_SIZE]; + + SYMCRYPT_ASSERT( + blockSize <= SYMCRYPT_MAC_MAX_RESULT_SIZE && + cbResult > 0 ); + + if (iterationCnt == 0) + { + return SYMCRYPT_WRONG_ITERATION_COUNT; + } + + iBlock = 0; + while( cbResult > 0 ) + { + iBlock += 1; + SYMCRYPT_STORE_MSBFIRST32( &rbBlockResult[0], iBlock ); // use result buf as temp + + pExpandedKey->macAlg->initFunc ( &macState, &pExpandedKey->macKey); + pExpandedKey->macAlg->appendFunc( &macState, pbSalt, cbSalt); + pExpandedKey->macAlg->appendFunc( &macState, &rbBlockResult[0], 4 ); // block count encoded in 4 bytes + pExpandedKey->macAlg->resultFunc( &macState, rbWorkBuffer); + +#pragma warning(suppress: 22105) + memcpy( rbBlockResult, rbWorkBuffer, blockSize ); + for( iterations = 1; iterations < iterationCnt; iterations++ ) + { + pExpandedKey->macAlg->initFunc ( &macState, &pExpandedKey->macKey ); + pExpandedKey->macAlg->appendFunc( &macState, rbWorkBuffer, blockSize ); + pExpandedKey->macAlg->resultFunc( &macState, rbWorkBuffer ); + SymCryptXorBytes( &rbWorkBuffer[0], &rbBlockResult[0], &rbBlockResult[0], blockSize ); + } + + bytes = SYMCRYPT_MIN( cbResult, blockSize ); + memcpy( pbResult, rbBlockResult, bytes ); + pbResult += bytes; + cbResult -= bytes; + } + + SymCryptWipeKnownSize( &rbWorkBuffer[0], sizeof( rbWorkBuffer ) ); + SymCryptWipeKnownSize( &rbBlockResult[0], sizeof( rbBlockResult ) ); + return SYMCRYPT_NO_ERROR; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptPbkdf2ExpandKey( + _Out_ PSYMCRYPT_PBKDF2_EXPANDED_KEY pExpandedKey, + _In_ PCSYMCRYPT_MAC macAlgorithm, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ) +{ + SYMCRYPT_ASSERT( macAlgorithm->expandedKeySize <= sizeof( pExpandedKey->macKey ) ); + + pExpandedKey->macAlg = macAlgorithm; + return macAlgorithm->expandKeyFunc(&pExpandedKey->macKey, pbKey, cbKey ); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptPbkdf2( + PCSYMCRYPT_MAC macAlgorithm, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey, + _In_reads_opt_(cbSalt) PCBYTE pbSalt, + SIZE_T cbSalt, + UINT64 iterationCnt, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult) +{ + SYMCRYPT_PBKDF2_EXPANDED_KEY key; + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + scError = SymCryptPbkdf2ExpandKey( &key, macAlgorithm, pbKey, cbKey ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + scError = SymCryptPbkdf2Derive( &key, pbSalt, cbSalt, iterationCnt, pbResult, cbResult ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + +cleanup: + + SymCryptWipeKnownSize( &key, sizeof( key ) ); + + return scError; + +} + +// +// Self tests are in pbkdf_*.c files +// to avoid pulling in SHA-1 when only PBKDF-SHA256 is used and +// similar scenarios. +// diff --git a/libs/symcrypt/lib/pbkdf2_hmacsha1.c b/libs/symcrypt/lib/pbkdf2_hmacsha1.c new file mode 100644 index 00000000000..cf675a8b85e --- /dev/null +++ b/libs/symcrypt/lib/pbkdf2_hmacsha1.c @@ -0,0 +1,41 @@ +// +// pbkdf2_hmacsha1.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +// +// The PBKDF SHA-1 test +// This is in a separate module to avoid pulling in SHA-1 whenever we use PBKDF +// + +static const UINT64 pbkdf2_IterationCnt = 5; + +static const BYTE pbkdf2_sha1Answer[] = +{ + 0xef, 0xa9, 0xbf, 0xea, 0xa3, 0x4d, 0x70, 0x64, +}; + +VOID +SYMCRYPT_CALL +SymCryptPbkdf2_HmacSha1SelfTest(void) +{ + BYTE res[sizeof(pbkdf2_sha1Answer)]; + + SymCryptPbkdf2( + SymCryptHmacSha1Algorithm, + &SymCryptTestKey32[0], 8, + &SymCryptTestKey32[16], 16, + pbkdf2_IterationCnt, + res, + sizeof(res)); + + SymCryptInjectError( res, sizeof( res ) ); + + if (memcmp(res, pbkdf2_sha1Answer, sizeof(res)) !=0) + { + SymCryptFatal('Pbk2'); + } +} diff --git a/libs/symcrypt/lib/pbkdf2_hmacsha256.c b/libs/symcrypt/lib/pbkdf2_hmacsha256.c new file mode 100644 index 00000000000..ebef54bb81d --- /dev/null +++ b/libs/symcrypt/lib/pbkdf2_hmacsha256.c @@ -0,0 +1,41 @@ +// +// pbkdf2_hmacsha256.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +// +// The PBKDF SHA-256 test +// This is in a separate module to avoid pulling in SHA-256 whenever we use PBKDF +// + +static const UINT64 pbkdf2_IterationCnt = 5; + +static const BYTE pbkdf2_sha256Answer[] = +{ + 0x05, 0x98, 0x1e, 0x89, 0x48, 0xd2, 0x84, 0x61, +}; + +VOID +SYMCRYPT_CALL +SymCryptPbkdf2_HmacSha256SelfTest(void) +{ + BYTE res[sizeof(pbkdf2_sha256Answer)]; + + SymCryptPbkdf2( + SymCryptHmacSha256Algorithm, + &SymCryptTestKey32[0], 8, + &SymCryptTestKey32[16], 16, + pbkdf2_IterationCnt, + res, + sizeof(res)); + + SymCryptInjectError( res, sizeof( res ) ); + + if (memcmp(res, pbkdf2_sha256Answer, sizeof(res)) !=0) + { + SymCryptFatal('Pbk2'); + } +} diff --git a/libs/symcrypt/lib/poly1305.c b/libs/symcrypt/lib/poly1305.c new file mode 100644 index 00000000000..286f023d6b8 --- /dev/null +++ b/libs/symcrypt/lib/poly1305.c @@ -0,0 +1,468 @@ +// +// Poly1305.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +VOID +SYMCRYPT_CALL +SymCryptPoly1305ProcessBlocks( + _Inout_ PSYMCRYPT_POLY1305_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptPoly1305( + _In_reads_( SYMCRYPT_POLY1305_KEY_SIZE ) PCBYTE pbKey, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_POLY1305_RESULT_SIZE ) PBYTE pbResult ) +{ + SYMCRYPT_POLY1305_STATE state; + + SymCryptPoly1305Init( &state, pbKey ); + SymCryptPoly1305Append( &state, pbData, cbData ); + SymCryptPoly1305Result( &state, pbResult ); +} + +VOID +SYMCRYPT_CALL +SymCryptPoly1305Init( + _Out_ PSYMCRYPT_POLY1305_STATE pState, + _In_reads_( SYMCRYPT_POLY1305_KEY_SIZE ) PCBYTE pbKey ) +{ + pState->r[0] = SYMCRYPT_LOAD_LSBFIRST32( pbKey + 0 ) & 0x0fffffff; + pState->r[1] = SYMCRYPT_LOAD_LSBFIRST32( pbKey + 4 ) & 0x0ffffffc; + pState->r[2] = SYMCRYPT_LOAD_LSBFIRST32( pbKey + 8 ) & 0x0ffffffc; + pState->r[3] = SYMCRYPT_LOAD_LSBFIRST32( pbKey + 12 ) & 0x0ffffffc; + + pState->s[0] = SYMCRYPT_LOAD_LSBFIRST32( pbKey + 16 ); + pState->s[1] = SYMCRYPT_LOAD_LSBFIRST32( pbKey + 20 ); + pState->s[2] = SYMCRYPT_LOAD_LSBFIRST32( pbKey + 24 ); + pState->s[3] = SYMCRYPT_LOAD_LSBFIRST32( pbKey + 28 ); + + // Set accumulator to zero + SymCryptWipeKnownSize( &pState->a[0], sizeof( pState->a ) ); + + pState->bytesInBuffer = 0; +} + +VOID +SYMCRYPT_CALL +SymCryptPoly1305Append( + _Inout_ PSYMCRYPT_POLY1305_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + SIZE_T nBytes; + SIZE_T bytesInBuffer; + + bytesInBuffer = pState->bytesInBuffer; + if( bytesInBuffer != 0 ) + { + // We have a partial block in the buffer, keep filling the block + + SYMCRYPT_ASSERT( bytesInBuffer < 16 ); + nBytes = 16 - bytesInBuffer; + if( nBytes > cbData ) + { + nBytes = cbData; + } + + memcpy( &pState->buf[bytesInBuffer], pbData, nBytes ); + pbData += nBytes; + cbData -= nBytes; + bytesInBuffer += nBytes; + + if( bytesInBuffer == 16 ) + { + // Buffer is full, process it and empty the buffer + SymCryptPoly1305ProcessBlocks( pState, pState->buf, bytesInBuffer ); + bytesInBuffer = 0; + } + pState->bytesInBuffer = bytesInBuffer; + } + + if( cbData >= 16 ) + { + // There are whole blocks to process + SymCryptPoly1305ProcessBlocks( pState, pbData, cbData & ~0xf ); + pbData += cbData; + cbData &= 0xf; + pbData -= cbData; + } + + if( cbData > 0 ) + { + // Copy remaining data to buffer + SYMCRYPT_ASSERT( cbData < 16 ); + memcpy( &pState->buf[0], pbData, cbData ); + pState->bytesInBuffer = cbData; + } +} + +VOID +SYMCRYPT_CALL +SymCryptPoly1305Result( + _Inout_ PSYMCRYPT_POLY1305_STATE pState, + _Out_writes_( SYMCRYPT_POLY1305_RESULT_SIZE ) PBYTE pbResult ) +{ + SIZE_T bytesInBuffer; + UINT64 t; + UINT32 a4, a3, a2, a1, a0; + UINT32 maskOld, maskNew; + + bytesInBuffer = pState->bytesInBuffer; + if( bytesInBuffer > 0 ) + { + // Add trailing '1' byte and pad with zeroes + // Wipe function deals with 0-length wipes properly + pState->buf[bytesInBuffer++] = 1; + SymCryptWipe( &pState->buf[bytesInBuffer], 16 - bytesInBuffer ); + + // Now we have to process the block, but the block function adds a trailing + // 1 byte to each 16-byte block. We compensate for that by decrementing + // the highest word of the accumulator first; the 1 byte added by the block + // processing function has the effect of incrementing the highest accumulator + // word so those two operations cancel each other out. + pState->a[4] -= 1; + SymCryptPoly1305ProcessBlocks( pState, pState->buf, 16 ); + } + + // We have to fully reduce the accumulator first + // We have a[4]<6 at this point + a0 = pState->a[0]; + a1 = pState->a[1]; + a2 = pState->a[2]; + a3 = pState->a[3]; + a4 = pState->a[4]; + + SYMCRYPT_ASSERT( a4 < 6 ); + // Because a4 < 6, we have to subtract either 0*P or 1*P + // we subtract P and them mux-choose between the new and old value + // Subtracting P is the same as subtracting 2^130 and adding 5 + t = 5; + + t += a0; + a0 = (UINT32) t; + t >>= 32; + + t += a1; + a1 = (UINT32) t; + t >>= 32; + + t += a2; + a2 = (UINT32) t; + t >>= 32; + + t += a3; + a3 = (UINT32) t; + t >>= 32; + + t += a4; + t -= 4; + a4 = (UINT32) t; + t >>= 32; + + // If this subtraction produced a carry, then t = 0xffffffff, otherwise it is 0 + maskOld = (UINT32) t; // ffffffff if the old value is correct, 0 otherwise + maskNew = ~maskOld; // ffffffff if the new value is correct, 0 otherwise + + a0 = (maskNew & a0) | (maskOld & pState->a[0]); + a1 = (maskNew & a1) | (maskOld & pState->a[1]); + a2 = (maskNew & a2) | (maskOld & pState->a[2]); + a3 = (maskNew & a3) | (maskOld & pState->a[3]); + // a4 = (maskNew & a4) | (maskOld & pState->a[4]); // We don't need a4... + + // Now we add S and return the data + t = a0; + t += pState->s[0]; + SYMCRYPT_STORE_LSBFIRST32( pbResult + 0, (UINT32) t ); + t >>= 32; + + t += a1; + t += pState->s[1]; + SYMCRYPT_STORE_LSBFIRST32( pbResult + 4, (UINT32) t ); + t >>= 32; + + t += a2; + t += pState->s[2]; + SYMCRYPT_STORE_LSBFIRST32( pbResult + 8, (UINT32) t ); + t >>= 32; + + t += a3; + t += pState->s[3]; + SYMCRYPT_STORE_LSBFIRST32( pbResult + 12, (UINT32) t ); + + SymCryptWipeKnownSize( (PBYTE) pState, sizeof( *pState ) ); +} + + +/* +The heart of Poly1305 is a modular multiplication. +The modulus P := 2^130 - 5 + +One multiplicant is R which is part of the key. R is restricted to a subset of all possible +values ("clamped") to make the computation faster. +The other multiplicant is the accumulator A. The overall operation is + + A += <value derived from the data> + A = (A*R) mod P + +We write all values base 2^32: +b := 2^32 +A = a4 b^4 + a3 b^3 + a2 b^2 + a1 b + a1 +R = r3 b^3 + r2 b^2 + r1 b + r0 + +Fully reduced we would have a4 <= 3 but we don't store A in fully-reduced form. Instead +we maintain a4 < L with L:=8. + +The restrictions on R are: + r3, r2, r1, r0 < 2^28 + r3, r2, r1 are multiples of 4 + +The core algorithm looks like this (explanations below) + + + a4 a3 a2 a1 a0 + r3 r2 r1 r0 * +--------------------------------------- + a4r0 a3r0 a2r0 a1r0 a0r0 + a4r1 a3r1 a2r1 a1r1 a0r1 + a4r2 a3r2 a2r2 a1r1 a0r2 +a4r3 a3r3 a2r3 a1r3 a0r3 + +---------------------------------------- + S7 S6 S5 S4 S3 S2 S1 S0 + + S7 S6 S5 T4+U T3 S2 S1 S0 + + T3 S2 S1 S0 + S7 S6 S5 T4 + S7/4 S6/4 S5/4 T4/4 + + ------------------- + U V3 V2 V1 V0 + +At the top you see A and R with the 5*4 digit products arranged in columns. +The S values are the sums of the product columns without any carries. +Because the r values are <2^28 and a4 < L we have + + S0 <= 1*(2^32-1)(2^28-1) + S1 <= 2*(2^32-1)(2^28-1) + S2 <= 3*(2^32-1)(2^28-1) + S3 <= 4*(2^32-1)(2^28-1) + S4 <= 3*(2^32-1)(2^28-1) + (L-1)*(2^28-1) + S5 <= 2*(2^32-1)(2^28-1) + (L-1)*(2^28-1), multiple of 4 + S6 <= 1*(2^32-1)(2^28-1) + (L-1)*(2^28-1), multiple of 4 + S7 <= (L-1)*(2^28-1), multiple of 4 + +The next line defines T4, U, and T3 by +T3 := S3 mod b the lower word of S3 +T := S4 + floor(S3/b) add the upper word of S3 to S4 +U := T mod 4 +T4 := T - U Split T into a small value U and a bigger T4 that is a multiple of 4 + +note that the digits (S7,S6, S5, S4, S3, S2, S1, S0) and (S7, S6, S5, T4+U, T3, S2, S1, S0) +encode the same number, namely the result of the multiplication. + +We have bounds + floor(S3/b) <= 2^2 * (2^32-1) * (2^28-1) / 2^32 < 2^30 + T < 3*(2^32-1)(2^28-1) + (L-1)*(2^28-1) + 2^30 + U < 4 + T4 < 3*(2^32-1)(2^28-1) + (L-1)*(2^28-1) + 2^30, multiple of 4 + +Now we are ready to perform the modulo reduction. Because P = 2^130 - 5 we have for any value X + X*2^130 mod P = 5*X mod P +because 2^130 = 5 mod P +Or, if X is a multiple of 4 then + X*2^128 = (X + X/4) mod P +(this is just the previous equation divided by 4) +We apply that to S7, S6, S5, and T4 and add them (column wise) to (T3, S2, S1, S0) to get + +V0 := S0 + T4 + T4/4 +V1 := S1 + S5 + S5/4 +V2 := S2 + S6 + S6/4 +V3 := T3 + S7 + S7/4 + +and note that (U, V3, V2, V1, V0) is equal to the result of the multiplication modulo P +We can derive some bounds on these values + + We assume L <= 8 (will get strict bound later) + + V0 < 1*(2^32-1)(2^28-1) + 3*(2^32-1)(2^28-1) + (L-1)*(2^28-1) + 2^30 + (3*(2^32-1)(2^28-1) + (L-1)*(2^28-1) + 2^30)/4 + = 4*1*(2^32-1)(2^28-1) + (L-1)*(2^28-1) + 2^30 + (3*(2^32-1)(2^28-1) + (L-1)*(2^28-1) + 2^30)/4 + < 2^2 * 2^32 * 2^28 + 2^31 + 2^30 + (2^4 * 2^32 * 2^28 + 2^31 + 2^30)/4 + = 2^62 + 2^31 + 2^30 + 2^60 + 2^29 + 2^30 + < 2^63 + + V1 < 2*2^60 + 2*2^60 + 2^31 + (2*2^60 + 2^31)/4 < 2^63 + + V2 < 3*2^60 + 2^60 + 2^31 + (2^60 + 2^31) < 2^63 + + V3 < 2^32 + 2^31 + 2^29 < 2*2^32 + + U < 4 + +So all the V values fit in 64 bits. A final carry propagation pass cleans this up to an array of 32-bit values which become +the new accumulator value. (During carry propagation the 32-bit carry from the lower digit can be added to the higher digit +because the V values are less than 2^63.) + +V3 < 2*2^32 and after adding at most 2^32 from a carry it is < 3*2^32 so the carry from V3 to U is at most 2. +Thus the highest digit of the accumulator can be at most 3 + 2 = 5. This ensures a5<L for L>=6. We assumed L<=8 before, so +L=6 works and satisfies the earlier assumption. + +To clarify the logic: IF a4<8 at the start of the multiplication THEN a4<6 after this function. Between multiplications we add +a value < 2^129 which could result in adding 2 to a4, but as a4<6 before the addition the a4<8 before the multiplication +is still satisfied. +*/ + +VOID +SYMCRYPT_CALL +SymCryptPoly1305ProcessBlocks( + _Inout_ PSYMCRYPT_POLY1305_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +// This is the portable C implementation, based on 32-bit operations. +// If necessary, we'll add assembler code for this function later. +{ + UINT32 a0, a1, a2, a3, a4; + UINT32 r0, r1, r2, r3; + UINT64 t64; + UINT32 T3; + UINT32 V0, V1, V2; + UINT32 cy; + UINT32 U; + UINT32 t32; + + r0 = pState->r[0]; + r1 = pState->r[1]; + r2 = pState->r[2]; + r3 = pState->r[3]; + + a0 = pState->a[0]; + a1 = pState->a[1]; + a2 = pState->a[2]; + a3 = pState->a[3]; + a4 = pState->a[4]; + + // Here we have a4 < 6, but we sometimes decrement a4 to compensate for the + // 2^128 this function always adds. So we test a4 + 1 < 7 + SYMCRYPT_ASSERT( a4 + 1 < 7 ); + + while( cbData >= 16 ) + { + // Acc += data[0..15] + 2^128 + t64 = (UINT64) a0 + SYMCRYPT_LOAD_LSBFIRST32( pbData + 0 ); + a0 = (UINT32) t64; + t64 >>= 32; + + t64 += (UINT64) a1 + SYMCRYPT_LOAD_LSBFIRST32( pbData + 4 ); + a1 = (UINT32) t64; + t64 >>= 32; + + t64 += (UINT64) a2 + SYMCRYPT_LOAD_LSBFIRST32( pbData + 8 ); + a2 = (UINT32) t64; + t64 >>= 32; + + t64 += (UINT64) a3 + SYMCRYPT_LOAD_LSBFIRST32( pbData + 12 ); + a3 = (UINT32) t64; + t64 >>= 32; + + a4 = (UINT32) t64 + a4 + 1; // +1 is the padding '1' which we always apply + SYMCRYPT_ASSERT( a4 < 8 ); + + pbData += 16; + cbData -=16; + + // Compute S3 + t64 = SYMCRYPT_MUL32x32TO64( a3, r0 ) + + SYMCRYPT_MUL32x32TO64( a2, r1 ) + + SYMCRYPT_MUL32x32TO64( a1, r2 ) + + SYMCRYPT_MUL32x32TO64( a0, r3 ); + + SYMCRYPT_ASSERT( t64 < (1ULL << 62) ); + + T3 = (UINT32) t64; + t64 >>= 32; + + // Compute T = S4 + floor(S3/2^32). We have the floor part in t64 already + // now add S4 to it + t64 += a4*r0 // this fits in 32 bits as r0 < 2^28 and a4 < 8 + + SYMCRYPT_MUL32x32TO64( a3, r1 ) + + SYMCRYPT_MUL32x32TO64( a2, r2 ) + + SYMCRYPT_MUL32x32TO64( a1, r3 ); + + U = (UINT32) t64 & 3; + t64 &= ~3; // t64 = T4 here + + // Compute S0 + T4 + T4/4, and V0 + t64 += (t64 >> 2) + SYMCRYPT_MUL32x32TO64( a0, r0 ); + V0 = (UINT32)t64; + cy = (UINT32)(t64 >> 32); // the carry from S0 to S1 + + // Compute S5 + t64 = a4 * r1 + SYMCRYPT_MUL32x32TO64( a3, r2 ) + SYMCRYPT_MUL32x32TO64( a2, r3 ); + t64 += t64 >> 2; // = S5 + S5/4 + + t64 += SYMCRYPT_MUL32x32TO64( a1, r0 ) + SYMCRYPT_MUL32x32TO64( a0, r1 ); + // t64 = S1 + S5 + S5/4 + + t64 += cy; + V1 = (UINT32) t64; + cy = (UINT32)(t64 >> 32); // the carry from S1 to S2 + + // Compute S6 + t64 = a4 * r2 + SYMCRYPT_MUL32x32TO64( a3, r3 ); + t64 += t64 >> 2; // S6 + S6/4 + + // now add S2 + t64 += SYMCRYPT_MUL32x32TO64( a2, r0 ) + SYMCRYPT_MUL32x32TO64( a1, r1 ) + SYMCRYPT_MUL32x32TO64( a0, r2 ); + t64 += cy; + V2 = (UINT32) t64; + cy = (UINT32)(t64 >> 32); + + // Finally T3 + S7 + S7/4 + t32 = a4 * r3; // =S7, a 32-bit value + t32 += t32/4; + t64 = (UINT64) T3 + t32; + t64 += cy; + + a0 = V0; + a1 = V1; + a2 = V2; + a3 = (UINT32) t64; + a4 = U + (UINT32)(t64 >> 32); + + SYMCRYPT_ASSERT( a4 < 6 ); + } + + pState->a[0] = a0; + pState->a[1] = a1; + pState->a[2] = a2; + pState->a[3] = a3; + pState->a[4] = a4; +} + + +static const BYTE poly1305Kat[16] = { + 0xef, 0x9e, 0x73, 0x2a, 0x7f, 0x2d, 0xf1, 0x85, 0xa7, 0x11, 0x80, 0xae, 0x58, 0x3a, 0x0f, 0x93, +}; + + +VOID +SYMCRYPT_CALL +SymCryptPoly1305Selftest(void) +{ + BYTE res[16]; + + SymCryptPoly1305( SymCryptTestKey32, SymCryptTestMsg16, 16, res ); + + SymCryptInjectError( res, sizeof( res ) ); + + if( memcmp( res, poly1305Kat, sizeof( res ) ) != 0 ) + { + SymCryptFatal( 'p135'); + } +} diff --git a/libs/symcrypt/lib/precomp.h b/libs/symcrypt/lib/precomp.h new file mode 100644 index 00000000000..2b8f5dc710a --- /dev/null +++ b/libs/symcrypt/lib/precomp.h @@ -0,0 +1,26 @@ +// +// SymCrypt library pre-compiled header file +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#ifdef __cplusplus +#error C++ +#endif + +#include <stdlib.h> +#include <string.h> + +#include "symcrypt.h" +#include "sc_lib.h" + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 +#include <wmmintrin.h> +#include <immintrin.h> + + #if SYMCRYPT_GNUC + #include <x86intrin.h> // required for definition of _rdseed64_step for GCC 8 and earlier + #include <xsaveintrin.h> + #define _XCR_XFEATURE_ENABLED_MASK 0 + #endif +#endif diff --git a/libs/symcrypt/lib/primes.c b/libs/symcrypt/lib/primes.c new file mode 100644 index 00000000000..6c9fcbee96f --- /dev/null +++ b/libs/symcrypt/lib/primes.c @@ -0,0 +1,306 @@ +// +// primes.c +// Primality tests and prime number generation +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +UINT32 +SYMCRYPT_CALL +SymCryptIntMillerRabinPrimalityTest( + _In_ PCSYMCRYPT_INT piSrc, + UINT32 nBitsSrc, + UINT32 nIterations, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + BOOLEAN innerLoop = TRUE; + UINT32 borrow = 0; + + UINT32 nDigitsSrc = 0; + + UINT32 R = 1; + PSYMCRYPT_INT piD = NULL; + UINT32 cbD = 0; + PSYMCRYPT_MODULUS pmModulus = NULL; + UINT32 cbModulus = 0; + PSYMCRYPT_MODELEMENT peX = NULL; + UINT32 cbX = 0; + + PSYMCRYPT_MODELEMENT peOne = NULL; + PSYMCRYPT_MODELEMENT peMinOne = NULL; + + nDigitsSrc = SymCryptIntDigitsizeOfObject( piSrc ); + cbD = SymCryptSizeofIntFromDigits( nDigitsSrc ); + cbModulus = SymCryptSizeofModulusFromDigits( nDigitsSrc ); + + SYMCRYPT_ASSERT( nBitsSrc >= SymCryptIntBitsizeOfValue( piSrc ) ); + + SYMCRYPT_ASSERT( cbScratch >= cbModulus + SYMCRYPT_SCRATCH_BYTES_FOR_INT_TO_MODULUS(nDigitsSrc) ); + + // Allocate the modulus + pmModulus = SymCryptModulusCreate( pbScratch, cbModulus, nDigitsSrc ); + SYMCRYPT_ASSERT( pmModulus != NULL ); + pbScratch += cbModulus; + cbScratch -= cbModulus; + + // Set the modulus + SymCryptIntToModulus( + piSrc, + pmModulus, + nBitsSrc, // Average number of expected operations + SYMCRYPT_FLAG_MODULUS_PARITY_PUBLIC, + pbScratch, + cbScratch ); + + // Modelement size + cbX = SymCryptSizeofModElementFromModulus( pmModulus ); + + SYMCRYPT_ASSERT( cbScratch >= 3*cbX + cbD + + SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigitsSrc ) ); + + peX = SymCryptModElementCreate( pbScratch, cbX, pmModulus ); + SYMCRYPT_ASSERT( peX != NULL ); + pbScratch += cbX; + cbScratch -= cbX; + + peOne = SymCryptModElementCreate( pbScratch, cbX, pmModulus ); + SYMCRYPT_ASSERT( peOne != NULL ); + pbScratch += cbX; + cbScratch -= cbX; + + peMinOne = SymCryptModElementCreate( pbScratch, cbX, pmModulus ); + SYMCRYPT_ASSERT( peMinOne != NULL ); + pbScratch += cbX; + cbScratch -= cbX; + + // Allocate D + piD = SymCryptIntCreate( pbScratch, cbD, nDigitsSrc ); + SYMCRYPT_ASSERT( piD != NULL ); + pbScratch += cbD; + cbScratch -= cbD; + + // Calculate (piSrc - 1) + // Note: We should never get a borrow here because the requirement + // is that Src > 3. + SymCryptIntCopy( piSrc, piD ); + borrow = SymCryptIntSubUint32( piD, 1, piD ); + SYMCRYPT_ASSERT( borrow==0 ); + + SYMCRYPT_ASSERT( SymCryptIntGetBit( piD, 0 ) == 0 ); + + // Check the 3 mod 4 requirement when side-channel safe + SYMCRYPT_ASSERT( + ((flags & SYMCRYPT_FLAG_DATA_PUBLIC) != 0) || + (SymCryptIntGetBit( piD, 1 )!=0) ); + UNREFERENCED_PARAMETER( flags ); + + // Calculate R and D such that Src - 1 = D*2^R + // Notice that the loop executes only if + // the SYMCRYPT_FLAG_DATA_PUBLIC is + // specified (and Src != 3 mod 4) + R = 1; + while( SymCryptIntGetBit( piD, R )==0 ) + { + R++; + } + SymCryptIntDivPow2( piD, R, piD ); + + // Set peOne and peMinOne + SymCryptModElementSetValueUint32( 1, pmModulus, peOne, pbScratch, cbScratch ); + SymCryptModElementSetValueNegUint32( 1, pmModulus, peMinOne, pbScratch, cbScratch ); + + for (UINT32 i=0; i<nIterations; i++) + { + // Pick a random X in [2, piSrc-2] + // Therefore the flags parameter is 0 (default: not allowed 0, 1, -1 when modulus > 3) + SymCryptModSetRandom( pmModulus, peX, 0, pbScratch, cbScratch ); + + // X^D mod piSrc + // Notice that nBitsSrc is public in the call of SymCryptModExp + SymCryptModExp( pmModulus, peX, piD, nBitsSrc, 0, peX, pbScratch, cbScratch ); + + // Check for 1 or -1 + if ( SymCryptModElementIsEqual( pmModulus, peX, peOne ) | + SymCryptModElementIsEqual( pmModulus, peX, peMinOne ) ) + { + continue; + } + + // repeat R-1 times + // Notice that the inner loop executes only if + // the SYMCRYPT_FLAG_DATA_PUBLIC is + // specified (and Src != 3 mod 4) + innerLoop = TRUE; + for (UINT32 j=0; (j<R-1)&&(innerLoop); j++) + { + // Square X + SymCryptModSquare( pmModulus, peX, peX, pbScratch, cbScratch ); + + // Check if it is 1 + if (SymCryptModElementIsEqual( pmModulus, peX, peOne )) + { + return 0x0; + } + + // Check if it is -1 + if (SymCryptModElementIsEqual( pmModulus, peX, peMinOne )) + { + innerLoop = FALSE; + break; + } + } + + if (innerLoop) + { + return 0x0; + } + } + + return 0xffffffff; // Prime +} + +#define SYMCRYPT_PRIME_GENERATION_MR_ITERATIONS (64) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptIntGenerateRandomPrime( + _In_ PCSYMCRYPT_INT piLow, + _In_ PCSYMCRYPT_INT piHigh, + _In_reads_opt_( nPubExp ) PCUINT64 pu64PubExp, + UINT32 nPubExp, + UINT32 nTries, + UINT32 flags, + _Inout_ PSYMCRYPT_INT piDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_EXTERNAL_FAILURE; + PSYMCRYPT_DIVISOR pdPubExp[ SYMCRYPT_RSAKEY_MAX_NUMOF_PUBEXPS ]; + PSYMCRYPT_INT piTmp; + + UINT32 cnt = 0; + UINT32 e; + BOOLEAN reject; + SIZE_T cbObj; + + UINT32 nBits = SymCryptIntBitsizeOfObject(piDst); + UINT32 nBytes = (nBits + 7)/8; + + UINT32 nBitsHigh = SymCryptIntBitsizeOfValue( piHigh ); + + PCSYMCRYPT_TRIALDIVISION_CONTEXT pTrialDivisionContext = SymCryptCreateTrialDivisionContext( SymCryptIntDigitsizeOfObject( piHigh ) ); + + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_INT_PRIME_GEN( SymCryptIntDigitsizeOfObject( piDst ) ) ); + SYMCRYPT_ASSERT( nPubExp <= SYMCRYPT_RSAKEY_MAX_NUMOF_PUBEXPS ); + SYMCRYPT_ASSERT( SymCryptDigitsFromBits( 64 ) == 1 ); + + UNREFERENCED_PARAMETER( flags ); + + // Allocate divisor objects for each public exponent & initialize them + cbObj = SymCryptSizeofDivisorFromDigits( 1 ); + for( e = 0; e < nPubExp; e++ ) + { + SYMCRYPT_ASSERT( cbScratch >= cbObj ); + if( pu64PubExp[e] == 0 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto exit; + } + + pdPubExp[e] = SymCryptDivisorCreate( pbScratch, cbObj, 1 ); + pbScratch += cbObj; + cbScratch -= cbObj; + + SymCryptIntSetValueUint64( pu64PubExp[e], SymCryptIntFromDivisor( pdPubExp[e] ) ); + SymCryptIntToDivisor( SymCryptIntFromDivisor( pdPubExp[e] ), pdPubExp[e], 1000, SYMCRYPT_FLAG_DATA_PUBLIC, pbScratch, cbScratch ); + } + + cbObj = SymCryptSizeofIntFromDigits( 1 ); + SYMCRYPT_ASSERT( cbScratch >= cbObj + nBytes ); + piTmp = SymCryptIntCreate( pbScratch, cbObj, 1 ); + pbScratch += cbObj; + cbScratch -= cbObj; + + do + { + cnt++; + + scError = SymCryptCallbackRandom( pbScratch, nBytes ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto exit; + } + + scError = SymCryptIntSetValue( pbScratch, nBytes, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, piDst ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto exit; + } + + // Set the integer to 3 mod 4 + SymCryptIntSetBits( piDst, 3, 0, 2 ); + + // Zero out the top bits above the upper limit + SymCryptIntModPow2( piDst, nBitsHigh, piDst ); + + // Check if it is in the correct range + if ( (SymCryptIntIsLessThan( piDst, piLow )) || + (!SymCryptIntIsLessThan( piDst, piHigh )) ) + { + continue; + } + + // Fast compositeness check + if( SymCryptIntFindSmallDivisor( pTrialDivisionContext, piDst, NULL, 0 ) != 0 ) + { + // We found a small divisor; it is not a prime + continue; + } + + // Check for compatibility with public exponents (if provided) + reject = FALSE; + for( e = 0; e < nPubExp; e++ ) + { + SymCryptIntDivMod( piDst, pdPubExp[e], NULL, piTmp, pbScratch, cbScratch ); + + // Check that e has a modular inverse mod P-1 + // If e and P-1 are coprime, or GCD( P-1, e ) == 1, then e^-1 exists + // We have (P mod e) in piTmp. + // If piTmp == 0 then P is divisible by e, and will fail primality test - we don't care about the result of the GCD + // Otherwise, GCD( (P mod e)-1, e ) == GCD( P-1 mod e, e ) == GCD( P-1, e ) + // + // Note that if P-1 is a multiple of e then (P mod e)-1 == 0, and GCD( 0, e ) == e + if( SymCryptUint64Gcd( pu64PubExp[e], SymCryptIntGetValueLsbits64( piTmp ) - 1, SYMCRYPT_FLAG_GCD_INPUTS_NOT_BOTH_EVEN ) != 1 ) + { + // We can't continue the big loop from here :-( + reject = TRUE; + break; + } + } + if( reject ) + { + continue; + } + + // Primality check + if (SymCryptIntMillerRabinPrimalityTest( piDst, nBitsHigh, SYMCRYPT_PRIME_GENERATION_MR_ITERATIONS, 0, pbScratch, cbScratch )) + { + scError = SYMCRYPT_NO_ERROR; + break; + } + } + while (cnt<nTries); + + if (cnt>=nTries) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + } + +exit: + SymCryptFreeTrialDivisionContext( pTrialDivisionContext ); + return scError; +} diff --git a/libs/symcrypt/lib/rc2.c b/libs/symcrypt/lib/rc2.c new file mode 100644 index 00000000000..d17807fd0dc --- /dev/null +++ b/libs/symcrypt/lib/rc2.c @@ -0,0 +1,438 @@ +// +// Rc2.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +// +// This module contains the routines to implement RC2 from RFC 2268 +// +// This is a new implementation, based on the RFC specification +// and NOT based on the existing one in RSA32.lib, which is the one from RSA data security. +// + + +#include "precomp.h" + +const SYMCRYPT_BLOCKCIPHER SymCryptRc2BlockCipher_default = { + SymCryptRc2ExpandKey, // PSYMCRYPT_BLOCKCIPHER_EXPAND_KEY expandKeyFunc; + SymCryptRc2Encrypt, // PSYMCRYPT_BLOCKCIPHER_CRYPT encryptFunc; + SymCryptRc2Decrypt, // PSYMCRYPT_BLOCKCIPHER_CRYPT decryptFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_CRYPT_ECB ecbEncryptFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_CRYPT_ECB ecbDecryptFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_CRYPT_MODE cbcEncryptFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_CRYPT_MODE cbcDecryptFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_MAC_MODE cbcMacFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_CRYPT_MODE ctrMsbFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_AEADPART_MODE gcmEncryptPartFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_AEADPART_MODE gcmDecryptPartFunc; + 8, // SIZE_T blockSize; + sizeof( SYMCRYPT_RC2_EXPANDED_KEY ), // SIZE_T expandedKeySize; // = sizeof( SYMCRYPT_XXX_EXPANDED_KEY ) +}; + +const PCSYMCRYPT_BLOCKCIPHER SymCryptRc2BlockCipher = &SymCryptRc2BlockCipher_default; + +/* + * + * constants + * + */ +static const BYTE PITABLE[] = +{ + 0xd9, 0x78, 0xf9, 0xc4, 0x19, 0xdd, 0xb5, 0xed, 0x28, 0xe9, 0xfd, 0x79, 0x4a, 0xa0, 0xd8, 0x9d, + 0xc6, 0x7e, 0x37, 0x83, 0x2b, 0x76, 0x53, 0x8e, 0x62, 0x4c, 0x64, 0x88, 0x44, 0x8b, 0xfb, 0xa2, + 0x17, 0x9a, 0x59, 0xf5, 0x87, 0xb3, 0x4f, 0x13, 0x61, 0x45, 0x6d, 0x8d, 0x09, 0x81, 0x7d, 0x32, + 0xbd, 0x8f, 0x40, 0xeb, 0x86, 0xb7, 0x7b, 0x0b, 0xf0, 0x95, 0x21, 0x22, 0x5c, 0x6b, 0x4e, 0x82, + 0x54, 0xd6, 0x65, 0x93, 0xce, 0x60, 0xb2, 0x1c, 0x73, 0x56, 0xc0, 0x14, 0xa7, 0x8c, 0xf1, 0xdc, + 0x12, 0x75, 0xca, 0x1f, 0x3b, 0xbe, 0xe4, 0xd1, 0x42, 0x3d, 0xd4, 0x30, 0xa3, 0x3c, 0xb6, 0x26, + 0x6f, 0xbf, 0x0e, 0xda, 0x46, 0x69, 0x07, 0x57, 0x27, 0xf2, 0x1d, 0x9b, 0xbc, 0x94, 0x43, 0x03, + 0xf8, 0x11, 0xc7, 0xf6, 0x90, 0xef, 0x3e, 0xe7, 0x06, 0xc3, 0xd5, 0x2f, 0xc8, 0x66, 0x1e, 0xd7, + 0x08, 0xe8, 0xea, 0xde, 0x80, 0x52, 0xee, 0xf7, 0x84, 0xaa, 0x72, 0xac, 0x35, 0x4d, 0x6a, 0x2a, + 0x96, 0x1a, 0xd2, 0x71, 0x5a, 0x15, 0x49, 0x74, 0x4b, 0x9f, 0xd0, 0x5e, 0x04, 0x18, 0xa4, 0xec, + 0xc2, 0xe0, 0x41, 0x6e, 0x0f, 0x51, 0xcb, 0xcc, 0x24, 0x91, 0xaf, 0x50, 0xa1, 0xf4, 0x70, 0x39, + 0x99, 0x7c, 0x3a, 0x85, 0x23, 0xb8, 0xb4, 0x7a, 0xfc, 0x02, 0x36, 0x5b, 0x25, 0x55, 0x97, 0x31, + 0x2d, 0x5d, 0xfa, 0x98, 0xe3, 0x8a, 0x92, 0xae, 0x05, 0xdf, 0x29, 0x10, 0x67, 0x6c, 0xba, 0xc9, + 0xd3, 0x00, 0xe6, 0xcf, 0xe1, 0x9e, 0xa8, 0x2c, 0x63, 0x16, 0x01, 0x3f, 0x58, 0xe2, 0x89, 0xa9, + 0x0d, 0x38, 0x34, 0x1b, 0xab, 0x33, 0xff, 0xb0, 0xbb, 0x48, 0x0c, 0x5f, 0xb9, 0xb1, 0xcd, 0x2e, + 0xc5, 0xf3, 0xdb, 0x47, 0xe5, 0xa5, 0x9c, 0x77, 0x0a, 0xa6, 0x20, 0x68, 0xfe, 0x7f, 0xc1, 0xad +}; + +/* + * + * macros + * + */ + + +/* + * These are the original macros we derived directly from the RFC. + * To improve the perf we changed to using R0, R1, R2, R3 variables rather + * than an array. + */ + +/* +#define MIX(R, K, i, j, S) {\ + R[i] = R[i] + K[j] + (R[(i-1)&3] & R[(i-2)&3]) + ((~R[(i-1)&3]) & R[(i-3)&3]);\ + j = j + 1;\ + R[i] = ROL16(R[i], S);\ + } + +#define MIXROUND(R, K, j) {\ + MIX(R, K, 0, j, 1);\ + MIX(R, K, 1, j, 2);\ + MIX(R, K, 2, j, 3);\ + MIX(R, K, 3, j, 5);\ + } + +#define MASH(R, K, i) \ + R[i] = R[i] + K[R[(i-1)&3]&63]; + +#define MASHROUND(R, K) {\ + MASH(R, K, 0);\ + MASH(R, K, 1);\ + MASH(R, K, 2);\ + MASH(R, K, 3);\ + } + +// +// decrypt macros +// + + +#define RMIX(R, K, i, j, S) {\ + R[i] = ROR16( R[i], S );\ + R[i] = R[i] - K[j] - (R[(i-1)&3] & R[(i-2)&3]) - ((~R[(i-1)&3]) & R[(i-3)&3]);\ + j = j - 1;\ + } + +#define RMIXROUND(R, K, j) {\ + RMIX(R, K, 3, j, 5);\ + RMIX(R, K, 2, j, 3);\ + RMIX(R, K, 1, j, 2);\ + RMIX(R, K, 0, j, 1);\ + } + +#define RMASH(R, K, i) \ + R[i] = R[i] - K[R[(i-1)&3] & 63]; + +#define RMASHROUND(R, K) {\ + RMASH(R, K, 3);\ + RMASH(R, K, 2);\ + RMASH(R, K, 1);\ + RMASH(R, K, 0);\ + } +*/ + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRc2ExpandKeyEx( + _Out_ PSYMCRYPT_RC2_EXPANDED_KEY pExpandedKey, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey, + UINT32 effectiveKeySizeInBits ) +{ + SYMCRYPT_ALIGN BYTE L[128]; + UINT32 T; + UINT32 T1; + UINT32 T8; + UINT32 TM; + int i; + + SYMCRYPT_SET_MAGIC( pExpandedKey ); + + // + // According to RFC 2268 any key size in 1..128 is allowed. + // + // The effective key size cannot be 0 as the RFC specs would lead to a buffer overflow + // in the key expansion. + // + // If the effective key size <= 8 then T8=1 and the key expansion backward recursion + // drops into a fixed point because L[i+1] xor L[i+T8] is zero. + // Therefore, we require an effective key size of at least 9. + // + if( cbKey < 1 || cbKey > 128 || effectiveKeySizeInBits < 9 || effectiveKeySizeInBits > 8*128 ) + { + return SYMCRYPT_WRONG_KEY_SIZE; + } + + T = (UINT32)cbKey; // 1 <= T1 <= 128 + + T1 = effectiveKeySizeInBits; // 9 <= T1 <= 1024 + T8 = (T1+7)/8; // 2 <= T8 <= 128 + + TM = 255 & ((1 << (8 + (UINT32)T1 - 8*T8))-1); + + // + // To be endian-agnostic our expanded key is stored as an array of UINT16s. We do the key + // expansion in a local buffer and copy the values into the expanded key using the proper conversion. + // + memcpy(L, pbKey, T); + + for(i = T; i <= 127; i++) + { + L[i] = PITABLE[(L[i-1]+L[i-T]) & 0xff]; + // + // If the key size T=1 then we lose one bit of key space in the key expansion because + // L[i-1] == L[i-T] which makes the index to PITABLE even. So L[1..128] depend only on + // 7 bits. + // + } + + L[128-T8] = PITABLE[L[128-T8] & TM]; + + for( i = 127-T8; i >=0; i--) + { + L[i] = PITABLE[L[i+1] ^ L[i+T8]]; + } + + // + // Now we copy the result into the UINT16 array in our expanded key. + // This is a memcpy for little-endian platforms, but this code works on all CPUs. + // + for( i=0; i<64; i++ ) + { + pExpandedKey->K[i] = SYMCRYPT_LOAD_LSBFIRST16( &L[2*i] ); + } + + SymCryptWipeKnownSize( L, sizeof( L ) ); + + return SYMCRYPT_NO_ERROR; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRc2ExpandKey( + _Out_ PSYMCRYPT_RC2_EXPANDED_KEY pExpandedKey, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ) +{ + return SymCryptRc2ExpandKeyEx( pExpandedKey, pbKey, cbKey, (UINT32) (8*cbKey) ); +} + + +#define MIXROUND( n ) {\ + R0 = R0 + K[4*n] + (R3 & R2) + (~R3 & R1); \ + R0 = ROL16( R0, 1 ); \ + R1 = R1 + K[4*n+1] + (R0 & R3) + (~R0 & R2 ); \ + R1 = ROL16( R1, 2 ); \ + R2 = R2 + K[4*n+2] + (R1 & R0) + (~R1 & R3); \ + R2 = ROL16( R2, 3 ); \ + R3 = R3 + K[4*n+3] + (R2 & R1) + (~R2 & R0 ); \ + R3 = ROL16( R3, 5 ); \ + } + +#define MASHROUND() { \ + R0 = R0 + K[R3 & 63]; \ + R1 = R1 + K[R0 & 63]; \ + R2 = R2 + K[R1 & 63]; \ + R3 = R3 + K[R2 & 63]; \ + } + +VOID +SYMCRYPT_CALL +SymCryptRc2Encrypt( + _In_ PCSYMCRYPT_RC2_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_RC2_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_RC2_BLOCK_SIZE ) PBYTE pbDst ) +{ + UINT16 R0, R1, R2, R3; + PCUINT16 K; + + SYMCRYPT_CHECK_MAGIC( pExpandedKey ); + + // + // 1. Initialize words R[0], ..., R[3] to contain the 64-bit plaintext value. + // + R0 = SYMCRYPT_LOAD_LSBFIRST16( &pbSrc[0] ); + R1 = SYMCRYPT_LOAD_LSBFIRST16( &pbSrc[2] ); + R2 = SYMCRYPT_LOAD_LSBFIRST16( &pbSrc[4] ); + R3 = SYMCRYPT_LOAD_LSBFIRST16( &pbSrc[6] ); + + // + // 2. Expand the key, so that words K[0], ..., K[63] become defined. + // (In our case the key was previously expanded, so we just grab the pointer to it.) + // + K = pExpandedKey->K; + + // + // 3. Initialize j to zero. + // + + // + // 4. Perform five mixing rounds. + // + MIXROUND(0); + MIXROUND(1); + MIXROUND(2); + MIXROUND(3); + MIXROUND(4); + + // + // 5. Perform one mashing round. + // + MASHROUND(); + + // + // 6. Perform six mixing rounds. + // + MIXROUND(5); + MIXROUND(6); + MIXROUND(7); + MIXROUND(8); + MIXROUND(9); + MIXROUND(10); + + // + // 7. Perform one mashing round. + // + MASHROUND(); + + // + // 8. Perform five mixing rounds. + // + MIXROUND(11); + MIXROUND(12); + MIXROUND(13); + MIXROUND(14); + MIXROUND(15); + + SYMCRYPT_STORE_LSBFIRST16( &pbDst[0], R0 ); + SYMCRYPT_STORE_LSBFIRST16( &pbDst[2], R1 ); + SYMCRYPT_STORE_LSBFIRST16( &pbDst[4], R2 ); + SYMCRYPT_STORE_LSBFIRST16( &pbDst[6], R3 ); + +} + + +#define RMIXROUND( n ) {\ + R3 = ROR16( R3, 5 ); \ + R3 = R3 - K[4*n+3] - (R2 & R1) - (~R2 & R0 ); \ + R2 = ROR16( R2, 3 ); \ + R2 = R2 - K[4*n+2] - (R1 & R0) - (~R1 & R3); \ + R1 = ROR16( R1, 2 ); \ + R1 = R1 - K[4*n+1] - (R0 & R3) - (~R0 & R2 ); \ + R0 = ROR16( R0, 1 ); \ + R0 = R0 - K[4*n ] - (R3 & R2) - (~R3 & R1); \ + } + +#define RMASHROUND() { \ + R3 = R3 - K[R2 & 63]; \ + R2 = R2 - K[R1 & 63]; \ + R1 = R1 - K[R0 & 63]; \ + R0 = R0 - K[R3 & 63]; \ + } + + + +VOID +SYMCRYPT_CALL +SymCryptRc2Decrypt( + _In_ PCSYMCRYPT_RC2_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_RC2_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_RC2_BLOCK_SIZE ) PBYTE pbDst ) +{ + UINT16 R0, R1, R2, R3; + PCUINT16 K; + + SYMCRYPT_CHECK_MAGIC( pExpandedKey ); + + // + // 1. Initialize words R[0], ..., R[3] to contain the 64-bit plaintext value. + // + R0 = SYMCRYPT_LOAD_LSBFIRST16( &pbSrc[0] ); + R1 = SYMCRYPT_LOAD_LSBFIRST16( &pbSrc[2] ); + R2 = SYMCRYPT_LOAD_LSBFIRST16( &pbSrc[4] ); + R3 = SYMCRYPT_LOAD_LSBFIRST16( &pbSrc[6] ); + + // + // 2. Expand the key, so that words K[0], ..., K[63] become defined. + // (In our case the key was previously expanded, so we just grab the pointer to it.) + // + K = pExpandedKey->K; + + // + // 3. Initialize j to 63. + // + + // + // 4. Perform five r-mixing rounds. + // + RMIXROUND(15); + RMIXROUND(14); + RMIXROUND(13); + RMIXROUND(12); + RMIXROUND(11); + + // + // 5. Perform one r-mashing round. + // + RMASHROUND(); + + // + // 6. Perform six r-mixing rounds. + // + RMIXROUND(10); + RMIXROUND(9); + RMIXROUND(8); + RMIXROUND(7); + RMIXROUND(6); + RMIXROUND(5); + + // + // 7. Perform one r-mashing round. + // + RMASHROUND(); + + // + // 8. Perform five r-mixing rounds. + // + RMIXROUND(4); + RMIXROUND(3); + RMIXROUND(2); + RMIXROUND(1); + RMIXROUND(0); + + SYMCRYPT_STORE_LSBFIRST16( &pbDst[0], R0 ); + SYMCRYPT_STORE_LSBFIRST16( &pbDst[2], R1 ); + SYMCRYPT_STORE_LSBFIRST16( &pbDst[4], R2 ); + SYMCRYPT_STORE_LSBFIRST16( &pbDst[6], R3 ); + +} + +static const BYTE testPlaintext[8] = { 'P', 'l', 'a', 'i', 'n', 't', 'x', 't', }; +static const BYTE testCiphertext[8] = { + 0x89, 0xe8, 0x5d, 0x1a, 0x98, 0xcd, 0xe5, 0x52, +}; + +VOID +SYMCRYPT_CALL +SymCryptRc2Selftest(void) +{ + SYMCRYPT_RC2_EXPANDED_KEY key; + BYTE buf[SYMCRYPT_RC2_BLOCK_SIZE]; + + if( SymCryptRc2ExpandKeyEx( &key, SymCryptTestKey32, 16, 87) != SYMCRYPT_NO_ERROR ) + { + SymCryptFatal( 'rc21' ); + } + + SymCryptRc2Encrypt( &key, testPlaintext, buf ); + + SymCryptInjectError( buf, SYMCRYPT_RC2_BLOCK_SIZE ); + + if( memcmp( buf, testCiphertext, SYMCRYPT_RC2_BLOCK_SIZE ) != 0 ) + { + SymCryptFatal( 'rc22' ); + } + + SymCryptRc2Decrypt( &key, testCiphertext, buf ); + + SymCryptInjectError( buf, SYMCRYPT_RC2_BLOCK_SIZE ); + + if( memcmp( buf, testPlaintext, SYMCRYPT_RC2_BLOCK_SIZE ) != 0 ) + { + SymCryptFatal( 'rc23' ); + } + +} diff --git a/libs/symcrypt/lib/rc4.c b/libs/symcrypt/lib/rc4.c new file mode 100644 index 00000000000..914771c5aa5 --- /dev/null +++ b/libs/symcrypt/lib/rc4.c @@ -0,0 +1,156 @@ +// +// Rc4.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// This is a new implementation, NOT based on the existing ones in RSA32.lib. +// The algorithm specification is taken from "ARCFOUR Algorithm" internet +// draft dated July 1999, and from memory. +// + +#include "precomp.h" + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRc4Init( + _Out_ PSYMCRYPT_RC4_STATE pState, + _In_reads_( cbKey ) PCBYTE pbKey, + _In_ SIZE_T cbKey ) +{ + SIZE_T i; + SIZE_T j; + BYTE keyBuf[256]; + SIZE_T keyIdx; + + SYMCRYPT_RC4_S_TYPE T; + + if( cbKey > 256 || cbKey == 0 ) + { + return SYMCRYPT_WRONG_KEY_SIZE; + } + + // + // Make a copy of the key to obey the read-once rule. + // This is a case where it looks safe to break the read-once + // rule, but it isn't. RC4 with very long keys (e.g. 256 bytes) + // is actually very vulnerable against related-key attacks. + // One obvious precaution is to limit the length of the RC4 key, + // which one of the layers above us might do. + // Allowing the key bytes to change as we read them negates + // this countermeasure. + // + memcpy( keyBuf, pbKey, cbKey ); + + for( i=0; i<256; i++ ) + { + pState->S[i] = (SYMCRYPT_RC4_S_TYPE) i; + } + + j = 0; + keyIdx = 0; + for( i=0; i<256; i++ ) + { + + T = pState->S[i]; + j = (j + T + keyBuf[keyIdx]) & 0xff; + pState->S[i] = pState->S[j]; + pState->S[j] = T; + keyIdx++; + if( keyIdx == cbKey ) + { + keyIdx = 0; + } + } + + // + // We store the i value already incremented for the next byte. + // This seems to allow better instruction sequencing interleaving in the actual en/decrypt loop + // + pState->i = 1; + pState->j = 0; + + SYMCRYPT_SET_MAGIC( pState ); + + SymCryptWipe( keyBuf, cbKey ); + + return SYMCRYPT_NO_ERROR; +} + + +VOID +SYMCRYPT_CALL +SymCryptRc4Crypt( + _Inout_ PSYMCRYPT_RC4_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + _In_ SIZE_T cbData ) +{ + SIZE_T i; + SIZE_T j; + SYMCRYPT_RC4_S_TYPE Ti; + SYMCRYPT_RC4_S_TYPE Tj; + PCBYTE pbSrcEnd = pbSrc + cbData; + + SYMCRYPT_CHECK_MAGIC( pState ); + + i = pState->i; + j = pState->j; + + // + // I tried to unroll this loop 4x and use a single 32-bit operation to XOR the key + // stream with the data. This actually makes the code slower by 1 c/B on a Core 2. + // I suspect that that is because the instruction decoders are the bottleneck, and + // a small loop can be run out of the uop queue which bypasses the instruction decoders. + // A larger loop has to be decoded every time, and that slows things down. + // The theoretical gain of unrolling the loop is less than 1 c/B, + // and as Core 2 and derived CPUs are the most commonly used CPUs by our customers, + // it is not worthwhile to persue this further. + // + // - Niels Ferguson (niels) 2010-10-11 + // + + while( pbSrc < pbSrcEnd ) + { + // + // Our i value is already incremented + // + Ti = pState->S[i]; + j = (j + Ti ) & 0xff; + Tj = pState->S[j]; + pState->S[i] = Tj; + pState->S[j] = Ti; + *pbDst = (BYTE) (*pbSrc ^ pState->S[(Ti + Tj) & 0xff]); + + i = (i + 1) & 0xff; + + pbSrc++; + pbDst++; + } + + pState->i = (BYTE) i; + pState->j = (BYTE) j; +} + + +static const BYTE rc4KatAnswer[ 3 ] = { 0x71, 0x46, 0x92 }; + + +VOID +SYMCRYPT_CALL +SymCryptRc4Selftest(void) +{ + BYTE buf[3]; + SYMCRYPT_RC4_STATE state; + + SymCryptRc4Init( &state, SymCryptTestKey32, sizeof( SymCryptTestKey32 ) ); + + SymCryptRc4Crypt( &state, SymCryptTestMsg3, buf, sizeof( buf ) ); + + SymCryptInjectError( buf, sizeof( buf ) ); + + if( memcmp( buf, rc4KatAnswer, sizeof( buf )) != 0 ) + { + SymCryptFatal( 'rc4 ' ); + } + +} diff --git a/libs/symcrypt/lib/rdrand.c b/libs/symcrypt/lib/rdrand.c new file mode 100644 index 00000000000..79e8d951e3a --- /dev/null +++ b/libs/symcrypt/lib/rdrand.c @@ -0,0 +1,172 @@ +// +// rdrand.c Support for RdRand instruction +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +#if (SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64) // only available on x86 and amd64 architectures + +#ifdef __clang__ +#pragma clang attribute push (__attribute__((target("rdrnd"))), apply_to=function) +#else +#pragma GCC push_options +#pragma GCC target("rdrnd") +#endif + +#if SYMCRYPT_MS_VC && _MSC_VER < 1610 +#error MSVC version lacks support for RDRAND intrinsics. Compile for the generic environment instead. +#endif + +// +// TODO: the _rdrand_u*() versions of the intrinsics can be removed once the new compiler +// with the _rdrand*_step() intrinsics is used in all branches + +#if SYMCRYPT_MS_VC && _MSC_VER < 1700 // 1700 = Dev11, + +// +// This is the code that uses the old intrinsics in the compiler version 16.1 +// + +unsigned int _rdrand_u32(void); +unsigned __int64 _rdrand_u64(void); + + +#if SYMCRYPT_CPU_X86 +#define SymCryptRdrandSizet(p) ( *(p)=(SIZE_T)_rdrand_u32(), SYMCRYPT_NO_ERROR ) +#else +#define SymCryptRdrandSizet(p) ( *(p)=(SIZE_T)_rdrand_u64(), SYMCRYPT_NO_ERROR ) +#endif + +#else // _MSC_VER + +// +// Code for the new Dev11 intrinsics +// + +#if SYMCRYPT_CPU_X86 +#define _rdrandxx_step(_p) _rdrand32_step( (unsigned int *) (_p) ) +#else +#define _rdrandxx_step(_p) _rdrand64_step( (UINT64 *) (_p) ) +#endif + +FORCEINLINE +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRdrandSizet( SIZE_T * p ) +{ + int i; + + // + // In Win8/WinBlue we iterated 1000 times. + // But we got a crash bucket where we fail because of + // not getting any random data. + // I contacted the Intel people; according to them they cannot make the + // RDRAND instruction fail more than a dozen times in a row under any tested + // circumstance. They have no idea how it could fail 1000 times in a row. + // As a failure of this code leads to a bugcheck (it fails a security promise, and + // is therefore treated as a critical security bug) I have increased the + // iteration count to 1000000. + // This will not affect any machine that didn't bugcheck before, but it hopefully + // will remove some of the current bugchecks. + // + // Niels Ferguson (niels) 2014-04-09. + // + for( i=0; i<1000000; i++ ) + { + if( _rdrandxx_step( p ) != 0 ) + { + return SYMCRYPT_NO_ERROR; + } + } + return SYMCRYPT_HARDWARE_FAILURE; +} + +#endif + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRdrandStatus(void) +{ + // + // Check that the library is initialized; otherwise the CPUID info + // is all zeroes. (This check only happens in CHKed builds.) + // + SymCryptCheckLibraryInitialized(); + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_RDRAND ) ) + { + return SYMCRYPT_NO_ERROR; + } + else + { + return SYMCRYPT_NOT_IMPLEMENTED; + } +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRdrandGetBytes( + _Out_writes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + _Out_writes_( SYMCRYPT_SHA512_RESULT_SIZE ) PBYTE pbResult ) +{ + SIZE_T * pBuf; + SIZE_T nBuf; + SIZE_T i; + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + // + // Take care of the obvious errors that can happen + // + if( SymCryptRdrandStatus() != SYMCRYPT_NO_ERROR || + (cbBuffer & 0xf) != 0 + ) + { + SymCryptFatal( 'rdrn' ); + } + + pBuf = (SIZE_T *) pbBuffer; + nBuf = cbBuffer / sizeof( SIZE_T ); + + for( i=0; i<nBuf; i++ ) + { + scError = SymCryptRdrandSizet( &pBuf[i] ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + } + + SymCryptSha512( pbBuffer, cbBuffer, pbResult ); + +cleanup: + SymCryptWipe( pbBuffer, cbBuffer ); + + return scError; +} + + +VOID +SYMCRYPT_CALL +SymCryptRdrandGet( + _Out_writes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + _Out_writes_( SYMCRYPT_SHA512_RESULT_SIZE ) PBYTE pbResult ) +{ + if( SymCryptRdrandGetBytes( pbBuffer, cbBuffer, pbResult ) != SYMCRYPT_NO_ERROR ) + { + SymCryptFatal( 'rdrx' ); + } +} + +#ifdef __clang__ +#pragma clang attribute pop +#else +#pragma GCC pop_options +#endif + +#endif // SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 diff --git a/libs/symcrypt/lib/rdseed.c b/libs/symcrypt/lib/rdseed.c new file mode 100644 index 00000000000..5e1f94f7809 --- /dev/null +++ b/libs/symcrypt/lib/rdseed.c @@ -0,0 +1,135 @@ +// +// rdseed.c Support for RdSeed instruction +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +#if (SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64) // only available on x86 and amd64 architectures. + +#ifdef __clang__ +#pragma clang attribute push (__attribute__((target("rdseed"))), apply_to=function) +#else +#pragma GCC push_options +#pragma GCC target("rdseed") +#endif + +#if SYMCRYPT_MS_VC && _MSC_VER < 1610 +#error MSVC version lacks support for RDSEED intrinsics. Compile for the generic environment instead. +#endif + +// +// Create a definition that works on SIZE_Ts. +// + +#if SYMCRYPT_CPU_X86 +#define _rdseedxx_step(_p) _rdseed32_step( (unsigned int *) (_p) ) +#else +#define _rdseedxx_step(_p) _rdseed64_step( (UINT64 *) (_p) ) +#endif + +FORCEINLINE +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRdseedSizet( SIZE_T * p ) +{ + int i; + + // + // There is no way to report errors, and customers rely on the RNG to work properly. + // Therefore, higher layers will fatal if this function fails. + // This is why we have a very high retry count; the alternative is to fatal. + // + // + for( i=0; i<10000000; i++ ) + { + if( _rdseedxx_step( p ) != 0 ) + { + return SYMCRYPT_NO_ERROR; + } + } + return SYMCRYPT_HARDWARE_FAILURE; +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRdseedStatus(void) +{ + // + // Check that the library is initialized; otherwise the CPUID info + // is all zeroes. (This check only happens in CHKed builds.) + // + SymCryptCheckLibraryInitialized(); + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_RDSEED ) ) + { + return SYMCRYPT_NO_ERROR; + } + else + { + return SYMCRYPT_NOT_IMPLEMENTED; + } +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRdseedGetBytes( + _Out_writes_( cbResult ) PBYTE pbResult, + SIZE_T cbResult ) +{ + SIZE_T * pBuf; + SIZE_T nBuf; + SIZE_T i; + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + // + // Take care of the obvious errors that can happen + // + if( SymCryptRdseedStatus() != SYMCRYPT_NO_ERROR || + (cbResult & 0xf) != 0 + ) + { + SymCryptFatal( 'rdsd' ); + } + + pBuf = (SIZE_T *) pbResult; + nBuf = cbResult / sizeof( SIZE_T ); + + for( i=0; i<nBuf; i++ ) + { + scError = SymCryptRdseedSizet( &pBuf[i] ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + } + +cleanup: + + return scError; +} + + +VOID +SYMCRYPT_CALL +SymCryptRdseedGet( + _Out_writes_( cbResult ) PBYTE pbResult, + SIZE_T cbResult ) +{ + if( SymCryptRdseedGetBytes( pbResult, cbResult ) != SYMCRYPT_NO_ERROR ) + { + SymCryptFatal( 'rdsx' ); + } +} + + +#ifdef __clang__ +#pragma clang attribute pop +#else +#pragma GCC pop_options +#endif + +#endif // SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 diff --git a/libs/symcrypt/lib/recoding.c b/libs/symcrypt/lib/recoding.c new file mode 100644 index 00000000000..4341c5c8fff --- /dev/null +++ b/libs/symcrypt/lib/recoding.c @@ -0,0 +1,209 @@ +// +// recoding.c Algorithms for recoding the factors / exponents in various implementations +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// + +#include "precomp.h" + +// +// The following is an adaptation of algorithm 6: "Protected +// odd-only recoding algorithm for the fixed-window representation" +// from the paper +// "Selecting Elliptic Curves for Cryptography: An Efficiency and +// Security Analysis" by Bos, Costello, Longa, and Naehrig +// +// Input: odd integer k \in [1,GOrd], window width w>=2, and +// t = ceil( GOrdBitsize / w-1 ) +// +// Output: (k_t, ... , k_0) where k_i \in {+-1, +-3, ..., +-(2^(w-1) -1)} +// +// Algorithm: +// for i=0 to (t-1) do +// k_i = (k mod 2^w) - 2^(w-1) +// k = (k-k_i)/2^(w-1) +// k_t = k mod 2^(w-1) +// return (k_t, ..., k_0) +// +// Remarks: +// 1. An invariant of the main loop is that (k > 0 and k odd). This means +// that all k_i's are odd and that k_t > 0. +// 2. We will store the values of k_i's as absolute values and signs in +// absofKIs and sigofKIs arrays, resp. The sigofKIs[i] is 0xffffffff if +// k_i < 0, otherwise it is 0. +// 3. In the multiplication algorithm we always access the precomputed point +// P[(|k_i|-1)/2]. Therefore here we just shift the |k_i| value left by +// one bit before storing it in absofKIs table. +// 4. Caller should check k in range [1,GOrd] to ensure use of recoding will +// give correct results. This algorithm always recodes the t * (w-1) least +// significant bits of the provided k, interpreted as an unsigned integer. +// +VOID +SYMCRYPT_CALL +SymCryptFixedWindowRecoding( + UINT32 W, + _Inout_ PSYMCRYPT_INT piK, + _Inout_ PSYMCRYPT_INT piTmp, + _Out_writes_( nRecodedDigits ) + PUINT32 absofKIs, + _Out_writes_( nRecodedDigits ) + PUINT32 sigofKIs, + UINT32 nRecodedDigits ) +{ + UINT32 T1 = 0; + UINT32 T2 = 0; + UINT32 mask = ~(0xffffffff << W); // Window mask = 2^w - 1 (e.g. 0x0000003f for w = 6) + UINT32 smask = 0x1 << (W-1); // Sign mask = 2^(w-1) (e.g. 0x00000020 for w = 6) + + SYMCRYPT_ASSERT( W < 32 ); + + for (UINT32 i=0; i < nRecodedDigits - 1; i++) + { + T1 = SymCryptIntGetValueLsbits32( piK ) & mask; // T1 = k mod 2^W + + // At this point if the w-th bit of T1 is 1 then we know that T1 > 2^(w-1) + // (Since k = odd is a loop invariant). + // + // In this case, (case A), T1 & ~smask is equal to (k mod 2^w) - 2^(w-1) = k_i = |k_i|. + // + // Otherwise, (case B), we know that T1 < 2^(w-1). Therefore 2^(w-1) - T1 = |k_i|. + + sigofKIs[i] = SYMCRYPT_MASK32_ZERO( T1 & smask ); // If the sign of k_i is - this mask is set to 0xffffffff. (Case B) + + T2 = T1 & ~smask; // |k_i| in case A + T1 = smask - T1; // |k_i| in case B + + absofKIs[i] = ((T1 & sigofKIs[i]) | (T2 & ~sigofKIs[i])) >> 1; // Setting (masked) the absolute value of k_i in absofKIs (divided by 2) + + SymCryptIntSubUint32( piK, T2, piTmp ); // This gives k - k_i in case (A) + SymCryptIntAddUint32( piK, T1, piK ); // This gives k - k_i in case (B) + + SymCryptIntMaskedCopy( piTmp, piK, ~sigofKIs[i] ); // Copy the result to piK in case (B) + + SymCryptIntDivPow2( piK, W-1, piK ); // k := k / 2^(w-1) + } + + // The last sign is positive given k < GOrd => k_t < 2^w + sigofKIs[nRecodedDigits - 1] = 0; + // Belts and braces, select only the bottom w-1 bits (ensure all absofKIs represent odd values in range [1,2^(w-1)-1]) + absofKIs[nRecodedDigits - 1] = (SymCryptIntGetValueLsbits32( piK ) & mask & ~smask) >> 1; +} + +// +// The following is an algorithm for computing the width-w NAF of a positive integer. +// +// Input: integer k \in [1,GOrd), window width w>=2, and nRecodedDigits = GOrdBitsize + 1 +// +// Output: (k_(nRecodedDigits-1), ... , k_0) where k_i \in {0, +-1, +-3, ..., +-(2^(w-1) -1)} +// +// Algorithm: +// for i = 0 to (nRecodedDigits-1) +// if (k is odd) +// k_i = (k mods 2^w) +// k = k - k_i +// else +// k_i = 0 +// k = k/2 +// return (k_(nRecodedDigits-1), ..., k_0) +// +// Note: k mods 2^w is the integer u with (u == k mod 2^w) and (-2^(w-1) <= u < 2^(w-1) ). +// +// Remarks: +// 1. The above algorithm and the implementation are NOT SIDE-CHANNEL SAFE. +// Therefore, it should only be used when the SYMCRYPT_FLAG_DATA_PUBLIC is +// specified. +// 2. The multiplication algorithm uses |k_i|/2 as indexes. Therefore we will shift left +// the absolute value of k_i by 1 bit and store only |k_i|/2. +// 3. Since now the k_i's can be zero we will store the following in sigofKIs: +// sigofKIs[i] = 0x00000001 if k_i > 0 +// sigofKIs[i] = 0x00000000 if k_i = 0 +// sigofKIs[i] = 0xffffffff if k_i < 0 +// +VOID +SYMCRYPT_CALL +SymCryptWidthNafRecoding( + UINT32 W, + _Inout_ PSYMCRYPT_INT piK, + _Out_writes_( nRecodedDigits ) + PUINT32 absofKIs, + _Out_writes_( nRecodedDigits ) + PUINT32 sigofKIs, + UINT32 nRecodedDigits ) +{ + UINT32 T1 = 0; + UINT32 mask = ~(0xffffffff << W); // Window mask = 2^w - 1 (e.g. 0x0000003f for w = 6) + UINT32 modulus = mask + 1; // 2^w + UINT32 smask = 0x1 << (W-1); // Sign mask = 2^(w-1) (e.g. 0x00000020 for w = 6) + + SYMCRYPT_ASSERT( W < 32 ); + + for (UINT32 i=0; i < nRecodedDigits; i++) + { + T1 = SymCryptIntGetValueLsbits32( piK ) & mask; // T1 = k mod 2^W + + if (T1 & 0x1) + { + if (T1 > smask) + { + sigofKIs[i] = 0xffffffff; + absofKIs[i] = modulus - T1; // 2^W - T1 = |T1 - 2^W| + SymCryptIntAddUint32( piK, absofKIs[i], piK ); // k-k_i + } + else + { + // Here (k mod 2^W) is already in the specified range + sigofKIs[i] = 0x00000001; + absofKIs[i] = T1; + SymCryptIntSubUint32( piK, absofKIs[i], piK ); // k-k_i + } + } + else + { + absofKIs[i] = 0; + sigofKIs[i] = 0; + } + + SymCryptIntDivPow2( piK, 1, piK ); // k := k / 2 + } +} + +// +// The following is an algorithm similar to the above +// but the output is only non-negative (odd) digits. +// +// Requirements: +// nRecodedDigits == nBitsExp +// +VOID +SYMCRYPT_CALL +SymCryptPositiveWidthNafRecoding( + UINT32 W, + _In_ PCSYMCRYPT_INT piK, + UINT32 nBitsExp, + _Out_writes_( nRecodedDigits ) + PUINT32 absofKIs, + UINT32 nRecodedDigits ) +{ + UINT32 T1 = 0; + UINT32 cntrZ = W; // Counter that specifies when we filled the last non-zero NAF digit + + SYMCRYPT_ASSERT( nRecodedDigits <= SymCryptIntBitsizeOfObject( piK ) ); + + for (UINT32 i=0; i < nRecodedDigits; i++) + { + T1 = SymCryptIntGetBits( piK, i, SYMCRYPT_MIN(W, nBitsExp-i) ); // Get a batch of W bits (but don't go over nBitsExp) + + if ((cntrZ>=W) && ((T1 & 0x01) > 0)) // Only store odd digits + { + absofKIs[i] = T1; + cntrZ = 0; + } + else + { + absofKIs[i] = 0; + } + + cntrZ++; // Prepare the counter for the next iteration + } +} diff --git a/libs/symcrypt/lib/rsa_enc.c b/libs/symcrypt/lib/rsa_enc.c new file mode 100644 index 00000000000..a76e663d923 --- /dev/null +++ b/libs/symcrypt/lib/rsa_enc.c @@ -0,0 +1,1531 @@ +// +// rsa_enc.c RSA related algorithms +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +// +// Helper functions for RSA raw encrypt/decrypt (they do NOT allocate scratch space) +// + +UINT32 +SYMCRYPT_CALL +SymCryptRsaCoreEncScratchSpace( _In_ PCSYMCRYPT_RSAKEY pkRsakey) +{ + // Bounded by 2^19 + 2^24 ~ 2^24 (see symcrypt_internal.h) + return SymCryptSizeofModElementFromModulus( pkRsakey->pmModulus ) + + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pkRsakey->nDigitsOfModulus ), + SYMCRYPT_SCRATCH_BYTES_FOR_MODEXP( pkRsakey->nDigitsOfModulus ) ); +} + +SYMCRYPT_ERROR +SymCryptRsaCoreVerifyInput( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_NUMBER_FORMAT numFormat, + SIZE_T cbDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PSYMCRYPT_INT piTmpInteger = NULL; + UINT32 cbTmpInteger = 0; + + UNREFERENCED_PARAMETER( cbScratch ); + + if ( cbSrc > SymCryptRsakeySizeofModulus(pkRsakey) || + cbDst < SymCryptRsakeySizeofModulus(pkRsakey) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // It is an error of value(pbSrc) >= modulus + // We already know that cbSrc <= sizeof( modulus ) so we only have to run this check + // if cbSrc == sizeof( modulus ) + // No side channel issues here: we are only comparing the input to the public part of the key. + if (cbSrc == SymCryptRsakeySizeofModulus(pkRsakey)) + { + cbTmpInteger = SymCryptSizeofIntFromDigits( pkRsakey->nDigitsOfModulus ); + SYMCRYPT_ASSERT( cbScratch >= cbTmpInteger ); + piTmpInteger = SymCryptIntCreate( pbScratch, cbTmpInteger, pkRsakey->nDigitsOfModulus ); + + scError = SymCryptIntSetValue( pbSrc, cbSrc, numFormat, piTmpInteger ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + if (!SymCryptIntIsLessThan(piTmpInteger, SymCryptIntFromModulus(pkRsakey->pmModulus))) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + } + +cleanup: + return scError; +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaCoreEnc( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_NUMBER_FORMAT numFormat, + UINT32 flags, + _Out_writes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PSYMCRYPT_MODELEMENT peRes = NULL; + UINT32 cbModElement = 0; + + PBYTE pbFnScratch = NULL; + SIZE_T cbFnScratch = 0; + + BYTE abExpIntBuffer[ SYMCRYPT_SIZEOF_INT_FROM_BITS( 64 ) + SYMCRYPT_ASYM_ALIGN_VALUE]; + PSYMCRYPT_INT piExp = NULL; + + UNREFERENCED_PARAMETER( flags ); + + scError = SymCryptRsaCoreVerifyInput(pkRsakey, pbSrc, cbSrc, numFormat, cbDst, pbScratch, cbScratch); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + cbModElement = SymCryptSizeofModElementFromModulus( pkRsakey->pmModulus ); + + UNREFERENCED_PARAMETER( cbScratch ); + SYMCRYPT_ASSERT( cbScratch >= cbModElement + + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pkRsakey->nDigitsOfModulus ), + SYMCRYPT_SCRATCH_BYTES_FOR_MODEXP( pkRsakey->nDigitsOfModulus ) )); + + pbFnScratch = pbScratch; + cbFnScratch = cbScratch; + + peRes = SymCryptModElementCreate( pbScratch, cbModElement, pkRsakey->pmModulus ); + SYMCRYPT_ASSERT( peRes != NULL ); + pbFnScratch += cbModElement; + cbFnScratch -= cbModElement; + + // Set the original value + scError = SymCryptModElementSetValue( pbSrc, cbSrc, numFormat, pkRsakey->pmModulus, peRes, pbFnScratch, cbFnScratch ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // Convert the public exponent to an Int + // Future: we can optimize the ModExp to take an UINT64 + piExp = SymCryptIntCreate( SYMCRYPT_ASYM_ALIGN_UP(abExpIntBuffer), sizeof( abExpIntBuffer) - SYMCRYPT_ASYM_ALIGN_VALUE, 1 ); + if( piExp == NULL ) + { + scError = SYMCRYPT_HARDWARE_FAILURE; + goto cleanup; + } + SymCryptIntSetValueUint64( pkRsakey->au64PubExp[0], piExp ); + + // Modular Exponentiation + SymCryptModExp( + pkRsakey->pmModulus, + peRes, + piExp, + SymCryptIntBitsizeOfValue( piExp ), // This is a public value + SYMCRYPT_FLAG_DATA_PUBLIC, + peRes, + pbFnScratch, + cbFnScratch ); + + // Output the value + scError = SymCryptModElementGetValue( pkRsakey->pmModulus, peRes, pbDst, cbDst, numFormat, pbFnScratch, cbFnScratch ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + +cleanup: + + if( piExp != NULL ) + { + SymCryptIntWipe( piExp ); + } + + return scError; +} + +UINT32 +SYMCRYPT_CALL +SymCryptRsaCoreDecCrtScratchSpace( _In_ PCSYMCRYPT_RSAKEY pkRsakey) +{ + UINT32 cbModElementTotal = 0; + UINT32 nPrimes = pkRsakey->nPrimes; + + SYMCRYPT_ASSERT( nPrimes <= SYMCRYPT_RSAKEY_MAX_NUMOF_PRIMES ); + // clamp nPrimes to SYMCRYPT_RSAKEY_MAX_NUMOF_PRIMES for scratch memory allocation purposes + // SymCryptRsaCoreDecCrt will fail with invalid argument if there are too many primes later + nPrimes = SYMCRYPT_MIN( nPrimes, SYMCRYPT_RSAKEY_MAX_NUMOF_PRIMES ); + + for (UINT32 i=0; i<pkRsakey->nPrimes; i++) + { + cbModElementTotal += SYMCRYPT_SIZEOF_MODELEMENT_FROM_BITS( pkRsakey->nBitsOfPrimes[i]); + } + + // Bounded by 5*2^19 + 2^24 ~ 2^24 (see symcrypt_internal.h) + return 3*SymCryptSizeofIntFromDigits( pkRsakey->nDigitsOfModulus ) + + SymCryptSizeofIntFromDigits( pkRsakey->nMaxDigitsOfPrimes ) + + cbModElementTotal + + SYMCRYPT_SIZEOF_MODELEMENT_FROM_BITS( pkRsakey->nBitsOfModulus) + + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pkRsakey->nDigitsOfModulus ), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_MODEXP( pkRsakey->nDigitsOfModulus ), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_INT_DIVMOD( pkRsakey->nDigitsOfModulus, pkRsakey->nMaxDigitsOfPrimes ), + SYMCRYPT_SCRATCH_BYTES_FOR_CRT_SOLUTION( pkRsakey->nMaxDigitsOfPrimes ) ))); +} + +UINT32 +SYMCRYPT_CALL +SymCryptRsaCoreDecScratchSpace( _In_ PCSYMCRYPT_RSAKEY pkRsakey) +{ + // Bounded by 2^19 + 2^24 ~ 2^24 (see symcrypt_internal.h) + return SymCryptSizeofModElementFromModulus( pkRsakey->pmModulus ) + + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pkRsakey->nDigitsOfModulus ), + SYMCRYPT_SCRATCH_BYTES_FOR_MODEXP( pkRsakey->nDigitsOfModulus ) ); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaCoreDecCrt( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_NUMBER_FORMAT numFormat, + UINT32 flags, + _Out_writes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PSYMCRYPT_INT piCiphertext = NULL; + PSYMCRYPT_INT piPlaintext = NULL; + UINT32 cbInt = 0; + + PSYMCRYPT_INT piTmp = NULL; + UINT32 cbTmp = 0; + + PSYMCRYPT_MODELEMENT peCrtElements[SYMCRYPT_RSAKEY_MAX_NUMOF_PRIMES] = { 0 }; + UINT32 cbModElements[SYMCRYPT_RSAKEY_MAX_NUMOF_PRIMES] = { 0 }; + UINT32 cbModElementTotal = 0; + + // Used to verify decryption + PSYMCRYPT_INT piVerify = NULL; // Size equal to cbInt + PSYMCRYPT_MODELEMENT peVerify = NULL; + UINT32 cbModElementVerify = 0; + + PBYTE pbFnScratch = NULL; + SIZE_T cbFnScratch = 0; + + UNREFERENCED_PARAMETER( flags ); + + // Make sure that the key has a private key + if (!pkRsakey->hasPrivateKey) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + scError = SymCryptRsaCoreVerifyInput(pkRsakey, pbSrc, cbSrc, numFormat, cbDst, pbScratch, cbScratch); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // Verify that the number of primes does not cause a stack overflow + if (pkRsakey->nPrimes > SYMCRYPT_RSAKEY_MAX_NUMOF_PRIMES) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + cbInt = SymCryptSizeofIntFromDigits( pkRsakey->nDigitsOfModulus ); + cbTmp = SymCryptSizeofIntFromDigits( pkRsakey->nMaxDigitsOfPrimes ); + for (UINT32 i=0; i<pkRsakey->nPrimes; i++) + { + cbModElements[i] = SYMCRYPT_SIZEOF_MODELEMENT_FROM_BITS( pkRsakey->nBitsOfPrimes[i]); + cbModElementTotal += cbModElements[i]; + } + + cbModElementVerify = SymCryptSizeofModElementFromModulus( pkRsakey->pmModulus ); + + UNREFERENCED_PARAMETER( cbScratch ); + // + // From symcrypt_internal.h we have: + // - sizeof results are upper bounded by 2^19 + // - SYMCRYPT_SCRATCH_BYTES results are upper bounded by 2^27 (including RSA and ECURVE) + // - nPrimes is at most SYMCRYPT_RSAKEY_MAX_NUMOF_PRIMES = 2 + // Thus the following calculation does not overflow cbScratch. + // + SYMCRYPT_ASSERT( cbScratch >= + 3*cbInt + cbTmp + cbModElementTotal + cbModElementVerify + + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pkRsakey->nDigitsOfModulus ), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_MODEXP( pkRsakey->nDigitsOfModulus ), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_INT_DIVMOD( pkRsakey->nDigitsOfModulus, pkRsakey->nMaxDigitsOfPrimes ), + SYMCRYPT_SCRATCH_BYTES_FOR_CRT_SOLUTION( pkRsakey->nMaxDigitsOfPrimes ) ))) ); + + pbFnScratch = pbScratch; + cbFnScratch = cbScratch; + + piPlaintext = SymCryptIntCreate( pbFnScratch, cbFnScratch, pkRsakey->nDigitsOfModulus ); + SYMCRYPT_ASSERT( piPlaintext != NULL ); + pbFnScratch += cbInt; + cbFnScratch -= cbInt; + + piCiphertext = SymCryptIntCreate( pbFnScratch, cbFnScratch, pkRsakey->nDigitsOfModulus ); + SYMCRYPT_ASSERT( piCiphertext != NULL ); + pbFnScratch += cbInt; + cbFnScratch -= cbInt; + + piTmp = SymCryptIntCreate( pbFnScratch, cbFnScratch, pkRsakey->nMaxDigitsOfPrimes ); + SYMCRYPT_ASSERT( piTmp != NULL ); + pbFnScratch += cbTmp; + cbFnScratch -= cbTmp; + + SYMCRYPT_ASSERT( pkRsakey->nPrimes <= SYMCRYPT_RSAKEY_MAX_NUMOF_PRIMES ); + for (UINT32 i=0; i<pkRsakey->nPrimes; i++) + { + peCrtElements[i] = SymCryptModElementCreate( pbFnScratch, cbFnScratch, pkRsakey->pmPrimes[i] ); + SYMCRYPT_ASSERT( peCrtElements[i] != NULL ); + pbFnScratch += cbModElements[i]; + cbFnScratch -= cbModElements[i]; + } + + piVerify = SymCryptIntCreate( pbFnScratch, cbFnScratch, pkRsakey->nDigitsOfModulus ); + SYMCRYPT_ASSERT( piVerify != NULL ); + pbFnScratch += cbInt; + cbFnScratch -= cbInt; + + peVerify = SymCryptModElementCreate( pbFnScratch, cbFnScratch, pkRsakey->pmModulus ); + SYMCRYPT_ASSERT( peVerify != NULL ); + pbFnScratch += cbModElementVerify; + cbFnScratch -= cbModElementVerify; + + // Set the ciphertext + scError = SymCryptIntSetValue( pbSrc, cbSrc, numFormat, piCiphertext ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // Modular exponentiations + for (UINT32 i=0; i<pkRsakey->nPrimes; i++) + { + // c mod the prime + // Note: For two equally sized primes we can use straight the faster SymCryptIntToModElement function + // but for now this is the general case. + SymCryptIntDivMod( + piCiphertext, + SymCryptDivisorFromModulus(pkRsakey->pmPrimes[i]), + NULL, + piTmp, + pbFnScratch, + cbFnScratch ); + + SymCryptIntToModElement( piTmp, pkRsakey->pmPrimes[i], peCrtElements[i], pbFnScratch, cbFnScratch ); + + // Modular Exponentiation + SymCryptModExp( + pkRsakey->pmPrimes[i], + peCrtElements[i], + pkRsakey->piCrtPrivExps[i], // For now only the first exponent is allowed + pkRsakey->nBitsOfPrimes[i], // This is a public value + 0, // Side-channel safe modexp + peCrtElements[i], + pbFnScratch, + cbFnScratch ); + } + + // Solve the crt equations + scError = SymCryptCrtSolve( + pkRsakey->nPrimes, + (PCSYMCRYPT_MODULUS *) pkRsakey->pmPrimes, + (PSYMCRYPT_MODELEMENT *) pkRsakey->peCrtInverses, + peCrtElements, + 0, + piPlaintext, + pbFnScratch, + cbFnScratch ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + /* + A hardware error during RSA decryption can leak the + prime factors. For example, suppose the message + is M and you try to sign it with + M^d for some decryption exponent d. + Using the CRT, you compute M^d mod p correctly but + M^d mod q incorrectly. Your supposed M^d (mod p*q) is + then raised to an encryption exponent e + by the verifier, detects an invalid signature. + The verifier can also find p via a GCD and factor the modulus. + + To avoid this problem, re-encrypt the supposed M^d + and verify our own signature. + */ + + // Don't call the full encryption function just the modular exponentiation + + SymCryptIntToModElement( piPlaintext, pkRsakey->pmModulus, peVerify, pbFnScratch, cbFnScratch ); + + SymCryptIntSetValueUint64( pkRsakey->au64PubExp[0], piTmp ); + + // Modular Exponentiation (Not side-channel safe) + SymCryptModExp( + pkRsakey->pmModulus, + peVerify, + piTmp, + SymCryptIntBitsizeOfValue( piTmp ), + SYMCRYPT_FLAG_DATA_PUBLIC, // Exponent is public + peVerify, + pbFnScratch, + cbFnScratch ); + + SymCryptModElementToInt( pkRsakey->pmModulus, peVerify, piVerify, pbFnScratch, cbFnScratch ); + + if (!SymCryptIntIsEqual( piCiphertext, piVerify )) + { + scError = SYMCRYPT_HARDWARE_FAILURE; + goto cleanup; + } + + // Output the result + scError = SymCryptIntGetValue( piPlaintext, pbDst, cbDst, numFormat ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + +cleanup: + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaCoreDec( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_NUMBER_FORMAT numFormat, + UINT32 flags, + _Out_writes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PSYMCRYPT_MODELEMENT peRes = NULL; + UINT32 cbModElement = 0; + + PBYTE pbFnScratch = NULL; + SIZE_T cbFnScratch = 0; + + UNREFERENCED_PARAMETER( flags ); + + // Make sure that the key has a private key + if ((cbSrc>SymCryptRsakeySizeofModulus(pkRsakey)) || + (!pkRsakey->hasPrivateKey) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + cbModElement = SymCryptSizeofModElementFromModulus( pkRsakey->pmModulus ); + + UNREFERENCED_PARAMETER( cbScratch ); + SYMCRYPT_ASSERT( cbScratch >= cbModElement + + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pkRsakey->nDigitsOfModulus ), + SYMCRYPT_SCRATCH_BYTES_FOR_MODEXP( pkRsakey->nDigitsOfModulus ) ) ); + + pbFnScratch = pbScratch; + cbFnScratch = cbScratch; + + peRes = SymCryptModElementCreate( pbScratch, cbModElement, pkRsakey->pmModulus ); + SYMCRYPT_ASSERT( peRes != NULL ); + pbFnScratch += cbModElement; + cbFnScratch -= cbModElement; + + // Set the ciphertext + scError = SymCryptModElementSetValue( pbSrc, cbSrc, numFormat, pkRsakey->pmModulus, peRes, pbFnScratch, cbFnScratch ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // Modular Exponentiation + SymCryptModExp( + pkRsakey->pmModulus, + peRes, + pkRsakey->piPrivExps[0], // For now only the first exponent is allowed + pkRsakey->nBitsOfModulus, // This is a public value + 0, // Side-channel safe modexp + peRes, + pbFnScratch, + cbFnScratch ); + + // Output the value + scError = SymCryptModElementGetValue( pkRsakey->pmModulus, peRes, pbDst, cbDst, numFormat, pbFnScratch, cbFnScratch ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + +cleanup: + + return scError; +} + + +// +// Encryption / decryption functions +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaRawEncrypt( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_NUMBER_FORMAT numFormat, + UINT32 flags, + _Out_writes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PBYTE pbScratch = NULL; + UINT32 cbScratch = 0; + + // Make sure that the key may be used in Encrypt/Decrypt + if ( (pkRsakey->fAlgorithmInfo & SYMCRYPT_FLAG_RSAKEY_ENCRYPT) == 0 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + cbScratch = SymCryptRsaCoreEncScratchSpace( pkRsakey ); + + pbScratch = (PBYTE)SymCryptCallbackAlloc( cbScratch ); + if (pbScratch == NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + scError = SymCryptRsaCoreEnc( pkRsakey, pbSrc, cbSrc, numFormat, flags, pbDst, cbDst, pbScratch, cbScratch ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + scError = SYMCRYPT_NO_ERROR; + +cleanup: + if (pbScratch!=NULL) + { + SymCryptWipe(pbScratch,cbScratch); + SymCryptCallbackFree(pbScratch); + } + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaRawDecrypt( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_NUMBER_FORMAT numFormat, + UINT32 flags, + _Out_writes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PBYTE pbScratch = NULL; + UINT32 cbScratch = 0; + + // Make sure that the key may be used in Encrypt/Decrypt + if ( (pkRsakey->fAlgorithmInfo & SYMCRYPT_FLAG_RSAKEY_ENCRYPT) == 0 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Make sure that the key has a private key + if (!pkRsakey->hasPrivateKey) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + +#define SYMCRYPT_CRT_DECRYPTION (1) // Set this to 0 to test the non-crt decryption + + // Scratch space +#if (SYMCRYPT_CRT_DECRYPTION) + cbScratch = SymCryptRsaCoreDecCrtScratchSpace( pkRsakey ); +#else + cbScratch = SymCryptRsaCoreDecScratchSpace( pkRsakey ); +#endif + + pbScratch = (PBYTE)SymCryptCallbackAlloc( cbScratch ); + if (pbScratch == NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + +#if (SYMCRYPT_CRT_DECRYPTION) + scError = SymCryptRsaCoreDecCrt( pkRsakey, pbSrc, cbSrc, numFormat, flags, pbDst, cbDst, pbScratch, cbScratch ); +#else + scError = SymCryptRsaCoreDec( pkRsakey, pbSrc, cbSrc, numFormat, flags, pbDst, cbDst, pbScratch, cbScratch ); +#endif + + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + scError = SYMCRYPT_NO_ERROR; + +cleanup: + if (pbScratch!=NULL) + { + SymCryptWipe(pbScratch,cbScratch); + SymCryptCallbackFree(pbScratch); + } + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPkcs1Encrypt( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + UINT32 flags, + SYMCRYPT_NUMBER_FORMAT nfDst, + _Out_writes_opt_( cbDst ) PBYTE pbDst, + SIZE_T cbDst, + _Out_ SIZE_T *pcbDst ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PBYTE pbScratch = NULL; + SIZE_T cbScratch = 0; + + PBYTE pbTmp = NULL; + SIZE_T cbTmp = SymCryptRsakeySizeofModulus(pkRsakey); + + cbScratch = cbTmp + SymCryptRsaCoreEncScratchSpace( pkRsakey ); + + UNREFERENCED_PARAMETER( flags ); + + // Make sure that the key may be used in Encrypt/Decrypt + if ( (pkRsakey->fAlgorithmInfo & SYMCRYPT_FLAG_RSAKEY_ENCRYPT) == 0 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + *pcbDst = cbTmp; + + // Check if only *pcbDst is needed + if (pbDst == NULL) + { + scError = SYMCRYPT_NO_ERROR; + goto cleanup; + } + + pbScratch = (PBYTE)SymCryptCallbackAlloc( cbScratch ); + if (pbScratch == NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + pbTmp = pbScratch + cbScratch - cbTmp; + + scError = SymCryptRsaPkcs1ApplyEncryptionPadding( + pbSrc, + cbSrc, + pbTmp, + cbTmp ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + scError = SymCryptRsaCoreEnc( + pkRsakey, + pbTmp, + cbTmp, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, // Always MSB first for RSA OAEP + flags, + pbDst, + cbDst, + pbScratch, + cbScratch - cbTmp ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + if (nfDst == SYMCRYPT_NUMBER_FORMAT_LSB_FIRST) + { + // To implement this revert the buffer properly + scError = SYMCRYPT_NOT_IMPLEMENTED; + goto cleanup; + } + + scError = SYMCRYPT_NO_ERROR; + +cleanup: + if (pbScratch!=NULL) + { + SymCryptWipe(pbScratch,cbScratch); + SymCryptCallbackFree(pbScratch); + } + + return scError; +} + +// Ensure SymCryptRoundUpPow2Sizet below will not fail +C_ASSERT((UINT32) ((SYMCRYPT_RSAKEY_MAX_BITSIZE_MODULUS + 7) / 8) <= ((SIZE_T_MAX / 2) + 1)); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPkcs1Decrypt( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_NUMBER_FORMAT nfSrc, + UINT32 flags, + _Out_writes_opt_( cbDst ) PBYTE pbDst, + SIZE_T cbDst, + _Out_ SIZE_T *pcbDst ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PBYTE pbScratch = NULL; + SIZE_T cbScratch = 0; + + PBYTE pbTmp = NULL; + SIZE_T cbModulus = SymCryptRsakeySizeofModulus(pkRsakey); + SIZE_T cbTmp = SymCryptRoundUpPow2Sizet( cbModulus ); // tmp buffer needs to be a power of 2 + + // Make sure that the key may be used in Encrypt/Decrypt + if ( (pkRsakey->fAlgorithmInfo & SYMCRYPT_FLAG_RSAKEY_ENCRYPT) == 0 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Make sure that the key has a private key + if (!pkRsakey->hasPrivateKey) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + +#if (SYMCRYPT_CRT_DECRYPTION) + cbScratch = cbTmp + SymCryptRsaCoreDecCrtScratchSpace( pkRsakey ); +#else + cbScratch = cbTmp + SymCryptRsaCoreDecScratchSpace( pkRsakey ); +#endif + + UNREFERENCED_PARAMETER( flags ); + + pbScratch = (PBYTE)SymCryptCallbackAlloc( cbScratch ); + if (pbScratch == NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + pbTmp = pbScratch + cbScratch - cbTmp; + + if (nfSrc == SYMCRYPT_NUMBER_FORMAT_LSB_FIRST) + { + // To implement this revert the buffer properly + scError = SYMCRYPT_NOT_IMPLEMENTED; + goto cleanup; + } + +#if (SYMCRYPT_CRT_DECRYPTION) + scError = SymCryptRsaCoreDecCrt( + pkRsakey, + pbSrc, + cbSrc, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, + flags, + pbTmp, + cbModulus, + pbScratch, + cbScratch - cbTmp ); +#else + scError = SymCryptRsaCoreDec( + pkRsakey, + pbSrc, + cbSrc, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, + flags, + pbTmp, + cbModulus, + pbScratch, + cbScratch - cbTmp ); +#endif + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + scError = SymCryptRsaPkcs1RemoveEncryptionPadding( + pbTmp, + cbModulus, + cbTmp, + pbDst, + cbDst, + pcbDst ); + // The error that is returned from the encryption padding is confidential data + // due to Bleichenbacher-style attacks. + // Make sure we don't create a side-channel leak for it. + +cleanup: + if (pbScratch!=NULL) + { + SymCryptWipe(pbScratch,cbScratch); + SymCryptCallbackFree(pbScratch); + } + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaOaepEncrypt( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + _In_ PCSYMCRYPT_HASH hashAlgorithm, + _In_reads_bytes_( cbLabel ) PCBYTE pbLabel, + SIZE_T cbLabel, + UINT32 flags, + SYMCRYPT_NUMBER_FORMAT nfDst, + _Out_writes_opt_( cbDst ) PBYTE pbDst, + SIZE_T cbDst, + _Out_ SIZE_T *pcbDst ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PBYTE pbScratch = NULL; + SIZE_T cbScratch = 0; + + PBYTE pbTmp = NULL; + SIZE_T cbTmp = SymCryptRsakeySizeofModulus(pkRsakey); + + UNREFERENCED_PARAMETER( flags ); + + // Make sure that the key may be used in Encrypt/Decrypt + if ( (pkRsakey->fAlgorithmInfo & SYMCRYPT_FLAG_RSAKEY_ENCRYPT) == 0 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + *pcbDst = cbTmp; + + // Check if only *pcbDst is needed + if (pbDst == NULL) + { + scError = SYMCRYPT_NO_ERROR; + goto cleanup; + } + + // The SYMCRYPT_SCRATCH_BYTES_FOR_RSA_OAEP macro does not + // overflow cbScratch since cbTmp < 2^17. + cbScratch = cbTmp + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_RSA_OAEP( hashAlgorithm, cbTmp ), SymCryptRsaCoreEncScratchSpace( pkRsakey ) ); + pbScratch = (PBYTE)SymCryptCallbackAlloc( cbScratch ); + if (pbScratch == NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + pbTmp = pbScratch + cbScratch - cbTmp; + + scError = SymCryptRsaOaepApplyEncryptionPadding( + pbSrc, + cbSrc, + hashAlgorithm, + pbLabel, + cbLabel, + NULL, // Seed + 0, // cbSeed + pbTmp, + cbTmp, + pbScratch, + cbScratch - cbTmp ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + scError = SymCryptRsaCoreEnc( + pkRsakey, + pbTmp, + cbTmp, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, // Always MSB first for RSA OAEP + flags, + pbDst, + cbDst, + pbScratch, + cbScratch - cbTmp ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + if (nfDst == SYMCRYPT_NUMBER_FORMAT_LSB_FIRST) + { + // To implement this revert the buffer properly + scError = SYMCRYPT_NOT_IMPLEMENTED; + goto cleanup; + } + + scError = SYMCRYPT_NO_ERROR; + +cleanup: + if (pbScratch!=NULL) + { + SymCryptWipe(pbScratch,cbScratch); + SymCryptCallbackFree(pbScratch); + } + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaOaepDecrypt( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_NUMBER_FORMAT nfSrc, + _In_ PCSYMCRYPT_HASH hashAlgorithm, + _In_reads_bytes_( cbLabel ) PCBYTE pbLabel, + SIZE_T cbLabel, + UINT32 flags, + _Out_writes_opt_( cbDst ) PBYTE pbDst, + SIZE_T cbDst, + _Out_ SIZE_T *pcbDst ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SIZE_T cbDstResult = 0; // We always return a value into *pcbDst + + PBYTE pbScratch = NULL; + SIZE_T cbScratch = 0; + + PBYTE pbTmp = NULL; + SIZE_T cbTmp = SymCryptRsakeySizeofModulus(pkRsakey); + + UNREFERENCED_PARAMETER( flags ); + + // Make sure that the key may be used in Encrypt/Decrypt + if ( (pkRsakey->fAlgorithmInfo & SYMCRYPT_FLAG_RSAKEY_ENCRYPT) == 0 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if (cbSrc > cbTmp) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Make sure that the key has a private key + if (!pkRsakey->hasPrivateKey) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // The SYMCRYPT_SCRATCH_BYTES_FOR_RSA_OAEP macro does not + // overflow cbScratch since cbTmp < 2^17. +#if (SYMCRYPT_CRT_DECRYPTION) + cbScratch = cbTmp + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_RSA_OAEP( hashAlgorithm, cbSrc ), SymCryptRsaCoreDecCrtScratchSpace( pkRsakey ) ); +#else + cbScratch = cbTmp + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_RSA_OAEP( hashAlgorithm, cbSrc ), SymCryptRsaCoreDecScratchSpace( pkRsakey ) ); +#endif + + pbScratch = (PBYTE)SymCryptCallbackAlloc( cbScratch ); + if (pbScratch == NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + pbTmp = pbScratch + cbScratch - cbTmp; + + if (nfSrc == SYMCRYPT_NUMBER_FORMAT_LSB_FIRST) + { + // To implement this revert the buffer properly + scError = SYMCRYPT_NOT_IMPLEMENTED; + goto cleanup; + } + +#if (SYMCRYPT_CRT_DECRYPTION) + scError = SymCryptRsaCoreDecCrt( + pkRsakey, + pbSrc, + cbSrc, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, + flags, + pbTmp, + cbTmp, + pbScratch, + cbScratch - cbTmp ); +#else + scError = SymCryptRsaCoreDec( + pkRsakey, + pbSrc, + cbSrc, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, + flags, + pbTmp, + cbTmp, + pbScratch, + cbScratch - cbTmp ); +#endif + + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + scError = SymCryptRsaOaepRemoveEncryptionPadding( + pbTmp, + cbTmp, + hashAlgorithm, + pbLabel, + cbLabel, + flags, + pbDst, + cbDst, + &cbDstResult, + pbScratch, + cbScratch - cbTmp ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + scError = SYMCRYPT_NO_ERROR; + +cleanup: + if (pbScratch!=NULL) + { + SymCryptWipe(pbScratch,cbScratch); + SymCryptCallbackFree(pbScratch); + } + + *pcbDst = cbDstResult; + + return scError; +} + +// +// Signing / Verification functions +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPkcs1Sign( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _In_reads_bytes_( cbHashValue ) PCBYTE pbHashValue, + SIZE_T cbHashValue, + _In_ PCSYMCRYPT_OID pHashOIDs, + _In_ SIZE_T nOIDCount, + UINT32 flags, + SYMCRYPT_NUMBER_FORMAT nfSignature, + _Out_writes_opt_( cbSignature ) PBYTE pbSignature, + SIZE_T cbSignature, + _Out_ SIZE_T *pcbSignature ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PBYTE pbScratch = NULL; + SIZE_T cbScratch = 0; + + PBYTE pbTmp = NULL; + SIZE_T cbTmp = SymCryptRsakeySizeofModulus(pkRsakey); + + PCBYTE pbOID = NULL; + SIZE_T cbOID = 0; + + UNREFERENCED_PARAMETER(nOIDCount); + + pbOID = pHashOIDs ? pHashOIDs->pbOID : NULL; + cbOID = pHashOIDs ? pHashOIDs->cbOID : 0; + + // Make sure that the key may be used in Sign/Verify + if ( (pkRsakey->fAlgorithmInfo & SYMCRYPT_FLAG_RSAKEY_SIGN) == 0 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Make sure that the key has a private key + if (!pkRsakey->hasPrivateKey) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + *pcbSignature = cbTmp; + + // Check if only *pcbSignature is needed + if (pbSignature == NULL) + { + scError = SYMCRYPT_NO_ERROR; + goto cleanup; + } + +#if (SYMCRYPT_CRT_DECRYPTION) + cbScratch = cbTmp + SymCryptRsaCoreDecCrtScratchSpace( pkRsakey ); +#else + cbScratch = cbTmp + SymCryptRsaCoreDecScratchSpace( pkRsakey ); +#endif + + pbScratch = (PBYTE)SymCryptCallbackAlloc( cbScratch ); + if (pbScratch == NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + pbTmp = pbScratch + cbScratch - cbTmp; + + scError = SymCryptRsaPkcs1ApplySignaturePadding( + pbHashValue, + cbHashValue, + pbOID, + cbOID, + flags, + pbTmp, + cbTmp ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + +#if (SYMCRYPT_CRT_DECRYPTION) + scError = SymCryptRsaCoreDecCrt( + pkRsakey, + pbTmp, + cbTmp, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, + flags, + pbSignature, + cbSignature, + pbScratch, + cbScratch - cbTmp ); +#else + scError = SymCryptRsaCoreDec( + pkRsakey, + pbTmp, + cbTmp, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, + flags, + pbSignature, + cbSignature, + pbScratch, + cbScratch - cbTmp ); +#endif + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + if (nfSignature == SYMCRYPT_NUMBER_FORMAT_LSB_FIRST) + { + // To implement this revert the buffer properly + scError = SYMCRYPT_NOT_IMPLEMENTED; + goto cleanup; + } + + scError = SYMCRYPT_NO_ERROR; + +cleanup: + if (pbScratch!=NULL) + { + SymCryptWipe(pbScratch,cbScratch); + SymCryptCallbackFree(pbScratch); + } + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPkcs1Verify( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _In_reads_bytes_( cbHashValue ) PCBYTE pbHashValue, + SIZE_T cbHashValue, + _In_reads_bytes_( cbSignature ) PCBYTE pbSignature, + SIZE_T cbSignature, + SYMCRYPT_NUMBER_FORMAT nfSignature, + _In_reads_opt_( nOIDCount ) PCSYMCRYPT_OID pHashOIDs, + _In_ SIZE_T nOIDCount, + UINT32 flags ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PBYTE pbScratch = NULL; + SIZE_T cbScratch = 0; + + PBYTE pbTmp = NULL; + SIZE_T cbTmp = SymCryptRsakeySizeofModulus(pkRsakey); + + // Make sure that the key may be used in Sign/Verify + if ( (pkRsakey->fAlgorithmInfo & SYMCRYPT_FLAG_RSAKEY_SIGN) == 0 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if (cbSignature > cbTmp) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if (nfSignature == SYMCRYPT_NUMBER_FORMAT_LSB_FIRST) + { + // To implement this revert the buffer properly + scError = SYMCRYPT_NOT_IMPLEMENTED; + goto cleanup; + } + + // The SYMCRYPT_SCRATCH_BYTES_FOR_RSA_PKCS1 macro does not + // overflow cbScratch since cbTmp < 2^17. + cbScratch = cbTmp + + SYMCRYPT_MAX( SymCryptRsaCoreEncScratchSpace( pkRsakey ), + SYMCRYPT_SCRATCH_BYTES_FOR_RSA_PKCS1( cbTmp ) ); + + pbScratch = (PBYTE)SymCryptCallbackAlloc( cbScratch ); + if (pbScratch == NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + pbTmp = pbScratch + cbScratch - cbTmp; + + scError = SymCryptRsaCoreEnc( + pkRsakey, + pbSignature, + cbSignature, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, + flags, + pbTmp, + cbTmp, + pbScratch, + cbScratch - cbTmp ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + scError = SymCryptRsaPkcs1VerifySignaturePadding( + pbHashValue, + cbHashValue, + pHashOIDs, + nOIDCount, + pbTmp, + cbTmp, + flags, + pbScratch, + cbScratch - cbTmp ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + scError = SYMCRYPT_NO_ERROR; + +cleanup: + if (pbScratch!=NULL) + { + SymCryptWipe(pbScratch,cbScratch); + SymCryptCallbackFree(pbScratch); + } + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPssSign( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _In_reads_bytes_( cbHashValue ) PCBYTE pbHashValue, + SIZE_T cbHashValue, + _In_ PCSYMCRYPT_HASH hashAlgorithm, + SIZE_T cbSalt, + UINT32 flags, + SYMCRYPT_NUMBER_FORMAT nfSignature, + _Out_writes_opt_( cbSignature ) PBYTE pbSignature, + SIZE_T cbSignature, + _Out_ SIZE_T *pcbSignature ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PBYTE pbScratch = NULL; + SIZE_T cbScratch = 0; + + PBYTE pbTmp = NULL; + SIZE_T cbTmp = SymCryptRsakeySizeofModulus(pkRsakey); + + // Make sure that the key may be used in Sign/Verify + if ( (pkRsakey->fAlgorithmInfo & SYMCRYPT_FLAG_RSAKEY_SIGN) == 0 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if ( (cbHashValue > cbTmp) || + (cbSalt > cbTmp) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Make sure that the key has a private key + if (!pkRsakey->hasPrivateKey) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + *pcbSignature = cbTmp; + + // Check if only *pcbSignature is needed + if (pbSignature == NULL) + { + scError = SYMCRYPT_NO_ERROR; + goto cleanup; + } + + // The SYMCRYPT_SCRATCH_BYTES_FOR_RSA_PSS macro does not + // overflow cbScratch since cbTmp < 2^17. +#if (SYMCRYPT_CRT_DECRYPTION) + cbScratch = cbTmp + + SYMCRYPT_MAX( SymCryptRsaCoreDecCrtScratchSpace( pkRsakey ), + SYMCRYPT_SCRATCH_BYTES_FOR_RSA_PSS( hashAlgorithm, cbHashValue, cbTmp ) ); +#else + cbScratch = cbTmp + + SYMCRYPT_MAX( SymCryptRsaCoreDecScratchSpace( pkRsakey ), + SYMCRYPT_SCRATCH_BYTES_FOR_RSA_PSS( hashAlgorithm, cbHashValue, cbTmp ) ); +#endif + + pbScratch = (PBYTE)SymCryptCallbackAlloc( cbScratch ); + if (pbScratch == NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + pbTmp = pbScratch + cbScratch - cbTmp; + + scError = SymCryptRsaPssApplySignaturePadding( + pbHashValue, + cbHashValue, + hashAlgorithm, + NULL, // For now only random salt supported + cbSalt, + pkRsakey->nBitsOfModulus, + flags, + pbTmp, + cbTmp, + pbScratch, + cbScratch - cbTmp ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + +#if (SYMCRYPT_CRT_DECRYPTION) + scError = SymCryptRsaCoreDecCrt( + pkRsakey, + pbTmp, + cbTmp, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, + flags, + pbSignature, + cbSignature, + pbScratch, + cbScratch - cbTmp ); +#else + scError = SymCryptRsaCoreDec( + pkRsakey, + pbTmp, + cbTmp, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, + flags, + pbSignature, + cbSignature, + pbScratch, + cbScratch - cbTmp ); +#endif + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + if (nfSignature == SYMCRYPT_NUMBER_FORMAT_LSB_FIRST) + { + // To implement this revert the buffer properly + scError = SYMCRYPT_NOT_IMPLEMENTED; + goto cleanup; + } + + scError = SYMCRYPT_NO_ERROR; + +cleanup: + if (pbScratch!=NULL) + { + SymCryptWipe(pbScratch,cbScratch); + SymCryptCallbackFree(pbScratch); + } + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPssVerify( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _In_reads_bytes_( cbHashValue ) PCBYTE pbHashValue, + SIZE_T cbHashValue, + _In_reads_bytes_( cbSignature ) PCBYTE pbSignature, + SIZE_T cbSignature, + SYMCRYPT_NUMBER_FORMAT nfSignature, + _In_ PCSYMCRYPT_HASH hashAlgorithm, + SIZE_T cbSalt, + UINT32 flags ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PBYTE pbScratch = NULL; + SIZE_T cbScratch = 0; + + PBYTE pbTmp = NULL; + SIZE_T cbTmp = SymCryptRsakeySizeofModulus(pkRsakey); + + // Make sure that the key may be used in Sign/Verify + if ( (pkRsakey->fAlgorithmInfo & SYMCRYPT_FLAG_RSAKEY_SIGN) == 0 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if ( (cbHashValue > cbTmp) || + (cbSalt > cbTmp) || + (cbSignature > cbTmp) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if (nfSignature == SYMCRYPT_NUMBER_FORMAT_LSB_FIRST) + { + // To implement this revert the buffer properly + scError = SYMCRYPT_NOT_IMPLEMENTED; + goto cleanup; + } + + // The SYMCRYPT_SCRATCH_BYTES_FOR_RSA_PSS macro does not + // overflow cbScratch since cbTmp < 2^17. + cbScratch = cbTmp + + SYMCRYPT_MAX( SymCryptRsaCoreEncScratchSpace( pkRsakey ), + SYMCRYPT_SCRATCH_BYTES_FOR_RSA_PSS( hashAlgorithm, cbHashValue, cbTmp ) ); + + pbScratch = (PBYTE)SymCryptCallbackAlloc( cbScratch ); + if (pbScratch == NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + pbTmp = pbScratch + cbScratch - cbTmp; + + scError = SymCryptRsaCoreEnc( + pkRsakey, + pbSignature, + cbSignature, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, + flags, + pbTmp, + cbTmp, + pbScratch, + cbScratch - cbTmp ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + scError = SymCryptRsaPssVerifySignaturePadding( + pbHashValue, + cbHashValue, + hashAlgorithm, + cbSalt, + pbTmp, + cbTmp, + pkRsakey->nBitsOfModulus, + flags, + pbScratch, + cbScratch - cbTmp ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + scError = SYMCRYPT_NO_ERROR; + +cleanup: + if (pbScratch!=NULL) + { + SymCryptWipe(pbScratch,cbScratch); + SymCryptCallbackFree(pbScratch); + } + + return scError; +} diff --git a/libs/symcrypt/lib/rsa_padding.c b/libs/symcrypt/lib/rsa_padding.c new file mode 100644 index 00000000000..d2f384b61c3 --- /dev/null +++ b/libs/symcrypt/lib/rsa_padding.c @@ -0,0 +1,1218 @@ +// +// rsa_padding.c RSA padding algorithms +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +#define ASN1_SEQUENCE_BYTE (0x30) +#define ASN1_OCTET_STRING_BYTE (0x04) + +#define PKCS_BLOCKTYPE_1 (0x01) // This is not used, added here for completeness +#define PKCS_BLOCKTYPE_2 (0x02) + +// +// Note: we could optimize these OID lists by using the same byte sequence for +// the long and short versions. +// +const SYMCRYPT_OID SymCryptMd5OidList[] = +{ + {12, (BYTE *)"\x06\x08\x2a\x86\x48\x86\xf7\x0d\x02\x05\x05\x00"}, + {10, (BYTE *)"\x06\x08\x2a\x86\x48\x86\xf7\x0d\x02\x05"}, +}; + +const SYMCRYPT_OID SymCryptSha1OidList[] = +{ + {9, (BYTE *)"\x06\x05\x2b\x0e\x03\x02\x1a\x05\x00"}, + {7, (BYTE *)"\x06\x05\x2b\x0e\x03\x02\x1a"} +}; + +const SYMCRYPT_OID SymCryptSha224OidList[] = +{ + {13, (BYTE *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x04\x05\x00"}, + {11, (BYTE *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x04"} +}; + +const SYMCRYPT_OID SymCryptSha256OidList[] = +{ + {13, (BYTE *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x01\x05\x00"}, + {11, (BYTE *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x01"} +}; + +const SYMCRYPT_OID SymCryptSha384OidList[] = +{ + {13, (BYTE *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x02\x05\x00"}, + {11, (BYTE *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x02"} +}; + +const SYMCRYPT_OID SymCryptSha512OidList[] = +{ + {13, (BYTE *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x03\x05\x00"}, + {11, (BYTE *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x03"} +}; + +const SYMCRYPT_OID SymCryptSha512_224OidList[] = +{ + {13, (BYTE *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x05\x05\x00"}, + {11, (BYTE *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x05"} +}; + +const SYMCRYPT_OID SymCryptSha512_256OidList[] = +{ + {13, (BYTE *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x06\x05\x00"}, + {11, (BYTE *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x06"} +}; + +const SYMCRYPT_OID SymCryptSha3_224OidList[] = +{ + {13, (BYTE *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x07\x05\x00"}, + {11, (BYTE *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x07"} +}; + +const SYMCRYPT_OID SymCryptSha3_256OidList[] = +{ + {13, (BYTE *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x08\x05\x00"}, + {11, (BYTE *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x08"} +}; + +const SYMCRYPT_OID SymCryptSha3_384OidList[] = +{ + {13, (BYTE *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x09\x05\x00"}, + {11, (BYTE *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x09"} +}; + +const SYMCRYPT_OID SymCryptSha3_512OidList[] = +{ + {13, (BYTE *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x0a\x05\x00"}, + {11, (BYTE *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x0a"} +}; + +const SYMCRYPT_OID SymCryptShake128OidList[] = +{ + {13, (BYTE *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x0b\x05\x00"}, + {11, (BYTE *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x0b"} +}; + +const SYMCRYPT_OID SymCryptShake256OidList[] = +{ + {13, (BYTE *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x0c\x05\x00"}, + {11, (BYTE *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x0c"} +}; + +VOID +SYMCRYPT_CALL +SymCryptRsaPaddingMaskGeneration( + _In_ PCSYMCRYPT_HASH hashAlgorithm, + _In_ PVOID pHashState, + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + _Out_writes_bytes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst ) +{ + SIZE_T cIterations = 0; + + BYTE rgbHash[SYMCRYPT_HASH_MAX_RESULT_SIZE] = { 0 }; + BYTE rgbCount[sizeof(UINT32)] = { 0 }; + PBYTE pbCount = NULL; + SIZE_T cbMaskRemaining = cbDst; + PBYTE pbMaskIndex = pbDst; + + BOOLEAN fAvoidDWORDReverse = FALSE; + + SIZE_T cbHashAlg = SymCryptHashResultSize( hashAlgorithm ); + + cIterations = (cbDst + (cbHashAlg - 1)) / cbHashAlg; + if (cIterations < 256) + { + fAvoidDWORDReverse = TRUE; + } + + for (UINT32 i = 0; i < cIterations; i++) + { + SymCryptHashInit( hashAlgorithm, pHashState ); + + // hash the seed + SymCryptHashAppend( hashAlgorithm, pHashState, pbSrc, cbSrc ); + + // Reverse the count bytes + pbCount = (BYTE*)&i; + if (fAvoidDWORDReverse) + { + rgbCount[3] = pbCount[0]; + } + else + { + for (UINT32 j = 0; j < sizeof(UINT32); j++) + { + rgbCount[j] = pbCount[sizeof(UINT32) - j - 1]; + } + } + + // hash the count + SymCryptHashAppend( hashAlgorithm, pHashState, rgbCount, sizeof(UINT32) ); + + // copy the bytes from this hash into the mask buffer + if (cbMaskRemaining >= cbHashAlg) + { + SymCryptHashResult( hashAlgorithm, pHashState, pbMaskIndex, cbHashAlg ); + + cbMaskRemaining -= cbHashAlg; + pbMaskIndex += cbHashAlg; + } + else + { + SymCryptHashResult( hashAlgorithm, pHashState, rgbHash, cbHashAlg ); + + memcpy( pbMaskIndex, rgbHash, cbMaskRemaining); + break; + } + } +} + +// +// PKCS1 Encryption Format: +// 0x00 || 0x02 || PS || 0x00 || M +// +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPkcs1ApplyEncryptionPadding( + _In_reads_bytes_( cbPlaintext ) PCBYTE pbPlaintext, + SIZE_T cbPlaintext, + _Out_writes_bytes_( cbPkcs1Format ) PBYTE pbPkcs1Format, + SIZE_T cbPkcs1Format ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + // Format: 00 02 <PS> 00 <M> + // <PS> 8 or more padding bytes, random, all nonzero + // <M> message, length between 0 and cbPKCS1Format - 11. + // See RFC 3447 for more details. + + SIZE_T cbPS; + SIZE_T i; + + // ensure output buffer is big enough (padding has 11 bytes overhead) + if( cbPkcs1Format < (cbPlaintext + 11) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + cbPS = cbPkcs1Format - (cbPlaintext + 3); + + pbPkcs1Format[0] = 0x00; + pbPkcs1Format[1] = PKCS_BLOCKTYPE_2; + + scError = SymCryptCallbackRandom( &pbPkcs1Format[2], cbPS ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Make sure that none of the bytes in PS is zero (as per specs) + for( i = 0; i < cbPS; i++ ) + { + while( pbPkcs1Format[2 + i] == 0x00 ) + { + scError = SymCryptCallbackRandom( &pbPkcs1Format[2+i], 1 ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + } + } + + pbPkcs1Format[2 + cbPS] = 0x00; + + memcpy(pbPkcs1Format + 3 + cbPS, pbPlaintext, cbPlaintext); + +cleanup: + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPkcs1RemoveEncryptionPadding( + _Inout_updates_bytes_( cbPkcs1Buffer ) PBYTE pbPkcs1Format, + SIZE_T cbPkcs1Format, + SIZE_T cbPkcs1Buffer, + _Out_writes_bytes_opt_( cbPlaintext ) PBYTE pbPlaintext, + SIZE_T cbPlaintext, + _Out_ SIZE_T *pcbPlaintext ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + UINT32 mPaddingError = 0; + UINT32 mBufferSizeError = 0; + + UINT32 cbPlaintextResult = 0; + UINT32 i; + UINT32 mByteIsZero; + UINT32 mLengthFound; + UINT32 iFirstZero; + UINT32 cbPlaintextTruncated; + + SYMCRYPT_ASSERT( cbPkcs1Buffer >= cbPkcs1Format ); + SYMCRYPT_ASSERT( cbPkcs1Buffer >= 32 ); // Requirements for SymcryptScsRotateBuffer + SYMCRYPT_ASSERT( (cbPkcs1Buffer & (cbPkcs1Buffer - 1)) == 0 ); // must be a power of 2 + SYMCRYPT_ASSERT( cbPkcs1Buffer <= (1 << 30 )); // Ensure we can use 31-bit masking operations + + // Format: 00 02 <PS> 00 <M> + // <PS> 8 or more padding bytes, random, all nonzero + // <M> message, length between 0 and cbPKCS1Format - 11. + // See RFC 3447 for more details. + // We do not reveal the buffer contents through side-channels to avoid Bleichenbacher-style attacks + // This includes the plaintext length, which is determined by the location of the 00 byte + + if ( cbPkcs1Format < 11 ) + { + // cbPKCS1Format is public, so the if() is safe. 11 is the total overhead + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + // this also implies that cbPkcs1Buffer >= 16 + + // Check the leading bytes + mPaddingError |= SymCryptMask32IsNonzeroU31( pbPkcs1Format[0] ); // First byte must be = 0 + mPaddingError |= SymCryptMask32NeqU31( pbPkcs1Format[1], PKCS_BLOCKTYPE_2 ); // Second byte must be = 2 + + iFirstZero = 0; + mLengthFound = 0; + for (i = 2; i < cbPkcs1Format; i++) + { + mByteIsZero = SymCryptMask32IsZeroU31( pbPkcs1Format[i] ); + + // remember the index of the first zero byte + iFirstZero |= i & mByteIsZero & ~mLengthFound; + mLengthFound |= mByteIsZero; + } + mPaddingError |= ~mLengthFound; + + // At this point: + // - iFirstZero points to the first zero byte, or is 0 if there is no zero byte + // - mPaddingError is set if no zero byte was found + + // It is an error if the first zero is at index < 10 as <PS> needs to be at least 8 bytes + mPaddingError |= SymCryptMask32LtU31( iFirstZero, 10 ); + + // Compute the # bytes of the message; 0 if there was a padding error + cbPlaintextResult = ~mPaddingError & ((UINT32)(cbPkcs1Format - iFirstZero - 1)); + + // We're done if the caller didn't want the actual message, but only the size. + // We do that before checking the size of the plaintext buffer so that callers who + // only want the size do not get an error. + if( pbPlaintext == NULL ) + { + // Condition is public. + goto cleanup; + } + + // Checking that the output buffer is large enough is a bit tricky as we have a SIZE_T as + // buffer size, but we like to work on 31-bit integers as they have better mask algorithm perf. + // We can truncate the SIZE_T and check for equality, which is side-channel safe. + cbPlaintextTruncated = ((UINT32) cbPlaintext) & 0x7fffffff; // Truncate to 31 bits + if( cbPlaintextTruncated == cbPlaintext ) + { + // Condition is public as we write the whole plaintext buffer anyway. + mBufferSizeError = SymCryptMask32LtU31( cbPlaintextTruncated, cbPlaintextResult ); + } + + // The message starts at iFirstZero + 1, which is a variable location so we can't just memcpy it without + // revealing information through side channels. + // Instead we rotate the buffer left (side-channel safe) so that the message appears at the front. + // Rotation constant is such that the message appears at the start. + SymCryptScsRotateBuffer( pbPkcs1Format, cbPkcs1Buffer, (iFirstZero + 1) & (cbPkcs1Buffer - 1) ); + + // The ScsCopy function can copy the data to the destination buffer, but the input buffer must be + // as long as the output buffer. We can't just use cbPlaintext as the output buffer size, as it is + // unbounded. But we can limit it to cbPkcs1Format as that is the public key size and is public. + SymCryptScsCopy( pbPkcs1Format, cbPlaintextResult, pbPlaintext, SYMCRYPT_MIN( cbPlaintext, cbPkcs1Format ) ); + +cleanup: + // Update scError with the two error masks. Padding error given highest priority. + scError ^= mBufferSizeError & (scError ^ SYMCRYPT_BUFFER_TOO_SMALL); + scError ^= mPaddingError & (scError ^ SYMCRYPT_INVALID_ARGUMENT); + + *pcbPlaintext = cbPlaintextResult; + return scError; +} + +// +// OAEP Encryption Format: +// +----------+---------+-------+ +// DB = | lHash | PS | M | +// +----------+---------+-------+ +// | +// +----------+ V +// | seed |--> MGF ---> xor +// +----------+ | +// | | +// +--+ V | +// |00| xor <----- MGF <-----| +// +--+ | | +// | | | +// V V V +// +--+----------+----------------------------+ +// EM = |00|maskedSeed| maskedDB | +// +--+----------+----------------------------+ +// +// PS = zero or more bytes 0x00 || 0x01 +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaOaepApplyEncryptionPadding( + _In_reads_bytes_( cbPlaintext ) PCBYTE pbPlaintext, + SIZE_T cbPlaintext, + _In_ PCSYMCRYPT_HASH hashAlgorithm, + _In_reads_bytes_( cbLabel ) PCBYTE pbLabel, + SIZE_T cbLabel, + _In_reads_bytes_opt_( cbSeed ) PCBYTE pbSeed, + SIZE_T cbSeed, + _Out_writes_bytes_( cbOaepFormat ) PBYTE pbOaepFormat, + SIZE_T cbOaepFormat, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PVOID pHashState; + + PBYTE pbSeedInternal; + PBYTE pbSeedMask; + PBYTE pbDB; + PBYTE pbDBMask; + + SIZE_T cbDB; + SIZE_T cbPS; + + SIZE_T cbHash = SymCryptHashResultSize( hashAlgorithm ); + SIZE_T cbHashState = SymCryptHashStateSize( hashAlgorithm ); + + UNREFERENCED_PARAMETER( cbScratch ); + + // OAEP overhead is 2 + 2 * size of hash result + if( cbOaepFormat < (cbPlaintext + (cbHash * 2) + 2) || + ((pbSeed!=NULL) && (cbSeed>cbHash)) || + ((pbSeed==NULL) && (cbSeed!=0)) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + cbPS = cbOaepFormat - (cbPlaintext + (cbHash * 2) + 2); + cbDB = cbOaepFormat - (cbHash + 1); + + SYMCRYPT_ASSERT( cbScratch >= cbHashState + (cbHash * 2) + (cbDB * 2) ); + + pHashState = (PVOID) pbScratch; + pbSeedInternal = pbScratch + cbHashState; + pbSeedMask = pbSeedInternal + cbHash; + pbDB = pbSeedMask + cbHash; + pbDBMask = pbDB + cbDB; + + // hash the label + SymCryptHash( hashAlgorithm, pbLabel, cbLabel, pbDB, cbHash ); + + SymCryptWipe(pbDB + cbHash, cbPS); + pbDB[cbHash + cbPS] = 0x01; + + // dcl - are we quite sure that none of these numbers are under attacker control? + memcpy(pbDB + cbHash + cbPS + 1, pbPlaintext, cbPlaintext); + + if (NULL == pbSeed) + { + // generate the random seed (same length as the hash result) + scError = SymCryptCallbackRandom( pbSeedInternal, cbHash ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + } + else + { + SymCryptWipe( pbSeedInternal, cbHash ); + memcpy(pbSeedInternal, pbSeed, cbSeed); + } + + // MGF(seed) + SymCryptRsaPaddingMaskGeneration( + hashAlgorithm, + pHashState, + pbSeedInternal, + cbHash, + pbDBMask, + cbDB); + + // set the most significant byte to 0x00 + pbOaepFormat[0] = 0x00; + + // XOR the DB and the mask MGF(seed) + for (UINT32 i = 0; i < cbDB; i++) + { + pbOaepFormat[cbHash + 1 + i] = pbDB[i] ^ pbDBMask[i]; + } + + // MGF(masked DB) + SymCryptRsaPaddingMaskGeneration( + hashAlgorithm, + pHashState, + pbOaepFormat + cbHash + 1, + cbDB, + pbSeedMask, + cbHash); + + // XOR the seed and the seed mask MGF(masked DB) + for (UINT32 i = 0; i < cbHash; i++) + { + pbOaepFormat[1 + i] = pbSeedInternal[i] ^ pbSeedMask[i]; + } + + scError = SYMCRYPT_NO_ERROR; + +cleanup: + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaOaepRemoveEncryptionPadding( + _In_reads_bytes_( cbOAEPFormat ) + PCBYTE pbOAEPFormat, + SIZE_T cbOAEPFormat, + _In_ PCSYMCRYPT_HASH hashAlgorithm, + _In_reads_bytes_( cbLabel ) PCBYTE pbLabel, + SIZE_T cbLabel, + UINT32 flags, + _Out_writes_bytes_( cbPlaintext ) + PBYTE pbPlaintext, + SIZE_T cbPlaintext, + _Out_ SIZE_T *pcbPlaintext, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PVOID pHashState; + + PBYTE pbSeedMask; + PBYTE pbSeed; + PBYTE pbDBMask; + PBYTE pbDB; + PBYTE pbLabelHash; + UINT32 mPaddingError; + + SIZE_T cbDB; + + SIZE_T cnt = 0; + + SIZE_T cbHashAlg = SymCryptHashResultSize( hashAlgorithm ); + SIZE_T cbHashState = SymCryptHashStateSize( hashAlgorithm ); + + UNREFERENCED_PARAMETER( cbScratch ); + + if (flags != 0) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // check if the most significant byte is set to 0x00 + mPaddingError = SymCryptMask32IsNonzeroU31( pbOAEPFormat[0] ); + + // Padding overhead is 2 hash values plus 2 bytes + if( cbOAEPFormat < (2*cbHashAlg + 2) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + cbDB = cbOAEPFormat - (cbHashAlg + 1); + + SYMCRYPT_ASSERT( cbScratch >= cbHashState + (cbHashAlg * 3) + (cbDB * 2) ); + + pHashState = (PVOID) pbScratch; + pbSeedMask = pbScratch + cbHashState; + pbSeed = pbSeedMask + cbHashAlg; + pbDBMask = pbSeed + cbHashAlg; + pbDB = pbDBMask + cbDB; + pbLabelHash = pbDB + cbDB; + + // MGF(masked DB) + SymCryptRsaPaddingMaskGeneration( + hashAlgorithm, + pHashState, + pbOAEPFormat + cbHashAlg + 1, + cbDB, + pbSeedMask, + cbHashAlg); + + // XOR the masked seed and the seed mask MGF(masked DB) + for (UINT32 i = 0; i < cbHashAlg; i++) + { + pbSeed[i] = pbOAEPFormat[1 + i] ^ pbSeedMask[i]; + } + + // MGF(seed) + SymCryptRsaPaddingMaskGeneration( + hashAlgorithm, + pHashState, + pbSeed, + cbHashAlg, + pbDBMask, + cbDB); + + // XOR the masked DB and the mask MGF(seed) + for (UINT32 i = 0; i < cbDB; i++) + { + pbDB[i] = pbOAEPFormat[cbHashAlg + 1 + i] ^ pbDBMask[i]; + } + + // hash the label + SymCryptHash( hashAlgorithm, pbLabel, cbLabel, pbLabelHash, cbHashAlg ); + + // check the label hash + mPaddingError |= SymCryptMask32IsZeroU31( SymCryptEqual( pbLabelHash, pbDB, cbHashAlg ) ); + + // + // At this point we have verified the leading 0 byte and the label hash, with any + // errors in mPaddingError. We could continue to make the entire padding removal + // side-channel safe like we do in the PKCS1 padding case, but that is not necessary. + // The side-channel only leaks data if the attacker can trigger two different behaviours + // and derive information from the difference. + // This is relatively easy to do with something like a match on 1 or 2 bytes because the + // chance of satisfying the check on a random input is still useful. But here we have + // matched 33 bytes (assuming a 32-byte hash) and the Bleichenbacher style attacks don't + // work beyond this point. Basically, these attacks produce ciphertexts without knowing + // the corresponding plaintext, and the chance of the label hash matching is something + // like 2^{-256}. So these ciphertexts will always fail right here, and there is no + // difference of behaviour that leaks data to the attacker. + // Thus, we can switch back to normal processing of the errors here. + // + + if( mPaddingError != 0 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // check the PS + for (cnt = cbHashAlg; cnt < cbDB; cnt++) + { + if (pbDB[cnt] == 0x01) + { + cnt++; + break; + } + else if (pbDB[cnt] != 0x00) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + } + + if (pbDB[cnt - 1] != 0x01) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // the rest is data + *pcbPlaintext = cbDB - cnt; + + if(NULL == pbPlaintext) + { + scError = SYMCRYPT_NO_ERROR; + goto cleanup; + } + + if (cbPlaintext < *pcbPlaintext) + { + scError = SYMCRYPT_BUFFER_TOO_SMALL; + goto cleanup; + } + + memcpy(pbPlaintext, pbDB + cnt, *pcbPlaintext); + + scError = SYMCRYPT_NO_ERROR; + +cleanup: + + return scError; +} + +// +// PKCS1 Signature Format: +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPkcs1ApplySignaturePadding( + _In_reads_bytes_( cbHash ) PCBYTE pbHash, + SIZE_T cbHash, + _In_reads_bytes_( cbHashOid ) + PCBYTE pbHashOid, + SIZE_T cbHashOid, + UINT32 flags, + _Out_writes_bytes_( cbPKCS1Format ) + PBYTE pbPKCS1Format, + SIZE_T cbPKCS1Format ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + SIZE_T cbEncoding; + SIZE_T cbPadding; + SIZE_T cbOidOffset; + + BOOLEAN fInsertASN1 = TRUE; + + if ((flags & ~SYMCRYPT_FLAG_RSA_PKCS1_NO_ASN1) != 0) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Simple check to avoid funky behavior if cbHash is close to SIZE_MAX + if (cbHash >= cbPKCS1Format) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + fInsertASN1 = ((flags & SYMCRYPT_FLAG_RSA_PKCS1_NO_ASN1) == 0); + + if (fInsertASN1) + { + if ( (pbHashOid!=NULL) && (cbHashOid>0) ) + { + // determine the length of the ASN1 Encoding + // 2 sequence bytes, 1 id byte and 3 length bytes + cbEncoding = 6 + cbHashOid + cbHash; + } + else + { + if (cbHashOid > 0) + { + // The caller has passed a NULL hash and a non 0 size for it. + // We can't guess the intent, hence we fail + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // special case for MD5 hash without OID + cbEncoding = 2 + cbHash; + } + + // we don't support encodings longer than 128 bytes, + // with this check we know that the length of the OID as + // well as the length of the hash value will each fit in + // one byte + if (cbEncoding > 0x80) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + } + else + { + cbEncoding = cbHash; + } + + // In a few scenarios (involving small RSA keys), the new large SHA + // hashes are too big to be signed by the specified key. + // There must be at least 8 bytes of 0xff. + if (3 + 8 + cbEncoding > cbPKCS1Format) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + cbPadding = cbPKCS1Format - 3 - cbEncoding; + + + // insert the block type and delimiters + pbPKCS1Format[0] = 0x00; + pbPKCS1Format[1] = 0x01; + pbPKCS1Format[2 + cbPadding] = 0x00; + + // insert the type 1 padding + memset(pbPKCS1Format + 2, 0xff, cbPadding); + + if (fInsertASN1) + { + cbOidOffset = 1; + if ( (pbHashOid!=NULL) && (cbHashOid>0) ) + { + // insert the algorithm encoding + pbPKCS1Format[2 + cbPadding + 1] = ASN1_SEQUENCE_BYTE; + pbPKCS1Format[2 + cbPadding + 2] = (BYTE)cbEncoding - 2; + + // insert the sequence string byte, length of the hash and the hash value + pbPKCS1Format[2 + cbPadding + 3] = ASN1_SEQUENCE_BYTE; + pbPKCS1Format[2 + cbPadding + 4] = (BYTE)cbHashOid; + cbOidOffset += 4; + memcpy(pbPKCS1Format + 2 + cbPadding + cbOidOffset, pbHashOid, cbHashOid); + } + + // insert the octet string byte, length of the hash and the hash value + pbPKCS1Format[2 + cbPadding + cbOidOffset + cbHashOid] = ASN1_OCTET_STRING_BYTE; + pbPKCS1Format[2 + cbPadding + cbOidOffset + cbHashOid + 1] = (BYTE)cbHash; + memcpy(pbPKCS1Format + 2 + cbPadding + cbOidOffset + cbHashOid + 2, pbHash, cbHash); + } + else + { + memcpy(pbPKCS1Format + 3 + cbPadding, pbHash, cbHash); + } + + scError = SYMCRYPT_NO_ERROR; + +cleanup: + + return scError; +} + +// +// Check if a PKCS1 padding is valid with regard to a hash oid +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPkcs1CheckSignaturePadding( + _In_reads_bytes_( cbHash ) PCBYTE pbHash, + SIZE_T cbHash, + _In_reads_bytes_( cbHashOid ) + PCBYTE pbHashOid, + SIZE_T cbHashOid, + _In_reads_bytes_( cbPKCS1Format ) + PCBYTE pbPKCS1Format, + UINT32 flags, + _Out_writes_bytes_( cbPKCS1Format ) + PBYTE pbScratch, + SIZE_T cbPKCS1Format) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + SymCryptWipe(pbScratch, cbPKCS1Format); + + scError = SymCryptRsaPkcs1ApplySignaturePadding( + pbHash, + cbHash, + pbHashOid, + cbHashOid, + flags, + pbScratch, + cbPKCS1Format ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + if ( SymCryptEqual(pbScratch, pbPKCS1Format, cbPKCS1Format) ) + { + scError = SYMCRYPT_NO_ERROR; + } + else + { + scError = SYMCRYPT_SIGNATURE_VERIFICATION_FAILURE; + } + +cleanup: + + return scError; +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPkcs1VerifySignaturePadding( + _In_reads_bytes_( cbHash ) PCBYTE pbHash, + SIZE_T cbHash, + _In_reads_( nOIDCount ) PCSYMCRYPT_OID pHashOIDs, + _In_ SIZE_T nOIDCount, + _In_reads_bytes_( cbPKCS1Format ) + PCBYTE pbPKCS1Format, + SIZE_T cbPKCS1Format, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + UINT32 i = 0; + + UNREFERENCED_PARAMETER( cbScratch ); + SYMCRYPT_ASSERT( cbScratch >= cbPKCS1Format ); + + if ((flags & ~SYMCRYPT_FLAG_RSA_PKCS1_OPTIONAL_HASH_OID) != 0) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // + // Verify padding and the hash value + // + if (pHashOIDs) + { + for (i = 0; i < nOIDCount; i++) + { + scError = SymCryptRsaPkcs1CheckSignaturePadding( + pbHash, + cbHash, + pHashOIDs[i].pbOID, + pHashOIDs[i].cbOID, + pbPKCS1Format, + 0, + pbScratch, + cbPKCS1Format ); + if (scError == SYMCRYPT_NO_ERROR) + { + break; + } + } + } + + if ((pHashOIDs == NULL ) || + (scError != SYMCRYPT_NO_ERROR && + flags & SYMCRYPT_FLAG_RSA_PKCS1_OPTIONAL_HASH_OID)) + { + // if no OID is passed in, or + // OID is passed in but failed verification, but OID is optional + scError = SymCryptRsaPkcs1CheckSignaturePadding( + pbHash, + cbHash, + NULL, + 0, + pbPKCS1Format, + SYMCRYPT_FLAG_RSA_PKCS1_NO_ASN1, + pbScratch, + cbPKCS1Format ); + } + +cleanup: + + return scError; +} + +// +// PSS Signature Format: +// +--------+----------+----------+ +// M' = |Padding1| Hash M | salt | +// +--------+----------+----------+ +// | +// +--------+----------+ V +// DB = |Padding2| salt | Hash +// +--------+----------+ | +// | | +// V | +--+ +// xor <--- MGF <---| |bc| +// | | +--+ +// | | | +// V V V +// +-------------------+----------+--+ +// EM = | maskedDB | H |bc| +// +-------------------+----------+--+ +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPssApplySignaturePadding( + _In_reads_bytes_( cbHash ) PCBYTE pbHash, + SIZE_T cbHash, + _In_ PCSYMCRYPT_HASH hashAlgorithm, + _In_reads_bytes_opt_( cbSalt ) + PCBYTE pbSalt, + _In_range_(0, cbPSSFormat) SIZE_T cbSalt, + UINT32 nBitsOfModulus, + UINT32 flags, + _Out_writes_bytes_( cbPSSFormat ) + PBYTE pbPSSFormat, + SIZE_T cbPSSFormat, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PVOID pHashState; + + PBYTE pbMPrime; + PBYTE pbDB; + PBYTE pbDBMask; + + SIZE_T cbDB; + SIZE_T cbMPrime; + SIZE_T cbPadding2; + + SIZE_T dwZeroBits = 0; // Number of bits of the leftmost bit to be zeroed + + SIZE_T cbHashAlg = SymCryptHashResultSize( hashAlgorithm ); + SIZE_T cbHashState = SymCryptHashStateSize( hashAlgorithm ); + + UNREFERENCED_PARAMETER( cbScratch ); + + if ((cbPSSFormat == 0) || (pbPSSFormat == NULL)) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Corner case of RFC 3447 for PSS: + // If nBitsOfModulus == 1 mod 8, then emBits = nBitsOfModulus - 1 == 0 mod 8 + // Thus the size of the input buffer in bytes is emLen = ceil(emBits /8), + // one smaller than the size of the modulus. Fix this here by setting the + // leftmost byte of the output equal to 0. + if (nBitsOfModulus%8 == 1) + { + pbPSSFormat[0] = 0; + pbPSSFormat++; + cbPSSFormat--; + } + + if ((flags!=0) || + (cbPSSFormat < (cbHashAlg + cbSalt + 2)) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + cbDB = cbPSSFormat - (cbHashAlg + 1); + cbPadding2 = cbDB - cbSalt - 1; + cbMPrime = 8 + cbHash + cbSalt; + + SYMCRYPT_ASSERT( cbScratch >= cbHashState + cbMPrime + (cbDB * 2) ); + + pHashState = (PVOID) pbScratch; + pbMPrime = pbScratch + cbHashState; + pbDB = pbMPrime + cbMPrime; + pbDBMask = pbDB + cbDB; + + // set up the M Prime + SymCryptWipe(pbMPrime, 8); + memcpy(pbMPrime + 8, pbHash, cbHash); + + if (NULL == pbSalt) + { + // generate the random salt + scError = SymCryptCallbackRandom( + pbMPrime + 8 + cbHash, + cbSalt); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + } + else + { + // copy the salt passed + memcpy(pbMPrime + 8 + cbHash, pbSalt, cbSalt); + } + + // hash the MPrime + SymCryptHash( hashAlgorithm, pbMPrime, cbMPrime, pbPSSFormat + cbDB, cbHashAlg ); + + // copy the same salt into the DB + SymCryptWipe(pbDB, cbPadding2); + pbDB[cbPadding2] = 0x01; + memcpy(pbDB + cbPadding2 + 1, pbMPrime + 8 + cbHash, cbSalt); + + // MGF(Hash of MPrime) + SymCryptRsaPaddingMaskGeneration( + hashAlgorithm, + pHashState, + pbPSSFormat + cbDB, + cbHashAlg, + pbDBMask, + cbDB); + + // XOR the DB and the mask MGF(seed) + for (UINT32 i = 0; i < cbDB; i++) + { + pbPSSFormat[i] = pbDB[i] ^ pbDBMask[i]; + } + + // calculate the number of bits to be zeroed + dwZeroBits = 8*cbPSSFormat + 1 - nBitsOfModulus; + + // mask off dwZeroBits worth of the encoded message + pbPSSFormat[0] &= (BYTE)(0xff >> dwZeroBits); + + // set the least significant byte of pbPSSFormat to bc + pbPSSFormat[cbPSSFormat - 1] = 0xbc; + + scError = SYMCRYPT_NO_ERROR; + +cleanup: + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPssVerifySignaturePadding( + _In_reads_bytes_( cbHash ) PCBYTE pbHash, + SIZE_T cbHash, + _In_ PCSYMCRYPT_HASH hashAlgorithm, + _In_range_(0, cbPSSFormat) SIZE_T cbSalt, + _In_reads_bytes_( cbPSSFormat ) + PCBYTE pbPSSFormat, + SIZE_T cbPSSFormat, + UINT32 nBitsOfModulus, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PVOID pHashState; + + PBYTE pbDBMask; + PBYTE pbMPrime; + PBYTE pbMPrimeHash; + PCBYTE pbHashOfMPrimeIndex; + + SIZE_T cbDB; + SIZE_T cbMPrime; + SIZE_T cbPadding2; + SIZE_T cbSaltObserved; + + SIZE_T dwZeroBits = 0; // Number of bits of the leftmost bit to be zeroed + + SIZE_T cbHashAlg = SymCryptHashResultSize( hashAlgorithm ); + SIZE_T cbHashState = SymCryptHashStateSize( hashAlgorithm ); + + UNREFERENCED_PARAMETER( cbScratch ); + + if (((flags & ~SYMCRYPT_FLAG_RSA_PSS_VERIFY_WITH_MINIMUM_SALT) != 0) || + (cbPSSFormat == 0) || + (pbPSSFormat == NULL)) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Corner case of RFC 3447 for PSS: + // If nBitsOfModulus == 1 mod 8, then emBits = nBitsOfModulus - 1 == 0 mod 8 + // Thus the size of the input buffer in bytes is emLen = ceil(emBits /8), + // one smaller than the size of the modulus. Fix this here by checking that the + // leftmost byte of the input equals 0. + if (nBitsOfModulus%8 == 1) + { + if (pbPSSFormat[0] != 0) + { + scError = SYMCRYPT_SIGNATURE_VERIFICATION_FAILURE; + goto cleanup; + } + pbPSSFormat++; + cbPSSFormat--; + } + + // calculate the number of bits to be zeroed + dwZeroBits = 8*cbPSSFormat + 1 - nBitsOfModulus; + + // check the most significant dwZeroBits bits to ensure they're zero and + // check the least significant byte + if( (cbPSSFormat < (cbHashAlg + cbSalt + 2)) || + (pbPSSFormat[0] & (BYTE)(0xff << (8 - dwZeroBits))) != 0 || + pbPSSFormat[cbPSSFormat - 1] != 0xbc + ) + { + scError = SYMCRYPT_SIGNATURE_VERIFICATION_FAILURE; + goto cleanup; + } + + cbDB = cbPSSFormat - (cbHashAlg + 1); + + pHashState = (PVOID) pbScratch; + pbDBMask = pbScratch + cbHashState; + + // index to hash of M Prime + pbHashOfMPrimeIndex = pbPSSFormat + (cbPSSFormat - (cbHashAlg + 1)); + + // MGF(masked DB) + SymCryptRsaPaddingMaskGeneration( + hashAlgorithm, + pHashState, + pbHashOfMPrimeIndex, + cbHashAlg, + pbDBMask, + cbDB); + + // XOR the DB and the DB mask and store the result in pbDBMask (not needed after this) + for (UINT32 i = 0; i < cbDB; i++) + { + pbDBMask[i] = pbPSSFormat[i] ^ pbDBMask[i]; + } + + // mask off the first dwZeroBits + pbDBMask[0] &= (BYTE)(0xff >> dwZeroBits); + + // find the length of the all-zeroes padding2 in pbDBMask + // padding2 must be terminated by a 0x01 byte + for (cbPadding2 = 0; cbPadding2 < (cbDB - cbSalt); cbPadding2++) + { + if (pbDBMask[cbPadding2] == 0x01) + { + // we have reached the end of padding2 + break; + } + + if (pbDBMask[cbPadding2] != 0x00) + { + // non-zero byte in what should be padding2 + scError = SYMCRYPT_SIGNATURE_VERIFICATION_FAILURE; + goto cleanup; + } + } + + // Here we have either: + // cbPadding2 == cbDB - cbSalt, which means the padding is too long + // or + // cbPadding2 <= cbDB - cbSalt - 1, and we have broken out of the loop when we found the 0x01 byte + if( cbPadding2 == cbDB - cbSalt ) + { + scError = SYMCRYPT_SIGNATURE_VERIFICATION_FAILURE; + goto cleanup; + } + + cbSaltObserved = cbDB - cbPadding2 - 1; + // cbSalt <= cbDB - cbPadding2 - 1 = cbSaltObserved + // so cbSaltObserved is acceptable value for signature verification + // with SYMCRYPT_FLAG_RSA_PSS_VERIFY_WITH_MINIMUM_SALT + + if( ((flags & SYMCRYPT_FLAG_RSA_PSS_VERIFY_WITH_MINIMUM_SALT) == 0) && + cbSaltObserved != cbSalt ) + { + // When SYMCRYPT_FLAG_RSA_PSS_VERIFY_WITH_MINIMUM_SALT not specified, + // we require salt length observed to exactly match the caller provided salt length + scError = SYMCRYPT_SIGNATURE_VERIFICATION_FAILURE; + goto cleanup; + } + + pbMPrime = pbDBMask + cbDB; + cbMPrime = 8 + cbHash + cbSaltObserved; + pbMPrimeHash = pbMPrime + cbMPrime; + + SYMCRYPT_ASSERT( cbScratch >= cbHashState + cbDB + cbMPrime + cbHashAlg ); + + // create the M Prime + SymCryptWipe(pbMPrime, 8); + memcpy(pbMPrime + 8, pbHash, cbHash); + memcpy(pbMPrime + 8 + cbHash, + pbDBMask + (cbDB - cbSaltObserved), + cbSaltObserved); + + // hash the M Prime + SymCryptHash( hashAlgorithm, pbMPrime, cbMPrime, pbMPrimeHash, cbHashAlg ); + + if ( !SymCryptEqual(pbPSSFormat + cbDB, pbMPrimeHash, cbHashAlg) ) + { + scError = SYMCRYPT_SIGNATURE_VERIFICATION_FAILURE; + goto cleanup; + } + + scError = SYMCRYPT_NO_ERROR; + +cleanup: + return scError; +} diff --git a/libs/symcrypt/lib/rsakey.c b/libs/symcrypt/lib/rsakey.c new file mode 100644 index 00000000000..32b802ba7e6 --- /dev/null +++ b/libs/symcrypt/lib/rsakey.c @@ -0,0 +1,1631 @@ +// +// rsakey.c RSA keys' related algorithms +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +#define RSA_DEFAULT_PUBLIC_EXPONENT (65537) + +PSYMCRYPT_RSAKEY +SYMCRYPT_CALL +SymCryptRsakeyAllocate( + _In_ PCSYMCRYPT_RSA_PARAMS pParams, + _In_ UINT32 flags ) +{ + PVOID p; + SIZE_T cb; + PSYMCRYPT_RSAKEY res = NULL; + + UNREFERENCED_PARAMETER( flags ); + + SYMCRYPT_ASSERT( pParams != NULL ); + + cb = SymCryptSizeofRsakeyFromParams( pParams ); + + p = SymCryptCallbackAlloc( cb ); + + if ( p==NULL ) + { + goto cleanup; + } + + res = SymCryptRsakeyCreate( p, cb, pParams ); + +cleanup: + return res; +} + +VOID +SYMCRYPT_CALL +SymCryptRsakeyFree( _Out_ PSYMCRYPT_RSAKEY pkObj ) +{ + SYMCRYPT_CHECK_MAGIC( pkObj ); + SymCryptRsakeyWipe( pkObj ); + SymCryptCallbackFree( pkObj ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptSizeofRsakeyFromParams( _In_ PCSYMCRYPT_RSA_PARAMS pParams ) +{ + UINT32 nModulusDigits; + UINT32 res; + + SYMCRYPT_ASSERT( pParams != NULL ); + + nModulusDigits = SymCryptDigitsFromBits( pParams->nBitsOfModulus ); + + // + // From symcrypt_internal.h we have: + // - sizeof results are upper bounded by 2^19 + // - SYMCRYPT_SCRATCH_BYTES results are upper bounded by 2^27 (including RSA and ECURVE) + // - nPrimes and nPubExps are bounded by SYMCRYPT_RSAKEY_MAX_NUMOF_PRIMES = 2 and + // SYMCRYPT_RSAKEY_MAX_NUMOF_PUBEXPS = 1 + // Thus the following calculation does not overflow the result. + // + res = sizeof(SYMCRYPT_RSAKEY) + + SymCryptSizeofModulusFromDigits( nModulusDigits ) + // For Modulus + pParams->nPrimes * SymCryptSizeofModulusFromDigits( nModulusDigits ) + // For Primes + pParams->nPrimes * SYMCRYPT_SIZEOF_MODELEMENT_FROM_BITS( pParams->nBitsOfModulus ) + // For CrtInverses + pParams->nPubExp * SymCryptSizeofIntFromDigits( nModulusDigits ) + // For PrivExps + pParams->nPubExp * pParams->nPrimes * SymCryptSizeofIntFromDigits( nModulusDigits ); // For CrtPrivExps + + // Consistency check with the static macro (optimized away in production) + SYMCRYPT_ASSERT( res <= SYMCRYPT_SIZEOF_RSAKEY_FROM_PARAMS( pParams->nBitsOfModulus, pParams->nPrimes, pParams->nPubExp ) ); + + return res; +} + +PSYMCRYPT_RSAKEY +SYMCRYPT_CALL +SymCryptRsakeyCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + _In_ PCSYMCRYPT_RSA_PARAMS pParams ) +{ + PSYMCRYPT_RSAKEY pkObj = NULL; + + PBYTE pbCurr = pbBuffer; + SIZE_T cbNeeded; + SIZE_T itemSize; + + SYMCRYPT_ASSERT( pParams != NULL ); + + cbNeeded = SymCryptSizeofRsakeyFromParams( pParams ); + + SYMCRYPT_ASSERT_ASYM_ALIGNED( pbCurr ); + + if (( cbBuffer < cbNeeded ) || + ( pParams->nBitsOfModulus < SYMCRYPT_RSAKEY_MIN_BITSIZE_MODULUS ) || + ( pParams->nBitsOfModulus > SYMCRYPT_RSAKEY_MAX_BITSIZE_MODULUS ) || + ( pParams->nPubExp < 1 ) || + ( pParams->nPubExp > SYMCRYPT_RSAKEY_MAX_NUMOF_PUBEXPS ) || + ( pParams->nPrimes == 1 ) || + ( pParams->nPrimes > SYMCRYPT_RSAKEY_MAX_NUMOF_PRIMES ) ) + { + goto cleanup; + } + SYMCRYPT_ASSERT( cbBuffer >= sizeof( SYMCRYPT_RSAKEY ) ); + + pkObj = (PSYMCRYPT_RSAKEY) pbCurr; + + // Set all the parameters to 0 + SymCryptWipe( pbBuffer, cbBuffer ); + + // Main parameters of the RSAKEY + // Everything is 0 until created + + pkObj->cbTotalSize = (UINT32) cbNeeded; + // The result should always be within 4 GB, but we check to avoid security bugs + SYMCRYPT_ASSERT( pkObj->cbTotalSize == cbNeeded ); + + pkObj->hasPrivateKey = FALSE; + + pkObj->nSetBitsOfModulus = pParams->nBitsOfModulus; + pkObj->nDigitsOfModulus = SymCryptDigitsFromBits( pkObj->nSetBitsOfModulus ); // The modulus object has always this number of digits + + pkObj->nPrimes = pParams->nPrimes; + pkObj->nPubExp = pParams->nPubExp; + + pbCurr += sizeof( SYMCRYPT_RSAKEY ); + + // Modulus + itemSize = SymCryptSizeofModulusFromDigits( pkObj->nDigitsOfModulus ); + SYMCRYPT_ASSERT( cbBuffer >= sizeof( SYMCRYPT_RSAKEY ) + itemSize + + (pkObj->nPrimes*SymCryptSizeofModulusFromDigits( pkObj->nDigitsOfModulus )) + + (pkObj->nPrimes*SYMCRYPT_SIZEOF_MODELEMENT_FROM_BITS( pParams->nBitsOfModulus )) + + (pkObj->nPubExp*SymCryptSizeofIntFromDigits( pkObj->nDigitsOfModulus )) + + (pkObj->nPubExp*pkObj->nPrimes*SymCryptSizeofIntFromDigits( pkObj->nDigitsOfModulus )) ); + pkObj->pmModulus = SymCryptModulusCreate( + pbCurr, + itemSize, + pkObj->nDigitsOfModulus ); + SYMCRYPT_ASSERT( pkObj->pmModulus != NULL ); + pbCurr += itemSize; + + // For the remaining objects + // defer creation until SymCryptRsakeyGenerate or + // SymCryptRsakeySetValue + + // Primes + for (UINT32 i=0; i<pkObj->nPrimes; i++) + { + pkObj->pbPrimes[i] = pbCurr; + pbCurr += SymCryptSizeofModulusFromDigits( pkObj->nDigitsOfModulus ); + } + + // CRT Inverses of primes + for (UINT32 i=0; i<pkObj->nPrimes; i++) + { + pkObj->pbCrtInverses[i] = pbCurr; + pbCurr += SYMCRYPT_SIZEOF_MODELEMENT_FROM_BITS( pParams->nBitsOfModulus ); + } + + // Private exponents + for (UINT32 i=0; i<pkObj->nPubExp; i++) + { + pkObj->pbPrivExps[i] = pbCurr; + pbCurr += SymCryptSizeofIntFromDigits( pkObj->nDigitsOfModulus ); + } + + // Private exponents modulo each prime (minus 1) + for (UINT32 i=0; i<pkObj->nPubExp*pkObj->nPrimes; i++) + { + pkObj->pbCrtPrivExps[i] = pbCurr; + pbCurr += SymCryptSizeofIntFromDigits( pkObj->nDigitsOfModulus ); + } + + // Setting the magic + SYMCRYPT_SET_MAGIC( pkObj ); + +cleanup: + return pkObj; +} + +VOID +SYMCRYPT_CALL +SymCryptRsakeyWipe( _Out_ PSYMCRYPT_RSAKEY pkDst ) +{ + // Wipe the whole structure in one go. + SymCryptWipe( pkDst, pkDst->cbTotalSize ); +} + +#if 0 +VOID +SYMCRYPT_CALL +SymCryptRsakeyCopy( + _In_ PCSYMCRYPT_RSAKEY pkSrc, + _Out_ PSYMCRYPT_RSAKEY pkDst ) +{ + SymCryptFatal( 'rsac' ); + // This function doesn't work correctly because subobjects might + // not have been created yet. + // Future: fix this + + // + // in-place copy is somewhat common... + // + if( pkSrc != pkDst ) + { + pkDst->fAlgorithmInfo = pkSrc->fAlgorithmInfo; + pkDst->cbTotalSize = pkSrc->cbTotalSize; + pkDst->hasPrivateKey = pkSrc->hasPrivateKey; + pkDst->nSetBitsOfModulus = pkSrc->nSetBitsOfModulus; + + pkDst->nBitsOfModulus = pkSrc->nBitsOfModulus; + pkDst->nDigitsOfModulus = pkSrc->nDigitsOfModulus; + + pkDst->nPubExp = pkSrc->nPubExp; + for (UINT32 i=0; i<SYMCRYPT_RSAKEY_MAX_NUMOF_PUBEXPS; i++) + { + pkDst->au64PubExp[i] = pkSrc->au64PubExp[i]; + } + + pkDst->nPrimes = pkSrc->nPrimes; + for (UINT32 i=0; i<SYMCRYPT_RSAKEY_MAX_NUMOF_PRIMES; i++) + { + pkDst->nBitsOfPrimes[i] = pkSrc->nBitsOfPrimes[i]; + pkDst->nDigitsOfPrimes[i] = pkSrc->nDigitsOfPrimes[i]; + } + + // Copy the objects + SymCryptModulusCopy( pkSrc->pmModulus, pkDst->pmModulus ); + + for (UINT32 i=0; i< pkSrc->nPrimes; i++) + { + SymCryptModulusCopy( pkSrc->pmPrimes[i], pkDst->pmPrimes[i] ); + SymCryptModElementCopy( pkSrc->pmPrimes[i], pkSrc->peCrtInverses[i], pkDst->peCrtInverses[i] ); + } + + for (UINT32 i=0; i< pkSrc->nPubExp; i++) + { + SymCryptIntCopy( pkSrc->piPrivExps[i], pkDst->piPrivExps[i] ); + } + + for (UINT32 i=0; i< pkSrc->nPubExp*pkSrc->nPrimes; i++) + { + SymCryptIntCopy( pkSrc->piCrtPrivExps[i], pkDst->piCrtPrivExps[i] ); + } + } +} +#endif + +BOOLEAN +SYMCRYPT_CALL +SymCryptRsakeyHasPrivateKey( _In_ PCSYMCRYPT_RSAKEY pkRsakey ) +{ + return pkRsakey->hasPrivateKey; +} + +UINT32 +SYMCRYPT_CALL +SymCryptRsakeySizeofModulus( _In_ PCSYMCRYPT_RSAKEY pkRsakey ) +{ + return (pkRsakey->nBitsOfModulus + 7)/8; +} + +UINT32 +SYMCRYPT_CALL +SymCryptRsakeyModulusBits( _In_ PCSYMCRYPT_RSAKEY pkRsakey ) +{ + return pkRsakey->nBitsOfModulus; +} + +UINT32 +SYMCRYPT_CALL +SymCryptRsakeySizeofPublicExponent( + _In_ PCSYMCRYPT_RSAKEY pRsakey, + UINT32 index ) +{ + SYMCRYPT_ASSERT( index == 0 ); + UNREFERENCED_PARAMETER( index ); + return SymCryptUint64Bytesize( pRsakey->au64PubExp[0] ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptRsakeySizeofPrime( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + UINT32 index ) +{ + return (pkRsakey->nBitsOfPrimes[index] + 7)/8; +} + +UINT32 +SYMCRYPT_CALL +SymCryptRsakeyGetNumberOfPublicExponents( _In_ PCSYMCRYPT_RSAKEY pkRsakey ) +{ + return pkRsakey->nPubExp; +} + +UINT32 +SYMCRYPT_CALL +SymCryptRsakeyGetNumberOfPrimes( _In_ PCSYMCRYPT_RSAKEY pkRsakey ) +{ + return pkRsakey->nPrimes; +} + +VOID +SYMCRYPT_CALL +SymCryptRsakeyCreateAllObjects( _Inout_ PSYMCRYPT_RSAKEY pkRsakey ) +{ + // Primes + for (UINT32 i=0; i<pkRsakey->nPrimes; i++) + { + pkRsakey->pmPrimes[i] = SymCryptModulusCreate( + pkRsakey->pbPrimes[i], + SymCryptSizeofModulusFromDigits( pkRsakey->nDigitsOfPrimes[i] ), + pkRsakey->nDigitsOfPrimes[i] ); + SYMCRYPT_ASSERT( pkRsakey->pmPrimes[i] != NULL ); + } + + // CRT Inverses of primes + for (UINT32 i=0; i<pkRsakey->nPrimes; i++) + { + pkRsakey->peCrtInverses[i] = SymCryptModElementCreate( + pkRsakey->pbCrtInverses[i], + SymCryptSizeofModElementFromModulus( pkRsakey->pmPrimes[i] ), + pkRsakey->pmPrimes[i] ); + SYMCRYPT_ASSERT( pkRsakey->peCrtInverses[i] != NULL ); + } + + // Private exponents + for( UINT32 i=0; i<pkRsakey->nPubExp; i++ ) + { + pkRsakey->piPrivExps[i] = SymCryptIntCreate( + pkRsakey->pbPrivExps[i], + SymCryptSizeofIntFromDigits( pkRsakey->nDigitsOfModulus ), + pkRsakey->nDigitsOfModulus ); + SYMCRYPT_ASSERT( pkRsakey->piPrivExps[i] != NULL ); + } + + // Private exponents modulo each prime (minus 1) + for (UINT32 i=0; i<pkRsakey->nPubExp*pkRsakey->nPrimes; i++) + { + pkRsakey->piCrtPrivExps[i] = SymCryptIntCreate( + pkRsakey->pbCrtPrivExps[i], + SymCryptSizeofIntFromDigits( pkRsakey->nDigitsOfPrimes[i] ), + pkRsakey->nDigitsOfPrimes[i] ); + SYMCRYPT_ASSERT( pkRsakey->piCrtPrivExps[i] != NULL ); + } +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsakeyCalculatePrivateFields( + _Inout_ PSYMCRYPT_RSAKEY pkRsakey, + _Out_ PSYMCRYPT_DIVISOR pdTmp, // Temporary of nMaxDigitsOfPrimes + _Out_ PSYMCRYPT_INT piPhi, // Temporary of nDigitsOfModulus + _Out_ PSYMCRYPT_INT piAcc, // Temporary of nMaxDigitsOfPrimes + nDigitsOfModulus + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch, + UINT32 flags +) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + BYTE tmpGcdBuf[ SYMCRYPT_SIZEOF_INT_FROM_BITS( 64 ) + SYMCRYPT_ASYM_ALIGN_VALUE]; + PSYMCRYPT_INT piTmpGcd; + + // Use pdTmp as int scratch + PSYMCRYPT_INT piScr = SymCryptIntFromDivisor(pdTmp); + + UINT32 allowedFlags = SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION; + + if ( ( flags & ~allowedFlags ) != 0 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // We need a 1-digit tmp value to store the GCD in. + // Simpler to put it on the stack than to add full scratch size computation support to this function + piTmpGcd = SymCryptIntCreate( SYMCRYPT_ASYM_ALIGN_UP( tmpGcdBuf ), sizeof( tmpGcdBuf ) - SYMCRYPT_ASYM_ALIGN_VALUE, SymCryptDigitsFromBits( 64 ) ); + + // Run the CRT generation + scError = SymCryptCrtGenerateInverses( pkRsakey->nPrimes, pkRsakey->pmPrimes, 0, pkRsakey->peCrtInverses, pbScratch, cbScratch); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // Calculate Phi + SymCryptIntSetValueUint32( 1, piPhi ); + for (UINT32 i=0; i<pkRsakey->nPrimes; i++) + { + // piScr can have the different number of digits than each prime + scError = SymCryptIntCopyMixedSize( SymCryptIntFromModulus( pkRsakey->pmPrimes[i] ), piScr ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + SymCryptIntSubUint32( piScr, 1, piScr ); // p-1 + SymCryptIntMulMixedSize( piScr, piPhi, piAcc, pbScratch, cbScratch ); + scError = SymCryptIntCopyMixedSize( piAcc, piPhi ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + } + + // Calculate the private exponents + for (UINT32 i=0; i<pkRsakey->nPubExp; i++) + { + // IntExtendedGcd requirements: + // - First argument > 0: piPhi as the product of p-1's + // - Second argument: odd, verified below + // We also reject public exponent 1, as that is obviously unsafe. + if( pkRsakey->au64PubExp[i] == 1 || (pkRsakey->au64PubExp[i] & 1) != 1) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Calculate D + SymCryptIntSetValueUint64( pkRsakey->au64PubExp[i], piScr ); + + // Calculate D + SymCryptIntExtendedGcd( + piPhi, + piScr, + SYMCRYPT_FLAG_GCD_INPUTS_NOT_BOTH_EVEN, + piTmpGcd, // Gcd + NULL, // Lcm + NULL, // InvSrc1ModSrc2 + pkRsakey->piPrivExps[i], + pbScratch, + cbScratch); + + if( !SymCryptIntIsEqualUint32( piTmpGcd, 1 ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + } + + //Calculate the private exponents modulo each prime minus 1 + for (UINT32 i=0; i<pkRsakey->nPrimes; i++) + { + scError = SymCryptIntCopyMixedSize( SymCryptIntFromModulus(pkRsakey->pmPrimes[i]), SymCryptIntFromDivisor(pdTmp) ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // IntToDivisor requirement: + // Each prime has at least SYMCRYPT_RSAKEY_MIN_BITSIZE_PRIME bits --> P-1 > 0 + SymCryptIntSubUint32( SymCryptIntFromDivisor(pdTmp), 1, SymCryptIntFromDivisor(pdTmp) ); + SymCryptIntToDivisor( + SymCryptIntFromDivisor(pdTmp), + pdTmp, + pkRsakey->nPubExp, + 0, + pbScratch, + cbScratch ); + + for (UINT32 j=0; j<pkRsakey->nPubExp; j++) + { + SymCryptIntDivMod( + pkRsakey->piPrivExps[j], + pdTmp, + NULL, + piPhi, // Set it to Phi as each private exponent might have different size + pbScratch, + cbScratch ); + + scError = SymCryptIntCopyMixedSize( piPhi, pkRsakey->piCrtPrivExps[ j*pkRsakey->nPrimes + i ]); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + } + } + + // Check that the product of the primes is in fact the modulus + if( (flags & SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION) == 0 ) + { + if( pkRsakey->nPrimes != 2 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + SymCryptIntMulMixedSize( + SymCryptIntFromModulus(pkRsakey->pmPrimes[0]), + SymCryptIntFromModulus(pkRsakey->pmPrimes[1]), + piAcc, + pbScratch, cbScratch ); + + if( !SymCryptIntIsEqual( piAcc, SymCryptIntFromModulus( pkRsakey->pmModulus ) ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + } + +cleanup: + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsakeyGenerate( + _Inout_ PSYMCRYPT_RSAKEY pkRsakey, + _In_reads_opt_( nPubExp ) PCUINT64 pu64PubExp, + UINT32 nPubExp, + _In_ UINT32 flags ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + // 3 sizes of temporary elements: + // - ndPrimes = number of digit size of each prime (we choose it to be the same for all primes) + // - ndMod = pkRsakey->nDigitsOfModulus + // - ndLarge = ndPrimes + ndMod + + UINT32 ndPrimes = 0; + + UINT32 cbPrimes = 0; + PSYMCRYPT_INT piLow = NULL; + PSYMCRYPT_INT piHigh = NULL; + + UINT32 cbDivisor = 0; + PSYMCRYPT_DIVISOR pdTmp = NULL; + + UINT32 ndMod = pkRsakey->nDigitsOfModulus; + UINT32 cbMod = 0; + PSYMCRYPT_INT piPhi = NULL; + + UINT32 ndLarge = 0; + UINT32 cbLarge = 0; + PSYMCRYPT_INT piAcc = NULL; + + PBYTE pbScratch = NULL; + UINT32 cbScratch = 0; + PBYTE pbFnScratch = NULL; + UINT32 cbFnScratch = 0; + + UINT32 maxTries = 0; // For the prime generation (and the modulus operations ?) + UINT32 primeBits = 0; + + const UINT64 defaultExponent = RSA_DEFAULT_PUBLIC_EXPONENT; + + // Ensure caller has specified what algorithm(s) the key will be used with + UINT32 algorithmFlags = SYMCRYPT_FLAG_RSAKEY_SIGN | SYMCRYPT_FLAG_RSAKEY_ENCRYPT; + // Ensure only allowed flags are specified + UINT32 allowedFlags = SYMCRYPT_FLAG_KEY_NO_FIPS | algorithmFlags; + + if ( ( ( flags & ~allowedFlags ) != 0 ) || + ( ( flags & algorithmFlags ) == 0) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // SymCryptRsaSignVerifyPct requires the generated key to be at least 496 bits to avoid fatal + // Require caller to specify NO_FIPS for up to 1024 bits as running FIPS tests on too-small keys + // does not make it FIPS certifiable and gives the wrong impression to callers + if ( ( (flags & SYMCRYPT_FLAG_KEY_NO_FIPS) == 0 ) && + ( pkRsakey->nSetBitsOfModulus < SYMCRYPT_RSAKEY_FIPS_MIN_BITSIZE_MODULUS ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Handle the default exponent case + if( pu64PubExp == NULL && nPubExp == 0 ) + { + pu64PubExp = &defaultExponent; + nPubExp = 1; + } + + // Make sure we have: + // - exactly 2 primes + // - the right number of public exponents + // - exactly 1 public exponent + if (pkRsakey->nPrimes != 2 || nPubExp != pkRsakey->nPubExp || nPubExp != 1 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Copy the public exponent into the key + pkRsakey->au64PubExp[0] = pu64PubExp[0]; + + // Before doing anything calculate all the needed sizes + // The size limits were checked in SymCryptRsakeyCreate which is the only way to create an Rsakey object. + pkRsakey->nBitsOfModulus = pkRsakey->nSetBitsOfModulus; // This will be the exact bit size of our modulus + + pkRsakey->nBitsOfPrimes[0] = (pkRsakey->nBitsOfModulus + 1)/2; + pkRsakey->nBitsOfPrimes[1] = pkRsakey->nBitsOfModulus/2; // The second prime is one bit smaller for odd-length moduli + + pkRsakey->nDigitsOfPrimes[0] = SymCryptDigitsFromBits(pkRsakey->nBitsOfPrimes[0]); + pkRsakey->nDigitsOfPrimes[1] = SymCryptDigitsFromBits(pkRsakey->nBitsOfPrimes[1]); + + pkRsakey->nMaxDigitsOfPrimes = SYMCRYPT_MAX(pkRsakey->nDigitsOfPrimes[0], pkRsakey->nDigitsOfPrimes[1]); + + ndPrimes = pkRsakey->nMaxDigitsOfPrimes; + ndLarge = ndPrimes + ndMod; + + primeBits = SYMCRYPT_MAX(pkRsakey->nBitsOfPrimes[0],pkRsakey->nBitsOfPrimes[1]); + maxTries = 100 * primeBits; + + // Create all the SymCryptObjects + SymCryptRsakeyCreateAllObjects( pkRsakey ); + + // Allocate the temp integers and the scratch space + // All sizes are limited by the modulus sizes verified in SymCryptRsakeyCreate + cbPrimes = SymCryptSizeofIntFromDigits( ndPrimes ); + cbMod = SymCryptSizeofIntFromDigits( ndMod ); + cbLarge = SymCryptSizeofIntFromDigits( ndLarge ); + cbDivisor = SymCryptSizeofDivisorFromDigits( ndPrimes ); + + cbScratch = 2*cbPrimes + cbMod + cbLarge + cbDivisor + + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_INT_PRIME_GEN(ndPrimes), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_INT_TO_MODULUS(ndMod), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_INT_MUL(ndMod), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_CRT_GENERATION(ndPrimes), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_EXTENDED_GCD(ndMod), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_INT_TO_DIVISOR(ndPrimes), + SYMCRYPT_SCRATCH_BYTES_FOR_INT_DIVMOD( ndMod, ndPrimes ) + )))))); + + pbScratch = (PBYTE)SymCryptCallbackAlloc( cbScratch ); + if (pbScratch == NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + pbFnScratch = pbScratch; + cbFnScratch = cbScratch; + + // Create temporaries + // dcl - this would be easier to review with one statement per line + piLow = SymCryptIntCreate( pbFnScratch, cbPrimes, ndPrimes ); pbFnScratch += cbPrimes; cbFnScratch -= cbPrimes; + piHigh = SymCryptIntCreate( pbFnScratch, cbPrimes, ndPrimes ); pbFnScratch += cbPrimes; cbFnScratch -= cbPrimes; + + piPhi = SymCryptIntCreate( pbFnScratch, cbMod, ndMod ); pbFnScratch += cbMod; cbFnScratch -= cbMod; + + piAcc = SymCryptIntCreate( pbFnScratch, cbLarge, ndLarge ); pbFnScratch += cbLarge; cbFnScratch -= cbLarge; + + pdTmp = SymCryptDivisorCreate( pbFnScratch, cbDivisor, ndPrimes ); pbFnScratch += cbDivisor; cbFnScratch -= cbDivisor; + + // ***Prime generation limits*** + // + // If nBitsOfModulus is even (main case) + // Low limit = 2^{primeBits-1} + 2^{primeBits - 2} + // High limit = 2^primeBits - 1 + // + // If nBitsOfModulus is odd we use different + // limits for the two primes (until we have an integer sqrt function) + // + // For the first + // Low limit = 2^{primeBits-1} + 2^{primeBits - 2} + // High limit = 2^primeBits - 1 + // For the second + // Low limit = 2^{primeBits-2} + 2^{primeBits - 3} + // High limit = 2^{primeBits-1} - 1 + // + // Notice that nBitsOfModulus is a public value. + // + // *** TODO: This works only for 2 primes to give modulus + // of exactly nBitsOfModulus bits. + + SymCryptIntSetValueUint32( 3, piLow ); + SymCryptIntMulPow2( piLow, primeBits - 2, piLow ); + + SymCryptIntSetValueUint32( 1, piHigh ); + SymCryptIntMulPow2( piHigh, primeBits, piHigh ); + SymCryptIntSubUint32( piHigh, 1, piHigh ); + + // Generate primes and at the same time accumulate their product into piPhi + SymCryptIntSetValueUint32( 1, piPhi ); + for (UINT32 i=0; i<pkRsakey->nPrimes; i++) + { + if ( ((pkRsakey->nBitsOfModulus % 2)==1) && (i>0) ) + { + SymCryptIntDivPow2( piLow, 1, piLow ); + SymCryptIntDivPow2( piHigh, 1, piHigh ); + } + + // IntGenerateRandomPrime requirement: + // piLow > 3 since nBitsOfModulus is bounded by + // SYMCRYPT_RSAKEY_MIN_BITSIZE_MODULUS. + scError = SymCryptIntGenerateRandomPrime( + piLow, + piHigh, + pu64PubExp, + nPubExp, + maxTries, + 0, + SymCryptIntFromModulus( pkRsakey->pmPrimes[i] ), + pbFnScratch, + cbFnScratch); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // IntToModulus requirement: + // piLow > 0 --> pkRsakey->pmPrimes[i] > 0 + SymCryptIntToModulus( + SymCryptIntFromModulus( pkRsakey->pmPrimes[i] ), + pkRsakey->pmPrimes[i], + pkRsakey->nBitsOfModulus, // Average number of operations + SYMCRYPT_FLAG_MODULUS_PARITY_PUBLIC | SYMCRYPT_FLAG_MODULUS_PRIME, + pbFnScratch, + cbFnScratch ); + + SymCryptIntMulMixedSize( SymCryptIntFromModulus( pkRsakey->pmPrimes[i] ), piPhi, piAcc, pbFnScratch, cbFnScratch ); // P_i * Product + scError = SymCryptIntCopyMixedSize( piAcc, piPhi ); // Move the result to piPhi + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + } + + // IntToModulus requirement: + // piPhi product of non-zero primes --> piPhi > 0 + SymCryptIntCopy( piPhi, SymCryptIntFromModulus( pkRsakey->pmModulus ) ); + SymCryptIntToModulus( + SymCryptIntFromModulus( pkRsakey->pmModulus ), + pkRsakey->pmModulus, + pkRsakey->nBitsOfModulus, // Average number of operations + SYMCRYPT_FLAG_DATA_PUBLIC, + pbFnScratch, + cbFnScratch ); + + if ( SymCryptIntBitsizeOfValue( piPhi ) != pkRsakey->nBitsOfModulus) + { + scError = SYMCRYPT_EXTERNAL_FAILURE; // This should never happen (make it assert) + goto cleanup; + } + + // Calculate the rest of the fields + scError = SymCryptRsakeyCalculatePrivateFields( pkRsakey, pdTmp, piPhi, piAcc, pbFnScratch, cbFnScratch, 0 ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + pkRsakey->hasPrivateKey = TRUE; + + pkRsakey->fAlgorithmInfo = flags; // We want to track all of the flags in the Rsakey + + if ( ( flags & SYMCRYPT_FLAG_KEY_NO_FIPS ) == 0 ) + { + // Ensure RSA algorithm selftest is run before first use of RSA algorithm + // Per FIPS 140-3 IG, this selftest cannot be a PCT + SYMCRYPT_RUN_SELFTEST_ONCE( + SymCryptRsaSelftest, + SYMCRYPT_SELFTEST_ALGORITHM_RSA); + + // Run SignVerify PCT on generated keypair + // Our current understanding is that this PCT is sufficient for both RSA_SIGN and RSA_ENCRYPT + + // Unconditionally set the sign flag to enable SignVerify PCT on encrypt-only keypair + pkRsakey->fAlgorithmInfo |= SYMCRYPT_FLAG_RSAKEY_SIGN; + + SYMCRYPT_RUN_KEY_GEN_PCT( + SymCryptRsaSignVerifyPct, + pkRsakey, + SYMCRYPT_PCT_RSA_SIGN ); + + // Unset the sign flag before returning encrypt-only keypair + if ( ( flags & SYMCRYPT_FLAG_RSAKEY_SIGN ) == 0 ) + { + pkRsakey->fAlgorithmInfo ^= SYMCRYPT_FLAG_RSAKEY_SIGN; + } + } + +cleanup: + if (pbScratch!=NULL) + { + SymCryptWipe(pbScratch,cbScratch); + SymCryptCallbackFree(pbScratch); + } + + return scError; +} + +// The maximum number of iterations we use in probabilistic prime recovery method +// If n, e, d are valid then successful prime recover for each iteration should +// occur with probability ~1/2; with 100 iterations we fail for a valid private +// exponent with probability ~2^-100 +#define SYMCRYPT_MAX_PRIME_RECOVERY_ITERATIONS (100) + +#define SYMCRYPT_SCRATCH_BYTES_FOR_PRIME_RECOVERY( _ndMod, _ndPubExp, _nBitsMod ) \ + SymCryptSizeofIntFromDigits( _ndMod ) + /* Space for piPrivExp*/ \ + SymCryptSizeofIntFromDigits( _ndPubExp ) + /* Space for piPubExp*/ \ + SymCryptSizeofIntFromDigits( _ndMod + _ndPubExp ) + /* Space for piExpProd*/ \ + (4*SYMCRYPT_SIZEOF_MODELEMENT_FROM_BITS( _nBitsMod )) + /* Space for peTmpY, peTmpX, peOne, peNegOne */\ + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_INT_MUL( _ndMod + _ndPubExp ), /* Space for SymCryptIntMulMixedSize */ \ + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( _ndMod ), /* Space for other SymCryptMod* */ \ + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_MODEXP( _ndMod ), /* Space for SymCryptModExp */ \ + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_EXTENDED_GCD( _ndMod ), /* Space for SymCryptIntExtendedGcd */ \ + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_INT_TO_MODULUS( _ndMod ), /* Space for SymCryptIntToModulus */ \ + SYMCRYPT_SCRATCH_BYTES_FOR_INT_DIVMOD( _ndMod, _ndMod ) /* Space for SymCryptIntDivMod */ \ + ))))) + +static +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsakeyCalculatePrimesFromPrivateExponent( + _Inout_ PSYMCRYPT_RSAKEY pkRsakey, // must already have modulus and public exponent set + _In_reads_bytes_( cbPrivateExponent ) + PCBYTE pbPrivateExponent, + SIZE_T cbPrivateExponent, + SYMCRYPT_NUMBER_FORMAT numFormat, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + UINT32 cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + // 3 digit sizes of temporary integers: + // - ndMod = pkRsakey->nDigitsOfModulus + // - ndPubExp = digits for a UINT64 public exponent + // - ndExpProd = ndMod + ndPubExp + + UINT32 ndMod = pkRsakey->nDigitsOfModulus; + UINT32 cbMod = SymCryptSizeofIntFromDigits( ndMod ); + + UINT32 ndPubExp = SymCryptDigitsFromBits( 64 ); + UINT32 cbPubExp = SymCryptSizeofIntFromDigits( ndPubExp ); + + UINT32 nBitsExpProd = 0; // we compute this later before use + UINT32 ndExpProd = ndMod + ndPubExp; + UINT32 cbExpProd = SymCryptSizeofIntFromDigits( ndExpProd ); + + UINT32 cbModElement = SYMCRYPT_SIZEOF_MODELEMENT_FROM_BITS( pkRsakey->nBitsOfModulus ); + + PSYMCRYPT_INT piPrivExp = NULL; + PSYMCRYPT_INT piPubExp = NULL; + PSYMCRYPT_INT piExpProd = NULL; + PSYMCRYPT_MODELEMENT peTmpY = NULL; + PSYMCRYPT_MODELEMENT peTmpX = NULL; + PSYMCRYPT_MODELEMENT peTmpPtr = NULL; + PSYMCRYPT_MODELEMENT peOne = NULL; + PSYMCRYPT_MODELEMENT peNegOne = NULL; + + PBYTE pbFnScratch = pbScratch; + UINT32 cbFnScratch = cbScratch; + + UINT64 low64ExpProd = 0; + UINT32 trailingZeros = 0; + + BOOL bFoundNonTrivialRoot = FALSE; + + // + // Recover primes from private exponent using probabilistic prime-factor recovery method + // See SP800-56B rev2 Appendix C.1 and Boneh 1999 + // + SYMCRYPT_ASSERT( pkRsakey->nPrimes == 2 ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_PRIME_RECOVERY(ndMod, ndPubExp, pkRsakey->nBitsOfModulus) ); + + piPrivExp = SymCryptIntCreate( pbFnScratch, cbMod, ndMod ); + SYMCRYPT_ASSERT( piPrivExp != NULL ); + pbFnScratch += cbMod; + cbFnScratch -= cbMod; + piPubExp = SymCryptIntCreate( pbFnScratch, cbPubExp, ndPubExp ); + SYMCRYPT_ASSERT( piPubExp != NULL ); + pbFnScratch += cbPubExp; + cbFnScratch -= cbPubExp; + piExpProd = SymCryptIntCreate( pbFnScratch, cbExpProd, ndExpProd ); + SYMCRYPT_ASSERT( piExpProd != NULL ); + pbFnScratch += cbExpProd; + cbFnScratch -= cbExpProd; + + peTmpY = SymCryptModElementCreate( pbFnScratch, cbModElement, pkRsakey->pmModulus ); + SYMCRYPT_ASSERT( peTmpY != NULL ); + pbFnScratch += cbModElement; + cbFnScratch -= cbModElement; + peTmpX = SymCryptModElementCreate( pbFnScratch, cbModElement, pkRsakey->pmModulus ); + SYMCRYPT_ASSERT( peTmpX != NULL ); + pbFnScratch += cbModElement; + cbFnScratch -= cbModElement; + peOne = SymCryptModElementCreate( pbFnScratch, cbModElement, pkRsakey->pmModulus ); + SYMCRYPT_ASSERT( peOne != NULL ); + pbFnScratch += cbModElement; + cbFnScratch -= cbModElement; + peNegOne = SymCryptModElementCreate( pbFnScratch, cbModElement, pkRsakey->pmModulus ); + SYMCRYPT_ASSERT( peNegOne != NULL ); + pbFnScratch += cbModElement; + cbFnScratch -= cbModElement; + + // Ensure that modulus is odd - this is required for later SymCryptIntExtendedGcd + if( (SymCryptIntGetValueLsbits32(SymCryptIntFromModulus( pkRsakey->pmModulus ))& 1)==0 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Import private exponent + scError = SymCryptIntSetValue(pbPrivateExponent, cbPrivateExponent, numFormat, piPrivExp); + if( scError != SYMCRYPT_NO_ERROR ) + { + // The integer cannot fit the private exponent (SYMCRYPT_BUFFER_TOO_SMALL), + // only if the caller providing a private exponent larger than the public modulus + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Basic range check + if( !SymCryptIntIsLessThan(piPrivExp, SymCryptIntFromModulus(pkRsakey->pmModulus)) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Given range check, we can guarantee to compute + // Private exponent (d) * Public exponent (e) + // In piExpProd without overflow + SymCryptIntSetValueUint64( pkRsakey->au64PubExp[0], piPubExp ); + + // compute upper bound on product bit count based on public data (nBitsOfModulus (public) >= nBitsOfPrivateExponent (private)) + nBitsExpProd = pkRsakey->nBitsOfModulus + SymCryptIntBitsizeOfValue( piPubExp ); + + SymCryptIntMulMixedSize( piPrivExp, piPubExp, piExpProd, pbFnScratch, cbFnScratch ); + + // Ensure d*e is odd + low64ExpProd = SymCryptIntGetValueLsbits64( piExpProd ); + + if( (low64ExpProd & 1) == 0 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Compute how many trailing zeros in m = d*e - 1 + // + // We are variable time w.r.t. the number of trailing (up to 64) zeroes. An attacker using + // sidechannels to determine the number of trailing zeroes of m can glean information about + // the private exponent proportionate to the number of trailing zeroes. As we bound this to + // 64, at most an attacker can theoretically determine 64-bits of an expected 2048-bits of + // private exponent - and they can only do this for 1 in 2^64 keys. + // + // It would be possible to mask the number of trailing zeroes from sidechannels by always + // squaring by 64 times in the inner loop below and using masked operations to select out + // any found non-trivial root. We can consider doing this as a hardening measure if this API + // does see a lot of usage, but the expectation is that this method will almost never be + // used and it is just not enough of a leak for an attacker to even try to measuring. + trailingZeros = SymCryptCountTrailingZeros64( low64ExpProd-1 ); + + // If there are 64 trailing zeroes then we abort because the prime factor recovery method + // could theoretically leak more than 64-bits of the private exponent. The likelihood of any + // key which has this many trailing zeroes _ever_ having being generated by a legitimate key + // generation process is extremely small given the cost of RSA key generation. This much more + // likely indicates faulty inputs or a hardware fault rather than a legitimate keypair we + // should try to import. + if( trailingZeros == 64 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + SymCryptIntDivPow2( piExpProd, trailingZeros, piExpProd ); // r = m >> t + SymCryptModElementSetValueUint32( 1, pkRsakey->pmModulus, peOne, pbFnScratch, cbFnScratch ); + SymCryptModElementSetValueNegUint32( 1, pkRsakey->pmModulus, peNegOne, pbFnScratch, cbFnScratch ); + + for( UINT32 i=0; i<SYMCRYPT_MAX_PRIME_RECOVERY_ITERATIONS; i++ ) + { + // y is random value g in [2,n-2] + SymCryptModSetRandom( pkRsakey->pmModulus, peTmpY, 0, pbFnScratch, cbFnScratch ); + + // ModExp y = g^r in place + // could make this a bit faster and leakier using trailingZeros to reduce nBitsExp, but + // not normally a big performance win + SymCryptModExp( + pkRsakey->pmModulus, + peTmpY, + piExpProd, + nBitsExpProd-1, + 0, + peTmpY, + pbFnScratch, cbFnScratch ); + + // if y == 1 or y == -1, start over (we found a trivial root of 1) + if( SymCryptModElementIsEqual( pkRsakey->pmModulus, peTmpY, peOne ) || + SymCryptModElementIsEqual( pkRsakey->pmModulus, peTmpY, peNegOne ) ) + { + continue; + } + + for( UINT32 j=1; j<=trailingZeros; j++ ) + { + // x = y^2 + SymCryptModSquare( pkRsakey->pmModulus, peTmpY, peTmpX, pbFnScratch, cbFnScratch ); + + // if x == 1 then y is a non-trivial root of 1 (it is not -1 or 1) + if( SymCryptModElementIsEqual( pkRsakey->pmModulus, peTmpX, peOne) ) + { + bFoundNonTrivialRoot = TRUE; + break; + } + + // if x == -1, start over + if( SymCryptModElementIsEqual( pkRsakey->pmModulus, peTmpX, peNegOne) ) + { + break; // just break out of inner loop; continues outer loop + } + + // swap x and y + peTmpPtr = peTmpY; + peTmpY = peTmpX; + peTmpX = peTmpPtr; + } + if( bFoundNonTrivialRoot ) + { + break; + } + } + + if( !bFoundNonTrivialRoot ) + { + // we failed to find a non-trivial root of 1, so we cannot recover prime factors + // it is almost certain that this means that the inputs were wrong + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // piPrivExp = y + SymCryptModElementToInt( pkRsakey->pmModulus, peTmpY, piPrivExp, pbFnScratch, cbFnScratch ); + // piPrivExp = y-1 (we know this cannot borrow as y^2 is 1, so y != 0) + SymCryptIntSubUint32( piPrivExp, 1, piPrivExp ); + + // piPrivExp = p0 = GCD(y-1, n) + SymCryptIntExtendedGcd( + piPrivExp, + SymCryptIntFromModulus( pkRsakey->pmModulus ), + SYMCRYPT_FLAG_GCD_INPUTS_NOT_BOTH_EVEN, + piPrivExp, + NULL, + NULL, + NULL, + pbFnScratch, cbFnScratch ); + + // compute the sizes of the primes + pkRsakey->nBitsOfPrimes[0] = SymCryptIntBitsizeOfValue(piPrivExp); + pkRsakey->nBitsOfPrimes[1] = pkRsakey->nBitsOfModulus - pkRsakey->nBitsOfPrimes[0]; + for( UINT32 i=0; i<2; i++ ) + { + pkRsakey->nDigitsOfPrimes[i] = SymCryptDigitsFromBits(pkRsakey->nBitsOfPrimes[i]); + if( pkRsakey->nBitsOfPrimes[i] < SYMCRYPT_RSAKEY_MIN_BITSIZE_PRIME ) + { + scError = SYMCRYPT_WRONG_KEY_SIZE; + goto cleanup; + } + } + pkRsakey->nMaxDigitsOfPrimes = SYMCRYPT_MAX(pkRsakey->nDigitsOfPrimes[0], pkRsakey->nDigitsOfPrimes[1]); + + // Create all the objects + SymCryptRsakeyCreateAllObjects(pkRsakey); + + scError = SymCryptIntCopyMixedSize( piPrivExp, SymCryptIntFromModulus( pkRsakey->pmPrimes[0] ) ); + if( scError != SYMCRYPT_NO_ERROR ) + { + // only fails if we computed the wrong bit-size for the primes above + scError = SYMCRYPT_HARDWARE_FAILURE; + goto cleanup; + } + + SymCryptIntToModulus( SymCryptIntFromModulus( pkRsakey->pmPrimes[0] ), + pkRsakey->pmPrimes[0], + pkRsakey->nBitsOfModulus, // Average number of operations + SYMCRYPT_FLAG_MODULUS_PARITY_PUBLIC | SYMCRYPT_FLAG_MODULUS_PRIME, + pbFnScratch, cbFnScratch ); + + SymCryptIntDivMod( SymCryptIntFromModulus( pkRsakey->pmModulus ), + SymCryptDivisorFromModulus( pkRsakey->pmPrimes[0] ), + piExpProd, // n / p0 - use piExpProd as Quotient.nDigits must be >= Src.nDigits + piPrivExp, // n % p0 - use piPrivExp as Remainder.nDigits must be >= Divisor.nDigits + pbFnScratch, cbFnScratch ); + + // Check remainder from dividing n by p0 is 0 + if( !SymCryptIntIsEqualUint32( piPrivExp, 0 ) ) + { + // Should always be true as p0 is GCD(y-1, n) so is definitionally a divisor of n + // Failure here indicates something wrong in our math, or hardware failure + scError = SYMCRYPT_HARDWARE_FAILURE; + goto cleanup; + } + + scError = SymCryptIntCopyMixedSize( piExpProd, SymCryptIntFromModulus( pkRsakey->pmPrimes[1] ) ); + if( scError != SYMCRYPT_NO_ERROR ) + { + // only fails if we computed the wrong bit-size for the primes above + scError = SYMCRYPT_HARDWARE_FAILURE; + goto cleanup; + } + + SymCryptIntToModulus( SymCryptIntFromModulus( pkRsakey->pmPrimes[1] ), + pkRsakey->pmPrimes[1], + pkRsakey->nBitsOfModulus, // Average number of operations + SYMCRYPT_FLAG_MODULUS_PARITY_PUBLIC | SYMCRYPT_FLAG_MODULUS_PRIME, + pbFnScratch, cbFnScratch ); + +cleanup: + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsakeySetValueInternal( + _In_reads_bytes_( cbModulus ) PCBYTE pbModulus, + SIZE_T cbModulus, + _In_reads_( nPubExp ) PCUINT64 pu64PubExp, + UINT32 nPubExp, + _In_reads_bytes_opt_( cbPrivateExponent ) PCBYTE pbPrivateExponent, + SIZE_T cbPrivateExponent, + _In_reads_opt_( nPrimes ) PCBYTE * ppPrimes, + _In_reads_opt_( nPrimes ) SIZE_T * pcbPrimes, + UINT32 nPrimes, + SYMCRYPT_NUMBER_FORMAT numFormat, + UINT32 flags, + _Inout_ PSYMCRYPT_RSAKEY pkRsakey ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + // 3 digit sizes of temporary integers: + // - ndPrimes = max digitsize of prime buffers + // - ndMod = pkRsakey->nDigitsOfModulus + // - ndLarge = ndPrimes + ndMod + + UINT32 cbDivisor = 0; + PSYMCRYPT_DIVISOR pdTmp = NULL; + + UINT32 ndMod = 0; + UINT32 cbMod = 0; + PSYMCRYPT_INT piPhi = NULL; + + UINT32 cbLarge = 0; + PSYMCRYPT_INT piAcc = NULL; + + PBYTE pbScratch = NULL; + UINT32 cbScratch = 0; + PBYTE pbFnScratch = NULL; + UINT32 cbFnScratch = 0; + + // Ensure caller has specified what algorithm(s) the key will be used with + UINT32 algorithmFlags = SYMCRYPT_FLAG_RSAKEY_SIGN | SYMCRYPT_FLAG_RSAKEY_ENCRYPT; + // Ensure only allowed flags are specified + UINT32 allowedFlags = SYMCRYPT_FLAG_KEY_NO_FIPS | SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION | algorithmFlags; + + if ( ( ( flags & ~allowedFlags ) != 0 ) || + ( ( flags & algorithmFlags ) == 0) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Check that minimal validation flag only specified with no fips + if ( ( ( flags & SYMCRYPT_FLAG_KEY_NO_FIPS ) == 0 ) && + ( ( flags & SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION ) != 0 ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Internal requirement that private key is either specified by primes or by private exponent, not both + // This is not exposed to external API surface - if we were to dynamically check, the SYMCRYPT_ERROR + // should indicate internal logic error - for now just assert + SYMCRYPT_ASSERT( (nPrimes==0) || (pbPrivateExponent==NULL) ); + + // Check if the arguments are correct + if ( (pbModulus==NULL) || (cbModulus==0) || // Modulus is needed + (nPubExp != 1) || (pu64PubExp==NULL) || // Exactly 1 public exponent is needed + ((nPrimes != 2) && (nPrimes != 0)) || + ((nPrimes == 2) && ((ppPrimes==NULL) || (pcbPrimes==NULL) || + (ppPrimes[0]==NULL) || (ppPrimes[1]==NULL) || + (pcbPrimes[0]==0) || (pcbPrimes[1]==0))) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + ndMod = pkRsakey->nDigitsOfModulus; + + // Calculate scratch spaces + // No integer overflows as all numbers are limited by ndMod which is checked during Create + if ( (pbPrivateExponent != NULL) || (nPrimes > 0) ) + { + if( pkRsakey->nPrimes != 2 ) + { + // The key was not allocated with space for private key material + // so we cannot set it with private key material + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + cbMod = SymCryptSizeofIntFromDigits( ndMod ); + cbLarge = SymCryptSizeofIntFromDigits( 2 * ndMod ); // 2*ndMod is still < SymCryptDigitsFromBits(SYMCRYPT_INT_MAX_BITS) + cbDivisor = SymCryptSizeofDivisorFromDigits( ndMod ); + + cbScratch = cbMod + cbLarge + cbDivisor + + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_INT_TO_MODULUS(ndMod), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_CRT_GENERATION(ndMod), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_EXTENDED_GCD(ndMod), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_INT_TO_DIVISOR(ndMod), + SYMCRYPT_SCRATCH_BYTES_FOR_INT_DIVMOD( ndMod, ndMod ) + )))); + + if( pbPrivateExponent != NULL ) + { + // We use at least as much scratch space when importing by private exponent, but probably more + SYMCRYPT_ASSERT( SymCryptDigitsFromBits( 64 ) == 1 ); + + cbScratch = SYMCRYPT_MAX( cbScratch, + SYMCRYPT_SCRATCH_BYTES_FOR_PRIME_RECOVERY(ndMod, 1, pkRsakey->nSetBitsOfModulus) ); + } + } + else + { + cbScratch = SYMCRYPT_SCRATCH_BYTES_FOR_INT_TO_MODULUS(ndMod); + } + + pbScratch = (PBYTE)SymCryptCallbackAlloc( cbScratch ); + if (pbScratch == NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + // Modulus + scError = SymCryptIntSetValue( pbModulus, cbModulus, numFormat, SymCryptIntFromModulus( pkRsakey->pmModulus ) ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // Compute actual modulus size, and check that it isn't bigger than the created size + pkRsakey->nBitsOfModulus = SymCryptIntBitsizeOfValue(SymCryptIntFromModulus(pkRsakey->pmModulus)); + if (pkRsakey->nBitsOfModulus > pkRsakey->nSetBitsOfModulus) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if (pkRsakey->nBitsOfModulus < SYMCRYPT_RSAKEY_MIN_BITSIZE_MODULUS) + { + scError = SYMCRYPT_WRONG_KEY_SIZE; + goto cleanup; + } + + // IntToModulus requirement: + // nBitsOfModulus >= SYMCRYPT_RSAKEY_MIN_BITSIZE_MODULUS --> pmModulus > 0 + SymCryptIntToModulus( + SymCryptIntFromModulus( pkRsakey->pmModulus ), + pkRsakey->pmModulus, + pkRsakey->nBitsOfModulus, + SYMCRYPT_FLAG_DATA_PUBLIC, + pbScratch, + cbScratch ); + + // Public exponents + pkRsakey->nPubExp = nPubExp; + for (UINT32 i = 0; i<pkRsakey->nPubExp; i++) + { + pkRsakey->au64PubExp[i] = pu64PubExp[i]; + } + + // Private key import either by private exponent or primes + if ( (pbPrivateExponent != NULL) || (nPrimes > 0) ) + { + if (pbPrivateExponent != NULL) + { + // Private exponent + scError = SymCryptRsakeyCalculatePrimesFromPrivateExponent( + pkRsakey, + pbPrivateExponent, cbPrivateExponent, + numFormat, + pbScratch, cbScratch ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + pbFnScratch = pbScratch; + cbFnScratch = cbScratch; + + // Create temporary piPhi + piPhi = SymCryptIntCreate( pbFnScratch, cbMod, ndMod ); pbFnScratch += cbMod; cbFnScratch -= cbMod; + } + else //if (nPrimes > 0) + { + // Primes + pbFnScratch = pbScratch; + cbFnScratch = cbScratch; + + // Create temporary piPhi + piPhi = SymCryptIntCreate( pbFnScratch, cbMod, ndMod ); pbFnScratch += cbMod; cbFnScratch -= cbMod; + + // First fix the tight number of digits of each prime + pkRsakey->nMaxDigitsOfPrimes = 0; + for (UINT32 i=0; i<pkRsakey->nPrimes; i++) + { +#pragma warning(suppress: 26007) // "Incorrect Annotation" - cannot phrase array of pointers to arrays in SAL + scError = SymCryptIntSetValue( ppPrimes[i], pcbPrimes[i], numFormat, piPhi ); + if (scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + pkRsakey->nBitsOfPrimes[i] = SymCryptIntBitsizeOfValue(piPhi); + pkRsakey->nDigitsOfPrimes[i] = SymCryptDigitsFromBits(pkRsakey->nBitsOfPrimes[i]); + + pkRsakey->nMaxDigitsOfPrimes = SYMCRYPT_MAX(pkRsakey->nMaxDigitsOfPrimes, pkRsakey->nDigitsOfPrimes[i]); + + if (pkRsakey->nBitsOfPrimes[i] < SYMCRYPT_RSAKEY_MIN_BITSIZE_PRIME) + { + scError = SYMCRYPT_WRONG_KEY_SIZE; + goto cleanup; + } + } + + // Create all the objects + SymCryptRsakeyCreateAllObjects(pkRsakey); + + // Set the values + for (UINT32 i=0; i<pkRsakey->nPrimes; i++) + { +#pragma warning(suppress: 26007) // "Incorrect Annotation" - cannot phrase array of pointers to arrays in SAL + scError = SymCryptIntSetValue( ppPrimes[i], pcbPrimes[i], numFormat, SymCryptIntFromModulus( pkRsakey->pmPrimes[i] ) ); + if (scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Check that this prime is odd (should we check for primality?) + if ((SymCryptIntGetValueLsbits32(SymCryptIntFromModulus( pkRsakey->pmPrimes[i] ))& 1)==0) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // IntToModulus requirement: + // nBitsOfPrimes >= SYMCRYPT_RSAKEY_MIN_BITSIZE_PRIME --> pmPrimes[i] > 0 + SymCryptIntToModulus( + SymCryptIntFromModulus( pkRsakey->pmPrimes[i] ), + pkRsakey->pmPrimes[i], + pkRsakey->nBitsOfModulus, // Average number of operations + SYMCRYPT_FLAG_MODULUS_PARITY_PUBLIC | SYMCRYPT_FLAG_MODULUS_PRIME, + pbFnScratch, + cbFnScratch ); + } + } + + // Create remaining temporaries + piAcc = SymCryptIntCreate( pbFnScratch, cbLarge, 2 * ndMod ); pbFnScratch += cbLarge; cbFnScratch -= cbLarge; + pdTmp = SymCryptDivisorCreate( pbFnScratch, cbDivisor, ndMod ); pbFnScratch += cbDivisor; cbFnScratch -= cbDivisor; + + // Calculate the rest of the fields + scError = SymCryptRsakeyCalculatePrivateFields( pkRsakey, pdTmp, piPhi, piAcc, pbFnScratch, cbFnScratch, flags & SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION ); + if (scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Everything is set here + pkRsakey->hasPrivateKey = TRUE; + } + + pkRsakey->fAlgorithmInfo = flags; // We want to track all of the flags in the Rsakey + + if ( ( flags & SYMCRYPT_FLAG_KEY_NO_FIPS ) == 0 ) + { + // Ensure RSA algorithm selftest is run before first use of RSA algorithm + SYMCRYPT_RUN_SELFTEST_ONCE( + SymCryptRsaSelftest, + SYMCRYPT_SELFTEST_ALGORITHM_RSA); + + // PCT does not need to be run on import - mark it as done + pkRsakey->fAlgorithmInfo |= SYMCRYPT_PCT_RSA_SIGN; + } + +cleanup: + if (pbScratch!=NULL) + { + SymCryptWipe(pbScratch,cbScratch); + SymCryptCallbackFree(pbScratch); + } + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsakeySetValue( + _In_reads_bytes_( cbModulus ) PCBYTE pbModulus, + SIZE_T cbModulus, + _In_reads_( nPubExp ) PCUINT64 pu64PubExp, + UINT32 nPubExp, + _In_reads_opt_( nPrimes ) PCBYTE * ppPrimes, + _In_reads_opt_( nPrimes ) SIZE_T * pcbPrimes, + UINT32 nPrimes, + SYMCRYPT_NUMBER_FORMAT numFormat, + UINT32 flags, + _Inout_ PSYMCRYPT_RSAKEY pkRsakey ) +{ + return SymCryptRsakeySetValueInternal( + pbModulus, cbModulus, + pu64PubExp, nPubExp, + NULL, 0, + ppPrimes, pcbPrimes, nPrimes, + numFormat, + flags, + pkRsakey ); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsakeySetValueFromPrivateExponent( + _In_reads_bytes_( cbModulus ) PCBYTE pbModulus, + SIZE_T cbModulus, + UINT64 u64PubExp, + _In_reads_bytes_( cbPrivateExponent ) PCBYTE pbPrivateExponent, + SIZE_T cbPrivateExponent, + SYMCRYPT_NUMBER_FORMAT numFormat, + UINT32 flags, + _Inout_ PSYMCRYPT_RSAKEY pkRsakey ) +{ + return SymCryptRsakeySetValueInternal( + pbModulus, cbModulus, + &u64PubExp, 1, + pbPrivateExponent, cbPrivateExponent, + NULL, NULL, 0, + numFormat, + flags, + pkRsakey ); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsakeyGetValue( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _Out_writes_bytes_( cbModulus ) PBYTE pbModulus, + SIZE_T cbModulus, + _Out_writes_opt_( nPubExp ) PUINT64 pu64PubExp, + UINT32 nPubExp, + _Out_writes_opt_( nPrimes ) PBYTE * ppPrimes, + _In_reads_opt_( nPrimes ) SIZE_T * pcbPrimes, + UINT32 nPrimes, + SYMCRYPT_NUMBER_FORMAT numFormat, + UINT32 flags ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + UNREFERENCED_PARAMETER( flags ); + + // Check if private key needed but not there + if ((nPrimes!=0) && (pkRsakey->hasPrivateKey == FALSE)) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Modulus + if (pbModulus!=NULL) + { + // We'll get an error if cbModulus is 0 or too small + scError = SymCryptIntGetValue( SymCryptIntFromModulus( pkRsakey->pmModulus ), pbModulus, cbModulus, numFormat ); + if (scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + } + + // Public exponents + if( pu64PubExp != NULL ) + { + if( nPubExp != 1 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + pu64PubExp[0] = pkRsakey->au64PubExp[0]; + } + + // Primes i.e. private key + if( nPrimes != 0 ) + { + if( nPrimes != 2 || ppPrimes == NULL || pcbPrimes == NULL ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + for (UINT32 i=0; i<nPrimes; i++) + { + if (ppPrimes[i]!=NULL) + { + scError = SymCryptIntGetValue( SymCryptIntFromModulus( pkRsakey->pmPrimes[i] ), ppPrimes[i], pcbPrimes[i], numFormat ); + if (scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + } + } + } + +cleanup: + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsakeyGetCrtValue( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _Out_writes_opt_(nCrtExponents) PBYTE * ppCrtExponents, + _In_reads_(nCrtExponents) SIZE_T * pcbCrtExponents, + UINT32 nCrtExponents, + _Out_writes_bytes_opt_(cbCrtCoefficient) PBYTE pbCrtCoefficient, + SIZE_T cbCrtCoefficient, + _Out_writes_bytes_opt_(cbPrivateExponent) PBYTE pbPrivateExponent, + SIZE_T cbPrivateExponent, + SYMCRYPT_NUMBER_FORMAT numFormat, + UINT32 flags) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PBYTE pbScratch = NULL; + SIZE_T cbScratch = 0; + + UNREFERENCED_PARAMETER( flags ); + + // Check if the arguments are correct + if ( (ppCrtExponents==NULL) && (nCrtExponents!=0) || + (nCrtExponents != 0 && nCrtExponents != 2 )) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Crt value can only be available we have private key. + if (pkRsakey->hasPrivateKey == FALSE) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Crt exponents + for (UINT32 i=0; i<nCrtExponents; i++) + { + if (ppCrtExponents[i]!=NULL) + { + scError = SymCryptIntGetValue( pkRsakey->piCrtPrivExps[i], ppCrtExponents[i], pcbCrtExponents[i], numFormat ); + if (scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + } + } + + if (pbCrtCoefficient!=NULL) + { + cbScratch = SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pkRsakey->nDigitsOfModulus ); + pbScratch = SymCryptCallbackAlloc( cbScratch ); + + if (pbScratch==NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + scError = SymCryptModElementGetValue( + pkRsakey->pmPrimes[0], + pkRsakey->peCrtInverses[0], + pbCrtCoefficient, + cbCrtCoefficient, + numFormat, + pbScratch, + cbScratch); + if (scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + } + + if (pbPrivateExponent!=NULL) + { + scError = SymCryptIntGetValue( pkRsakey->piPrivExps[0], pbPrivateExponent, cbPrivateExponent, numFormat ); + if (scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + } + +cleanup: + + if (pbScratch!=NULL) + { + SymCryptWipe(pbScratch,cbScratch); + SymCryptCallbackFree(pbScratch); + } + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsakeyExtendKeyUsage( + _Inout_ PSYMCRYPT_RSAKEY pkRsakey, + UINT32 flags ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + // Ensure caller has specified what algorithm(s) the key will be used with + UINT32 algorithmFlags = SYMCRYPT_FLAG_RSAKEY_SIGN | SYMCRYPT_FLAG_RSAKEY_ENCRYPT; + + if ( ( ( flags & ~algorithmFlags ) != 0 ) || + ( ( flags & algorithmFlags ) == 0) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + pkRsakey->fAlgorithmInfo |= flags; + +cleanup: + return scError; +} diff --git a/libs/symcrypt/lib/sc_lib.h b/libs/symcrypt/lib/sc_lib.h new file mode 100644 index 00000000000..faf42ec906a --- /dev/null +++ b/libs/symcrypt/lib/sc_lib.h @@ -0,0 +1,5161 @@ +// +// sc_lib.h +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// Internal definitions for the symcrypt library. +// This include file is used only for the files inside the library, not by +// the code that calls the library. +// + +#include <windef.h> +#include <winnt.h> + +#if SYMCRYPT_MS_VC +#define SYMCRYPT_DISABLE_CFG __declspec(guard(nocf)) +#else +#define SYMCRYPT_DISABLE_CFG +#endif + +// +// Global flags +// + +#define SYMCRYPT_FLAG_LIB_INITIALIZED 0x00000001 + +extern UINT32 g_SymCryptFlags; + +//============================================================================================== +// Common environment functions +//============================================================================================== + +VOID +SYMCRYPT_CALL +SymCryptInitEnvCommon( UINT32 version ); + +_Analysis_noreturn_ +VOID +SYMCRYPT_CALL +SymCryptFatalHang( UINT32 fatalcode ); + +#include <symcrypt_low_level.h> + +// Types + +typedef int BOOL; + +#if !defined(TRUE) +#define TRUE (1) +#endif + +#if !defined(FALSE) +#define FALSE (0) +#endif + +#if !defined(UNREFERENCED_PARAMETER) +#define UNREFERENCED_PARAMETER(x) ((void)x) +#endif + +#if !defined(FAST_FAIL_CRYPTO_LIBRARY) +#define FAST_FAIL_CRYPTO_LIBRARY 22 +#endif + +// +// We want to write some of our code to use the native register size provided by the platform we are using to enable +// generic code to compile into reasonable performant versions on 32b and 64b platforms. Below definitions give us +// this flexibility without relying on compiler specifics. +// +// WARNING: Some use of NATIVE_UINT also relies on the little-endianness of the 64b platform; our generic code normally +// uses UINT32, and at the time of writing mixing UINT32 and NATIVE_UINT will not work on a big-endian 64b platform! +// +#if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM64 +typedef INT64 NATIVE_INT; +typedef UINT64 NATIVE_UINT; +#define NATIVE_BITS (64) +#define NATIVE_BYTES (8) +#define NATIVE_BYTES_LOG2 (3) +#else +typedef INT32 NATIVE_INT; +typedef UINT32 NATIVE_UINT; +#define NATIVE_BITS (32) +#define NATIVE_BYTES (4) +#define NATIVE_BYTES_LOG2 (2) +#endif + + +// +// Our Wipe code uses FORCE_WRITE* which are implemented using +// WriteNoFence* functions. Unfortunately, they declare their parameter +// to be interlocked, and the compiler complains when we also access the variable +// using non-interlocked code. +// This warning is nonsensical in our situation, so we disable it. +// The second warning is about accessing a local variable via an interlocked ptr. +// +#pragma prefast( disable:28112 ) +#pragma prefast( disable:28113 ) +#pragma warning( disable: 4702 ) // unreachable code. The compilers are not equally smart, and some complain + // about 'function must return a value' and some about 'unreachable code' +#pragma warning( disable: 4296 ) // expression is always false - this warning is forced to be an error by a + // pragma in the SDK warning.h, but we don't consider it useful + + +// +// These macros allow a bunch of generic code to be written. +// For example, the Hash append function is written once generically +// using these macros. +// + +#define CONCAT_I2( a, b ) a##b +#define CONCAT_I3( a, b, c ) a##b##c + + +#define CONCAT2( a, b ) CONCAT_I2( a, b ) +#define CONCAT3( a, b, c ) CONCAT_I3( a, b, c ) +//#define CONCAT4( a, b, c, d) a##b##c##d + + + +#define SYMCRYPT_XXX_STATE CONCAT3( SYMCRYPT_, ALG, _STATE ) +#define PSYMCRYPT_XXX_STATE CONCAT3( PSYMCRYPT_, ALG, _STATE ) +#define PCSYMCRYPT_XXX_STATE CONCAT3( PCSYMCRYPT_, ALG, _STATE ) + +#define SYMCRYPT_Xxx CONCAT2( SymCrypt, Alg ) + +#define SYMCRYPT_XxxStateCopy CONCAT3( SymCrypt, Alg, StateCopy ) +#define SYMCRYPT_XxxInit CONCAT3( SymCrypt, Alg, Init ) +#define SYMCRYPT_XxxAppend CONCAT3( SymCrypt, Alg, Append ) +#define SYMCRYPT_XxxResult CONCAT3( SymCrypt, Alg, Result ) +#define SYMCRYPT_XxxAppendBlocks CONCAT3( SymCrypt, Alg, AppendBlocks ) +#define SYMCRYPT_XxxStateImport CONCAT3( SymCrypt, Alg, StateImport) +#define SYMCRYPT_XxxStateExport CONCAT3( SymCrypt, Alg, StateExport) + +// for XOFs and KMAC +#define SYMCRYPT_XXX_EXPANDED_KEY CONCAT3( SYMCRYPT_, ALG, _EXPANDED_KEY ) +#define PSYMCRYPT_XXX_EXPANDED_KEY CONCAT3( PSYMCRYPT_, ALG, _EXPANDED_KEY ) +#define PCSYMCRYPT_XXX_EXPANDED_KEY CONCAT3( PCSYMCRYPT_, ALG, _EXPANDED_KEY ) +#define SYMCRYPT_XxxEx CONCAT3( SymCrypt, Alg, Ex) +#define SYMCRYPT_XxxDefault CONCAT3( SymCrypt, Alg, Default ) +#define SYMCRYPT_XxxExpandKey CONCAT3( SymCrypt, Alg, ExpandKey ) +#define SYMCRYPT_XxxExpandKeyEx CONCAT3( SymCrypt, Alg, ExpandKeyEx ) +#define SYMCRYPT_XxxExtract CONCAT3( SymCrypt, Alg, Extract ) +#define SYMCRYPT_XxxResultEx CONCAT3( SymCrypt, Alg, ResultEx ) +#define SYMCRYPT_XxxKeyCopy CONCAT3( SymCrypt, Alg, KeyCopy ) + +#define SYMCRYPT_HmacXxx CONCAT2( SymCryptHmac, Alg ) +#define SYMCRYPT_HmacXxxStateCopy CONCAT3( SymCryptHmac, Alg, StateCopy ) +#define SYMCRYPT_HmacXxxKeyCopy CONCAT3( SymCryptHmac, Alg, KeyCopy ) +#define SYMCRYPT_HmacXxxExpandKey CONCAT3( SymCryptHmac, Alg, ExpandKey ) +#define SYMCRYPT_HmacXxxInit CONCAT3( SymCryptHmac, Alg, Init ) +#define SYMCRYPT_HmacXxxAppend CONCAT3( SymCryptHmac, Alg, Append ) +#define SYMCRYPT_HmacXxxResult CONCAT3( SymCryptHmac, Alg, Result ) + + +#define SYMCRYPT_XXX_INPUT_BLOCK_SIZE CONCAT3( SYMCRYPT_, ALG, _INPUT_BLOCK_SIZE ) +#define SYMCRYPT_XXX_RESULT_SIZE CONCAT3( SYMCRYPT_, ALG, _RESULT_SIZE ) + +#define SYMCRYPT_HMAC_XXX_INPUT_BLOCK_SIZE SYMCRYPT_XXX_INPUT_BLOCK_SIZE +#define SYMCRYPT_HMAC_XXX_RESULT_SIZE SYMCRYPT_XXX_RESULT_SIZE + +#define PSYMCRYPT_HMAC_XXX_EXPANDED_KEY CONCAT3( PSYMCRYPT_HMAC_, ALG, _EXPANDED_KEY ) +#define PCSYMCRYPT_HMAC_XXX_EXPANDED_KEY CONCAT3( PCSYMCRYPT_HMAC_, ALG, _EXPANDED_KEY ) +#define SYMCRYPT_HMAC_XXX_STATE CONCAT3( SYMCRYPT_HMAC_, ALG, _STATE ) +#define PSYMCRYPT_HMAC_XXX_STATE CONCAT3( PSYMCRYPT_HMAC_, ALG, _STATE ) +#define PCSYMCRYPT_HMAC_XXX_STATE CONCAT3( PCSYMCRYPT_HMAC_, ALG, _STATE ) + + +//============================================================================================== +// PLATFORM SPECIFICS +//============================================================================================== + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + +// +// The XMM save/restore functions need to be passed a buffer in which they can store their data. +// We have two different places where we use this, in kernel mode and in user mode (while testing) +// We can't declare a union of the two structs as we can't include the kernel-mode headers in this file +// when compiled for a user-mode app. +// Instead we define a structure with reserved space, and have each environment check the size and +// cast the pointer. +// +// We always use the KeSaveExtendedProcessorState call, and not the KeSaveFloatingPointState as it +// allows us to save only the XMM registers and not touch the X87/MMX registers which should +// save time. +// +#if SYMCRYPT_CPU_X86 + +// +// The XSTATE_SAVE structure consists of a union between +// struct: +// - INT64 8 +// - INT32 4 +// - Pointer 4 +// - Pointer 4 +// - Pointer 4 +// - Pointer 4 +// - BYTE 1 + 3 padding +// 32 total +// - XSTATE_CONTEXT +// - UINT64 8 +// - UINT32 4 +// - UINT32 4 +// - Pointer + UINT32 8 +// - Pointer + UINT32 8 +// 32 total +// +// Experimentally: need 4 more bytes, don't know why yet. +// Should have a look with the debugger when I have time. +// + +#define SYMCRYPT_XSTATE_SAVE_SIZE (32) + +#elif SYMCRYPT_CPU_AMD64 + +// +// The XSTATE_SAVE structure consists of +// - pointer 8 +// - pointer 8 +// - BYTE 1 + 7 padding +// - XSTATE_CONTEXT +// - UINT64 8 +// - UINT32 4 +// - UINT32 4 +// - Pointer 8 +// - Pointer 8 +// +#define SYMCRYPT_XSTATE_SAVE_SIZE (56) + +#endif + +typedef +SYMCRYPT_ALIGN +struct _SYMCRYPT_EXTENDED_SAVE_DATA { + SYMCRYPT_ALIGN BYTE data[SYMCRYPT_XSTATE_SAVE_SIZE]; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_EXTENDED_SAVE_DATA, *PSYMCRYPT_EXTENDED_SAVE_DATA; + + +// +// Two functions to save/restore the XMM registers. +// These must ALWAYS be called in pairs, even if the SaveXmm function returned an error. +// XMM registers cannot be used if the save function returned an error. +// If the SYMCRYPT_CPU_FEATURE_SAVEXMM_NOFAIL feature is present, then the +// SymCryptSaveXmm function will never return an error. +// + +// +// Functions to save/restore the XMM or YMM registers. +// If the Save*mm function is called and succeeds, then the corresponding +// Restore*mm function MUST be called later on the same thread. +// The extended registers cannot be called if the Save function returns an error. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSaveXmm( _Out_ PSYMCRYPT_EXTENDED_SAVE_DATA pSaveData ); + +VOID +SYMCRYPT_CALL +SymCryptRestoreXmm( _Inout_ PSYMCRYPT_EXTENDED_SAVE_DATA pSaveData ); + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSaveYmm( _Out_ PSYMCRYPT_EXTENDED_SAVE_DATA pSaveData ); + +VOID +SYMCRYPT_CALL +SymCryptRestoreYmm( _Inout_ PSYMCRYPT_EXTENDED_SAVE_DATA pSaveData ); +#endif + + +//============================================================================================== +// Library declarations +//============================================================================================== + +// +// Function to check that the library has been initialized +// +#if SYMCRYPT_DEBUG + +VOID +SYMCRYPT_CALL +SymCryptLibraryWasNotInitialized(void); + +static +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptCheckLibraryInitialized(void) +{ + if( !(g_SymCryptFlags & SYMCRYPT_FLAG_LIB_INITIALIZED) ) + { + SymCryptLibraryWasNotInitialized(); + } +} +#else +static +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptCheckLibraryInitialized(void) +{ +} +#endif + +#define HMAC_IPAD_BYTE 0x36 +#define HMAC_OPAD_BYTE 0x5c + +// SYMCRYPT_CPU_FEATURES +#define SYMCRYPT_CPU_FEATURES_FOR_PCLMULQDQ_CODE (SYMCRYPT_CPU_FEATURE_PCLMULQDQ | SYMCRYPT_CPU_FEATURE_SSSE3 | SYMCRYPT_CPU_FEATURE_SAVEXMM_NOFAIL ) + +#define SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE (SYMCRYPT_CPU_FEATURE_SSSE3 | SYMCRYPT_CPU_FEATURE_AESNI) +#define SYMCRYPT_CPU_FEATURES_FOR_AESNI_PCLMULQDQ_CODE (SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE | SYMCRYPT_CPU_FEATURES_FOR_PCLMULQDQ_CODE) +#define SYMCRYPT_CPU_FEATURES_FOR_VAES_256_CODE (SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE | SYMCRYPT_CPU_FEATURE_AVX2 | SYMCRYPT_CPU_FEATURE_VAES) +#define SYMCRYPT_CPU_FEATURES_FOR_VAES_512_CODE (SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE | SYMCRYPT_CPU_FEATURE_AVX512 | SYMCRYPT_CPU_FEATURE_VAES) + +#define SYMCRYPT_CPU_FEATURES_FOR_SHANI_CODE (SYMCRYPT_CPU_FEATURE_SSSE3 | SYMCRYPT_CPU_FEATURE_SHANI) + +#define SYMCRYPT_CPU_FEATURES_FOR_MULX (SYMCRYPT_CPU_FEATURE_BMI2 | SYMCRYPT_CPU_FEATURE_ADX | SYMCRYPT_CPU_FEATURE_SSE2 ) + +// +// ROTATE OPERATIONS +// +// +// If this lib is ever ported to a platform that doesn't have the _rotx functions +// the macros can be replaced by portable definitions just like the ROL16/ROR16 +// + +#define ROL16( x, n ) ((UINT16)( ( ((x) << (n)) | ((x) >> (16-(n))) ) )) +#define ROR16( x, n ) ((UINT16)( ( ((x) >> (n)) | ((x) << (16-(n))) ) )) + +#if SYMCRYPT_MS_VC + #define ROL32( x, n ) _rotl( (x), (n) ) + #define ROR32( x, n ) _rotr( (x), (n) ) + #define ROL64( x, n ) _rotl64( (x), (n) ) + #define ROR64( x, n ) _rotr64( (x), (n) ) +#elif SYMCRYPT_GNUC + #define ROL32( x, n ) ((UINT32)( ( ((x) << (n)) | ((x) >> (32-(n))) ) )) + #define ROR32( x, n ) ((UINT32)( ( ((x) >> (n)) | ((x) << (32-(n))) ) )) + #define ROL64( x, n ) ((UINT64)( ( ((x) << (n)) | ((x) >> (64-(n))) ) )) + #define ROR64( x, n ) ((UINT64)( ( ((x) >> (n)) | ((x) << (64-(n))) ) )) +#else + #error Unknown compiler +#endif + + +#define SYMCRYPT_ARRAY_SIZE(_x) (sizeof(_x)/sizeof(_x[0])) + +enum{ + STATE_NEXT = 0, // starting state = 0, set by structure wipe. + STATE_DATA_START, + STATE_DATA_END, + STATE_RESULT2, // 2nd phase of result computation (1st phase is at STATE_NEXT when the result operation is found) + STATE_RESULT_DONE, // 3rd phase of result computation +}; + + + +//========================================================================== +// Inline implementations ... +//========================================================================== + +// +// These are a bunch of functions to convert between an array of +// 32 or 64-bit integers to an array of bytes in LSBfirst or MSBfirst convention. +// Not all variations have been implemented yet. We add them as they are +// needed. +// + +// +// These implementations are optimized for inlining, especially when the +// size of the data to be converted is a compile-time constant. +// + +// +// SymCryptUint32ToMsbFirst & SymCryptMsbFirstToUint32. +// This is used by the SHA family +// +#if SYMCRYPT_CPU_AMD64 + +// +// On AMD64 we can do 2 UINT32s at once by doing a ROL(x,32) and a BSWAP. +// +static +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptUint32ToMsbFirst( _In_reads_(cuData) PCUINT32 puData, + _Out_writes_(4*cuData) PBYTE pbResult, + SIZE_T cuData ) +{ + while( cuData >= 2 ) + { + SYMCRYPT_STORE_MSBFIRST64( pbResult, ROL64( *(UINT64*)puData, 32 )); + pbResult += 8; + puData += 2; + cuData -= 2; + } + + if( cuData != 0 ) + { + SYMCRYPT_STORE_MSBFIRST32( pbResult, *puData ); + } +} + +#else // not _AMD64_ + +static +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptUint32ToMsbFirst( _In_reads_(cuData) PCUINT32 puData, + _Out_writes_(4*cuData) PBYTE pbResult, + SIZE_T cuData ) +{ + while( cuData != 0 ) + { + SYMCRYPT_STORE_MSBFIRST32( pbResult, *puData ); + puData++; + pbResult += 4; + cuData--; + } +} +#endif // platform switch for SymCryptUint32ToMsbFirst + +static +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptMsbFirstToUint32( _In_reads_(4*cuResult) PCBYTE pbData, + _Out_writes_(cuResult) PUINT32 puResult, + SIZE_T cuResult ) +{ + while( cuResult != 0 ) + { + *puResult = SYMCRYPT_LOAD_MSBFIRST32( pbData ); + puResult++; + pbData += 4; + cuResult--; + } +} + + +// +// SymCryptUint32ToLsbFirst & SymCryptLsbFirstToUint32 +// These are used by the MD4 and MD5 hash functions +// +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM | SYMCRYPT_CPU_ARM64 + +// +// On AMD64, X86, and ARM this is just a memcpy +// +static +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptUint32ToLsbFirst( _In_reads_(cuData) PCUINT32 puData, + _Out_writes_(4*cuData) PBYTE pbResult, + SIZE_T cuData ) + +{ + memcpy( pbResult, puData, 4*cuData ); +} + +static +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptLsbFirstToUint32( _In_reads_(4*cuResult) PCBYTE pbData, + _Out_writes_(cuResult) PUINT32 puResult, + SIZE_T cuResult ) +{ + memcpy( puResult, pbData, 4*cuResult ); +} + +#else // not (AMD64_ or X86_ or ARM or ARM64) + +static +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptUint32ToLsbFirst( _In_reads_(cuData) PCUINT32 puData, + _Out_writes_(4*cuData) PBYTE pbResult, + SIZE_T cuData ) +{ + while( cuData != 0 ) + { + SYMCRYPT_STORE_LSBFIRST32( pbResult, *puData ); + puData++; + pbResult += 4; + cuData--; + } +} + +static +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptLsbFirstToUint32( _In_reads_(4*cuResult) PCBYTE pbData, + _Out_writes_(cuResult) PUINT32 puResult, + SIZE_T cuResult ) +{ + while( cuResult != 0 ) + { + *puResult = SYMCRYPT_LOAD_LSBFIRST32( pbData ); + pbData += 4; + puResult++; + cuResult--; + } +} + +#endif // Platform switch for SymCryptUint32ToLsbFirst + + +// +// SymCryptUint64ToLsbFirst & SymCryptLsbFirstToUint64 +// These are used by Keccak. +// +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM | SYMCRYPT_CPU_ARM64 + +// +// On AMD64, X86, and ARM this is just a memcpy +// +static +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptUint64ToLsbFirst( _In_reads_(cuData) PCUINT64 puData, + _Out_writes_(8*cuData) PBYTE pbResult, + SIZE_T cuData ) + +{ + memcpy( pbResult, puData, 8*cuData ); +} + +static +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptLsbFirstToUint64( _In_reads_(8*cuResult) PCBYTE pbData, + _Out_writes_(cuResult) PUINT64 puResult, + SIZE_T cuResult ) +{ + memcpy( puResult, pbData, 8*cuResult ); +} + +#else // not (AMD64_ or X86_ or ARM or ARM64) + +static +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptUint64ToLsbFirst( _In_reads_(cuData) PCUINT64 puData, + _Out_writes_(8*cuData) PBYTE pbResult, + SIZE_T cuData ) +{ + while( cuData != 0 ) + { + SYMCRYPT_STORE_LSBFIRST64( pbResult, *puData ); + puData++; + pbResult += 8; + cuData--; + } +} + +static +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptLsbFirstToUint64( _In_reads_(8*cuResult) PCBYTE pbData, + _Out_writes_(cuResult) PUINT64 puResult, + SIZE_T cuResult ) +{ + while( cuResult != 0 ) + { + *puResult = SYMCRYPT_LOAD_LSBFIRST64( pbData ); + pbData += 8; + puResult++; + cuResult--; + } +} + +#endif // Platform switch for SymCryptUint64ToLsbFirst & SymCryptLsbFirstToUint64 + + +// +// SymCryptUint64ToMsbFirst & SymCryptMsbFirstToUint64 +// +static +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptUint64ToMsbFirst( _In_reads_(cuData) PCUINT64 puData, + _Out_writes_(8*cuData) PBYTE pbResult, + SIZE_T cuData ) +{ + while( cuData != 0 ) + { + SYMCRYPT_STORE_MSBFIRST64( pbResult, *puData ); + pbResult += 8; + puData ++; + cuData --; + } +} + +static +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptMsbFirstToUint64( _In_reads_(8*cuResult) PCBYTE pbData, + _Out_writes_(cuResult) PUINT64 puResult, + SIZE_T cuResult ) +{ + while( cuResult != 0 ) + { + *puResult = SYMCRYPT_LOAD_MSBFIRST64( pbData ); + puResult++; + pbData += 8; + cuResult--; + } +} + +//////////////////////////////////////////////////////////////////////////////////// +// Internal function prototypes +// + +// +// SymCryptSha1AppendBlocks +// +// Updates the chaining state of the hash function with one or more blocks of data. +// Each block is 64 bytes long, the natural size of a SHA256 input block. +// +// cbData must be a multiple of 64. +// +VOID +SYMCRYPT_CALL +SymCryptSha1AppendBlocks( + _Inout_ SYMCRYPT_SHA1_CHAINING_STATE * pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ); + +// +// SymCryptSha256AppendBlocks +// +// Updates the chaining state of the hash function with one or more blocks of data. +// Each block is 64 bytes long, the natural size of a SHA256 input block. +// +// cbData must be a multiple of 64. +// +VOID +SYMCRYPT_CALL +SymCryptSha256AppendBlocks( + _Inout_ SYMCRYPT_SHA256_CHAINING_STATE * pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ); + +// Intrinsics implementation processing 4 message blocks in parallel using XMM registers +VOID +SYMCRYPT_CALL +SymCryptSha256AppendBlocks_xmm_4blocks( + _Inout_ SYMCRYPT_SHA256_CHAINING_STATE * pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ); + +// Assembly implementation processing 4 message blocks in parallel using XMM registers +VOID +SYMCRYPT_CALL +SymCryptSha256AppendBlocks_xmm_ssse3_asm( + _Inout_ SYMCRYPT_SHA256_CHAINING_STATE * pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ); + +// Intrinsics implementation processing 8 message blocks in parallel using YMM registers +VOID +SYMCRYPT_CALL +SymCryptSha256AppendBlocks_ymm_8blocks( + _Inout_ SYMCRYPT_SHA256_CHAINING_STATE * pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ); + +// Assembly implementation processing 8 message blocks in parallel using YMM registers +VOID +SYMCRYPT_CALL +SymCryptSha256AppendBlocks_ymm_avx2_asm( + _Inout_ SYMCRYPT_SHA256_CHAINING_STATE * pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ); + + +// +// SymCryptSha512AppendBlocks +// +// Updates the chaining state of the hash function with one or more blocks of data. +// Each block is 128 bytes long, the natural size of a SHA512 input block. +// +// cbData must be a multiple of 128. +// +VOID +SYMCRYPT_CALL +SymCryptSha512AppendBlocks( + _Inout_ SYMCRYPT_SHA512_CHAINING_STATE * pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ); + + +VOID +SYMCRYPT_CALL +SymCryptSha512AppendBlocks_xmm( + _Inout_ SYMCRYPT_SHA512_CHAINING_STATE * pChain, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ); + +// Intrinsics implementation using YMM registers +VOID +SYMCRYPT_CALL +SymCryptSha512AppendBlocks_ymm_1block( + _Inout_ SYMCRYPT_SHA512_CHAINING_STATE * pChain, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ); + +// Intrinsics implementation processing 2 message blocks in parallel using YMM registers +VOID +SYMCRYPT_CALL +SymCryptSha512AppendBlocks_ymm_2blocks( + _Inout_ SYMCRYPT_SHA512_CHAINING_STATE * pChain, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ); + +// Intrinsics implementation processing 4 message blocks in parallel using YMM registers +VOID +SYMCRYPT_CALL +SymCryptSha512AppendBlocks_ymm_4blocks( + _Inout_ SYMCRYPT_SHA512_CHAINING_STATE * pChain, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ); + +// Assembly implementation processing 4 message blocks in parallel using YMM registers +VOID +SYMCRYPT_CALL +SymCryptSha512AppendBlocks_ymm_avx2_asm( + _Inout_ SYMCRYPT_SHA512_CHAINING_STATE * pChain, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ); + +// Assembly implementation processing 4 message blocks in parallel using YMM registers with AVX512 instruction set +VOID +SYMCRYPT_CALL +SymCryptSha512AppendBlocks_ymm_avx512vl_asm( + _Inout_ SYMCRYPT_SHA512_CHAINING_STATE * pChain, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ); + + + + +// +// SymCryptMd5AppendBlocks +// +// Updates the chaining state of the hash function with one or more blocks of data. +// Each block is 64 bytes long, the natural size of a MD5 input block. +// +// cbData must be a multiple of 64. +// +VOID +SYMCRYPT_CALL +SymCryptMd5AppendBlocks( + _Inout_ SYMCRYPT_MD5_CHAINING_STATE * pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ); + + +// +// SymCryptMd4AppendBlocks +// +// Updates the chaining state of the hash function with one or more blocks of data. +// Each block is 64 bytes long, the natural size of a MD5 input block. +// +// cbData must be a multiple of 64. +// +VOID +SYMCRYPT_CALL +SymCryptMd4AppendBlocks( + _Inout_ SYMCRYPT_MD4_CHAINING_STATE * pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ); + + +// +// SymCryptMd2AppendBlock +// +// Update the C and X state based on the message block in the buffer. +// +VOID +SYMCRYPT_CALL +SymCryptMd2AppendBlocks( + _Inout_ SYMCRYPT_MD2_CHAINING_STATE * pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ); + + +// +// SymCryptUint32ToMsbFirst +// +// Convert an array of UINT32s to 4-byte values stored MSB first (big-endian) conversion. +// Note that the count is the number of UINT32s to convert, not the number +// of bytes. This is somewhat unusual, but it avoids any confusion about +// converting an odd number of bytes. +// +VOID +SYMCRYPT_CALL +SymCryptUint32ToMsbFirst( _In_reads_(cuData) PCUINT32 puData, + _Out_writes_(4*cuData) PBYTE pbResult, + SIZE_T cuData ); + +// +// SymCryptUint32ToLsbFirst +// +// Convert an array of UINT32s to 4-byte values stored LSB first (little-endian) conversion. +// Note that the count is the number of UINT32s to convert, not the number +// of bytes. This is somewhat unusual, but it avoids any confusion about +// converting an odd number of bytes. +// +VOID +SYMCRYPT_CALL +SymCryptUint32ToLsbFirst( _In_reads_(cuData) PCUINT32 puData, + _Out_writes_(4*cuData) PBYTE pbResult, + SIZE_T cuData ); + +// +// SymCryptMsbFirstToUint32 +// +// Convert an array of 4-byte values stored MSB first to an array of UINT32s +// (big-endian) conversion. +// Note that the count is the number of UINT32s to convert, not the number +// of bytes. This is somewhat unusual, but it avoids any confusion about +// converting an odd number of bytes. +// +VOID +SYMCRYPT_CALL +SymCryptMsbFirstToUint32( _In_reads_(4*cuResult) PCBYTE pbData, + _Out_writes_(cuResult) PUINT32 puResult, + SIZE_T cuResult ); + +// +// SymCryptLsbFirstToUint32 +// +// Convert an array of 4-byte values stored LSB first to an array of UINT32s +// (little-endian) conversion. +// Note that the count is the number of UINT32s to convert, not the number +// of bytes. This is somewhat unusual, but it avoids any confusion about +// converting an odd number of bytes. +// +VOID +SYMCRYPT_CALL +SymCryptLsbFirstToUint32( _In_reads_(4*cuResult) PCBYTE pbData, + _Out_writes_(cuResult) PUINT32 puResult, + SIZE_T cuResult ); + +// +// SymCryptUint64ToMsbFirst +// +// Convert an array of UINT64s to an array of bytes using the MSB first +// (big-endian) conversion. +// +VOID +SYMCRYPT_CALL +SymCryptUint64ToMsbFirst( _In_reads_(cuData) PCUINT64 puData, + _Out_writes_(8*cuData) PBYTE pbResult, + SIZE_T cuData ); + +// +// SymCryptMsbFirstToUint64 +// +// Convert an array of 4-byte values stored MSB first to an array of UINT64s +// (big-endian) conversion. +// Note that the count is the number of UINT64s to convert, not the number +// of bytes. This is somewhat unusual, but it avoids any confusion about +// converting an odd number of bytes. +// +VOID +SYMCRYPT_CALL +SymCryptMsbFirstToUint64( _In_reads_(8*cuResult) PCBYTE pbData, + _Out_writes_(cuResult) PUINT64 puResult, + SIZE_T cuResult ); + + + +//============================================================================ +// HMAC macros and inline functions. +// +#define REPEAT_BYTE_TO_UINT32( x ) (((UINT32)x << 24) | ((UINT32)x << 16) | ((UINT32)x << 8) | x) +#define REPEAT_BYTE_TO_UINT64( x ) ( ((UINT64)REPEAT_BYTE_TO_UINT32(x) << 32) | REPEAT_BYTE_TO_UINT32(x) ) + +// +// The XorByteIntoBuffer function is a platform-optimized function to xor a byte +// repeatedly into a buffer. +// Note that the buffer length must be a multiple of 8. +// +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM | SYMCRYPT_CPU_ARM64 +static +FORCEINLINE +VOID +SYMCRYPT_CALL +XorByteIntoBuffer( _Inout_updates_( 8*cqBuf ) PBYTE pbBuf, SIZE_T cqBuf, BYTE v ) +{ + SIZE_T i; + const UINT64 v64 = REPEAT_BYTE_TO_UINT64( v ); + + for( i=0; i<cqBuf; i++ ) + { + ((UINT64 *)pbBuf)[i] ^= v64; + } +} +#else +static +FORCEINLINE +VOID +SYMCRYPT_CALL +XorByteIntoBuffer( _Inout_updates_( 8*cqBuf ) PBYTE pbBuf, SIZE_T cqBuf, BYTE v ) +{ + SIZE_T i; + + for( i=0; i<8*cqBuf; i++ ) + { + pbBuf[i] ^= v; + } +} +#endif + +// +// GHASH +// + +VOID +SYMCRYPT_CALL +SymCryptGHashExpandKey( + _Out_ PSYMCRYPT_GHASH_EXPANDED_KEY expandedKey, + _In_reads_( SYMCRYPT_GF128_BLOCK_SIZE ) PCBYTE pH ); + +VOID +SYMCRYPT_CALL +SymCryptGHashExpandKeyC( + _Out_writes_( SYMCRYPT_GF128_FIELD_SIZE ) PSYMCRYPT_GF128_ELEMENT expandedKey, + _In_reads_( SYMCRYPT_GF128_BLOCK_SIZE ) PCBYTE pH ); + +VOID +SYMCRYPT_CALL +SymCryptGHashExpandKeyX86( + _Out_ PSYMCRYPT_GHASH_EXPANDED_KEY expandedKey, + _In_reads_( SYMCRYPT_GF128_BLOCK_SIZE ) PCBYTE pH ); + +VOID +SYMCRYPT_CALL +SymCryptGHashExpandKeyAmd64( + _Out_writes_( SYMCRYPT_GF128_FIELD_SIZE ) PSYMCRYPT_GF128_ELEMENT expandedKey, + _In_reads_( SYMCRYPT_GF128_BLOCK_SIZE ) PCBYTE pH ); + +// +// For all GHashAppendData functions, data will be appended in multiples of SYMCRYPT_GF128_BLOCK_SIZE. +// If the data is not a multiple of SYMCRYPT_GF128_BLOCK_SIZE, any remaining data will be ignored. +// + +VOID +SYMCRYPT_CALL +SymCryptGHashAppendData( + _In_ PCSYMCRYPT_GHASH_EXPANDED_KEY expandedKey, + _Inout_ PSYMCRYPT_GF128_ELEMENT pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptGHashAppendDataC( + _In_reads_( SYMCRYPT_GF128_FIELD_SIZE ) PCSYMCRYPT_GF128_ELEMENT expandedKeyTable, + _Inout_ PSYMCRYPT_GF128_ELEMENT pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptGHashAppendDataXmm( + _In_reads_( SYMCRYPT_GF128_FIELD_SIZE ) PCSYMCRYPT_GF128_ELEMENT expandedKeyTable, + _Inout_ PSYMCRYPT_GF128_ELEMENT pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptGHashAppendDataNeon( + _In_reads_( SYMCRYPT_GF128_FIELD_SIZE ) PCSYMCRYPT_GF128_ELEMENT expandedKeyTable, + _Inout_ PSYMCRYPT_GF128_ELEMENT pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptGHashAppendDataPclmulqdq( + _In_reads_( SYMCRYPT_GF128_FIELD_SIZE ) PCSYMCRYPT_GF128_ELEMENT expandedKeyTable, + _Inout_ PSYMCRYPT_GF128_ELEMENT pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptGHashResult( + _In_ PCSYMCRYPT_GF128_ELEMENT pState, + _Out_writes_( SYMCRYPT_GF128_BLOCK_SIZE ) PBYTE pbResult ); + + +VOID +SYMCRYPT_CALL +SymCryptMarvin32AppendBlocks( + _Inout_ PSYMCRYPT_MARVIN32_CHAINING_STATE pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + + + + +extern const BYTE SymCryptTestMsg3[3]; +extern const BYTE SymCryptTestMsg16[16]; +extern const BYTE SymCryptTestKey32[32]; + +VOID +SYMCRYPT_CALL +SymCryptInjectError( PBYTE pbData, SIZE_T cbData ); + + +#define SYMCRYPT_CPUID_DETECT_FLAG_CHECK_OS_SUPPORT_FOR_YMM 1 // enable checking of OSXSAVE bit & XGETBV logic + +VOID +SYMCRYPT_CALL +SymCryptDetectCpuFeaturesByCpuid( UINT32 flags ); + +VOID +SYMCRYPT_CALL +SymCryptDetectCpuFeaturesFromRegisters(void); + +VOID +SYMCRYPT_CALL +SymCryptDetectCpuFeaturesFromRegistersNoTry(void); + +VOID +SYMCRYPT_CALL +SymCryptDetectCpuFeaturesFromIsProcessorFeaturePresent(void); + +VOID +SYMCRYPT_CALL +SymCryptCpuidExFunc( int cpuInfo[4], int function_id, int subfunction_id ); + +//////////////////////////////////////////////////////////////////////////// +// Export blob formats +//////////////////////////////////////////////////////////////////////// + +//========================================================== +// BLOBS +// +// SYMCRYPT_BLOB_HEADER +// Generic header for all exported blobs from SymCrypt +// + +typedef enum _SYMCRYPT_BLOB_TYPE { + SymCryptBlobTypeUnknown = 0, + SymCryptBlobTypeHashState = 0x100, + SymCryptBlobTypeMd2State = SymCryptBlobTypeHashState + 1, // explicit constants as these have to remain the same forever. + SymCryptBlobTypeMd4State = SymCryptBlobTypeHashState + 2, + SymCryptBlobTypeMd5State = SymCryptBlobTypeHashState + 3, + SymCryptBlobTypeSha1State = SymCryptBlobTypeHashState + 4, + SymCryptBlobTypeSha256State = SymCryptBlobTypeHashState + 5, + SymCryptBlobTypeSha384State = SymCryptBlobTypeHashState + 6, + SymCryptBlobTypeSha512State = SymCryptBlobTypeHashState + 7, + SymCryptBlobTypeSha3_256State = SymCryptBlobTypeHashState + 8, + SymCryptBlobTypeSha3_384State = SymCryptBlobTypeHashState + 9, + SymCryptBlobTypeSha3_512State = SymCryptBlobTypeHashState + 10, + SymCryptBlobTypeSha224State = SymCryptBlobTypeHashState + 11, + SymCryptBlobTypeSha512_224State = SymCryptBlobTypeHashState + 12, + SymCryptBlobTypeSha512_256State = SymCryptBlobTypeHashState + 13, + SymCryptBlobTypeSha3_224State = SymCryptBlobTypeHashState + 14, +} SYMCRYPT_BLOB_TYPE; + +#define SYMCRYPT_BLOB_MAGIC ('cmys') + +// +// We define all export structures with pack=1 so that there are no padding bytes. +// +#pragma pack(push, 1) + +typedef struct _SYMCRYPT_BLOB_HEADER { + UINT32 magic; // 'cmys' + UINT32 size; // total size of blob + UINT32 type; // SYMCRYPT_BLOB_TYPE: type of blob +} SYMCRYPT_BLOB_HEADER, *PSYMCRYPT_BLOB_HEADER; + +typedef struct _SYMCRYPT_BLOB_TRAILER { + BYTE checksum[8]; // contains the Marvin32 checksum of the rest of the blob +} SYMCRYPT_BLOB_TRAILER, *PSYMCRYPT_BLOB_TRAILER; + +typedef struct _SYMCRYPT_MD2_STATE_EXPORT_BLOB { + SYMCRYPT_BLOB_HEADER header; + BYTE C[16]; + BYTE X[16]; + UINT32 bytesInBuffer; + BYTE buffer[16]; + BYTE rfu[8]; // rfu = Reserved for Future Use. + SYMCRYPT_BLOB_TRAILER trailer; +} SYMCRYPT_MD2_STATE_EXPORT_BLOB; + +C_ASSERT( sizeof( SYMCRYPT_MD2_STATE_EXPORT_BLOB ) == SYMCRYPT_MD2_STATE_EXPORT_SIZE ); + + +typedef struct _SYMCRYPT_MD4_STATE_EXPORT_BLOB { + SYMCRYPT_BLOB_HEADER header; + BYTE chain[16]; // In the same format used for the final hash value of MD4 + UINT64 dataLength; + BYTE buffer[64]; + BYTE rfu[8]; // rfu = Reserved for Future Use. + SYMCRYPT_BLOB_TRAILER trailer; +} SYMCRYPT_MD4_STATE_EXPORT_BLOB; + +C_ASSERT( sizeof( SYMCRYPT_MD4_STATE_EXPORT_BLOB ) == SYMCRYPT_MD4_STATE_EXPORT_SIZE ); + + +typedef struct _SYMCRYPT_MD5_STATE_EXPORT_BLOB { + SYMCRYPT_BLOB_HEADER header; + BYTE chain[16]; // In the same format used for the final hash value of MD5 + UINT64 dataLength; + BYTE buffer[64]; + BYTE rfu[8]; // rfu = Reserved for Future Use. + SYMCRYPT_BLOB_TRAILER trailer; +} SYMCRYPT_MD5_STATE_EXPORT_BLOB; + +C_ASSERT( sizeof( SYMCRYPT_MD5_STATE_EXPORT_BLOB ) == SYMCRYPT_MD5_STATE_EXPORT_SIZE ); + + +typedef struct _SYMCRYPT_SHA1_STATE_EXPORT_BLOB { + SYMCRYPT_BLOB_HEADER header; + BYTE chain[20]; // in the same format used for the final hash value of SHA-1 + UINT64 dataLength; + BYTE buffer[64]; + BYTE rfu[8]; // rfu = Reserved for Future Use. + SYMCRYPT_BLOB_TRAILER trailer; +} SYMCRYPT_SHA1_STATE_EXPORT_BLOB; + +C_ASSERT( sizeof( SYMCRYPT_SHA1_STATE_EXPORT_BLOB ) == SYMCRYPT_SHA1_STATE_EXPORT_SIZE ); + + +typedef struct _SYMCRYPT_SHA256_STATE_EXPORT_BLOB { + SYMCRYPT_BLOB_HEADER header; + BYTE chain[32]; // in the same format used for the final hash value of SHA-256 + UINT64 dataLength; + BYTE buffer[64]; + BYTE rfu[8]; // rfu = Reserved for Future Use. + SYMCRYPT_BLOB_TRAILER trailer; +} SYMCRYPT_SHA256_STATE_EXPORT_BLOB; + +C_ASSERT( sizeof( SYMCRYPT_SHA256_STATE_EXPORT_BLOB ) == SYMCRYPT_SHA256_STATE_EXPORT_SIZE ); + + +typedef struct _SYMCRYPT_SHA512_STATE_EXPORT_BLOB { + SYMCRYPT_BLOB_HEADER header; + BYTE chain[64]; // in the same format used for the final hash value of SHA-512 + UINT64 dataLengthL; // low 64 bits of data length + UINT64 dataLengthH; // high 64 bits of data length + BYTE buffer[128]; + BYTE rfu[8]; // rfu = Reserved for Future Use. + SYMCRYPT_BLOB_TRAILER trailer; +} SYMCRYPT_SHA512_STATE_EXPORT_BLOB; + +C_ASSERT( sizeof( SYMCRYPT_SHA512_STATE_EXPORT_BLOB ) == SYMCRYPT_SHA512_STATE_EXPORT_SIZE ); + +// Refer to SYMCRYPT_KECCAK_STATE documentation for the explanation of each struct member +typedef struct _SYMCRYPT_KECCAK_STATE_EXPORT_BLOB { + SYMCRYPT_BLOB_HEADER header; + BYTE state[200]; + UINT32 stateIndex; + UINT8 paddingValue; + BOOLEAN squeezeMode; + BYTE rfu[8]; // rfu = Reserved for Future Use. + SYMCRYPT_BLOB_TRAILER trailer; +} SYMCRYPT_KECCAK_STATE_EXPORT_BLOB; + +typedef SYMCRYPT_KECCAK_STATE_EXPORT_BLOB SYMCRYPT_SHA3_224_STATE_EXPORT_BLOB; +typedef SYMCRYPT_KECCAK_STATE_EXPORT_BLOB SYMCRYPT_SHA3_256_STATE_EXPORT_BLOB; +typedef SYMCRYPT_KECCAK_STATE_EXPORT_BLOB SYMCRYPT_SHA3_384_STATE_EXPORT_BLOB; +typedef SYMCRYPT_KECCAK_STATE_EXPORT_BLOB SYMCRYPT_SHA3_512_STATE_EXPORT_BLOB; + +C_ASSERT(sizeof(SYMCRYPT_SHA3_224_STATE_EXPORT_BLOB) == SYMCRYPT_SHA3_224_STATE_EXPORT_SIZE); +C_ASSERT(sizeof(SYMCRYPT_SHA3_256_STATE_EXPORT_BLOB) == SYMCRYPT_SHA3_256_STATE_EXPORT_SIZE); +C_ASSERT(sizeof(SYMCRYPT_SHA3_384_STATE_EXPORT_BLOB) == SYMCRYPT_SHA3_384_STATE_EXPORT_SIZE); +C_ASSERT(sizeof(SYMCRYPT_SHA3_512_STATE_EXPORT_BLOB) == SYMCRYPT_SHA3_512_STATE_EXPORT_SIZE); + +#pragma pack(pop) + +///////////////////////////////////////////// +// AES internal functions + +extern const SYMCRYPT_BLOCKCIPHER SymCryptAesBlockCipherNoOpt; + +VOID +SYMCRYPT_CALL +SymCryptAes4Sbox( + _In_reads_(4) PCBYTE pIn, + _Out_writes_(4) PBYTE pOut, + BOOL UseSimd ); + +VOID +SYMCRYPT_CALL +SymCryptAes4SboxC( + _In_reads_(4) PCBYTE pIn, + _Out_writes_(4) PBYTE pOut ); + +VOID +SYMCRYPT_CALL +SymCryptAes4SboxXmm( + _In_reads_(4) PCBYTE pIn, + _Out_writes_(4) PBYTE pOut ); + +VOID +SYMCRYPT_CALL +SymCryptAes4SboxNeon( + _In_reads_(4) PCBYTE pIn, + _Out_writes_(4) PBYTE pOut ); + +VOID +SYMCRYPT_CALL +SymCryptAesCreateDecryptionRoundKey( + _In_reads_(16) PCBYTE pEncryptionRoundKey, + _Out_writes_(16) PBYTE pDecryptionRoundKey, + BOOL UseSimd ); + +VOID +SYMCRYPT_CALL +SymCryptAesCreateDecryptionRoundKeyC( + _In_reads_(16) PCBYTE pEncryptionRoundKey, + _Out_writes_(16) PBYTE pDecryptionRoundKey ); + +VOID +SYMCRYPT_CALL +SymCryptAesCreateDecryptionRoundKeyXmm( + _In_reads_(16) PCBYTE pEncryptionRoundKey, + _Out_writes_(16) PBYTE pDecryptionRoundKey ); + +VOID +SYMCRYPT_CALL +SymCryptAesCreateDecryptionRoundKeyNeon( + _In_reads_(16) PCBYTE pEncryptionRoundKey, + _Out_writes_(16) PBYTE pDecryptionRoundKey ); + +VOID +SYMCRYPT_CALL +SymCryptAesEncryptC( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbDst ); + +VOID +SYMCRYPT_CALL +SymCryptAesEncryptAsm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbDst ); + +VOID +SYMCRYPT_CALL +SymCryptAesEncryptXmm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbDst ); + +VOID +SYMCRYPT_CALL +SymCryptAesEncryptNeon( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbDst ); + +VOID +SYMCRYPT_CALL +SymCryptAesDecryptC( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbDst ); + +VOID +SYMCRYPT_CALL +SymCryptAesDecryptAsm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbDst ); + +VOID +SYMCRYPT_CALL +SymCryptAesDecryptXmm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbDst ); + +VOID +SYMCRYPT_CALL +SymCryptAesDecryptNeon( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbDst ); + +VOID +SYMCRYPT_CALL +SymCryptAesEcbEncryptC( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); +VOID +SYMCRYPT_CALL +SymCryptAesEcbEncryptAsm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); +VOID +SYMCRYPT_CALL +SymCryptAesEcbEncryptXmm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesEcbEncryptNeon( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesEcbDecryptC( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesCbcEncryptAsm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); +VOID +SYMCRYPT_CALL +SymCryptAesCbcEncryptXmm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesCbcEncryptNeon( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesCbcDecryptAsm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesCbcDecryptXmm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesCbcDecryptNeon( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesCbcMacXmm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesCbcMacNeon( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesCtrMsb64Asm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesCtrMsb64Xmm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesCtrMsb64Neon( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesCtrMsb32Xmm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesCtrMsb32Neon( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptXtsAesEncryptDataUnitC( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbTweakBlock, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptXtsAesDecryptDataUnitC( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbTweakBlock, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptXtsAesEncryptDataUnitAsm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbTweakBlock, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptXtsAesDecryptDataUnitAsm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbTweakBlock, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +// pbScratch must currently be 16B aligned +VOID +SYMCRYPT_CALL +SymCryptXtsAesEncryptDataUnitXmm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbTweakBlock, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE*16 ) PBYTE pbScratch, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +// pbScratch must currently be 16B aligned +VOID +SYMCRYPT_CALL +SymCryptXtsAesDecryptDataUnitXmm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbTweakBlock, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE*16 ) PBYTE pbScratch, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptXtsAesEncryptDataUnitZmm_2048( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbTweakBlock, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE*16 ) PBYTE pbScratch, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptXtsAesDecryptDataUnitZmm_2048( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbTweakBlock, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE*16 ) PBYTE pbScratch, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptXtsAesEncryptDataUnitYmm_2048( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbTweakBlock, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE*16 ) PBYTE pbScratch, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptXtsAesDecryptDataUnitYmm_2048( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbTweakBlock, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE*16 ) PBYTE pbScratch, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptXtsAesEncryptDataUnitNeon( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbTweakBlock, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptXtsAesDecryptDataUnitNeon( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbTweakBlock, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptXtsEncryptDataUnit( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ PCVOID pExpandedKey, + _Inout_updates_( pBlockCipher->blockSize ) PBYTE pbTweakBlock, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptXtsDecryptDataUnit( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ PCVOID pExpandedKey, + _Inout_updates_( pBlockCipher->blockSize ) PBYTE pbTweakBlock, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesGcmEncryptStitchedXmm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( SYMCRYPT_GF128_FIELD_SIZE ) PCSYMCRYPT_GF128_ELEMENT expandedKeyTable, + _Inout_ PSYMCRYPT_GF128_ELEMENT pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesGcmDecryptStitchedXmm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( SYMCRYPT_GF128_FIELD_SIZE ) PCSYMCRYPT_GF128_ELEMENT expandedKeyTable, + _Inout_ PSYMCRYPT_GF128_ELEMENT pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +#define GCM_YMM_MINBLOCKS 16 + +// Caller must check cbData >= GCM_YMM_MINBLOCKS * SYMCRYPT_GCM_BLOCK_SIZE +VOID +SYMCRYPT_CALL +SymCryptAesGcmEncryptStitchedYmm_2048( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( SYMCRYPT_GF128_FIELD_SIZE ) PCSYMCRYPT_GF128_ELEMENT expandedKeyTable, + _Inout_ PSYMCRYPT_GF128_ELEMENT pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +// Caller must check cbData >= GCM_YMM_MINBLOCKS * SYMCRYPT_GCM_BLOCK_SIZE +VOID +SYMCRYPT_CALL +SymCryptAesGcmDecryptStitchedYmm_2048( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( SYMCRYPT_GF128_FIELD_SIZE ) PCSYMCRYPT_GF128_ELEMENT expandedKeyTable, + _Inout_ PSYMCRYPT_GF128_ELEMENT pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesGcmEncryptStitchedNeon( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( SYMCRYPT_GF128_FIELD_SIZE ) PCSYMCRYPT_GF128_ELEMENT expandedKeyTable, + _Inout_ PSYMCRYPT_GF128_ELEMENT pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesGcmDecryptStitchedNeon( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( SYMCRYPT_GF128_FIELD_SIZE ) PCSYMCRYPT_GF128_ELEMENT expandedKeyTable, + _Inout_ PSYMCRYPT_GF128_ELEMENT pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesGcmEncryptPart( + _Inout_ PSYMCRYPT_GCM_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesGcmDecryptPart( + _Inout_ PSYMCRYPT_GCM_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptGcmEncryptPartTwoPass( + _Inout_ PSYMCRYPT_GCM_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptGcmDecryptPartTwoPass( + _Inout_ PSYMCRYPT_GCM_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptCtrMsb32( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ PCVOID pExpandedKey, + _Inout_updates_( pBlockCipher->blockSize ) + PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); +// +// SymCryptCtrMsb32 implements the CTR cipher mode with a 32-bit increment function. +// It is not intended to be used as-is, rather it is a building block for modes like GCM. +// See the description of SymCryptCtrMsb64 in symcrypt.h for more details. +// +// For now, this function is only intended for use with GCM, which specifies the use a +// 32-bit increment function. It's only used in cases where we can't use one of the optimized +// implementations (i.e. on ARM32 or x86[-64] without AESNI). Therefore, unlike the 64-bit version, +// there are no optimized implementations of the CTR function to call. If we ever need this +// functionality for other block cipher modes, this function will need to be updated and we'll +// need to add an additional pointer to SYMCRYPT_BLOCKCIPHER for the optimized CTR function. + +VOID +SYMCRYPT_CALL +SymCryptAesCtrMsb32( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +// SymCryptAesCtrMsb32 is a dispatch function for the optimized AES CTR implementations that use +//a 32-bit counter function (currently only relevant to GCM). + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptParallelHashProcess_serial( + _In_ PCSYMCRYPT_PARALLEL_HASH pParHash, + _Inout_updates_bytes_( nStates * pParHash->pHash->stateSize ) PVOID pStates, + SIZE_T nStates, + _Inout_updates_( nOperations ) PSYMCRYPT_PARALLEL_HASH_OPERATION pOperations, + SIZE_T nOperations, + _Out_writes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptParallelHashProcess( + _In_ PCSYMCRYPT_PARALLEL_HASH pParHash, + _Inout_updates_bytes_( nStates * pParHash->pHash->stateSize ) PVOID pStates, + SIZE_T nStates, + _Inout_updates_( nOperations ) PSYMCRYPT_PARALLEL_HASH_OPERATION pOperations, + SIZE_T nOperations, + _Out_writes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch, + UINT32 maxParallel ); + +VOID +SYMCRYPT_CALL +SymCryptHashAppendInternal( + _In_ PCSYMCRYPT_HASH pHash, + _Inout_ PSYMCRYPT_COMMON_HASH_STATE pState, + _In_reads_bytes_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptHashCommonPaddingMd4Style( + _In_ PCSYMCRYPT_HASH pHash, + _Inout_ PSYMCRYPT_COMMON_HASH_STATE pState ); + + +extern const PCSYMCRYPT_PARALLEL_HASH SymCryptParallelSha256Algorithm; +extern const PCSYMCRYPT_PARALLEL_HASH SymCryptParallelSha384Algorithm; +extern const PCSYMCRYPT_PARALLEL_HASH SymCryptParallelSha512Algorithm; + +#define PAR_SCRATCH_ELEMENTS_256 (4+8+64) // # scratch elements our parallel SHA256 implementations need +#define PAR_SCRATCH_ELEMENTS_512 (4+8+80) // # scratch elements our parallel SHA512 implementations need + +// pScratch must be 32B aligned, as it is used as an array of __m256i +VOID +SYMCRYPT_CALL +SymCryptParallelSha256AppendBlocks_ymm( + _Inout_updates_( 8 ) PSYMCRYPT_SHA256_CHAINING_STATE * pChain, + _Inout_updates_( 8 ) PCBYTE * ppByte, + SIZE_T nBytes, + _Out_writes_( PAR_SCRATCH_ELEMENTS_256 * 32 ) PBYTE pScratch ); + +// pScratch must be 32B aligned, as it is used as an array of __m256i +VOID +SYMCRYPT_CALL +SymCryptParallelSha512AppendBlocks_ymm( + _Inout_updates_( 4 ) PSYMCRYPT_SHA512_CHAINING_STATE * pChain, + _Inout_updates_( 4 ) PCBYTE * ppByte, + SIZE_T nBytes, + _Out_writes_( PAR_SCRATCH_ELEMENTS_512 * 32 ) PBYTE pScratch ); + +extern const SYMCRYPT_HASH SymCryptMd2Algorithm_default; +extern const SYMCRYPT_HASH SymCryptMd4Algorithm_default; +extern const SYMCRYPT_HASH SymCryptMd5Algorithm_default; +extern const SYMCRYPT_HASH SymCryptSha1Algorithm_default; +extern const SYMCRYPT_HASH SymCryptSha224Algorithm_default; +extern const SYMCRYPT_HASH SymCryptSha256Algorithm_default; +extern const SYMCRYPT_HASH SymCryptSha384Algorithm_default; +extern const SYMCRYPT_HASH SymCryptSha512Algorithm_default; +extern const SYMCRYPT_HASH SymCryptSha512_224Algorithm_default; +extern const SYMCRYPT_HASH SymCryptSha512_256Algorithm_default; +extern const SYMCRYPT_HASH SymCryptSha3_224Algorithm_default; +extern const SYMCRYPT_HASH SymCryptSha3_256Algorithm_default; +extern const SYMCRYPT_HASH SymCryptSha3_384Algorithm_default; +extern const SYMCRYPT_HASH SymCryptSha3_512Algorithm_default; +extern const SYMCRYPT_HASH SymCryptShake128HashAlgorithm_default; +extern const SYMCRYPT_HASH SymCryptShake256HashAlgorithm_default; + + + +// Paddings used by various SHA-3 derived algorithms +#define SYMCRYPT_SHA3_PADDING_VALUE 0x06 // 01 10* padding +#define SYMCRYPT_SHAKE_PADDING_VALUE 0x1f // 11 11 10* padding +#define SYMCRYPT_CSHAKE_PADDING_VALUE 0x04 // 00 10* padding (used when N or S are non-empty strings) + +// +// Functions operating on the Keccak state +// + +VOID +SYMCRYPT_CALL +SymCryptKeccakPermute(_Inout_updates_(25) UINT64* pState); +// Keccak-f[1600] permutation + +VOID +SYMCRYPT_CALL +SymCryptKeccakInit(_Out_ PSYMCRYPT_KECCAK_STATE pState, UINT32 inputBlockSize, UINT8 padding); + +VOID +SYMCRYPT_CALL +SymCryptKeccakReset(_Out_ PSYMCRYPT_KECCAK_STATE pState); + +VOID +SYMCRYPT_CALL +SymCryptKeccakZeroAppendBlock(_Inout_ PSYMCRYPT_KECCAK_STATE pState); +// Zero pads the current block by invoking the permutation and setting +// pState->stateIndex to 0. + +VOID +SYMCRYPT_CALL +SymCryptKeccakAppend( + _Inout_ PSYMCRYPT_KECCAK_STATE pState, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData); +// Generic append function. + +VOID +SYMCRYPT_CALL +SymCryptKeccakExtract( + _Inout_ PSYMCRYPT_KECCAK_STATE pState, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult, + BOOLEAN bWipe); +// Generic extract function, no restriction on cbResult. +// bWipe denotes whether to wipe the Keccak state and initialize it +// for a new computation. + +VOID +SYMCRYPT_CALL +SymCryptKeccakStateExport( + SYMCRYPT_BLOB_TYPE type, + _In_ PCSYMCRYPT_KECCAK_STATE pState, + _Out_writes_bytes_(SYMCRYPT_KECCAK_STATE_EXPORT_SIZE) PBYTE pbBlob); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptKeccakStateImport( + SYMCRYPT_BLOB_TYPE type, + _Out_ PSYMCRYPT_KECCAK_STATE pState, + _In_reads_bytes_(SYMCRYPT_KECCAK_STATE_EXPORT_SIZE) PCBYTE pbBlob); + +VOID +SYMCRYPT_CALL +SymCryptKeccakAppendEncodeTimes8( + _Inout_ SYMCRYPT_KECCAK_STATE *pState, + UINT64 uValue, + BOOLEAN bLeftEncode); +// Appends the left-encoding of uValue * 8 to the state + +VOID +SYMCRYPT_CALL +SymCryptKeccakAppendEncodedString( + _Inout_ PSYMCRYPT_KECCAK_STATE pState, + _In_reads_(cbString) PCBYTE pbString, + SIZE_T cbString); +// Appends 'left_encode(cbString * 8) || pbString' to the state + +VOID +SYMCRYPT_CALL +SymCryptCShakeEncodeInputStrings( + _Inout_ PSYMCRYPT_KECCAK_STATE pState, + _In_reads_( cbFunctionNameString ) PCBYTE pbFunctionNameString, + SIZE_T cbFunctionNameString, + _In_reads_( cbCustomizationString ) PCBYTE pbCustomizationString, + SIZE_T cbCustomizationString); +// Process CShake input strings +// Appends byte_pad( encode_string( pbFunctionNameString ) || encode_string( pbCustomizationString ), pState->inputBlockSize ) + + + +VOID +SYMCRYPT_CALL +SymCryptFatalIntercept( UINT32 fatalCode ); + +extern const BYTE SymCryptSha256KATAnswer[32]; +extern const BYTE SymCryptSha384KATAnswer[48]; +extern const BYTE SymCryptSha512KATAnswer[64]; + +// +// Arithmetic +// + +#define SYMCRYPT_ASSERT_ASYM_ALIGNED( _p ) SYMCRYPT_ASSERT( ((SIZE_T)(_p) & (SYMCRYPT_ASYM_ALIGN_VALUE - 1)) == 0 ); + + +#define SYMCRYPT_FDEF_DIGIT_NUINT32 ((UINT32)(SYMCRYPT_FDEF_DIGIT_SIZE / sizeof( UINT32 ) )) + +#define SYMCRYPT_OBJ_NDIGITS( _p ) ((_p)->nDigits) +#define SYMCRYPT_OBJ_NBYTES( _p ) ((_p)->nDigits * SYMCRYPT_FDEF_DIGIT_SIZE) +#define SYMCRYPT_OBJ_NUINT32( _p ) ((_p)->nDigits * SYMCRYPT_FDEF_DIGIT_SIZE / sizeof( UINT32 )) + +#if SYMCRYPT_MS_VC +#define SYMCRYPT_MUL32x32TO64( _a, _b ) UInt32x32To64( (_a), (_b) ) +#elif SYMCRYPT_GNUC +#define SYMCRYPT_MUL32x32TO64( _a, _b ) ( (UINT64)(_a)*(UINT64)(_b) ) +#else + #error Unknown compiler +#endif +typedef VOID (SYMCRYPT_CALL * SYMCRYPT_MOD_BINARY_OP_FN)( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +typedef VOID (SYMCRYPT_CALL * SYMCRYPT_MOD_UNARY_OP_FN)( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +typedef SYMCRYPT_ERROR (SYMCRYPT_CALL * SYMCRYPT_MOD_UNARY_OP_FLAG_STATUS_FN)( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +typedef VOID (SYMCRYPT_CALL * SYMCRYPT_MOD_SET_POST_FN)( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_ PSYMCRYPT_MODELEMENT peObj, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +typedef PCUINT32 (SYMCRYPT_CALL * SYMCRYPT_MOD_PRE_GET_FN)( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peObj, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +typedef VOID (SYMCRYPT_CALL * SYMCRYPT_MOD_COPY_FN)( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst ); + +typedef VOID (SYMCRYPT_CALL * SYMCRYPT_MODULUS_COPYFIXUP_FN)( + _In_ PCSYMCRYPT_MODULUS pmSrc, + _Out_ PSYMCRYPT_MODULUS pmDst ); + +typedef VOID (SYMCRYPT_CALL * SYMCRYPT_MODULUS_INIT_FN)( + _Inout_ PSYMCRYPT_MODULUS pmObj, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +// +// In the future we might want to implement a 'prepare divisor' for people who want to do one or more modular divisions. +// In EC projective coordinates you have a value stored as (X,Z) with X/Z being the actual value that needs to be exported. +// In Montgomery format, this is stored as (RX, RZ), and just doing RX * (1/RZ) gets you the value to be exported. +// There seem to be many tricks here to get some more speed; maybe we just need to define export functions for each +// point format and allow the Modulus to contain special optimizations. +// +// The SetPost function is the post-processing function of any SetValue operation. The SetValue operation will store the +// modElement in the normal integer format into the ModElement. The SetPost function post-processes it into the proper +// representation for that modulus. +// +// The PreGet function is the pre-processing function to any GetValue operation. It returns a pointer to the proper value +// stored in standard integer format. This pointer can either be into the ModElement itself, or into the scratch space. +// + +typedef struct _SYMCRYPT_MODULAR_FUNCTIONS { + SYMCRYPT_MOD_BINARY_OP_FN modAdd; + SYMCRYPT_MOD_BINARY_OP_FN modSub; + SYMCRYPT_MOD_UNARY_OP_FN modNeg; + SYMCRYPT_MOD_BINARY_OP_FN modMul; + SYMCRYPT_MOD_UNARY_OP_FN modSquare; + SYMCRYPT_MOD_UNARY_OP_FLAG_STATUS_FN modInv; + SYMCRYPT_MOD_SET_POST_FN modSetPost; + SYMCRYPT_MOD_PRE_GET_FN modPreGet; + SYMCRYPT_MODULUS_COPYFIXUP_FN modulusCopyFixup; // non-generic fixup after memcpy + SYMCRYPT_MODULUS_INIT_FN modulusInit; + PVOID slack[6]; +} SYMCRYPT_MODULAR_FUNCTIONS; + +#define SYMCRYPT_MODULAR_FUNCTIONS_SIZE (sizeof( SYMCRYPT_MODULAR_FUNCTIONS ) ) + +extern const SYMCRYPT_MODULAR_FUNCTIONS g_SymCryptModFns[]; +extern const UINT32 g_SymCryptModFnsMask; + +// +// Table entry that contains the information about an implementation. +// Allows generic code to make the decision. +// First entry in the table that is allowed is chosen, last entry always matches everything +// + +#define SYMCRYPT_MODULUS_FEATURE_MONTGOMERY 1 // Modulus is suitable for Montgomery processing +// #define SYMCRYPT_MODULUS_FEATURE_PSEUDO_MERSENNE 2 // Modulus is suitable for Pseudo-Mersenne processing +// #define SYMCRYPT_MODULUS_FEATURE_NISTP256 4 // Modulus is the NIST P256 curve prime +#define SYMCRYPT_MODULUS_FEATURE_NISTP384 8 // Modulus is the NIST P384 curve prime + +typedef struct _SYMCRYPT_MODULUS_TYPE_SELECTION_ENTRY +{ + UINT32 type; // Type value of this solution + SYMCRYPT_CPU_FEATURES cpuFeatures; // Required CPU features + UINT32 maxBits; // Max # bits that the actual value of the modulus is, 0 = no limit + UINT32 modulusFeatures; // Required features of the modulus +} SYMCRYPT_MODULUS_TYPE_SELECTION_ENTRY, *PSYMCRYPT_MODULUS_TYPE_SELECTION_ENTRY; +typedef const SYMCRYPT_MODULUS_TYPE_SELECTION_ENTRY* PCSYMCRYPT_MODULUS_TYPE_SELECTION_ENTRY; + +extern const SYMCRYPT_MODULUS_TYPE_SELECTION_ENTRY SymCryptModulusTypeSelections[]; // Array can be any size... + + +// Check that the size is a power of 2 +C_ASSERT( (SYMCRYPT_MODULAR_FUNCTIONS_SIZE & (SYMCRYPT_MODULAR_FUNCTIONS_SIZE-1)) == 0 ); + +// The macro that we use to call modular functions +#define SYMCRYPT_MOD_CALL(v) ((SYMCRYPT_MODULAR_FUNCTIONS *)(( SYMCRYPT_FORCE_READ32( &(v)->type) & g_SymCryptModFnsMask) + (PBYTE)(&g_SymCryptModFns) ))-> + +#define SYMCRYPT_MOD_FUNCTIONS_FDEF_GENERIC {\ + &SymCryptFdefModAddGeneric,\ + &SymCryptFdefModSubGeneric,\ + &SymCryptFdefModNegGeneric,\ + &SymCryptFdefModMulGeneric,\ + &SymCryptFdefModSquareGeneric,\ + &SymCryptFdefModInvGeneric,\ + &SymCryptFdefModSetPostGeneric,\ + &SymCryptFdefModPreGetGeneric,\ + &SymCryptFdefModulusCopyFixupGeneric,\ + &SymCryptFdefModulusInitGeneric,\ +} + +#define SYMCRYPT_MOD_FUNCTIONS_FDEF_MONTGOMERY {\ + &SymCryptFdefModAddGeneric,\ + &SymCryptFdefModSubGeneric,\ + &SymCryptFdefModNegGeneric,\ + &SymCryptFdefModMulMontgomery,\ + &SymCryptFdefModSquareMontgomery,\ + &SymCryptFdefModInvMontgomery,\ + &SymCryptFdefModSetPostMontgomery,\ + &SymCryptFdefModPreGetMontgomery,\ + &SymCryptFdefModulusCopyFixupMontgomery,\ + &SymCryptFdefModulusInitMontgomery,\ +} + +#define SYMCRYPT_MOD_FUNCTIONS_FDEF_MONTGOMERY_ARM64256 {\ + (SYMCRYPT_MOD_BINARY_OP_FN) &SymCryptFdefModAdd256Asm,\ + (SYMCRYPT_MOD_BINARY_OP_FN) &SymCryptFdefModSub256Asm,\ + &SymCryptFdefModNegGeneric,\ + (SYMCRYPT_MOD_BINARY_OP_FN) &SymCryptFdefModMulMontgomery256Asm, \ + (SYMCRYPT_MOD_UNARY_OP_FN) &SymCryptFdefModSquareMontgomery256Asm, \ + &SymCryptFdefModInvMontgomery,\ + &SymCryptFdefModSetPostMontgomery,\ + &SymCryptFdefModPreGetMontgomery,\ + &SymCryptFdefModulusCopyFixupMontgomery,\ + &SymCryptFdefModulusInitMontgomery,\ +} + +#define SYMCRYPT_MOD_FUNCTIONS_FDEF_MONTGOMERY_ARM64P384 {\ + (SYMCRYPT_MOD_BINARY_OP_FN) &SymCryptFdefModAdd384Asm,\ + (SYMCRYPT_MOD_BINARY_OP_FN) &SymCryptFdefModSub384Asm,\ + &SymCryptFdefModNegGeneric,\ + (SYMCRYPT_MOD_BINARY_OP_FN) &SymCryptFdefModMulMontgomeryP384Asm, \ + (SYMCRYPT_MOD_UNARY_OP_FN) &SymCryptFdefModSquareMontgomeryP384Asm, \ + &SymCryptFdef369ModInvMontgomery,\ + &SymCryptFdef369ModSetPostMontgomery,\ + &SymCryptFdef369ModPreGetMontgomery,\ + &SymCryptFdefModulusCopyFixupMontgomery,\ + &SymCryptFdef369ModulusInitMontgomery,\ +} + +#define SYMCRYPT_MOD_FUNCTIONS_FDEF_MONTGOMERY_MULX256 {\ + (SYMCRYPT_MOD_BINARY_OP_FN) &SymCryptFdefModAddMulx256Asm,\ + (SYMCRYPT_MOD_BINARY_OP_FN) &SymCryptFdefModSub256Asm,\ + &SymCryptFdefModNegGeneric,\ + (SYMCRYPT_MOD_BINARY_OP_FN) &SymCryptFdefModMulMontgomeryMulx256Asm,\ + (SYMCRYPT_MOD_UNARY_OP_FN) &SymCryptFdefModSquareMontgomeryMulx256Asm,\ + &SymCryptFdefModInvMontgomery256,\ + &SymCryptFdefModSetPostMontgomeryMulx256,\ + &SymCryptFdefModPreGetMontgomery256,\ + &SymCryptFdefModulusCopyFixupMontgomery,\ + &SymCryptFdefModulusInitMontgomery256,\ +} + +#define SYMCRYPT_MOD_FUNCTIONS_FDEF_MONTGOMERY_MULXP256 {\ + (SYMCRYPT_MOD_BINARY_OP_FN) &SymCryptFdefModAddMulx256Asm,\ + (SYMCRYPT_MOD_BINARY_OP_FN) &SymCryptFdefModSub256Asm,\ + &SymCryptFdefModNegGeneric,\ + (SYMCRYPT_MOD_BINARY_OP_FN) &SymCryptFdefModMulMontgomeryMulxP256Asm,\ + (SYMCRYPT_MOD_UNARY_OP_FN) &SymCryptFdefModSquareMontgomeryMulxP256Asm,\ + &SymCryptFdefModInvMontgomery256,\ + &SymCryptFdefModSetPostMontgomeryMulx256,\ + &SymCryptFdefModPreGetMontgomery256,\ + &SymCryptFdefModulusCopyFixupMontgomery,\ + &SymCryptFdefModulusInitMontgomery256,\ +} + +#define SYMCRYPT_MOD_FUNCTIONS_FDEF_MONTGOMERY_MULX384 {\ + (SYMCRYPT_MOD_BINARY_OP_FN) &SymCryptFdefModAddMulx384Asm,\ + (SYMCRYPT_MOD_BINARY_OP_FN) &SymCryptFdefModSub384Asm,\ + &SymCryptFdefModNegGeneric,\ + (SYMCRYPT_MOD_BINARY_OP_FN) &SymCryptFdefModMulMontgomeryMulx384Asm,\ + (SYMCRYPT_MOD_UNARY_OP_FN) &SymCryptFdefModSquareMontgomeryMulx384Asm,\ + &SymCryptFdef369ModInvMontgomery,\ + &SymCryptFdefModSetPostMontgomeryMulx384,\ + &SymCryptFdef369ModPreGetMontgomery,\ + &SymCryptFdefModulusCopyFixupMontgomery,\ + &SymCryptFdef369ModulusInitMontgomery,\ +} + +#define SYMCRYPT_MOD_FUNCTIONS_FDEF_MONTGOMERY_MULXP384 {\ + (SYMCRYPT_MOD_BINARY_OP_FN) &SymCryptFdefModAddMulx384Asm,\ + (SYMCRYPT_MOD_BINARY_OP_FN) &SymCryptFdefModSub384Asm,\ + &SymCryptFdefModNegGeneric,\ + (SYMCRYPT_MOD_BINARY_OP_FN) &SymCryptFdefModMulMontgomeryMulxP384Asm,\ + (SYMCRYPT_MOD_UNARY_OP_FN) &SymCryptFdefModSquareMontgomeryMulxP384Asm,\ + &SymCryptFdef369ModInvMontgomery,\ + &SymCryptFdefModSetPostMontgomeryMulxP384,\ + &SymCryptFdef369ModPreGetMontgomery,\ + &SymCryptFdefModulusCopyFixupMontgomery,\ + &SymCryptFdef369ModulusInitMontgomery,\ +} + +#define SYMCRYPT_MOD_FUNCTIONS_FDEF369_MONTGOMERY {\ + &SymCryptFdef369ModAddGeneric,\ + &SymCryptFdef369ModSubGeneric,\ + &SymCryptFdefModNegGeneric,\ + &SymCryptFdef369ModMulMontgomery,\ + &SymCryptFdef369ModSquareMontgomery,\ + &SymCryptFdef369ModInvMontgomery,\ + &SymCryptFdef369ModSetPostMontgomery,\ + &SymCryptFdef369ModPreGetMontgomery,\ + &SymCryptFdefModulusCopyFixupMontgomery,\ + &SymCryptFdef369ModulusInitMontgomery,\ +} + +#define SYMCRYPT_MOD_FUNCTIONS_FDEF_MONTGOMERY_MULX {\ + &SymCryptFdefModAddGeneric,\ + &SymCryptFdefModSubGeneric,\ + &SymCryptFdefModNegGeneric,\ + &SymCryptFdefModMulMontgomeryMulx,\ + &SymCryptFdefModSquareMontgomeryMulx,\ + &SymCryptFdefModInvMontgomery,\ + &SymCryptFdefModSetPostMontgomery,\ + &SymCryptFdefModPreGetMontgomery,\ + &SymCryptFdefModulusCopyFixupMontgomery,\ + &SymCryptFdefModulusInitMontgomery,\ +} + +#define SYMCRYPT_MOD_FUNCTIONS_FDEF_MONTGOMERY512 {\ + &SymCryptFdefModAddGeneric,\ + &SymCryptFdefModSubGeneric,\ + &SymCryptFdefModNegGeneric,\ + &SymCryptFdefModMulMontgomery512,\ + &SymCryptFdefModSquareMontgomery512,\ + &SymCryptFdefModInvMontgomery,\ + &SymCryptFdefModSetPostMontgomery,\ + &SymCryptFdefModPreGetMontgomery,\ + &SymCryptFdefModulusCopyFixupMontgomery,\ + &SymCryptFdefModulusInitMontgomery,\ +} + +#define SYMCRYPT_MOD_FUNCTIONS_FDEF_MONTGOMERY1024 {\ + &SymCryptFdefModAddGeneric,\ + &SymCryptFdefModSubGeneric,\ + &SymCryptFdefModNegGeneric,\ + &SymCryptFdefModMulMontgomery1024,\ + &SymCryptFdefModSquareMontgomery1024,\ + &SymCryptFdefModInvMontgomery,\ + &SymCryptFdefModSetPostMontgomery,\ + &SymCryptFdefModPreGetMontgomery,\ + &SymCryptFdefModulusCopyFixupMontgomery,\ + &SymCryptFdefModulusInitMontgomery,\ +} + +#define SYMCRYPT_MOD_FUNCTIONS_FDEF_MONTGOMERY_MULX1024 {\ + &SymCryptFdefModAddGeneric,\ + &SymCryptFdefModSubGeneric,\ + &SymCryptFdefModNegGeneric,\ + &SymCryptFdefModMulMontgomeryMulx1024,\ + &SymCryptFdefModSquareMontgomeryMulx1024,\ + &SymCryptFdefModInvMontgomery,\ + &SymCryptFdefModSetPostMontgomery,\ + &SymCryptFdefModPreGetMontgomery,\ + &SymCryptFdefModulusCopyFixupMontgomery,\ + &SymCryptFdefModulusInitMontgomery,\ +} + +VOID +SYMCRYPT_CALL +SymCryptFdefMaskedCopy( + _In_reads_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ) PCBYTE pbSrc, + _Inout_updates_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ) PBYTE pbDst, + UINT32 nDigits, + UINT32 mask ); +// +// Copies Src to Dst under mask. +// Requirements: +// - mask == 0 or mask == 0xffffffff +// - cbData must be a multiple of the size of a digit, or a multiple of the size of a ModElement. +// - pbSrc and pbDst must be SYMCRYPT_ALIGNed +// if mask == 0 this function does nothing. +// if mask == 0xffffffff this function is a memcpy from Src to Dst. +// This function is side-channel safe; the value of mask is not revealed +// through the memory access patterns. +// + +VOID +SYMCRYPT_CALL +SymCryptFdefConditionalSwap( + _Inout_updates_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ) PBYTE pbSrc1, + _Inout_updates_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ) PBYTE pbSrc2, + UINT32 nDigits, + UINT32 cond ); + +// +// Swaps the bytes of Src1 with the bytes of Src2 under a condition. +// Requirements: +// - cond = 0 or cond = 1 . +// - cbData must be a multiple of the size of a digit, or a multiple of the size of a ModElement. +// - pbSrc1 and pbSrc2 must be SYMCRYPT_ALIGNed +// if cond == 0 this function does nothing. +// if cond == 1 this function swaps the bytes of Src1 with the bytes of Src2. +// This function is side-channel safe; the value of cond is not revealed +// through the memory access patterns. +// + +VOID +SYMCRYPT_CALL +SymCryptFdefClaimScratch( PBYTE pbScratch, SIZE_T cbScratch, SIZE_T cbMin ); + +UINT32 +SymCryptFdefDigitsFromBits( UINT32 nBits ); + +PSYMCRYPT_INT +SYMCRYPT_CALL +SymCryptFdefIntAllocate( UINT32 nDigits ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefSizeofIntFromDigits( UINT32 nDigits ); + +PSYMCRYPT_INT +SYMCRYPT_CALL +SymCryptFdefIntCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + UINT32 nDigits ); + +VOID +SymCryptFdefIntCopy( + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_INT piDst ); + +VOID +SymCryptFdefIntMaskedCopy( + _In_ PCSYMCRYPT_INT piSrc, + _Inout_ PSYMCRYPT_INT piDst, + UINT32 mask ); + +VOID +SYMCRYPT_CALL +SymCryptFdefIntConditionalCopy( + _In_ PCSYMCRYPT_INT piSrc, + _Inout_ PSYMCRYPT_INT piDst, + UINT32 cond ); + +VOID +SYMCRYPT_CALL +SymCryptFdefIntConditionalSwap( + _Inout_ PSYMCRYPT_INT piSrc1, + _Inout_ PSYMCRYPT_INT piSrc2, + UINT32 cond ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntBitsizeOfObject( _In_ PCSYMCRYPT_INT piSrc ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefNumberofDigitsFromInt( _In_ PCSYMCRYPT_INT piSrc ); + +SYMCRYPT_ERROR +SymCryptFdefIntCopyMixedSize( + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_INT piDst ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntBitsizeOfValue( _In_ PCSYMCRYPT_INT piSrc ); + +VOID +SYMCRYPT_CALL +SymCryptFdefIntSetValueUint32( + UINT32 u32Src, + _Out_ PSYMCRYPT_INT piDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefIntSetValueUint64( + UINT64 u64Src, + _Out_ PSYMCRYPT_INT piDst ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptFdefIntSetValue( + _In_reads_bytes_(cbSrc) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_NUMBER_FORMAT format, + _Out_ PSYMCRYPT_INT piDst ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptFdefIntGetValue( + _In_ PCSYMCRYPT_INT piSrc, + _Out_writes_bytes_(cbDst) PBYTE pbDst, + SIZE_T cbDst, + SYMCRYPT_NUMBER_FORMAT format ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntGetValueLsbits32( _In_ PCSYMCRYPT_INT piSrc ); + +UINT64 +SYMCRYPT_CALL +SymCryptFdefIntGetValueLsbits64( _In_ PCSYMCRYPT_INT piSrc ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntAddUint32( + _In_ PCSYMCRYPT_INT piSrc1, + UINT32 u32Src2, + _Out_ PSYMCRYPT_INT piDst ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntAddSameSize( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + _Out_ PSYMCRYPT_INT piDst ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntAddMixedSize( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + _Out_ PSYMCRYPT_INT piDst ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntSubUint32( + _In_ PCSYMCRYPT_INT piSrc1, + UINT32 u32Src2, + _Out_ PSYMCRYPT_INT piDst ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntSubSameSize( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + _Out_ PSYMCRYPT_INT piDst ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntSubMixedSize( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + _Out_ PSYMCRYPT_INT piDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefIntNeg( + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_INT piDst ); + + +VOID +SYMCRYPT_CALL +SymCryptFdefIntMulPow2( + _In_ PCSYMCRYPT_INT piSrc, + SIZE_T Exp, + _Out_ PSYMCRYPT_INT piDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefIntDivPow2( + _In_ PCSYMCRYPT_INT piSrc, + SIZE_T exp, + _Out_ PSYMCRYPT_INT piDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefIntShr1( + UINT32 highestBit, + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_INT piDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefIntModPow2( + _In_ PCSYMCRYPT_INT piSrc, + SIZE_T exp, + _Out_ PSYMCRYPT_INT piDst ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntGetBit( + _In_ PCSYMCRYPT_INT piSrc, + UINT32 iBit ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntGetBits( + _In_ PCSYMCRYPT_INT piSrc, + UINT32 iBit, + UINT32 nBits ); + +VOID +SYMCRYPT_CALL +SymCryptFdefIntSetBits( + _In_ PSYMCRYPT_INT piDst, + UINT32 value, + UINT32 iBit, + UINT32 nBits ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntIsEqualUint32( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ UINT32 u32Src2 ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntIsEqual( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2 ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntIsLessThan( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2 ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntMulUint32( + _In_ PCSYMCRYPT_INT piSrc1, + UINT32 Src2, + _Out_ PSYMCRYPT_INT piDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefIntMulSameSize( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + _Out_ PSYMCRYPT_INT piDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +VOID +SYMCRYPT_CALL +SymCryptFdefIntSquare( + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_INT piDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +VOID +SYMCRYPT_CALL +SymCryptFdefIntMulMixedSize( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + _Out_ PSYMCRYPT_INT piDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +PSYMCRYPT_DIVISOR +SYMCRYPT_CALL +SymCryptFdefDivisorAllocate( UINT32 nDigits ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefSizeofDivisorFromDigits( UINT32 nDigits ); + +PSYMCRYPT_DIVISOR +SYMCRYPT_CALL +SymCryptFdefDivisorCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + UINT32 nDigits ); + +PSYMCRYPT_DIVISOR +SYMCRYPT_CALL +SymCryptFdefDivisorRetrieveHandle( _In_ PBYTE pbBuffer ); + +VOID +SymCryptFdefDivisorCopy( + _In_ PCSYMCRYPT_DIVISOR pdSrc, + _Out_ PSYMCRYPT_DIVISOR pdDst ); + +VOID +SymCryptFdefDivisorCopyFixup( + _In_ PCSYMCRYPT_DIVISOR pSrc, + _Out_ PSYMCRYPT_DIVISOR pDst ); + +PSYMCRYPT_INT +SYMCRYPT_CALL +SymCryptFdefIntFromDivisor( _In_ PSYMCRYPT_DIVISOR pdSrc ); + +VOID +SYMCRYPT_CALL +SymCryptFdefIntToDivisor( + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_DIVISOR pdDst, + UINT32 totalOperations, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefIntDivMod( + _In_ PCSYMCRYPT_INT piSrc, + _In_ PCSYMCRYPT_DIVISOR pdDivisor, + _Out_opt_ PSYMCRYPT_INT piQuotient, + _Out_opt_ PSYMCRYPT_INT piRemainder, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefRawDivMod( + _In_reads_(nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pNum, + UINT32 nDigits, + _In_ PCSYMCRYPT_DIVISOR pdDivisor, + _Out_writes_opt_(nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pQuotient, + _Out_writes_opt_(SYMCRYPT_OBJ_NUINT32(pdDivisor)) PUINT32 pRemainder, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + + +PSYMCRYPT_MODULUS +SYMCRYPT_CALL +SymCryptFdefModulusAllocate( UINT32 nDigits ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModulusFree( _Out_ PSYMCRYPT_MODULUS pmObj ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefSizeofModulusFromDigits( UINT32 nDigits ); + +PSYMCRYPT_MODULUS +SYMCRYPT_CALL +SymCryptFdefModulusCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + UINT32 nDigits ); + +PSYMCRYPT_MODULUS +SYMCRYPT_CALL +SymCryptFdefModulusRetrieveHandle( _In_ PBYTE pbBuffer ); + + +VOID +SymCryptFdefModulusCopy( + _In_ PCSYMCRYPT_MODULUS pmSrc, + _Out_ PSYMCRYPT_MODULUS pmDst ); + +PSYMCRYPT_MODELEMENT +SYMCRYPT_CALL +SymCryptFdefModElementAllocate( _In_ PCSYMCRYPT_MODULUS pmMod ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModElementFree( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peObj ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefSizeofModElementFromModulus( PCSYMCRYPT_MODULUS pmMod ); + +PSYMCRYPT_MODELEMENT +SYMCRYPT_CALL +SymCryptFdefModElementCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + PCSYMCRYPT_MODULUS pmMod ); + +PSYMCRYPT_MODELEMENT +SYMCRYPT_CALL +SymCryptFdefModElementRetrieveHandle( _In_ PBYTE pbBuffer ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModElementWipe( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peDst ); + +VOID +SymCryptFdefModElementCopy( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst ); + +VOID +SymCryptFdefModElementMaskedCopy( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst, + UINT32 mask ); + +PSYMCRYPT_DIVISOR +SYMCRYPT_CALL +SymCryptFdefDivisorFromModulus( _In_ PSYMCRYPT_MODULUS pmSrc ); + +VOID +SymCryptFdefModElementConditionalSwap( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_ PSYMCRYPT_MODELEMENT peData1, + _Inout_ PSYMCRYPT_MODELEMENT peData2, + _In_ UINT32 cond ); + +PSYMCRYPT_INT +SYMCRYPT_CALL +SymCryptFdefIntFromModulus( _In_ PSYMCRYPT_MODULUS pmSrc ); + +VOID +SYMCRYPT_CALL +SymCryptFdefIntToModulus( + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_MODULUS pmDst, + UINT32 averageOperations, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefIntToModElement( + _In_ PCSYMCRYPT_INT piSrc, + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModElementToIntGeneric( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_reads_bytes_( pmMod->nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) + PCUINT32 pSrc, + _Out_ PSYMCRYPT_INT piDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptFdefRawSetValue( + _In_reads_bytes_(cbSrc) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_NUMBER_FORMAT format, + _Out_writes_(nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst, + UINT32 nDigits ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptFdefModElementSetValueGeneric( + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_NUMBER_FORMAT format, + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModElementSetValueUint32Generic( + UINT32 value, + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModElementSetValueNegUint32( + UINT32 value, + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptFdefRawGetValue( + _In_reads_(nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc, + UINT32 nDigits, + _Out_writes_bytes_(cbDst) PBYTE pbDst, + SIZE_T cbDst, + SYMCRYPT_NUMBER_FORMAT format ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptFdefModElementGetValue( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_writes_bytes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst, + SYMCRYPT_NUMBER_FORMAT format, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefModElementIsEqual( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2 ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefModElementIsZero( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModAddGeneric( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModAddMulx256Asm( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModAddMulx384Asm( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModAdd256Asm( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModAdd384Asm( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdef369ModAddGeneric( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModSubGeneric( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdef369ModSubGeneric( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModSub256Asm( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModSub384Asm( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModNegGeneric( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModSetPostGeneric( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_ PSYMCRYPT_MODELEMENT peObj, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModSetPostMontgomery( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_ PSYMCRYPT_MODELEMENT peObj, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModSetPostMontgomeryMulx256( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_ PSYMCRYPT_MODELEMENT peObj, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModSetPostMontgomeryMulxP384( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_ PSYMCRYPT_MODELEMENT peObj, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdef369ModSetPostMontgomery( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_ PSYMCRYPT_MODELEMENT peObj, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +PCUINT32 +SYMCRYPT_CALL +SymCryptFdefModPreGetGeneric( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peObj, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +PCUINT32 +SYMCRYPT_CALL +SymCryptFdefModPreGetMontgomery( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peObj, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +PCUINT32 +SYMCRYPT_CALL +SymCryptFdefModPreGetMontgomery256( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peObj, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +PCUINT32 +SYMCRYPT_CALL +SymCryptFdef369ModPreGetMontgomery( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peObj, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModulusCopyFixupGeneric( + _In_ PCSYMCRYPT_MODULUS pmSrc, + _Out_ PSYMCRYPT_MODULUS pmDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModulusCopyFixupMontgomery( + _In_ PCSYMCRYPT_MODULUS pmSrc, + _Out_ PSYMCRYPT_MODULUS pmDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModulusInitGeneric( + _Inout_ PSYMCRYPT_MODULUS pmObj, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModulusInitMontgomeryInternal( + _Inout_ PSYMCRYPT_MODULUS pmObj, + UINT32 nUint32Used, // R = 2^{32 * this parameter} + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModulusInitMontgomery( + _Inout_ PSYMCRYPT_MODULUS pmObj, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModulusInitMontgomery256( + _Inout_ PSYMCRYPT_MODULUS pmObj, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdef369ModulusInitMontgomery( + _Inout_ PSYMCRYPT_MODULUS pmObj, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawAdd( + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 Src1, + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 Src2, + _Out_writes_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PUINT32 Dst, + UINT32 nDigits ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawSub( + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc1, + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc2, + _Out_writes_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PUINT32 pDst, + UINT32 nDigits ); +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawSubUint32( + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc1, + UINT32 Src2, + _Out_writes_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PUINT32 pDst, + UINT32 nDigits ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModMulGeneric( + _In_ PCSYMCRYPT_MODULUS pMod, + _In_ PCSYMCRYPT_MODELEMENT pSrc1, + _In_ PCSYMCRYPT_MODELEMENT pSrc2, + _Out_ PSYMCRYPT_MODELEMENT pDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModMulMontgomery( + _In_ PCSYMCRYPT_MODULUS pMod, + _In_ PCSYMCRYPT_MODELEMENT pSrc1, + _In_ PCSYMCRYPT_MODELEMENT pSrc2, + _Out_ PSYMCRYPT_MODELEMENT pDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModMulMontgomeryMulx256Asm( + _In_ PCSYMCRYPT_MODULUS pMod, + _In_ PCSYMCRYPT_MODELEMENT pSrc1, + _In_ PCSYMCRYPT_MODELEMENT pSrc2, + _Out_ PSYMCRYPT_MODELEMENT pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModMulMontgomeryMulxP384Asm( + _In_ PCSYMCRYPT_MODULUS pMod, + _In_ PCSYMCRYPT_MODELEMENT pSrc1, + _In_ PCSYMCRYPT_MODELEMENT pSrc2, + _Out_ PSYMCRYPT_MODELEMENT pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModMulMontgomery256Asm( + _In_ PCSYMCRYPT_MODULUS pMod, + _In_ PCSYMCRYPT_MODELEMENT pSrc1, + _In_ PCSYMCRYPT_MODELEMENT pSrc2, + _Out_ PSYMCRYPT_MODELEMENT pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModMulMontgomeryP384Asm( + _In_ PCSYMCRYPT_MODULUS pMod, + _In_ PCSYMCRYPT_MODELEMENT pSrc1, + _In_ PCSYMCRYPT_MODELEMENT pSrc2, + _Out_ PSYMCRYPT_MODELEMENT pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdef369ModMulMontgomery( + _In_ PCSYMCRYPT_MODULUS pMod, + _In_ PCSYMCRYPT_MODELEMENT pSrc1, + _In_ PCSYMCRYPT_MODELEMENT pSrc2, + _Out_ PSYMCRYPT_MODELEMENT pDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModMulMontgomeryMulx( + _In_ PCSYMCRYPT_MODULUS pMod, + _In_ PCSYMCRYPT_MODELEMENT pSrc1, + _In_ PCSYMCRYPT_MODELEMENT pSrc2, + _Out_ PSYMCRYPT_MODELEMENT pDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModMulMontgomeryMulx1024( + _In_ PCSYMCRYPT_MODULUS pMod, + _In_ PCSYMCRYPT_MODELEMENT pSrc1, + _In_ PCSYMCRYPT_MODELEMENT pSrc2, + _Out_ PSYMCRYPT_MODELEMENT pDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + + +VOID +SYMCRYPT_CALL +SymCryptFdefModSquareGeneric( + _In_ PCSYMCRYPT_MODULUS pMod, + _In_ PCSYMCRYPT_MODELEMENT pSrc, + _Out_ PSYMCRYPT_MODELEMENT pDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModSquareMontgomery( + _In_ PCSYMCRYPT_MODULUS pMod, + _In_ PCSYMCRYPT_MODELEMENT pSrc, + _Out_ PSYMCRYPT_MODELEMENT pDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModSquareMontgomeryMulx256Asm( + _In_ PCSYMCRYPT_MODULUS pMod, + _In_ PCSYMCRYPT_MODELEMENT pSrc, + _Out_ PSYMCRYPT_MODELEMENT pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModSquareMontgomeryMulxP384Asm( + _In_ PCSYMCRYPT_MODULUS pMod, + _In_ PCSYMCRYPT_MODELEMENT pSrc, + _Out_ PSYMCRYPT_MODELEMENT pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModSquareMontgomery256Asm( + _In_ PCSYMCRYPT_MODULUS pMod, + _In_ PCSYMCRYPT_MODELEMENT pSrc1, + _In_ PCSYMCRYPT_MODELEMENT pSrc2, + _Out_ PSYMCRYPT_MODELEMENT pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModSquareMontgomeryP384Asm( + _In_ PCSYMCRYPT_MODULUS pMod, + _In_ PCSYMCRYPT_MODELEMENT pSrc1, + _In_ PCSYMCRYPT_MODELEMENT pSrc2, + _Out_ PSYMCRYPT_MODELEMENT pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdef369ModSquareMontgomery( + _In_ PCSYMCRYPT_MODULUS pMod, + _In_ PCSYMCRYPT_MODELEMENT pSrc, + _Out_ PSYMCRYPT_MODELEMENT pDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModSquareMontgomeryMulx( + _In_ PCSYMCRYPT_MODULUS pMod, + _In_ PCSYMCRYPT_MODELEMENT pSrc, + _Out_ PSYMCRYPT_MODELEMENT pDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModSquareMontgomeryMulx1024( + _In_ PCSYMCRYPT_MODULUS pMod, + _In_ PCSYMCRYPT_MODELEMENT pSrc, + _Out_ PSYMCRYPT_MODELEMENT pDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + + +VOID +SYMCRYPT_CALL +SymCryptFdefRawMul( + _In_reads_(nDigits1*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc1, + UINT32 nDigits1, + _In_reads_(nDigits2*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc2, + UINT32 nDigits2, + _Out_writes_((nDigits1+nDigits2)*SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefRawMulMulx( + _In_reads_(nDigits1*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc1, + UINT32 nDigits1, + _In_reads_(nDigits2*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc2, + UINT32 nDigits2, + _Out_writes_((nDigits1+nDigits2)*SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefRawMulMulx1024( + _In_reads_(nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc1, + _In_reads_(nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc2, + UINT32 nDigits, + _Out_writes_(2*nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefRawSquare( + _In_reads_(nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc, + UINT32 nDigits, + _Out_writes_(2*nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefRawSquareMulx( + _In_reads_(nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc, + UINT32 nDigits, + _Out_writes_(2*nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefRawSquareMulx1024( + _In_reads_(nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc, + UINT32 nDigits, + _Out_writes_(2*nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdef369RawMul( + _In_reads_(nDigits1*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc1, + UINT32 nDigits1, + _In_reads_(nDigits2*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc2, + UINT32 nDigits2, + _Out_writes_((nDigits1+nDigits2)*SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawIsEqualUint32( + _In_reads_(nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc1, + UINT32 nDigits, + _In_ UINT32 u32Src2 ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawNeg( + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc1, + UINT32 carryIn, + _Out_writes_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PUINT32 pDst, + UINT32 nDigits ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawMaskedAdd( + _Inout_updates_( nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32 ) PUINT32 pAcc, + _In_reads_( nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32 ) PCUINT32 pSrc, + UINT32 mask, + UINT32 nDigits ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawMaskedSub( + _Inout_updates_( nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32 ) PUINT32 pAcc, + _In_reads_( nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32 ) PCUINT32 pSrc, + UINT32 mask, + UINT32 nDigits ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModDivPow2( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + UINT32 exp, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModDivSmallPow2( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _In_range_(1, NATIVE_BITS) UINT32 exp, + _Out_ PSYMCRYPT_MODELEMENT peDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModDivSmallPow2Asm( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _In_range_(1, NATIVE_BITS) UINT32 exp, + _Out_ PSYMCRYPT_MODELEMENT peDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModDivSmallPow2Mulx( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _In_range_(1, NATIVE_BITS) UINT32 exp, + _Out_ PSYMCRYPT_MODELEMENT peDst ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptFdefModInvGeneric( + _In_ PCSYMCRYPT_MODULUS pMod, + _In_ PCSYMCRYPT_MODELEMENT pSrc, + _Out_ PSYMCRYPT_MODELEMENT pDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptFdefModInvMontgomery( + _In_ PCSYMCRYPT_MODULUS pMod, + _In_ PCSYMCRYPT_MODELEMENT pSrc, + _Out_ PSYMCRYPT_MODELEMENT pDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptFdefModInvMontgomery256( + _In_ PCSYMCRYPT_MODULUS pMod, + _In_ PCSYMCRYPT_MODELEMENT pSrc, + _Out_ PSYMCRYPT_MODELEMENT pDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptFdef369ModInvMontgomery( + _In_ PCSYMCRYPT_MODULUS pMod, + _In_ PCSYMCRYPT_MODELEMENT pSrc, + _Out_ PSYMCRYPT_MODELEMENT pDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptModExpGeneric( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peBase, + _In_ PCSYMCRYPT_INT piExp, + UINT32 nBitsExp, + UINT32 flags, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptModMultiExpGeneric( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_reads_( nBases ) PCSYMCRYPT_MODELEMENT * peBaseArray, + _In_reads_( nBases ) PCSYMCRYPT_INT * piExpArray, + UINT32 nBases, + UINT32 nBitsExp, + UINT32 flags, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModSetRandomGeneric( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawAddUint32( + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 Src1, + UINT32 Src2, + _Out_writes_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PUINT32 Dst, + UINT32 nDigits ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawAddAsm( + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 Src1, + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 Src2, + _Out_writes_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PUINT32 Dst, + UINT32 nDigits ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdef369RawAddAsm( + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 Src1, + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 Src2, + _Out_writes_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PUINT32 Dst, + UINT32 nDigits ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawSubAsm( + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc1, + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc2, + _Out_writes_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PUINT32 pDst, + UINT32 nDigits ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdef369RawSubAsm( + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc1, + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc2, + _Out_writes_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PUINT32 pDst, + UINT32 nDigits ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawIsLessThan( + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc1, + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc2, + UINT32 nDigits ); + +VOID +SYMCRYPT_CALL +SymCryptFdefMaskedCopyAsm( + _In_reads_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ) PCBYTE pbSrc, + _Inout_updates_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ) PBYTE pbDst, + UINT32 nDigits, + UINT32 mask ); + +VOID +SYMCRYPT_CALL +SymCryptFdef369MaskedCopyAsm( + _In_reads_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ) PCBYTE pbSrc, + _Inout_updates_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ) PBYTE pbDst, + UINT32 nDigits, + UINT32 mask ); + +VOID +SYMCRYPT_CALL +SymCryptFdefRawMulAsm( + _In_reads_(nDigits1*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc1, + UINT32 nDigits1, + _In_reads_(nDigits2*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc2, + UINT32 nDigits2, + _Out_writes_((nDigits1+nDigits2)*SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefRawSquareAsm( + _In_reads_(nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc, + UINT32 nDigits, + _Out_writes_(2*nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdef369RawMulAsm( + _In_reads_(nDigits1*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc1, + UINT32 nDigits1, + _In_reads_(nDigits2*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc2, + UINT32 nDigits2, + _Out_writes_((nDigits1+nDigits2)*SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefRawMul512Asm( + _In_reads_(nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc1, + _In_reads_(nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc2, + UINT32 nDigits, + _Out_writes_(2*nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefRawSquare512Asm( + _In_reads_(nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc, + UINT32 nDigits, + _Out_writes_(2*nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefRawMul1024Asm( + _In_reads_(nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc1, + _In_reads_(nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc2, + UINT32 nDigits, + _Out_writes_(2*nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefRawSquare1024Asm( + _In_reads_(nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc, + UINT32 nDigits, + _Out_writes_(2*nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefMontgomeryReduceAsm( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_ PUINT32 pSrc, + _Out_ PUINT32 pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefMontgomeryReduce256Asm( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_ PUINT32 pSrc, + _Out_ PUINT32 pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefMontgomeryReduce512Asm( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_ PUINT32 pSrc, + _Out_ PUINT32 pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefMontgomeryReduce1024Asm( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_ PUINT32 pSrc, + _Out_ PUINT32 pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdef369MontgomeryReduce( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_ PUINT32 pSrc, + _Out_ PUINT32 pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdef369MontgomeryReduceAsm( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_ PUINT32 pSrc, + _Out_ PUINT32 pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefMontgomeryReduceMulx( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_ PUINT32 pSrc, + _Out_ PUINT32 pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefMontgomeryReduceMulx1024( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_ PUINT32 pSrc, + _Out_ PUINT32 pDst ); + + +//===================================================== +// Current state of FIPS tests for asymmetric keys +//===================================================== + +// -------------------------------------------------------------------- +// Key type | | +// & | Alg | Description +// Operation| | +// -------------------------------------------------------------------- +// Dlkey | DH | Requires use of named safe-prime group (otherwise we cannot perform private +// Generate | | key range check, or public key order validation). +// | | +// | | From SP800-56Ar3: +// | | Check private key is in the range [1, min(2^nBitsPriv, q)-1] +// | | nBitsPriv is specified either using a default value or using +// | | SymCryptDlkeySetPrivateKeyLength, such that 2s <= nBitsPriv <= nBitsOfQ. +// | | (s is the maximum security strength for a named safe-prime group as +// | | specified in SP800 - 56arev3) +// | | Check public key is in the range [2, p-2] +// | | Check that (Public key)^q == 1 mod p +// | | +// | | FIPS 140-3 does not require a further PCT before first use of the key. +// |----------------------------------------------------------- +// | DSA | Requires use of a Dlgroup which has q, but is not a named safe-prime group. +// | | +// | | FIPS 186-4 and SP800-89 do not require DSA keypair owners to perform +// | | validation of keypairs they generate. +// | | +// | | FIPS 140-3 requires that a module generating a Dlkey keypair for use in DSA +// | | must perform a PCT on the keypair before first operational use in DSA. +// | | As the Dlgroups supported by FIPS are distinct for DH and DSA, we can perform +// | | this PCT on key generation without fear of adverse performance. +// -------------------------------------------------------------------- +// Dlkey | DH | Requires use of named safe-prime group (otherwise we cannot perform private +// SetValue | | key range check, or public key order validation). +// | | +// | | From SP800-56Ar3: +// | | If importing a private key: +// | | Check private key is in the range [1, min(2^nBitsPriv, q)-1] +// | | nBitsPriv is specified either using a default value or using +// | | SymCryptDlkeySetPrivateKeyLength, such that 2s <= nBitsPriv <= nBitsOfQ. +// | | (s is the maximum security strength for a named safe-prime group as +// | | specified in SP800-56Arev3) +// | | +// | | If importing a public key: +// | | Check public key is in the range [2, p-2] +// | | Check that (Public key)^q == 1 mod p +// | | +// | | If importing both a private and public key, as above and also: +// | | Use the imported Private key to generate a Public key, and check the +// | | generated Public key is equal to the imported Public key. +// |----------------------------------------------------------- +// | DSA | Requires use of a Dlgroup which is not a named safe-prime group. +// | | +// | | FIPS 184-4 refers to SP800-89: +// | | If importing a public key: +// | | Check public key is in the range [2, p-2] +// | | Check that (Public key)^q == 1 mod p +// | | If importing a private and public key: +// | | Use the imported Private key to generate a Public key, and check the +// | | generated Public key is equal to the imported Public key. +// -------------------------------------------------------------------- +// Eckey | ECDH | Requires use of a NIST prime Elliptic Curve (P224, P256, P384, or P521) +// SetRandom| | +// | | From SP800-56Ar3: +// | | Check private key is in range [1, GOrd-1] +// | | Check public key is nonzero, has coordinates in the underlying field, and is a +// | | point on the curve +// | | Check that GOrd*(Public key) == O +// | | +// | | FIPS 140-3 does not require a further PCT before first use of the key +// |---------------------------------------------------------- +// | ECDSA | Requires use of a NIST prime Elliptic Curve (P224, P256, P384, or P521) +// | | +// | | FIPS 186-4 and SP800-89 do not require ECDSA keypair owners to perform +// | | validation of keypairs they generate. +// | | +// | | FIPS 140-3 requires that a module generating an Eckey keypair for use in ECDSA +// | | must perform a PCT on the keypair before first operational use in ECDSA. +// | | As the Elliptic curves used in ECDH and ECDSA are the same, an Eckey may be +// | | used for both ECDH and ECDSA. We defer the ECDSA PCT from the EckeySetRandom +// | | call to the first use of EcDsaSign, or the first export of the keypair. +// -------------------------------------------------------------------- +// Eckey | ECDH | Requires use of a NIST prime Elliptic Curve (P224, P256, P384, or P521) +// SetValue | | +// | | From SP800-56Ar3: +// | | If importing a private key: +// | | Check private key is in range [1, GOrd-1] +// | | +// | | If importing a public key: +// | | Check public key is nonzero, has coordinates in the underlying field, and is +// | | a point on the curve +// | | Check that GOrd*(Public key) == O +// | | +// | | If importing a private and public key: +// | | Use the imported Private key to generate a Public key, and check the +// | | generated Public key is equal to the imported Public key. +// |---------------------------------------------------------- +// | ECDSA | Requires use of a NIST prime Elliptic Curve (P224, P256, P384, or P521) +// | | +// | | FIPS 184-4 refers to SP800-89: +// | | If importing a public key: +// | | SP800-89 refers to ANS X9.62. Assume same tests required as SP800-56Ar3: +// | | Check public key is nonzero, has coordinates in the underlying field, and is +// | | a point on the curve +// | | Check that GOrd*(Public key) == O +// | | +// | | If importing a private and public key: +// | | Use the imported Private key to generate a Public key, and check the +// | | generated Public key is equal to the imported Public key. +// -------------------------------------------------------------------- +// Rsakey | RSA | From FIPS 186-4 (SIGN) and SP800-56Br2 (ENCRYPT for key transport): +// Generate |ENCRYPT| Ensure p and q are in open range (2 ^ ((nBits - 1) / 2), 2 ^ (nBits / 2)) +// | and | Ensure |p-q| > 2^((nBits/2)-100) +// | RSA | Ensure e is coprime with (p-1) and (q-1) +// | SIGN | Ensure d is in range [2 ^ (nBits/2) + 1, LCM(p-1,q-1) - 1] +// | | Ensure that d*e == 1 mod LCM(p-1,q-1) +// | | +// | | FIPS 140-3 requires that a module generating an Rsakey keypair for use in an +// | | RSA algorithm must perform a PCT on the keypair before first operational use. +// | | +// | | For ENCRYPT, SP800-56Br2 specifies the PCT to perform as part of key +// | | generation is: +// | | Check (m^e)^d == m mod n for some m in range [2, n-2] +// | | +// | | For SIGN, FIPS 186-4 refers to SP800-89, which does not clearly specify a +// | | PCT, but does specify that for an owner to have assurance of Private Key +// | | Possession they can sign a message with the private key and validate it with +// | | the public key to check they correspond to each other. Notably, this +// | | internally will verify (m^d)^e == m mod n for some m (along with testing +// | | additional padding logic) +// | | +// | | FIPS 140-2 explicitly says that only one PCT is required if a keypair may be +// | | used in either algorithm, with the module able to choose the PCT. +// | | FIPS 140-3 does not say anything specific about only requiring one PCT, but +// | | given that mathematically (m^e)^d == (m^ed) == (m^d)^e mod n, our +// | | current understanding is that the SIGN PCT works in lieu of the ENCRYPT PCT +// | | +// | | NOTE: FIPS 140-3 explicitly says that an RSA PCT cannot be used in lieu of an +// | | RSA algorithm selftest (CAST) +// -------------------------------------------------------------------- +// Rsakey | RSA | If importing a keypair (primes and modulus): +// SetValue |ENCRYPT| SP800-56Br2 specifies: +// | | Check (m^e)^d mod n == m for some m in range [2, n-2] +// | | Check n == p*q +// | | Check p and q are in open range (2 ^ ((nBits - 1) / 2), 2 ^ (nBits / 2)) +// | | Check |p-q| > 2^((nBits/2)-100) +// | | Check e is coprime with (p-1) and (q-1) +// | | Check p and q are probably prime +// | | Check d is in range [2 ^ (nBits/2) + 1, LCM(p-1,q-1) - 1] +// | | Check that d*e == 1 mod LCM(p-1,q-1) +// | | +// | | If importing a public key (only modulus): +// | | SP800-56Br2, refers to SP800-89 which details the following Partial Public Key +// | | Validation: +// | | Check n is odd +// | | Check n is not a prime or a power of a prime +// | | Check n has no factors smaller than 752 +// |---------------------------------------------------------- +// | RSA | FIPS 186-4 refers only to SP800-89 which has weaker tests for a keypair than +// | SIGN | SP800-56Br2 (i.e. success at SP800-56Br2 tests implies success in SP800-89) +// | | The current strategy will be to always perform the stronger tests. +// -------------------------------------------------------------------- + +// Macro for executing a Cryptographic Algorithm Self-Test (CAST) and setting the corresponding +// flag. These selftests must be run once per algorithm before the algorithm is used. For algorithms +// like hashing and symmetric encryption which have a low performance cost, we run the CASTs when +// the module is loaded. For asymmetric algorithms, we defer the CASTs until the first use of the +// algorithm; hence we need flags to keep track of which CASTs have been run. +#define SYMCRYPT_RUN_SELFTEST_ONCE(AlgorithmSelftestFunction, AlgorithmSelftestFlag) \ +if( ( g_SymCryptFipsSelftestsPerformed & AlgorithmSelftestFlag ) == 0 ) \ +{ \ + AlgorithmSelftestFunction( ); \ + SYMCRYPT_ATOMIC_OR32_PRE_RELAXED( &g_SymCryptFipsSelftestsPerformed, AlgorithmSelftestFlag ); \ +} + +// Macros for executing a pairwise consistency test on a key and setting the per-key selftest flag. +// Typically PCTs must be run for each key before the key is first used or exported, but the +// specific requirements vary between algorithms. +// +// Note that a PCT is not considered a CAST and thus does not satisfy the aforementioned requirement +// for algorithm selftests. +#define SYMCRYPT_RUN_KEY_GEN_PCT(KeySelftestFunction, Key, KeySelftestFlag) \ +if( ( Key->fAlgorithmInfo & (KeySelftestFlag | SYMCRYPT_FLAG_KEY_NO_FIPS) ) == 0 ) \ +{ \ + /* PCT should never fail on key generation - FIPS assert that it does not */ \ + SYMCRYPT_FIPS_ASSERT( KeySelftestFunction( Key ) == SYMCRYPT_NO_ERROR ); \ + SYMCRYPT_ATOMIC_OR32_PRE_RELAXED(&Key->fAlgorithmInfo, KeySelftestFlag); \ +} + +// Macro to check flag used in fAlgorithmInfo is non-zero and a power of 2 +#define CHECK_ALGORITHM_INFO_FLAG_POW2( flag ) \ + C_ASSERT( (flag != 0) && ((flag & (flag-1)) == 0) ); + +// Macro to check flags used together in fAlgorithmInfo are distinct +#define CHECK_ALGORITHM_INFO_FLAGS_DISTINCT( flag0, flag1, flag2, flag3, flag4 ) \ + C_ASSERT( (flag0 < flag1) && (flag1 < flag2) && (flag2 < flag3) && (flag3 < flag4) ); + +CHECK_ALGORITHM_INFO_FLAG_POW2(SYMCRYPT_PCT_DSA); +CHECK_ALGORITHM_INFO_FLAG_POW2(SYMCRYPT_PCT_ECDSA); +CHECK_ALGORITHM_INFO_FLAG_POW2(SYMCRYPT_PCT_RSA_SIGN); + +CHECK_ALGORITHM_INFO_FLAG_POW2(SYMCRYPT_FLAG_KEY_NO_FIPS); +CHECK_ALGORITHM_INFO_FLAG_POW2(SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION); + +CHECK_ALGORITHM_INFO_FLAG_POW2(SYMCRYPT_FLAG_DLKEY_DSA); +CHECK_ALGORITHM_INFO_FLAG_POW2(SYMCRYPT_FLAG_DLKEY_DH); + +CHECK_ALGORITHM_INFO_FLAG_POW2(SYMCRYPT_FLAG_ECKEY_ECDSA); +CHECK_ALGORITHM_INFO_FLAG_POW2(SYMCRYPT_FLAG_ECKEY_ECDH); + +CHECK_ALGORITHM_INFO_FLAG_POW2(SYMCRYPT_FLAG_RSAKEY_SIGN); +CHECK_ALGORITHM_INFO_FLAG_POW2(SYMCRYPT_FLAG_RSAKEY_ENCRYPT); + +CHECK_ALGORITHM_INFO_FLAGS_DISTINCT(SYMCRYPT_PCT_DSA, SYMCRYPT_FLAG_KEY_NO_FIPS, SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION, SYMCRYPT_FLAG_DLKEY_DSA, SYMCRYPT_FLAG_DLKEY_DH); +CHECK_ALGORITHM_INFO_FLAGS_DISTINCT(SYMCRYPT_PCT_ECDSA, SYMCRYPT_FLAG_KEY_NO_FIPS, SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION, SYMCRYPT_FLAG_ECKEY_ECDSA, SYMCRYPT_FLAG_ECKEY_ECDH); +CHECK_ALGORITHM_INFO_FLAGS_DISTINCT(SYMCRYPT_PCT_RSA_SIGN, SYMCRYPT_FLAG_KEY_NO_FIPS, SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION, SYMCRYPT_FLAG_RSAKEY_SIGN, SYMCRYPT_FLAG_RSAKEY_ENCRYPT); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaSignVerifyPct( PCSYMCRYPT_RSAKEY pkRsakey ); +// +// FIPS pairwise consistency test for RSA sign/verify. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDsaPct( PCSYMCRYPT_DLKEY pkDlkey ); +// +// FIPS pairwise consistency test for DSA sign/verify. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEcDsaPct( PCSYMCRYPT_ECKEY pkEckey ); +// +// FIPS pairwise consistency test for ECDSA sign/verify. +// + +typedef struct _SYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS { + SYMCRYPT_DLGROUP_DH_SAFEPRIMETYPE eDhSafePrimeType; + + PCBYTE pcbPrimeP; + + UINT32 nBitsOfP; // nBitsOfQ == nBitsOfP-1 + UINT32 nMinBitsPriv; // nMinBitsPriv == 2s + // s is the maximum security strength supported by the group based on SP800-56Arev3 + UINT32 nDefaultBitsPriv; // nBitsOfQ >= nDefaultBitsPriv >= nMinBitsPriv + // nDefaultBitsPriv will be the default value of nBitsPriv for a Dlkey in this Dlgroup + // nBitsPriv is the maximum length of the private key +} SYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS; +typedef const SYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS * PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS; +// +// SYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS is used to specify all the parameters needed for creation +// of a Dlgroup based on a safe-prime group (i.e. p = 2q+1, and g = 2). +// Currently this is used exclusively internally, and the interface for explicitly specifying use of +// safe-prime group in SymCrypt is to use + +// Internally supported Safe Prime groups +extern const PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS SymCryptDlgroupDhSafePrimeParamsModp2048; +extern const PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS SymCryptDlgroupDhSafePrimeParamsModp3072; +extern const PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS SymCryptDlgroupDhSafePrimeParamsModp4096; +extern const PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS SymCryptDlgroupDhSafePrimeParamsModp6144; +extern const PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS SymCryptDlgroupDhSafePrimeParamsModp8192; + +extern const PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS SymCryptDlgroupDhSafePrimeParamsffdhe2048; +extern const PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS SymCryptDlgroupDhSafePrimeParamsffdhe3072; +extern const PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS SymCryptDlgroupDhSafePrimeParamsffdhe4096; +extern const PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS SymCryptDlgroupDhSafePrimeParamsffdhe6144; +extern const PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS SymCryptDlgroupDhSafePrimeParamsffdhe8192; + +#define SYMCRYPT_DH_SAFEPRIME_GROUP_COUNT (10) + +// Note, we rely on the ordering of the parameters from smallest to largest within each named set of +// safe-prime groups as we iterate through them assuming this order in SymCryptDlgroupSetValueSafePrime +extern const PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS SymCryptNamedSafePrimeGroups[SYMCRYPT_DH_SAFEPRIME_GROUP_COUNT]; + +// +// Definitions for ECurve dispatch functions +// +typedef VOID (SYMCRYPT_CALL * PSYMCRYPT_ECPOINT_SET_ZERO_FUNC) ( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +typedef VOID (SYMCRYPT_CALL * PSYMCRYPT_ECPOINT_SET_DISTINGUISHED_FUNC) ( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +typedef VOID (SYMCRYPT_CALL * PSYMCRYPT_ECPOINT_SET_RANDOM_FUNC) ( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Out_ PSYMCRYPT_INT piScalar, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +typedef UINT32 (SYMCRYPT_CALL * PSYMCRYPT_ECPOINT_ISEQUAL_FUNC) ( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc1, + _In_ PCSYMCRYPT_ECPOINT poSrc2, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch); + +typedef UINT32 (SYMCRYPT_CALL * PSYMCRYPT_ECPOINT_ONCURVE_FUNC) ( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +typedef UINT32 (SYMCRYPT_CALL * PSYMCRYPT_ECPOINT_ISZERO_FUNC) ( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +typedef VOID (SYMCRYPT_CALL * PSYMCRYPT_ECPOINT_ADD_FUNC) ( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc1, + _In_ PCSYMCRYPT_ECPOINT poSrc2, + _Out_ PSYMCRYPT_ECPOINT poDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +typedef VOID (SYMCRYPT_CALL * PSYMCRYPT_ECPOINT_ADD_DIFF_NONZERO_FUNC) ( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc1, + _In_ PCSYMCRYPT_ECPOINT poSrc2, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +typedef VOID (SYMCRYPT_CALL * PSYMCRYPT_ECPOINT_DOUBLE_FUNC) ( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_ PSYMCRYPT_ECPOINT poDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +typedef VOID (SYMCRYPT_CALL * PSYMCRYPT_ECPOINT_NEGATE_FUNC) ( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Inout_ PSYMCRYPT_ECPOINT poSrc, + UINT32 mask, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +typedef SYMCRYPT_ERROR (SYMCRYPT_CALL * PSYMCRYPT_ECPOINT_SCALAR_MUL_FUNC) ( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_INT piScalar, + _In_opt_ + PCSYMCRYPT_ECPOINT poSrc, + UINT32 flags, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +typedef SYMCRYPT_ERROR (SYMCRYPT_CALL * PSYMCRYPT_ECPOINT_MULTI_SCALAR_MUL_FUNC) ( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_reads_( nPoints ) PCSYMCRYPT_INT * piSrcScalarArray, + _In_reads_( nPoints ) PCSYMCRYPT_ECPOINT * poSrcEcpointArray, + UINT32 nPoints, + UINT32 flags, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +typedef VOID (SYMCRYPT_CALL * PSYMCRYPT_ECURVE_FILL_SCRATCH_SPACES_FUNC) ( + _Inout_ PSYMCRYPT_ECURVE pCurve ); + + +typedef struct _SYMCRYPT_ECURVE_FUNCTIONS +{ + PSYMCRYPT_ECPOINT_SET_ZERO_FUNC setZeroFunc; + PSYMCRYPT_ECPOINT_SET_DISTINGUISHED_FUNC setDistinguishedFunc; + PSYMCRYPT_ECPOINT_SET_RANDOM_FUNC setRandomFunc; + PSYMCRYPT_ECPOINT_ISEQUAL_FUNC isEqualFunc; + PSYMCRYPT_ECPOINT_ISZERO_FUNC isZeroFunc; + PSYMCRYPT_ECPOINT_ONCURVE_FUNC onCurveFunc; + PSYMCRYPT_ECPOINT_ADD_FUNC addFunc; + PSYMCRYPT_ECPOINT_ADD_DIFF_NONZERO_FUNC addDiffFunc; + PSYMCRYPT_ECPOINT_DOUBLE_FUNC doubleFunc; + PSYMCRYPT_ECPOINT_NEGATE_FUNC negateFunc; + PSYMCRYPT_ECPOINT_SCALAR_MUL_FUNC scalarMulFunc; + PSYMCRYPT_ECPOINT_MULTI_SCALAR_MUL_FUNC multiScalarMulFunc; + PSYMCRYPT_ECURVE_FILL_SCRATCH_SPACES_FUNC fillScratchSpacesFunc; + PVOID slack[3]; +} SYMCRYPT_ECURVE_FUNCTIONS, *PSYMCRYPT_ECURVE_FUNCTIONS; +typedef const SYMCRYPT_ECURVE_FUNCTIONS *PCSYMCRYPT_ECURVE_FUNCTIONS; + +#define SYMCRYPT_ECURVE_FUNCTIONS_SIZE (sizeof( SYMCRYPT_ECURVE_FUNCTIONS ) ) + +// Check that the size is a power of 2 +C_ASSERT( (SYMCRYPT_ECURVE_FUNCTIONS_SIZE & (SYMCRYPT_ECURVE_FUNCTIONS_SIZE-1)) == 0 ); + +// +// Functions for the each type of curve +// + +//-------------------------------------------------------- +//--------- Short Weierstrass ---------------------------- +//-------------------------------------------------------- + +extern const PCSYMCRYPT_ECURVE_PARAMS_V2_EXTENSION SymCryptEcurveParamsV2ExtensionShortWeierstrass; + +VOID +SYMCRYPT_CALL +SymCryptShortWeierstrassFillScratchSpaces( _In_ PSYMCRYPT_ECURVE pCurve ); + +VOID +SYMCRYPT_CALL +SymCryptShortWeierstrassSetZero( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptShortWeierstrassSetDistinguished( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +UINT32 +SYMCRYPT_CALL +SymCryptShortWeierstrassIsEqual( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc1, + _In_ PCSYMCRYPT_ECPOINT poSrc2, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +UINT32 +SYMCRYPT_CALL +SymCryptShortWeierstrassIsZero( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +UINT32 +SYMCRYPT_CALL +SymCryptShortWeierstrassOnCurve( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptShortWeierstrassAdd( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc1, + _In_ PCSYMCRYPT_ECPOINT poSrc2, + _Out_ PSYMCRYPT_ECPOINT poDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptShortWeierstrassAddDiffNonZero( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc1, + _In_ PCSYMCRYPT_ECPOINT poSrc2, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptShortWeierstrassDouble( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_ PSYMCRYPT_ECPOINT poDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptShortWeierstrassNegate( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Inout_ PSYMCRYPT_ECPOINT poSrc, + UINT32 mask, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptShortWeierstrassDoubleSpecializedAm3( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_ PSYMCRYPT_ECPOINT poDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +//-------------------------------------------------------- +//--------- Twisted Edwards ------------------------------ +//-------------------------------------------------------- + +extern const PCSYMCRYPT_ECURVE_PARAMS_V2_EXTENSION SymCryptEcurveParamsV2ExtensionTwistedEdwards; + +VOID +SYMCRYPT_CALL +SymCryptTwistedEdwardsFillScratchSpaces( _In_ PSYMCRYPT_ECURVE pCurve ); + +VOID +SYMCRYPT_CALL +SymCryptTwistedEdwardsSetDistinguished( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch); + +VOID +SYMCRYPT_CALL +SymCryptTwistedEdwardsAdd( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc1, + _In_ PCSYMCRYPT_ECPOINT poSrc2, + _Out_ PSYMCRYPT_ECPOINT poDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptTwistedEdwardsAddDiffNonZero( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc1, + _In_ PCSYMCRYPT_ECPOINT poSrc2, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptTwistedEdwardsDouble( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_ PSYMCRYPT_ECPOINT poDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch); + +UINT32 +SYMCRYPT_CALL +SymCryptTwistedEdwardsIsEqual( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc1, + _In_ PCSYMCRYPT_ECPOINT poSrc2, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch); + +UINT32 +SYMCRYPT_CALL +SymCryptTwistedEdwardsOnCurve( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch); + +UINT32 +SYMCRYPT_CALL +SymCryptTwistedEdwardsIsZero( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch); + +VOID +SYMCRYPT_CALL +SymCryptTwistedEdwardsSetZero( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch); + +VOID +SYMCRYPT_CALL +SymCryptTwistedEdwardsNegate( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Inout_ PSYMCRYPT_ECPOINT poSrc, + UINT32 mask, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +//-------------------------------------------------------- +//--------- Montgomery ----------------------------------- +//-------------------------------------------------------- + +extern const PCSYMCRYPT_ECURVE_PARAMS_V2_EXTENSION SymCryptEcurveParamsV2ExtensionMontgomery; + +VOID +SYMCRYPT_CALL +SymCryptMontgomeryFillScratchSpaces( _In_ PSYMCRYPT_ECURVE pCurve ); + +VOID +SYMCRYPT_CALL +SymCryptMontgomerySetDistinguished( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +UINT32 +SYMCRYPT_CALL +SymCryptMontgomeryIsEqual( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc1, + _In_ PCSYMCRYPT_ECPOINT poSrc2, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch); + +UINT32 +SYMCRYPT_CALL +SymCryptMontgomeryIsZero( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMontgomeryPointScalarMul( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_INT piScalar, + _In_opt_ + PCSYMCRYPT_ECPOINT poSrc, + UINT32 flags, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +//-------------------------------------------------------- +//--------- Generic multiplication-related functions ----- +//-------------------------------------------------------- + +VOID +SYMCRYPT_CALL +SymCryptOfflinePrecomputation( + _In_ PSYMCRYPT_ECURVE pCurve, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEcpointScalarMulFixedWindow( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_INT piScalar, + _In_opt_ + PCSYMCRYPT_ECPOINT poSrc, + UINT32 flags, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEcpointMultiScalarMulWnafWithInterleaving( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_reads_( nPoints ) PCSYMCRYPT_INT * piSrcScalarArray, + _In_reads_( nPoints ) PCSYMCRYPT_ECPOINT * poSrcEcpointArray, + UINT32 nPoints, + UINT32 flags, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptEcpointGenericSetRandom( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Out_ PSYMCRYPT_INT piScalar, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptEcurveFillScratchSpaces( + _Inout_ PSYMCRYPT_ECURVE pCurve); +//-------------------------------------------------------- +//-------------------------------------------------------- + +// Table with the number of field elements for each point format (in ecpoint.c) +extern const UINT32 SymCryptEcpointFormatNumberofElements[4]; + +UINT32 +SYMCRYPT_CALL +SymCryptSizeofEcpointEx( + UINT32 cbModElement, + UINT32 numOfCoordinates ); + + +PCSYMCRYPT_TRIALDIVISION_CONTEXT +SYMCRYPT_CALL +SymCryptFdefCreateTrialDivisionContext( UINT32 nDigits ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntFindSmallDivisor( + _In_ PCSYMCRYPT_TRIALDIVISION_CONTEXT pContext, + _In_ PCSYMCRYPT_INT piSrc, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefFreeTrialDivisionContext( PCSYMCRYPT_TRIALDIVISION_CONTEXT pContext ); + +UINT64 +SymCryptInverseMod2e64( UINT64 m ); + + +//-------------------------------------------------------- +//-------------------------------------------------------- + +// Helper function for wiping the Ec key's private state (e.g. for use in layers such as composites) +VOID +SYMCRYPT_CALL +SymCryptEckeyWipePrivateState( + _Inout_ PSYMCRYPT_ECKEY pkEckey ); + +// Recoding algorithms +VOID +SYMCRYPT_CALL +SymCryptFixedWindowRecoding( + UINT32 W, + _Inout_ PSYMCRYPT_INT piK, + _Inout_ PSYMCRYPT_INT piTmp, + _Out_writes_( nRecodedDigits ) + PUINT32 absofKIs, + _Out_writes_( nRecodedDigits ) + PUINT32 sigofKIs, + UINT32 nRecodedDigits ); + +VOID +SYMCRYPT_CALL +SymCryptWidthNafRecoding( + UINT32 W, + _Inout_ PSYMCRYPT_INT piK, + _Out_writes_( nRecodedDigits ) + PUINT32 absofKIs, + _Out_writes_( nRecodedDigits ) + PUINT32 sigofKIs, + UINT32 nRecodedDigits ); + +VOID +SYMCRYPT_CALL +SymCryptPositiveWidthNafRecoding( + UINT32 W, + _In_ PCSYMCRYPT_INT piK, + UINT32 nBitsExp, + _Out_writes_( nRecodedDigits ) + PUINT32 absofKIs, + UINT32 nRecodedDigits ); + +// M-LWE: Module Learning-With-Errors (ML-KEM, ML-DSA) +// +// ML-KEM (also known as Kyber) and ML-DSA (also known as Dilithium) are Post-Quantum algorithms +// based on the Learning-With-Errors problem over Module Lattices (or the hardness of the M-LWE +// problem). +// +// A Module is a Vector Space over a Ring. That is, elements of the vector spaces are elements in +// the underlying ring. +// We refer to Module as MLWE in the below types to avoid naming confusion with Module as in +// "FIPS module". Though technically components acting on MLWE types could be used outside of the +// MLWE problem, these types are SymCrypt-internal, and are only currently intended for use in +// these MLWE-based algorithms. +// +// In ML-KEM and ML-DSA, Polynomial Rings are used. That is, a ring defined over polynomials. +// For both schemes, the polynomial ring is defined modulo the polynomial (X^256 + 1). This means +// there is a representative of each polynomial ring element with 256 coefficients +// (c_255*X^255 + c_254*X^254 + ... + c_0). The coefficients themselves are modulo a small prime +// in both schemes. For ML-KEM the small prime is 3329 (12-bits), and for ML-DSA the small prime +// is 8380417 (23-bits). +// Additionally, for both schemes there is a Number Theoretic Transform (NTT) which maps polynomial +// ring elements to a corresponding ring for efficient multiplication. +// The in-memory representation of a polynomial ring element uses the same struct regardless of +// whether it is in standard form, or the NTT form. For brevity we tend to refer to polynomial +// ring elements as PolyElements. +// +#define SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS (256) + +// MLWE internal function definitions are in their own headers +#include "sc_lib_mlkem.h" +#include "sc_lib_mldsa.h" + +// +// Common Composite Definitions +// + +typedef enum { + SYMCRYPT_CACHED_ECURVE_ID_NIST_P256 = 0, + SYMCRYPT_CACHED_ECURVE_ID_NIST_P384, + SYMCRYPT_CACHED_ECURVE_ID_CURVE_25519, + SYMCRYPT_CACHED_ECURVE_ID_COUNT +} SYMCRYPT_CACHED_ECURVE_ID, *PSYMCRYPT_CACHED_ECURVE_ID; + +PCSYMCRYPT_ECURVE +SYMCRYPT_CALL +SymCryptGetCachedEcurve( + SYMCRYPT_CACHED_ECURVE_ID curveId ); + +#define SYMCRYPT_COMPOSITE_SIZEOF_ENCODED_EC_PUBLIC_KEY_P256 (65) +#define SYMCRYPT_COMPOSITE_SIZEOF_ENCODED_EC_PUBLIC_KEY_P384 (97) +#define SYMCRYPT_COMPOSITE_SIZEOF_ENCODED_EC_PUBLIC_KEY_CURVE_25519 (32) + +#define SYMCRYPT_COMPOSITE_SIZEOF_MAX_ENCODED_EC_PUBLIC_KEY SYMCRYPT_COMPOSITE_SIZEOF_ENCODED_EC_PUBLIC_KEY_P384 + +#define SYMCRYPT_COMPOSITE_SIZEOF_ENCODED_EC_PRIVATE_KEY_P256 (51) +#define SYMCRYPT_COMPOSITE_SIZEOF_ENCODED_EC_PRIVATE_KEY_P384 (64) +#define SYMCRYPT_COMPOSITE_SIZEOF_ENCODED_EC_PRIVATE_KEY_CURVE_25519 (32) + +UINT32 +SYMCRYPT_CALL +SymCryptCompositeGetSizeOfEncodedEcSk( + SYMCRYPT_CACHED_ECURVE_ID curveId ); + +UINT32 +SYMCRYPT_CALL +SymCryptCompositeGetSizeOfEncodedEcPk( + SYMCRYPT_CACHED_ECURVE_ID curveId ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEckeyGetValueCompositeEncodingSk( + _In_ PCSYMCRYPT_ECKEY pEckey, + SYMCRYPT_CACHED_ECURVE_ID curveId, + _Out_writes_bytes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEckeyGetValueCompositeEncodingPk( + _In_ PCSYMCRYPT_ECKEY pEckey, + SYMCRYPT_CACHED_ECURVE_ID curveId, + _Out_writes_bytes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEckeySetValueCompositeEncodingPk( + _In_ SYMCRYPT_CACHED_ECURVE_ID curveId, + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + UINT32 flags, + _Inout_ PSYMCRYPT_ECKEY pEckey ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEckeySetValueCompositeEncodingSk( + SYMCRYPT_CACHED_ECURVE_ID curveId, + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + UINT32 flags, + _Inout_ PSYMCRYPT_ECKEY pEckey ); + +// +// Composite ML-KEM definitions +// + +typedef struct _SYMCRYPT_COMPOSITE_MLKEM_INTERNAL_PARAMS { + SYMCRYPT_COMPOSITE_MLKEM_PARAMS params; + SYMCRYPT_CACHED_ECURVE_ID ecurveId; + SYMCRYPT_MLKEM_PARAMS mlKemParams; + PCBYTE pbLabel; + SIZE_T cbLabel; + SIZE_T cbCiphertext; + SYMCRYPT_NUMBER_FORMAT numFormat; + SYMCRYPT_ECPOINT_FORMAT ecPointFormat; + SIZE_T cbExpandedSeed; + SIZE_T cbEncodedPrivateKey; + SIZE_T cbEncodedPublicKey; +} SYMCRYPT_COMPOSITE_MLKEM_INTERNAL_PARAMS, *PSYMCRYPT_COMPOSITE_MLKEM_INTERNAL_PARAMS; +typedef const SYMCRYPT_COMPOSITE_MLKEM_INTERNAL_PARAMS *PCSYMCRYPT_COMPOSITE_MLKEM_INTERNAL_PARAMS; + +typedef SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_COMPOSITE_MLKEMKEY { + PCSYMCRYPT_COMPOSITE_MLKEM_INTERNAL_PARAMS pParams; // pointer to internal params for Composite ML-KEM being used + PSYMCRYPT_MLKEMKEY pkMlKemkey; + PSYMCRYPT_ECKEY pkEcKey; // all composite keys with the same elliptic curve type + // share the same lazily allocated curve object. This + // avoids the overhead of setting up a new curve object per key. + + BOOLEAN hasPrivateSeed; + BYTE privateSeed[SYMCRYPT_COMPOSITE_MLKEM_IRTF_PRIVATE_SEED_SIZE]; + + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_COMPOSITE_MLKEMKEY, *PSYMCRYPT_COMPOSITE_MLKEMKEY; +typedef const SYMCRYPT_COMPOSITE_MLKEMKEY *PCSYMCRYPT_COMPOSITE_MLKEMKEY; + +// Rejection sampling for generating an EC scalar from an IRTF Composite ML-KEM seed +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptCompositeMlKemGetRandomScalarForEcKeyEx( + SYMCRYPT_CACHED_ECURVE_ID ecurveId, + SYMCRYPT_NUMBER_FORMAT numFormat, + _In_reads_bytes_( cbSeed ) PCBYTE pbSeed, + SIZE_T cbSeed, + _Out_writes_bytes_( cbScalar ) PBYTE pbScalar, + SIZE_T cbScalar ); + +// +// XMSS +// + +// +// ADRS structure definitions as specified in RFC 8391 +// +typedef enum _XMSS_ADRS_TYPE +{ + XMSS_ADRS_TYPE_OTS = 0, + XMSS_ADRS_TYPE_LTREE = 1, + XMSS_ADRS_TYPE_HASH_TREE = 2, +} XMSS_ADRS_TYPE; + +typedef struct _XMSS_OTS_ADDRESS +{ + BYTE en32Leaf[4]; + BYTE en32Chain[4]; + BYTE en32Hash[4]; +} XMSS_OTS_ADDRESS, *PXMSS_OTS_ADDRESS; + +typedef struct _XMSS_LTREE_ADDRESS +{ + BYTE en32Leaf[4]; + BYTE en32Height[4]; + BYTE en32Index[4]; +} XMSS_LTREE_ADDRESS, * PXMSS_LTREE_ADDRESS; + +typedef struct _XMSS_HASHTREE_ADDRESS +{ + BYTE padding[4]; + BYTE en32Height[4]; + BYTE en32Index[4]; +} XMSS_HASHTREE_ADDRESS, * PXMSS_HASHTREE_ADDRESS; + +typedef struct _XMSS_ADRS +{ + BYTE en32Layer[4]; + BYTE en64Tree[8]; + BYTE en32Type[4]; + + union { + XMSS_OTS_ADDRESS ots; + XMSS_LTREE_ADDRESS ltree; + XMSS_HASHTREE_ADDRESS hashtree; + } u; + + BYTE en32KeyAndMask[4]; + +} XMSS_ADRS, *PXMSS_ADRS; + + +typedef SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_XMSS_KEY +{ + UINT32 version; + + SYMCRYPT_XMSS_PARAMS params; + + SYMCRYPT_XMSSKEY_TYPE keyType; + + // Public key + BYTE Root[SYMCRYPT_HASH_MAX_RESULT_SIZE]; + BYTE Seed[SYMCRYPT_HASH_MAX_RESULT_SIZE]; + + SYMCRYPT_MAGIC_FIELD + + // Private key + SYMCRYPT_ALIGN_AT(16) UINT64 Idx; // Aligning on 16-bytes to suppress clang warning + // when atomic increment is performed on it. + BYTE SkXmss[SYMCRYPT_HASH_MAX_RESULT_SIZE]; + BYTE SkPrf[SYMCRYPT_HASH_MAX_RESULT_SIZE]; + +} SYMCRYPT_XMSS_KEY; + +typedef SYMCRYPT_XMSS_KEY* PSYMCRYPT_XMSS_KEY; + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmssComputePublicRoot( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams, + _In_reads_bytes_( cbSeed ) PCBYTE pbSeed, + SIZE_T cbSeed, + _In_reads_bytes_( cbSkXmss ) PCBYTE pbSkXmss, + SIZE_T cbSkXmss, + _Out_writes_bytes_( cbRoot ) PBYTE pbRoot, + SIZE_T cbRoot ); +// +// Compute public root value from SEED and SK_XMSS +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmsskeyVerifyRoot( + _In_ PCSYMCRYPT_XMSS_KEY pKey ); +// +// Verifies that the public root matches the private key by recomputing it +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmssVerifyInternal( + _Inout_ PSYMCRYPT_XMSS_KEY pKey, + _In_reads_bytes_( cbMessage ) PCBYTE pbMessage, + SIZE_T cbMessage, + UINT32 flags, + _In_reads_bytes_( cbSignature ) PCBYTE pbSignature, + SIZE_T cbSignature ); +// +// The function that actually does the signature verification. This one doesn't +// run the self-tests so that it can be called from the self-test function. +// + + +VOID +SYMCRYPT_CALL +SymCryptHbsGetWinternitzLengths( + UINT32 n, // data size in bytes + UINT32 w, // digit length in bits (Winternitz coefficient) + _Out_ PUINT32 puLen1, // number of w-bit digits in n + _Out_ PUINT32 puLen2 // number of w-bit digits to store the checksum len1 * (2^w - 1) + ); + +typedef struct _SYMCRYPT_TREEHASH_NODE +{ + UINT32 index; + UINT32 height; + BYTE value[SYMCRYPT_ANYSIZE_ARRAY]; +} SYMCRYPT_TREEHASH_NODE, * PSYMCRYPT_TREEHASH_NODE; + +#define SYMCRYPT_SIZEOF_TREEHASH_NODE(cbValue) (sizeof(SYMCRYPT_TREEHASH_NODE) - 1 + (cbValue)) + +#define SYMCRYPT_TREEHASH_NODE_GET(aNodes, cbValue, i) ((PSYMCRYPT_TREEHASH_NODE)((PBYTE)(aNodes) + (i) * SYMCRYPT_SIZEOF_TREEHASH_NODE(cbValue))) + + +typedef struct _SYMCRYPT_XMSS_INCREMENTAL_TREEHASH_CONTEXT +{ + PCSYMCRYPT_XMSS_PARAMS pParams; + PCBYTE pbSeed; + XMSS_ADRS adrs; + +} SYMCRYPT_XMSS_INCREMENTAL_TREEHASH_CONTEXT, * PSYMCRYPT_XMSS_INCREMENTAL_TREEHASH_CONTEXT; + + +typedef +VOID +(SYMCRYPT_CALL *PSYMCRYPT_INCREMENTAL_TREEHASH_FUNC)( + _In_ PSYMCRYPT_TREEHASH_NODE pNodeLeft, + _In_ PSYMCRYPT_TREEHASH_NODE pNodeRight, + _Out_ PSYMCRYPT_TREEHASH_NODE pNodeOut, + _Inout_ PSYMCRYPT_XMSS_INCREMENTAL_TREEHASH_CONTEXT pContext ); + + +typedef struct _SYMCRYPT_INCREMENTAL_TREEHASH +{ + UINT32 cbNode; // node size; height + hash result + UINT32 nSize; // current size of the stack + UINT32 nCapacity; // maximum items + UINT32 nLastLeafIndex; + PSYMCRYPT_INCREMENTAL_TREEHASH_FUNC funcCompressNodes; + PSYMCRYPT_XMSS_INCREMENTAL_TREEHASH_CONTEXT pContext; + + SYMCRYPT_TREEHASH_NODE arrNodes[SYMCRYPT_ANYSIZE_ARRAY]; + +} SYMCRYPT_INCREMENTAL_TREEHASH, *PSYMCRYPT_INCREMENTAL_TREEHASH; + + +PSYMCRYPT_INCREMENTAL_TREEHASH +SYMCRYPT_CALL +SymCryptHbsIncrementalTreehashInit( + UINT32 nLeaves, + PBYTE pbBuffer, + SIZE_T cbBuffer, + UINT32 cbHashResult, + PSYMCRYPT_INCREMENTAL_TREEHASH_FUNC funcCompressNodes, + PSYMCRYPT_XMSS_INCREMENTAL_TREEHASH_CONTEXT pContext); + +PSYMCRYPT_TREEHASH_NODE +SYMCRYPT_CALL +SymCryptHbsIncrementalTreehashGetNode( + _In_ PSYMCRYPT_INCREMENTAL_TREEHASH pIncHash, + SIZE_T index ); + +PSYMCRYPT_TREEHASH_NODE +SYMCRYPT_CALL +SymCryptHbsIncrementalTreehashAllocNode( + _Inout_ PSYMCRYPT_INCREMENTAL_TREEHASH pIncHash, + UINT32 nLeafIndex ); + +VOID +SYMCRYPT_CALL +SymCryptHbsIncrementalTreehashGetTopNodes( + _Inout_ PSYMCRYPT_INCREMENTAL_TREEHASH pIncHash, + _Out_ PSYMCRYPT_TREEHASH_NODE *ppNodeLeft, + _Out_ PSYMCRYPT_TREEHASH_NODE *ppNodeRight ); + +PSYMCRYPT_TREEHASH_NODE +SYMCRYPT_CALL +SymCryptHbsIncrementalTreehashProcessCommon( + _Inout_ PSYMCRYPT_INCREMENTAL_TREEHASH pIncHash, + BOOLEAN fFinal ); + +PSYMCRYPT_TREEHASH_NODE +SYMCRYPT_CALL +SymCryptHbsIncrementalTreehashProcess( + _Inout_ PSYMCRYPT_INCREMENTAL_TREEHASH pIncHash); + +PSYMCRYPT_TREEHASH_NODE +SYMCRYPT_CALL +SymCryptHbsIncrementalTreehashFinalize( + _Inout_ PSYMCRYPT_INCREMENTAL_TREEHASH pIncHash); + +UINT32 +SYMCRYPT_CALL +SymCryptHbsIncrementalTreehashStackDepth( + UINT32 nLeaves); + +SIZE_T +SYMCRYPT_CALL +SymCryptHbsSizeofScratchBytesForIncrementalTreehash( + UINT32 cbNode, + UINT32 nLeaves); + +UINT32 +SYMCRYPT_CALL +SymCryptHbsGetDigit( + UINT32 width, + _In_ PCBYTE pbBuffer, + SIZE_T cbBuffer, + UINT32 index); + +// +// LMS +// +#define SYMCRYPT_IS_VALID_WINTERNITZ_WIDTH(w) ( ((w) == 1) || ((w) == 2) || ((w) == 4) || ((w) == 8) ) +#define SYMCRYPT_LMS_KEY_PAIR_IDENTIFIER_SIZE 16 +#define SYMCRYPT_LMS_MAX_N 32 +#define SYMCRYPT_LMS_MAX_P 265 +#define SYMCRYPT_LMS_MAX_H 25 +#define SYMCRYPT_LMS_MAX_CUSTOM_TREE_HEIGHT 31 +#define SYMCRYPT_LMS_CHECKSUM_SIZE 16 + +// LmsAlgId || LmsOtsAlgId || I || RootNode +#define SYMCRYPT_LMS_PUB_KEY_SIZE(cbHashOutput) (8 + SYMCRYPT_LMS_KEY_PAIR_IDENTIFIER_SIZE + cbHashOutput) + +// LmsAlgId || LmsOtsAlgId || I || RootNode || NextUnusedLeaf || Seed +#define SYMCRYPT_LMS_PRIV_KEY_SIZE(cbHashOutput) (SYMCRYPT_LMS_PUB_KEY_SIZE(cbHashOutput) + sizeof(UINT32) + cbHashOutput) + +//========================================================================== +// LMS internal structures +//========================================================================== +typedef SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_LMS_KEY{ + SIZE_T cbSize; + SYMCRYPT_LMS_PARAMS params; + + // Leaf number of the next LM-OTS private key that has not yet been used + UINT64 nNextUnusedLeaf; + + // The key type, can be: SYMCRYPT_LMSKEY_TYPE_PUBLIC, or SYMCRYPT_LMSKEY_TYPE_PRIVATE + UINT32 keyType; + + // Key identifier + BYTE abId[SYMCRYPT_LMS_KEY_PAIR_IDENTIFIER_SIZE]; + + // Public key root + BYTE abPublicRoot[SYMCRYPT_LMS_MAX_N]; + + // Private key seed + BYTE abSeed[SYMCRYPT_LMS_MAX_N]; + + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_LMS_KEY; +typedef SYMCRYPT_LMS_KEY* PSYMCRYPT_LMS_KEY; +typedef const SYMCRYPT_LMS_KEY* PCSYMCRYPT_LMS_KEY; + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLmsVerifyInternal( + _In_ PCSYMCRYPT_LMS_KEY pKey, + _In_reads_bytes_(cbMessage) PCBYTE pbMessage, + SIZE_T cbMessage, + UINT32 flags, + _In_reads_bytes_(cbSignature) PCBYTE pbSignature, + SIZE_T cbSignature); +// +// This function carries out the actual LMS verification process. It's essential to prevent an infinite +// recursive call in SymCryptLmsVerifySelftest. +// + + +// Atomics. +// +// We define all our SymCrypt atomics below. Different compilers/environments have different +// intrinsics to handle atomics in different environments. +// +// The SymCrypt atomics take the form SYMCRYPT_ATOMIC_<Operation><Bitsize>_<Return>_<Ordering> +// +// <Operation> is the atomic operation (i.e. LOAD, OR, XOR, AND, ADD, INC, etc.) +// <Bitsize> indicates the bitsize of the values that the atomic operation operates on. Pointers to +// values which atomics operate on must be aligned to the size of the value. +// <Return> takes the value PRE or POST, indicating whether the return value of the atomic is the +// value of the destination before (PRE) or after (POST) the operation was performed. Not used when +// operation is LOAD! +// <Ordering> specifies the memory ordering of the atomic operation in relation to other loads/stores +// and can take one of the following values: +// RELAXED corresponds to relaxed memory ordering in C++11 +// SEQ_CST corresponds to sequentially consistent memory ordering in C++11 +// ACQUIRE corresponds to acquire memory ordering in C++11 +// RELEASE corresponds to release memory ordering in C++11 +// + +#if SYMCRYPT_PLATFORM_WINDOWS +#include <intrin.h> + +#if SYMCRYPT_CPU_ARM64 +// 64b loads are naturally atomic on Arm64 +#define SYMCRYPT_ATOMIC_LOAD64_RELAXED(_dest) SYMCRYPT_FORCE_READ64(_dest) +#define SYMCRYPT_ATOMIC_OR32_PRE_RELAXED(_dest, _val) _InterlockedOr_nf( (volatile LONG *)(_dest), (LONG)(_val) ) +#define SYMCRYPT_ATOMIC_ADD32_PRE_RELAXED(_dest, _val) _InterlockedExchangeAdd_nf( (volatile LONG *)(_dest), (LONG)(_val) ) +#define SYMCRYPT_ATOMIC_ADD64_POST_RELAXED(_dest, _val) _InterlockedAdd64_nf( (volatile LONG64 *)(_dest), (LONG64)(_val) ) + +#define SYMCRYPT_ATOMIC_ADD32_POST_SEQ_CST(_dest, _val) _InterlockedAdd( (volatile LONG *)(_dest), (LONG)(_val) ) + +#define SYMCRYPT_ATOMIC_LOADPTR_ACQUIRE(_dest) ((PVOID)__ldar64( (volatile UINT64 *)(_dest) )) +#define SYMCRYPT_ATOMIC_STOREPTR_RELEASE(_dest, _val) __stlr64( (volatile UINT64 *)(_dest), (UINT64)(_val) ) + +// For ARM/ARM64, MSVC does not have a dedicated acquire-release CAS intrinsic. +#define SYMCRYPT_ATOMIC_CAS_PTR_ACQUIRE_RELEASE( _dest, _exchange, _comp ) \ + _InterlockedCompareExchangePointer( (volatile PVOID *)(_dest), (PVOID)(_exchange), (PVOID)(_comp) ) + +#elif SYMCRYPT_CPU_ARM +#define SYMCRYPT_ATOMIC_LOAD64_RELAXED(_dest) _InterlockedOr64_nf( (volatile LONG64 *)(_dest), 0 ) +#define SYMCRYPT_ATOMIC_OR32_PRE_RELAXED(_dest, _val) _InterlockedOr_nf( (volatile LONG *)(_dest), (LONG)(_val) ) +#define SYMCRYPT_ATOMIC_ADD32_PRE_RELAXED(_dest, _val) _InterlockedExchangeAdd_nf( (volatile LONG *)(_dest), (LONG)(_val) ) +#define SYMCRYPT_ATOMIC_ADD64_POST_RELAXED(_dest, _val) _InterlockedAdd64_nf( (volatile LONG64 *)(_dest), (LONG64)(_val) ) + +#define SYMCRYPT_ATOMIC_ADD32_POST_SEQ_CST(_dest, _val) _InterlockedAdd( (volatile LONG *)(_dest), (LONG)(_val) ) + +#define SYMCRYPT_ATOMIC_LOADPTR_ACQUIRE(_dest) ((PVOID)_InterlockedOr_acq( (volatile LONG *)(_dest), 0 )) +#define SYMCRYPT_ATOMIC_STOREPTR_RELEASE(_dest, _val) _InterlockedExchangePointer_rel( (volatile PVOID *)(_dest), (PVOID)(_val) ) + +#define SYMCRYPT_ATOMIC_CAS_PTR_ACQUIRE_RELEASE( _dest, _exchange, _comp ) \ + _InterlockedCompareExchangePointer( (volatile PVOID *)(_dest), (PVOID)(_exchange), (PVOID)(_comp) ) + +#elif SYMCRYPT_CPU_AMD64 +// For MSVC on AMD64, there are no _nf atomic intrinsics +// 64b loads are naturally atomic on AMD64 +#define SYMCRYPT_ATOMIC_LOAD64_RELAXED(_dest) SYMCRYPT_FORCE_READ64(_dest) +#define SYMCRYPT_ATOMIC_OR32_PRE_RELAXED(_dest, _val) _InterlockedOr( (volatile LONG *)(_dest), (LONG)(_val) ) +#define SYMCRYPT_ATOMIC_ADD32_PRE_RELAXED(_dest, _val) _InterlockedExchangeAdd( (volatile LONG *)(_dest), (LONG)(_val) ) +#define SYMCRYPT_ATOMIC_ADD64_POST_RELAXED(_dest, _val) (_InterlockedExchangeAdd64( (volatile LONG64 *)(_dest), (LONG64)(_val) ) + (LONG64)(_val)) + +#define SYMCRYPT_ATOMIC_ADD32_POST_SEQ_CST(_dest, _val) (_InterlockedExchangeAdd( (volatile LONG *)(_dest), (LONG)(_val) ) + (LONG)(_val)) + +// Volatile load / store are sufficient for acquire-release semantics on AMD64 +#define SYMCRYPT_ATOMIC_LOADPTR_ACQUIRE(_dest) ((PVOID)SYMCRYPT_FORCE_READ64(_dest)) +#define SYMCRYPT_ATOMIC_STOREPTR_RELEASE(_dest, _val) SYMCRYPT_FORCE_WRITE64(_dest, ((UINT64)(_val))) + +#define SYMCRYPT_ATOMIC_CAS_PTR_ACQUIRE_RELEASE( _dest, _exchange, _comp ) \ + _InterlockedCompareExchangePointer( (volatile PVOID *)(_dest), (PVOID)(_exchange), (PVOID)(_comp) ) + +#elif SYMCRYPT_CPU_X86 +// For MSVC on x86, there is no 64b atomic load intrinsic - use expected to fail CAS, attempting to set from 0 to 0 +#define SYMCRYPT_ATOMIC_LOAD64_RELAXED(_dest) _InterlockedCompareExchange64( (volatile LONG64 *)(_dest), 0, 0 ) +// For MSVC on x86, there are no _nf atomic intrinsics +#define SYMCRYPT_ATOMIC_OR32_PRE_RELAXED(_dest, _val) _InterlockedOr( (volatile LONG *)(_dest), (LONG)(_val) ) +#define SYMCRYPT_ATOMIC_ADD32_PRE_RELAXED(_dest, _val) _InterlockedExchangeAdd( (volatile LONG *)(_dest), (LONG)(_val) ) +// For MSVC on x86, there is no 64b atomic add intrinsic +// We could use InterlockedAdd64 function from windows.h if we are using MSVC for Windows, but +// to remove dependency we just define our own inline function using _InterlockedCompareExchange64 +static +FORCEINLINE +LONG64 +SymCryptInlineInterlockedAdd64( volatile LONG64* destination, LONG64 value ) +{ + LONG64 preValue; + do { + preValue = *destination; + } while (_InterlockedCompareExchange64(destination, preValue + value, preValue) != preValue); + + return preValue + value; +} +#define SYMCRYPT_ATOMIC_ADD64_POST_RELAXED(_dest, _val) SymCryptInlineInterlockedAdd64( (volatile LONG64 *)(_dest), (LONG64)(_val) ) + +#define SYMCRYPT_ATOMIC_ADD32_POST_SEQ_CST(_dest, _val) (_InterlockedExchangeAdd( (volatile LONG *)(_dest), (LONG)(_val) ) + (LONG)(_val)) + +// Volatile load / store are sufficient for acquire-release semantics on x86 +#define SYMCRYPT_ATOMIC_LOADPTR_ACQUIRE(_dest) ((PVOID)SYMCRYPT_FORCE_READ32(_dest)) +#define SYMCRYPT_ATOMIC_STOREPTR_RELEASE(_dest, _val) SYMCRYPT_FORCE_WRITE32(_dest, ((UINT32)(_val))) + +#define SYMCRYPT_ATOMIC_CAS_PTR_ACQUIRE_RELEASE( _dest, _exchange, _comp ) \ + _InterlockedCompareExchangePointer( (volatile PVOID *)(_dest), (PVOID)(_exchange), (PVOID)(_comp) ) + +#else + +// Fallback intended to generically work across all supported platforms for cases where +// we do not make decisions based on CPU architecture, such as no ASM builds. For the most +// part the same as x86 except in cases where the underlying definition relies on pointer size. + +#define SYMCRYPT_ATOMIC_LOAD64_RELAXED(_dest) _InterlockedCompareExchange64( (volatile LONG64 *)(_dest), 0, 0 ) +#define SYMCRYPT_ATOMIC_OR32_PRE_RELAXED(_dest, _val) _InterlockedOr( (volatile LONG *)(_dest), (LONG)(_val) ) +#define SYMCRYPT_ATOMIC_ADD32_PRE_RELAXED(_dest, _val) _InterlockedExchangeAdd( (volatile LONG *)(_dest), (LONG)(_val) ) + +FORCEINLINE +LONG64 +SymCryptInlineInterlockedAdd64( volatile LONG64* destination, LONG64 value ) +{ + LONG64 preValue; + do { + preValue = *destination; + } while (_InterlockedCompareExchange64(destination, preValue + value, preValue) != preValue); + + return preValue + value; +} +#define SYMCRYPT_ATOMIC_ADD64_POST_RELAXED(_dest, _val) SymCryptInlineInterlockedAdd64( (volatile LONG64 *)(_dest), (LONG64)(_val) ) + +#define SYMCRYPT_ATOMIC_ADD32_POST_SEQ_CST(_dest, _val) (_InterlockedExchangeAdd( (volatile LONG *)(_dest), (LONG)(_val) ) + (LONG)(_val)) + +#if defined(_WIN64) +#define SYMCRYPT_ATOMIC_LOADPTR_ACQUIRE(_dest) ((PVOID)_InterlockedOr64( (volatile LONG64 *)(_dest), 0 )) +#else +#define SYMCRYPT_ATOMIC_LOADPTR_ACQUIRE(_dest) ((PVOID)_InterlockedOr( (volatile LONG *)(_dest), 0 )) +#endif + +#define SYMCRYPT_ATOMIC_STOREPTR_RELEASE(_dest, _val) _InterlockedExchangePointer( (volatile PVOID *)(_dest), (PVOID)(_val) ) + +#define SYMCRYPT_ATOMIC_CAS_PTR_ACQUIRE_RELEASE( _dest, _exchange, _comp ) \ + _InterlockedCompareExchangePointer( (volatile PVOID *)(_dest), (PVOID)(_exchange), (PVOID)(_comp) ) + +#endif + +#elif SYMCRYPT_GNUC +#define SYMCRYPT_ATOMIC_LOAD64_RELAXED(_dest) __atomic_load_n( (volatile uint64_t *)(_dest), __ATOMIC_RELAXED ) +#define SYMCRYPT_ATOMIC_OR32_PRE_RELAXED(_dest, _val) __atomic_fetch_or( (volatile uint32_t *)(_dest), (uint32_t)(_val), __ATOMIC_RELAXED ) +#define SYMCRYPT_ATOMIC_ADD32_PRE_RELAXED(_dest, _val) __atomic_fetch_add( (volatile uint32_t *)(_dest), (uint32_t)(_val), __ATOMIC_RELAXED ) +#define SYMCRYPT_ATOMIC_ADD64_POST_RELAXED(_dest, _val) __atomic_add_fetch( (volatile uint64_t *)(_dest), (uint64_t)(_val), __ATOMIC_RELAXED ) + +#define SYMCRYPT_ATOMIC_ADD32_POST_SEQ_CST(_dest, _val) __atomic_add_fetch( (volatile uint32_t *)(_dest), (uint32_t)(_val), __ATOMIC_ACQ_REL ) + +#define SYMCRYPT_ATOMIC_LOADPTR_ACQUIRE(_dest) __atomic_load_n( (volatile void* *)(_dest), __ATOMIC_ACQUIRE ) +#define SYMCRYPT_ATOMIC_STOREPTR_RELEASE(_dest, _val) __atomic_store_n( (volatile void* *)(_dest), (void*)(_val), __ATOMIC_RELEASE ) + +static +FORCEINLINE +void* +SymCryptAtomicCasPtrAcqRel( + void** dest, + void* desired, + void* expected) +{ + __atomic_compare_exchange_n( + dest, // ptr + &expected, + desired, + FALSE, // weak (set to FALSE => strong) + __ATOMIC_RELEASE, // success_memorder + __ATOMIC_ACQUIRE ); // failure_memorder + return expected; +} + +#define SYMCRYPT_ATOMIC_CAS_PTR_ACQUIRE_RELEASE( _dest, _exchange, _comp ) \ + SymCryptAtomicCasPtrAcqRel( (volatile void **)(_dest), (void *)(_exchange), (void *)(_comp) ) + +#endif + +// Inline CAS-128 functions + +// BOOLEAN +// SymCryptAtomicCas128Relaxed( +// _Inout_updates_(2) PUINT64 destination, +// _Inout_updates_(2) PUINT64 expectedValue, +// _In_reads_(2) PCUINT64 desiredValue); +// Performs Compare-and-Swap on a 128b memory location. +// Atomically reads destination, compares with expectedValue, and: +// if they are equal, writes desiredValue to destination, and return TRUE +// if they are not equal, writes the value read from destination to expectedValue, and returns FALSE +// +// Remarks: +// On success, the value of expectedValue is not guaranteed. +// Only destination is guaranteed to be read and written atomically, expectedValue should be a buffer +// which is only owned by the calling thread. +// destination must be aligned to 16 bytes +// + +#if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM64 + +#if SYMCRYPT_PLATFORM_WINDOWS + +#if SYMCRYPT_CPU_ARM64 +#define SYMCRYPT_MSVC_CAS128_NF _InterlockedCompareExchange128_nf +#elif SYMCRYPT_CPU_AMD64 +#define SYMCRYPT_MSVC_CAS128_NF _InterlockedCompareExchange128 +#endif + +static +FORCEINLINE +BOOLEAN +SymCryptAtomicCas128Relaxed( + _Inout_updates_(2) PUINT64 destination, + _Inout_updates_(2) PUINT64 expectedValue, + _In_reads_(2) PCUINT64 desiredValue) +{ + return SYMCRYPT_MSVC_CAS128_NF( + (volatile LONG64 *)destination, + (LONG64)desiredValue[1], + (LONG64)desiredValue[0], + (LONG64 *) expectedValue ); +} + +#elif SYMCRYPT_GNUC + +static +FORCEINLINE +BOOLEAN +SymCryptAtomicCas128Relaxed( + _Inout_updates_(2) PUINT64 destination, + _Inout_updates_(2) PUINT64 expectedValue, + _In_reads_(2) PCUINT64 desiredValue) +{ +#if SYMCRYPT_CPU_AMD64 + // To avoid dynamically linking libatomic in OpenEnclave, use inline assembly for cmpxchg16b + // on AMD64. We always need to perform CPU feature detection before we hit this function. + BOOLEAN result; + __asm__ __volatile__ + ( + "lock cmpxchg16b %1\n\t" + "sete %0" + : "=r" (result) + , "+m" (*destination) + , "+d" (expectedValue[1]) + , "+a" (expectedValue[0]) + : "c" (desiredValue[1]) + , "b" (desiredValue[0]) + : "cc" + ); + return result; +#elif SYMCRYPT_CPU_ARM64 + // clang inlines this but GCC dynamically links to libatomic + // For now, just let the compiler decide, and for ARM64 modules, always allow linking to libatomic + // We may want to break out into inline asm for LDXP/STXP implementation (v8.0) vs. CASP + // implementation (v8.1) in future + return __atomic_compare_exchange( + (__int128 *)destination, // ptr + (__int128 *)expectedValue, // expected + (__int128 *)desiredValue, // desired + FALSE, // weak (set to FALSE => strong) + __ATOMIC_RELAXED, // success_memorder + __ATOMIC_RELAXED); // failure_memorder +#endif +} + +#endif + +#endif + +static +FORCEINLINE +UINT32 +SymCryptCountTrailingZeros32( UINT32 value ) +{ + unsigned long index = 0; + if( value == 0 ) + { + return 32; + } + +#if SYMCRYPT_PLATFORM_WINDOWS && (SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM64 | SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_ARM) + _BitScanForward(&index, value); +#elif SYMCRYPT_GNUC + index = __builtin_ctz(value); +#else + while( (value & 1) == 0 ) + { + index++; + value >>= 1; + } +#endif + + return (UINT32)index; +} + +static +FORCEINLINE +UINT32 +SymCryptCountTrailingZeros64( UINT64 value ) +{ + unsigned long index = 0; + if( value == 0 ) + { + return 64; + } + +#if SYMCRYPT_PLATFORM_WINDOWS && (SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM64) + _BitScanForward64(&index, value); +#elif SYMCRYPT_PLATFORM_WINDOWS && (SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_ARM) + if( ((UINT32)value) == 0 ) + { + _BitScanForward(&index, (UINT32)(value>>32)); + index += 32; + } else { + _BitScanForward(&index, (UINT32)value); + } + +#elif SYMCRYPT_GNUC + index = __builtin_ctzll(value); +#else + while( (value & 1) == 0 ) + { + index++; + value >>= 1; + } +#endif + + return (UINT32)index; +} + +static +FORCEINLINE +UINT32 +SymCryptCountLeadingZeros32( UINT32 value ) +{ + unsigned long zeros = 0; + + if(value == 0) + { + return 32; + } + +#if SYMCRYPT_PLATFORM_WINDOWS && (SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM64 | SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_ARM) + _BitScanReverse(&zeros, value); + zeros = 31 - zeros; +#elif SYMCRYPT_GNUC + zeros = __builtin_clz(value); +#else + while( (value & 0x80000000) == 0 ) + { + zeros++; + value <<= 1; + } +#endif + + return (UINT32)zeros; +} + +static +FORCEINLINE +UINT32 +SymCryptCountLeadingZeros64( UINT64 value ) +{ + unsigned long zeros = 0; + + if(value == 0) + { + return 64; + } + +#if SYMCRYPT_PLATFORM_WINDOWS && (SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM64) + _BitScanReverse64(&zeros, value); + zeros = 63 - zeros; +#elif SYMCRYPT_PLATFORM_WINDOWS && (SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_ARM) + if( (value >> 32) == 0 ) + { + _BitScanReverse(&zeros, (UINT32)value); + zeros = 63 - zeros; + } else { + _BitScanReverse(&zeros, (UINT32)(value >> 32)); + zeros = 31 - zeros; + } +#elif SYMCRYPT_GNUC + zeros = __builtin_clzll(value); +#else + while( (value & 0x8000000000000000) == 0 ) + { + zeros++; + value <<= 1; + } +#endif + + return (UINT32)zeros; +} diff --git a/libs/symcrypt/lib/sc_lib_mldsa.h b/libs/symcrypt/lib/sc_lib_mldsa.h new file mode 100644 index 00000000000..54dafbabeaa --- /dev/null +++ b/libs/symcrypt/lib/sc_lib_mldsa.h @@ -0,0 +1,1081 @@ +// +// sc_lib_mldsa.h +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// Internal ML-DSA definitions for the symcrypt library. +// Always intended to be included as part of sc_lib.h +// + +// +// Modulus for ML-DSA +// +#define SYMCRYPT_MLDSA_Q (8380417) + +// +// Montgomery multiplier for ML-DSA, log 2 (i.e. R = 2^32) +// +#define SYMCRYPT_MLDSA_R_LOG2 (32) + +// +// Size of the root seed xi used in key generation +// +#define SYMCRYPT_MLDSA_ROOT_SEED_SIZE (32) + +// +// Size of the public seed rho +// +#define SYMCRYPT_MLDSA_PUBLIC_SEED_SIZE (32) + +// +// Size of public key hash (tr) = SHAKE256 result size = 64 bytes +// +#define SYMCRYPT_MLDSA_PUBLIC_KEY_HASH_SIZE SYMCRYPT_SHAKE256_RESULT_SIZE + +// +// Size of private signing seed K +// +#define SYMCRYPT_MLDSA_PRIVATE_SIGNING_SEED_SIZE (32) + +// +// Size of the private vector seed rho prime +// +#define SYMCRYPT_MLDSA_PRIVATE_VECTOR_SEED_SIZE (64) + +// +// Size of random value used in signing (rnd in FIPS 204) +// +#define SYMCRYPT_MLDSA_SIGNING_RANDOM_SIZE (32) + +// +// Length of hash algorithm OIDs in bytes. Currently all supported hash algorithms have 11-byte +// OIDs, but this is not guaranteed to be the case as more algorithms are added in the future. +// If the OID length becomes variable, functions which use this value will need to be changed. +// +#define SYMCRYPT_MLDSA_SUPPORTED_HASH_OID_SIZE (11) + +// +// Flag for Sign and Verify with External Mu +// +#define SYMCRYPT_FLAG_MLDSA_EXTERNALMU (0x1) + +typedef struct _SYMCRYPT_MLDSA_POLYELEMENT { + // PolyElements just store the coefficients without any header. + UINT32 coeffs[SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS]; +} SYMCRYPT_MLDSA_POLYELEMENT, * PSYMCRYPT_MLDSA_POLYELEMENT; +typedef const SYMCRYPT_MLDSA_POLYELEMENT* PCSYMCRYPT_MLDSA_POLYELEMENT; + +// Maximum number of rows and columns in A matrix for ML-DSA +#define SYMCRYPT_MLDSA_VECTOR_MAX_LENGTH (8) +#define SYMCRYPT_MLDSA_MATRIX_MAX_NROWS (8) +#define SYMCRYPT_MLDSA_MATRIX_MAX_NCOLS (7) + +typedef _Struct_size_bytes_( cbTotalSize ) struct _SYMCRYPT_MLDSA_VECTOR { + _Field_range_( 1, SYMCRYPT_MLDSA_VECTOR_MAX_LENGTH ) + UINT8 nElems; // Number of PolyElements in the vector + UINT32 cbTotalSize; // Total size of the Vector + + // Followed by: + // nElems PolyElements +} SYMCRYPT_MLDSA_VECTOR, * PSYMCRYPT_MLDSA_VECTOR; +typedef const SYMCRYPT_MLDSA_VECTOR* PCSYMCRYPT_MLDSA_VECTOR; + +typedef _Struct_size_bytes_( cbTotalSize ) struct _SYMCRYPT_MLDSA_MATRIX { + _Field_range_( 1, SYMCRYPT_MLDSA_MATRIX_MAX_NROWS ) + UINT8 nRows; // k in FIPS-204 + _Field_range_( 1, SYMCRYPT_MLDSA_MATRIX_MAX_NCOLS ) + UINT8 nCols; // l in FIPS-204 + UINT32 cbTotalSize; // Total size of the Matrix + + // Followed by: + // nRows*nCols PolyElements in row-major order +} SYMCRYPT_MLDSA_MATRIX, * PSYMCRYPT_MLDSA_MATRIX; +typedef const SYMCRYPT_MLDSA_MATRIX* PCSYMCRYPT_MLDSA_MATRIX; + +typedef struct _SYMCRYPT_MLDSA_INTERNAL_PARAMS { + UINT32 params; // parameter set of ML-DSA being used - takes a value from SYMCRYPT_MLDSA_PARAMS + + UINT32 cbPolyElement; // size in bytes of one polynomial ring element + UINT32 cbRowVector; // size in bytes of one row vector (k elements) + UINT32 cbColVector; // size in bytes of one column vector (l elements) + UINT32 cbMatrix; // size in bytes of one matrix + + UINT8 nRows; // Number of rows in the A matrix (k in FIPS-204) + UINT8 nCols; // Number of columns in the A matrix (l in FIPS-204) + + UINT8 privateKeyRange; // Coefficient range of s1, s2 private key vectors (eta in FIPS-204) + UINT8 encodedCoefficientBitLength; // Bit length of encoded private key coefficients + + UINT8 nChallengeNonZeroCoeffs; // Number of non-zero coefficients in the challenge polynomial (tau in FIPS-204) + UINT8 nHintNonZeroCoeffs; // Max number of non-zero coefficients in the hint polynomial (omega in FIPS-204) + UINT8 maskCoefficientRangeLog2; // Coefficient range of mask polynomial y (log_2(gamma_1) in FIPS-204) + UINT8 commitmentModulus; // Modulus for commitment values in UseHint and MakeHint (q-1)/(2*gamma_2) + UINT32 decomposeR1Factor; // Multiplication factor for R1 in SymCryptMlDsaDecompose - see function comments + UINT32 commitmentRoundingRange; // Rounding range for commitment value (gamma_2 in FIPS-204) + UINT32 w1EncodeCoefficientBitLength; // Bit length of coefficients for w1 encoding (q - 1) / ((2 * gamma_2) - 1)) + + UINT32 cbCommitmentHash; // Size of the commitment hash (lambda / 4 in FIPS 204) + UINT32 cbEncodedPrivateKey; // Size of the encoded private key + UINT32 cbEncodedPublicKey; // Size of the encoded public key + UINT32 cbEncodedSignature; // Size of the encoded signature +} SYMCRYPT_MLDSA_INTERNAL_PARAMS, * PSYMCRYPT_MLDSA_INTERNAL_PARAMS; +typedef const SYMCRYPT_MLDSA_INTERNAL_PARAMS* PCSYMCRYPT_MLDSA_INTERNAL_PARAMS; + +typedef _Struct_size_bytes_( cbTotalSize ) struct _SYMCRYPT_MLDSAKEY { + UINT32 fAlgorithmInfo; // Tracks which algorithms the key can be used in (not currently used) + // Also tracks which per-key selftests have been performed on this key + // A bitwise OR of SYMCRYPT_FLAG_KEY_*, SYMCRYPT_FLAG_MLDSAKEY_*, and + // SYMCRYPT_SELFTEST_KEY_* values + + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams; + + UINT32 cbTotalSize; // Total in-memory size of the ML-DSA key (this header and the following structs) + + BOOLEAN hasRootSeed; // True if the key has the seed used in key generation (xi) + BOOLEAN hasPrivateKey; // True if the key has private vectors s1, s2, t0 + + // Seeds + _When_( hasRootSeed, _Field_size_bytes_(SYMCRYPT_MLDSA_ROOT_SEED_SIZE) ) + _When_( !hasRootSeed, _Field_size_bytes_part_(SYMCRYPT_MLDSA_ROOT_SEED_SIZE, 0) ) + BYTE rootSeed[SYMCRYPT_MLDSA_ROOT_SEED_SIZE]; // Root seed used in key generation (xi) - only available for keys generated by SymCrypt, or imported from a seed + + _When_( hasPrivateKey, _Field_size_bytes_(SYMCRYPT_MLDSA_PRIVATE_SIGNING_SEED_SIZE) ) + _When_( !hasPrivateKey, _Field_size_bytes_part_(SYMCRYPT_MLDSA_PRIVATE_SIGNING_SEED_SIZE, 0) ) + BYTE privateSigningSeed[SYMCRYPT_MLDSA_PRIVATE_SIGNING_SEED_SIZE]; // Private seed used in signing (K) + + BYTE publicSeed[SYMCRYPT_MLDSA_PUBLIC_SEED_SIZE]; // Public seed from which A can be derived (rho) + BYTE publicKeyHash[SYMCRYPT_MLDSA_PUBLIC_KEY_HASH_SIZE]; // SHAKE-256 hash of the public key + + // + // ML-DSA matrix/vector components: A * s1 + s2 = t + // + // t is separated into two components, t0 and t1, using Power2Round. t0 is private and is used + // during signing; t1 is public and is used during verification. All components are stored in + // NTT form so that we do not need to convert them during signing or verification. + // + + // Public components - always valid + PSYMCRYPT_MLDSA_MATRIX pmA; // Public matrix A - size nRows x nCols + PSYMCRYPT_MLDSA_VECTOR pvt1; // Public component of t vector from Power2Round (row vector) + + // Private components - only valid when hasPrivateKey is TRUE + PSYMCRYPT_MLDSA_VECTOR pvs1; // Private vector s1 (column vector) + PSYMCRYPT_MLDSA_VECTOR pvs2; // Private vector s2 (row vector) + PSYMCRYPT_MLDSA_VECTOR pvt0; // Private component of t vector from Power2Round (row vector) + + SYMCRYPT_MAGIC_FIELD + // Followed by: + // A + // t1 + // s1 + // s2 + // t0 +} SYMCRYPT_MLDSAKEY, * PSYMCRYPT_MLDSAKEY; +typedef const SYMCRYPT_MLDSAKEY* PCSYMCRYPT_MLDSAKEY; + +typedef _Struct_size_bytes_(cbTotalSize) struct _SYMCRYPT_MLDSA_INTERNAL_COMPUTATION_TEMPORARIES +{ + UINT32 cbTotalSize; // Total in-memory size of this structure + UINT32 nRowVectors; // Number of row vectors + UINT32 nColVectors; // Number of column vectors + UINT32 nPolyElements; // Number of PolyElements + UINT32 cbScratch; // Size of scratch buffer + + + SYMCRYPT_SHAKE256_STATE shake256State; + + _Field_size_( nRowVectors ) + PSYMCRYPT_MLDSA_VECTOR* pvRowVectors; // Array of pointers to row vectors + _Field_size_( nColVectors ) + PSYMCRYPT_MLDSA_VECTOR* pvColVectors; // Array of pointers to column vectors + _Field_size_( nPolyElements) + PSYMCRYPT_MLDSA_POLYELEMENT* pePolyElements; // Array of pointers to PolyElements + + _Field_size_bytes_( cbScratch ) + PBYTE pbScratch; + + SYMCRYPT_MAGIC_FIELD + // Followed by: + // pvRowVectors[0..nRowVectors-1] + // pvColVectors[0..nColVectors-1] + // pePolyElements[0..nPolyElements-1] + // nRowVectors * SYMCRYPT_INTERNAL_MLDSA_SIZEOF_VECTOR( nRows ) buffer for row vectors + // nColVectors * SYMCRYPT_INTERNAL_MLDSA_SIZEOF_VECTOR( nCols ) buffer for column vectors + // nPoly * SYMCRYPT_INTERNAL_MLDSA_SIZEOF_POLYELEMENT buffer for PolyElements + // cbScratch bytes of scratch space +} SYMCRYPT_MLDSA_INTERNAL_COMPUTATION_TEMPORARIES, * PSYMCRYPT_MLDSA_INTERNAL_COMPUTATION_TEMPORARIES; + +#define SYMCRYPT_INTERNAL_MLDSA_SIZEOF_POLYELEMENT ( sizeof( SYMCRYPT_MLDSA_POLYELEMENT ) ) +#define SYMCRYPT_INTERNAL_MLDSA_SIZEOF_VECTOR( _nElems ) ( sizeof( SYMCRYPT_MLDSA_VECTOR ) + ( _nElems * sizeof( SYMCRYPT_MLDSA_POLYELEMENT ) ) ) +#define SYMCRYPT_INTERNAL_MLDSA_SIZEOF_MATRIX( _nRows, _nCols ) ( sizeof( SYMCRYPT_MLDSA_MATRIX ) + ( _nRows * _nCols * sizeof( SYMCRYPT_MLDSA_POLYELEMENT ) ) ) +#define SYMCRYPT_INTERNAL_MLDSA_SIZEOF_KEY( _nRows, _nCols ) ( sizeof( SYMCRYPT_MLDSAKEY) + \ + SYMCRYPT_INTERNAL_MLDSA_SIZEOF_MATRIX( _nRows, _nCols ) + \ + SYMCRYPT_INTERNAL_MLDSA_SIZEOF_VECTOR( _nCols ) + \ + (SYMCRYPT_INTERNAL_MLDSA_SIZEOF_VECTOR( _nRows ) * 3u) ) + +#define SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT_OFFSET( _row ) ( sizeof( SYMCRYPT_MLDSA_VECTOR ) + (_row * SYMCRYPT_INTERNAL_MLDSA_SIZEOF_POLYELEMENT) ) +#define SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( _row, _pVector ) ((PSYMCRYPT_MLDSA_POLYELEMENT) ( ((PBYTE) (_pVector)) + SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT_OFFSET( _row ) )) +#define SYMCRYPT_INTERNAL_MLDSA_MATRIX_ELEMENT_OFFSET( _row, _col, _pMatrix ) ( sizeof( SYMCRYPT_MLDSA_MATRIX ) + ((_row * (_pMatrix)->nCols + _col) * SYMCRYPT_INTERNAL_MLDSA_SIZEOF_POLYELEMENT) ) +#define SYMCRYPT_INTERNAL_MLDSA_MATRIX_ELEMENT( _row, _col, _pMatrix) ((PSYMCRYPT_MLDSA_POLYELEMENT) ( ((PBYTE) (_pMatrix)) + SYMCRYPT_INTERNAL_MLDSA_MATRIX_ELEMENT_OFFSET( _row, _col, _pMatrix ) )) + +#define SYMCRYPT_INTERNAL_MLDSA_SIZEOF_ENCODED_VECTOR( _pVector, _nBitsPerCoeff ) ( ((_pVector)->nElems * SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS * (_nBitsPerCoeff) ) / 8u ) + +// For packing signed coefficients into the minimum possible number of bits for encoding, ML-DSA +// converts them to (signed upper bound - x) for each coefficient x. For example, when encoding +// s1 and s2 which have coefficients in the range [-eta, eta] with ML-DSA-65 (eta = 4), 1 is encoded +// as (4 - 1) = 3, 0 is encoded as (4 - 0) = 4, -1 is encoded as (4 - (-1)) = 5, etc. Conveniently, +// this also works in reverse to decode the coefficients. +#define SYMCRYPT_INTERNAL_MLDSA_SHORT_COEFFICIENT_ENCODE_DECODE( _val, _bound) ( _bound - _val ) + +////////////////////////////////////////////////////////////////////////// +// Internal implementations of public APIs +////////////////////////////////////////////////////////////////////////// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaKeyGenerateEx( + _Inout_ PSYMCRYPT_MLDSAKEY pkMlDsakey, + _In_reads_( cbRootSeed ) PCBYTE pbRootSeed, + SIZE_T cbRootSeed, + UINT32 flags ); +// +// Implements SymCryptMlDsakeyGenerate. Takes a seed from the caller so that keys can be generated +// deterministically for testing. +// +// Parameters: +// - (pbRootSeed, cbRootSeed): The seed used to generate the key (xi in FIPS 204) +// +// See SymCryptMlDsakeyGenerate for additional documentation. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaSignEx( + _In_ PCSYMCRYPT_MLDSAKEY pkMlDsakey, + _In_reads_( cbInput ) PCBYTE pbInput, + SIZE_T cbInput, + _In_reads_opt_( cbContext ) PCBYTE pbContext, + _In_range_( 0, SYMCRYPT_MLDSA_CONTEXT_MAX_LENGTH ) SIZE_T cbContext, + _In_reads_opt_( cbHashOid ) PCBYTE pbHashOid, + SIZE_T cbHashOid, + _In_reads_( cbRandom ) PCBYTE pbRandom, + SIZE_T cbRandom, + UINT32 flags, + _Out_writes_( cbSignature ) PBYTE pbSignature, + SIZE_T cbSignature ); +// +// Implements SymCryptMlDsaSign, SymCryptExternalMuMlDsaSign, and SymCryptHashMlDsaSign. +// Takes the random value from the caller so that signing can be done deterministically for testing. +// +// Parameters: +// - (pbInput, cbInput): The message to be signed. For SymCryptMlDsaSign, this is the full message. +// For SymCryptHashMlDsaSign, this is the hash of the message. +// - (pbContext, cbContext): An optional context string which will be prepended to the message. +// - (pbHashOid, cbHashOid): The DER-encoded OID of the hash algorithm used to hash the message, +// when using SymCryptHashMlDsaSign. Must be NULL for SymCryptMlDsaSign. +// - (pbRandom, cbRandom): The random value used in the signing process (rnd in FIPS 204). +// - flags: 0 or SYMCRYPT_FLAG_MLDSA_EXTERNALMU. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaVerifyEx( + _In_ PCSYMCRYPT_MLDSAKEY pkMlDsakey, + _In_reads_( cbInput ) PCBYTE pbInput, + SIZE_T cbInput, + _In_reads_opt_( cbContext ) PCBYTE pbContext, + _In_range_( 0, SYMCRYPT_MLDSA_CONTEXT_MAX_LENGTH ) SIZE_T cbContext, + _In_reads_opt_( cbHashOid ) PCBYTE pbHashOid, + SIZE_T cbHashOid, + _In_reads_( cbSignature ) PCBYTE pbSignature, + SIZE_T cbSignature, + UINT32 flags ); +// +// Implements SymCryptMlDsaVerify, SymCryptExternalMuMlDsaVerify, and SymCryptHashMlDsaVerify. +// +// Parameters: +// - (pbInput, cbInput): The message to be verified. For SymCryptMlDsaVerify, this is the full +// message. For SymCryptHashMlDsaVerify, this is the hash of the message. +// - (pbContext, cbContext): An optional context string which will be prepended to the message. +// - (pbHashOid, cbHashOid): The DER-encoded OID of the hash algorithm used to hash the message, +// when using SymCryptHashMlDsaVerify. Must be NULL for SymCryptMlDsaVerify. +// - (pbSignature, cbSignature): The signature to be verified. +// - flags: 0 or SYMCRYPT_FLAG_MLDSA_EXTERNALMU. +// + +_Success_( TRUE ) +PSYMCRYPT_MLDSAKEY +SYMCRYPT_CALL +SymCryptMlDsakeyInitialize( + _In_ PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pInternalParams, + _Out_writes_bytes_(cbKey) PBYTE pbKey, + UINT32 cbKey ); +// +// Initializes a SYMCRYPT_MLDSAKEY structure in the given buffer. The buffer size (cbKey) must +// be exactly equal to the size of the key structure, which can be calculated using +// SYMCRYPT_INTERNAL_MLDSA_SIZEOF_KEY. +// +// Parameters: +// - pInternalParams: Parameter set to use for the key. +// - (pbKey, cbKey): Buffer for the key structure. +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsakeyComputeT( + _In_ PCSYMCRYPT_MLDSA_MATRIX pmA, + _In_ PCSYMCRYPT_MLDSA_VECTOR pvs1, + _In_ PCSYMCRYPT_MLDSA_VECTOR pvs2, + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvt0, + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvt1, + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvTmp, + _Inout_ PSYMCRYPT_MLDSA_POLYELEMENT peTmp ); +// +// Helper function for computing the t vector in ML-DSA: A * s1 + s2 = t. Used by key generation +// and private key import. All inputs must be in NTT form. The outputs t0 and t1 are NOT returned +// in NTT form; it is the caller's responsibility to convert them when appropriate. +// +// Parameters: +// - pmA: Public matrix A +// - pvs1: Private vector s1. +// - pvs2: Private vector s2. +// - pvt0: Private component of t vector from Power2Round. +// - pvt1: Public component of t vector from Power2Round. +// - pvTmp: Temporary vector for intermediate computations. +// - peTmp: Temporary PolyElement for intermediate computations. +// + +////////////////////////////////////////////////////////////////////////// +// Montgomery reduction and multiplication +////////////////////////////////////////////////////////////////////////// + +UINT32 +SYMCRYPT_CALL +SymCryptMlDsaMontReduce( UINT64 a ); +// +// Montgomery reduction +// res = a * R^-1 mod Q. +// +// Note that this divides out a factor of R. +// + +UINT32 +SYMCRYPT_CALL +SymCryptMlDsaMontMul( UINT32 a, UINT32 b ); +// +// Montgomery multiplication +// res = (a * b) / R mod Q +// +// Equivalent to SymCryptMlDsaMontReduce( (UINT64) a * b ) +// As above, this divides out a factor of R, which can be compensated for in either input, +// or taken into account in the output. +// + +////////////////////////////////////////////////////////////////////////// +// 32-bit modular arithmetic +////////////////////////////////////////////////////////////////////////// + +UINT32 +SYMCRYPT_CALL +SymCryptMlDsaModAdd( UINT32 a, UINT32 b ); +// +// res := a + b mod Q +// +// Requirements: a < Q, b < Q +// + +UINT32 +SYMCRYPT_CALL +SymCryptMlDsaModSub( UINT32 a, UINT32 b ); +// +// res := a - b mod Q +// +// Requirements: a < Q, b < Q +// + +////////////////////////////////////////////////////////////////////////// +// Polynomial operations +////////////////////////////////////////////////////////////////////////// + +_Success_( TRUE ) +PSYMCRYPT_MLDSA_POLYELEMENT +SYMCRYPT_CALL +SymCryptMlDsaPolyElementCreate( + _Inout_updates_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer ); +// +// Initializes a SYMCRYPT_MLDSA_POLYELEMENT in the given buffer. +// cbBuffer must be equal to SYMCRYPT_INTERNAL_MLDSA_SIZEOF_POLYELEMENT. +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaPolyElementSetZero( + _Inout_ PSYMCRYPT_MLDSA_POLYELEMENT peDst ); +// +// Sets all coefficients to zero +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaPolyElementNTT( + _Inout_ PSYMCRYPT_MLDSA_POLYELEMENT peSrc ); +// +// ML-DSA Polynomial Ring Element NTT: +// peSrc = NTT(peSrc) per FIPS 204 +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaPolyElementINTT( + _Inout_ PSYMCRYPT_MLDSA_POLYELEMENT peSrc ); +// +// ML-DSA Polynomial Ring Element inverse NTT: +// peSrc = InverseNTT(peSrc) per FIPS 204 +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaPolyElementMulR( + _Inout_ PSYMCRYPT_MLDSA_POLYELEMENT peSrc ); +// +// ML-DSA Polynomial multiplication by the Montgomery multiplier R: +// peSrc = (peSrc * R) mod Q +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaPolyElementMontMul( + _In_ PCSYMCRYPT_MLDSA_POLYELEMENT peSrc1, + _In_ PCSYMCRYPT_MLDSA_POLYELEMENT peSrc2, + _Out_ PSYMCRYPT_MLDSA_POLYELEMENT peDst ); +// +// ML-DSA Polynomial Montgomery multiplication: +// peDst = (peSrc1 * peSrc2) ./ R +// where: +// * is polynomial multiplication given sources in NTT form +// ./ is coefficient-wise division and R is the Montgomery multiplier +// +// Requirements: +// - peSrc1 and peSrc2 must be PolyElements in ML-DSA NTT form +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaPolyElementAdd( + _In_ PCSYMCRYPT_MLDSA_POLYELEMENT peSrc1, + _In_ PCSYMCRYPT_MLDSA_POLYELEMENT peSrc2, + _Out_ PSYMCRYPT_MLDSA_POLYELEMENT peDst ); +// +// ML-DSA Polynomial Ring Element addition +// peDst = peSrc1 + peSrc2 +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaPolyElementSub( + _In_ PCSYMCRYPT_MLDSA_POLYELEMENT peSrc1, + _In_ PCSYMCRYPT_MLDSA_POLYELEMENT peSrc2, + _Out_ PSYMCRYPT_MLDSA_POLYELEMENT peDst ); +// +// ML-DSA Polynomial Ring Element subtraction +// peDst = peSrc1 - peSrc2 +// + +////////////////////////////////////////////////////////////////////////// +// Vector operations +////////////////////////////////////////////////////////////////////////// + +_Success_( TRUE ) +PSYMCRYPT_MLDSA_VECTOR +SYMCRYPT_CALL +SymCryptMlDsaVectorCreate( + _Out_writes_( cbBuffer ) PBYTE pbBuffer, + UINT32 cbBuffer, + UINT8 nElems ); +// +// Initializes a vector of nElems PolyElements in the given buffer. +// cbBuffer must be equal to SYMCRYPT_INTERNAL_MLDSA_SIZEOF_VECTOR( nElems ). +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaVectorCopy( + _In_ PCSYMCRYPT_MLDSA_VECTOR pvSrc, + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvDst ); +// +// pvDst = pvSrc. Vectors must be the same size. +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaVectorSetZero( + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvDst ); +// +// Sets all elements of the vector to zero. +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaVectorAdd( + _In_ PCSYMCRYPT_MLDSA_VECTOR pvSrc1, + _In_ PCSYMCRYPT_MLDSA_VECTOR pvSrc2, + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvDst ); +// +// pvDst = pvSrc1 + pvSrc2 +// +// Requirements: pvSrc1, pvSrc2, and pvDst must all have the same number of elements. +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaVectorSub( + _In_ PCSYMCRYPT_MLDSA_VECTOR pvSrc1, + _In_ PCSYMCRYPT_MLDSA_VECTOR pvSrc2, + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvDst ); +// +// pvDst = pvSrc1 - pvSrc2 +// +// Requirements: pvSrc1, pvSrc2, and pvDst must all have the same number of elements. +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaVectorPolyElementMontMul( + _In_ PCSYMCRYPT_MLDSA_VECTOR pvSrc1, + _In_ PCSYMCRYPT_MLDSA_POLYELEMENT peSrc2, + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvDst ); +// +// ML-DSA Vector-PolyElement Montgomery Multiplication: +// pvDst[i] = (pvSrc1[i] * peSrc2) ./ R +// +// where: +// * is polynomial multiplication given sources in NTT form +// ./ is coefficient-wise division and R is Montgomery multiplier +// +// Requirements: +// - peSrc2, and all elements of pvSrc1 must be in ML-DSA NTT form +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaVectorNTT( + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvSrc ); +// +// ML-DSA Vector NTT: +// pvSrc[i] = NTT(pvSrc[i]) for each element in pvSrc +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaVectorINTT( + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvSrc ); +// +// ML-DSA Vector inverse NTT: +// pvSrc[i] = INTT(pvSrc[i]) for each element in pvSrc +// + + +////////////////////////////////////////////////////////////////////////// +// Matrix operations +////////////////////////////////////////////////////////////////////////// + +PSYMCRYPT_MLDSA_MATRIX +SYMCRYPT_CALL +SymCryptMlDsaMatrixCreate( + _Out_writes_( cbBuffer ) PBYTE pbBuffer, + UINT32 cbBuffer, + UINT8 nRows, + UINT8 nCols ); +// +// Initializes a matrix of nRows * nCols PolyElements in the given buffer. +// cbBuffer must be equal to SYMCRYPT_INTERNAL_MLDSA_SIZEOF_MATRIX( nRows, nCols ). +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaMatrixVectorMontMul( + _In_ PCSYMCRYPT_MLDSA_MATRIX pmSrc1, + _In_ PCSYMCRYPT_MLDSA_VECTOR pvSrc2, + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvDst, + _Inout_ PSYMCRYPT_MLDSA_POLYELEMENT peTmp ); +// +// ML-DSA Matrix-Vector Montgomery Multiplication: +// pvDst = (pmSrc1 * pvSrc2) ./ R +// +// where: +// * is matrix-vector multiplication of polynomials in NTT form +// ./ is coefficient-wise division and R is Montgomery multiplier +// + + +////////////////////////////////////////////////////////////////////////// +// Sampling and rejection +////////////////////////////////////////////////////////////////////////// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaRejNttPoly( + _In_reads_( cbRejNttPolySeed ) PCBYTE pbRejNttPolySeed, + SIZE_T cbRejNttPolySeed, + _Inout_ PSYMCRYPT_MLDSA_POLYELEMENT peDst ); +// +// RejNTTPoly from FIPS 204 +// Used by SymCryptMlDsaExpandA to generate a polynomials in the public matrix A from the expanded +// public seed. The output polynomial is in NTT form with coefficients modulo Q. +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaExpandA( + _In_reads_( cbPublicSeed ) PCBYTE pbPublicSeed, + SIZE_T cbPublicSeed, + _Inout_ PSYMCRYPT_MLDSA_MATRIX pmA ); +// +// ExpandA from FIPS 204 +// Expands the public seed into the public matrix A. +// \hat{A}[i, j] = RejNttPoly(seed || j || i) for each index (i, j) in A +// + +INT8 +SYMCRYPT_CALL +SymCryptMlDsaCoeffFromHalfByte( + _In_ PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + _In_range_( 0, 15 ) UINT8 halfByte ); +// +// CoeffFromHalfByte from FIPS 204 +// Converts a nibble (range [0, 15]) to a coefficient in the range [-eta, eta] +// If the nibble is outside of the valid private key range ([0, 14] for eta = 2, [0, 8] for eta = 4), +// returns INT8_MIN. +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaRejBoundedPoly( + _In_ PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + _In_reads_( cbRejBoundedPolySeed ) PCBYTE pbRejBoundedPolySeed, + SIZE_T cbRejBoundedPolySeed, + _Inout_ PSYMCRYPT_MLDSA_POLYELEMENT peDst ); +// +// RejBoundedPoly from FIPS 204 +// Used by SymCryptMlDsaExpandS to generate polynomials in the private vectors s1 and s2 from the +// expanded private vector seed. Coefficients in the output polynomial are modulo Q. +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaExpandS( + _In_ PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + _In_reads_( cbPrivateVectorSeed ) PCBYTE pbPrivateVectorSeed, + SIZE_T cbPrivateVectorSeed, + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvs1, + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvs2 ); +// +// ExpandS from FIPS 204 +// s1 = RejBoundedPoly(seed || i) for each index i in s1 (column vector) +// s2 = RejBoundedPoly(seed || i) for each index i in s2 (row vector) +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaSampleInBall( + _In_ PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + _In_reads_( cbCommitmentHash ) PCBYTE pbCommitmentHash, + SIZE_T cbCommitmentHash, + _Inout_ PSYMCRYPT_MLDSA_POLYELEMENT peChallenge ); +// +// SampleInBall from FIPS 204 +// Samples a polynomial c in R_q with coefficients in {-1, 0, 1} and Hamming weight tau. +// As with all polynomials, coefficients are represented as unsigned integers modulo Q. +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaExpandMask( + _In_ PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + _Inout_ PSYMCRYPT_SHAKE256_STATE pShakeState, + _In_reads_( cbPrivateRandom ) PCBYTE pbPrivateRandom, + SIZE_T cbPrivateRandom, + _In_ UINT16 counter, + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvMask ); +// +// ExpandMask from FIPS 204 +// Samples a polynomial vector y in R^l such that each polynomial y[r] has coefficients between +// (-gamma_1 + 1, gamma_1) modulo Q, where gamma_1 == 2^(maskCoefficientRangeLog2) . The output +// vector is returned in NTT form. +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaMakeHint( + _In_ PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvWMinusCs2, + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvWMinusCs2PlusCt0, + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvDst, + _Out_ UINT32* nBitsSet ); +// +// MakeHint from FIPS 204 +// Computes the hint vector. Each coefficient of the polynomials in the vector is a single bit +// indicating whether adding ct0 to (w - cs2) alters the high bits of the corresponding coefficient. +// We define our inputs differently from FIPS 204 to reduce computations: +// +// In FIPS 204, MakeHint is defined as: +// [[r1 != v1]] where r1 = HighBits(r), v1 = HighBits(r + z) +// +// ML-DSA.Sign_internal calls MakeHint with inputs: +// z = -ct0, r = w - cs2 + ct0 +// +// We can simplify this to: +// r1 = HighBits(w - cs2 + ct0), v1 = HighBits(w - cs2) +// +// Note that this function modifies the inputs in place for efficiency. +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaUseHint( + _In_ PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + _In_ PCSYMCRYPT_MLDSA_VECTOR pvHint, + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvCommitment ); +// +// UseHint from FIPS 204 +// Uses the hint vector to recalculate the original commitment vector from the approximated +// commitment vector by setting the high bits of the coefficients that were dropped in the +// approximation. On input, pvCommitment is the approximated commitment vector. On output, it is +// the recalculated original commitment vector. +// +// TODO osgvsowi/55435592 Consider decoding the hint just-in-time to avoid allocating an +// entire vector for it +// + +////////////////////////////////////////////////////////////////////////// +// Encoding/decoding +////////////////////////////////////////////////////////////////////////// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaPolyElementEncode( + _In_ PCSYMCRYPT_MLDSA_POLYELEMENT peSrc, + UINT32 nBitsPerCoefficient, + UINT32 signedCoefficientBound, + _Out_writes_( nBitsPerCoefficient * (SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8) ) + PBYTE pbDst ); +// +// Encode a polynomial with coefficients in the range [0, 2^nBitsPerCoefficient] into a tightly +// packed byte array. +// +// Signed coefficients are encoded as described in the comment for +// SYMCRYPT_INTERNAL_MLDSA_SHORT_COEFFICIENT_ENCODE_DECODE. For these coefficients, the +// signedCoefficientBound parameter indicates the upper bound of the coefficients when they are +// positive, and is used to convert them from their internal representation modulo Q to the +// encoded representation. +// +// For polynomials whose coefficients are always positive and do not need any special encoding +// (e.g. t1), signedCoefficientBound must be 0. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaPolyElementDecode( + _In_reads_bytes_( nBitsPerCoefficient * (SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8) ) + PCBYTE pbSrc, + UINT32 nBitsPerCoefficient, + UINT32 signedCoefficientBound, + _Inout_ PSYMCRYPT_MLDSA_POLYELEMENT peDst ); +// +// From a byte array that was previously encoded as described in SymCryptMlDsaPolyElementEncode, +// decode a polynomial with coefficients in the range [0, 2^nBitsPerCoefficient]. +// +// See comments on SymCryptMlDsaPolyElementEncode for information about how coefficients are +// encoded and decoded. +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaVectorEncode( + _In_ PCSYMCRYPT_MLDSA_VECTOR pvSrc, + UINT32 nBitsPerCoefficient, + UINT32 signedCoefficientBound, + _Out_writes_( pvSrc->nElems * nBitsPerCoefficient * (SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8) ) + PBYTE pbDst ); +// +// Encodes a vector of polynomials into a tightly packed byte array. +// pbDst := SymCryptMlDsaPolyElementEncode(i) for each polynomial i in pvSrc +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaVectorDecode( + _In_reads_bytes_( pvDst->nElems * nBitsPerCoefficient * (SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8) ) + PCBYTE pbSrc, + UINT32 nBitsPerCoefficient, + UINT32 signedCoefficientBound, + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvDst ); +// +// Decodes a vector of encoded polynomials from a byte array. +// pvDst[i] := SymCryptMlDsaPolyElementDecode(i) for each encoded polynomial i in pbSrc +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaPkEncode( + _In_ PCSYMCRYPT_MLDSAKEY pkMlDsakey, + _Out_writes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst ); +// +// pkEncode(key) = rho || SimpleBitPack(t1) +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaPkDecode( + _In_reads_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + UINT32 flags, + _Inout_ PSYMCRYPT_MLDSAKEY pkMlDsakey ); +// +// Decodes a public key from a byte array. The encoded public key only contains rho and t1. +// We recalculate the A matrix from rho. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaSkEncode( + _In_ PCSYMCRYPT_MLDSAKEY pKey, + _Out_writes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst ); +// +// skEncode(key) = rho || K || H(pkEncode(key)) || BitPack(s1) || BitPack(s2) || BitPack(t0) +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaSkDecode( + _In_reads_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + UINT32 flags, + _Inout_ PSYMCRYPT_MLDSAKEY pKey ); +// +// Decodes a private key from a byte array. The encoded private key contains rho, K, s1, s2 and t0. +// We recalculate the A matrix from rho, t1 by recalculating A * s1 + s2 = t. This function also +// validates that the recalculated public key hash and t0 match the encoded values. If they do +// not, it returns SYMCRYPT_INVALID_BLOB. +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaSigEncode( + _In_ PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + _In_reads_( cbCommitmentHash ) PBYTE pbCommitmentHash, + SIZE_T cbCommitmentHash, + _In_ PCSYMCRYPT_MLDSA_VECTOR pvResponse, + _In_ PCSYMCRYPT_MLDSA_VECTOR pvHint, + _Out_writes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst ); +// +// SigEncode from FIPS 204 +// Encodes a signature into a tightly packed byte array. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaSigDecode( + _In_ PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + _In_reads_( cbSig ) PCBYTE pbSig, + SIZE_T cbSig, + _Out_writes_( cbCommitmentHash) PBYTE pbCommitmentHash, + SIZE_T cbCommitmentHash, + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvResponse, + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvHint ); +// +// SigDecode from FIPS 204 +// Decodes a signature from a tightly packed byte array, producing the commitment hash, response +// vector, and hint vector. +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaHintBitPack( + _In_ PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + _In_ PCSYMCRYPT_MLDSA_VECTOR pvSrc, + _Out_writes_bytes_( pParams->nHintNonZeroCoeffs + pvSrc->nElems ) + PBYTE pbDst ); +// +// HintBitPack from FIPS 204 +// Packs the hint vector into a byte array. The first nHintNonZeroCoeffs bytes are the indices +// of non-zero coefficients in the vector, and the last nElems bytes contain the number of +// non-zero coefficients in polynomials 0..i of the vector. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaHintBitUnpack( + _In_ PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + _In_reads_bytes_( pParams->nHintNonZeroCoeffs + pvDst->nElems ) + PCBYTE pbSrc, + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvDst ); +// +// HintBitUnpack from FIPS 204 +// Unpacks the hint vector from a byte array where each byte indicates the index of a non-zero +// coefficient in the corresponding polynomial. See comment on SymCryptMlDsaHintBitPack for more +// details about encoding. +// + +////////////////////////////////////////////////////////////////////////// +// Auxiliary functions +////////////////////////////////////////////////////////////////////////// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaGetInternalParamsFromParams( + SYMCRYPT_MLDSA_PARAMS params, + _Out_ PCSYMCRYPT_MLDSA_INTERNAL_PARAMS* pInternalParams ); +// +// Get the internal parameter structure corresponding to the given parameter set enum. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHashMlDsaValidateHashAlgAndGetOid( + _In_ PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + SYMCRYPT_PQDSA_HASH_ID hashAlg, + SIZE_T cbHash, + _Out_ PCSYMCRYPT_OID* ppOid ); +// +// Validates that the given hash algorithm meets the required collision strength for the ML-DSA +// parameter set, as defined in FIPS 204. Also validates that cbHash matches the expected length +// for the hash algorithm, or for XOFs, is >= the required collision strength. +// See comments on the definition of SymCryptHashMlDsaSign +// + +INT32 +SYMCRYPT_CALL +SymCryptMlDsaModPlusMinus( UINT32 r, UINT32 modulus ); +// +// Helper function which implements the mod+- operation from FIPS 204. +// In FIPS 204, r0 := r mod+- 2^d where mod+- returns the unique element in (-(2^d/2), 2^d/2] +// which is congruent to r modulo 2^d. Importantly, this means that r0 may be negative. +// To use consistent data structures throughout the our implementation and simplify modular +// arithmetic, we do not use negative numbers. Instead, we always represent negative values as +// UINT32s modulo Q. +// +// Requirements: r < modulus +// + +UINT32 +SYMCRYPT_CALL +SymCryptMlDsaPolyElementInfinityNorm( _In_ PCSYMCRYPT_MLDSA_POLYELEMENT peSrc ); +// +// Returns the infinity norm of the given polynomial element as defined in FIPS 204. +// The infinity norm is the maximum absolute value of w mod+- Q for each coefficient w in the +// polynomial. +// + +UINT32 +SYMCRYPT_CALL +SymCryptMlDsaVectorInfinityNorm( _In_ PCSYMCRYPT_MLDSA_VECTOR pvSrc ); +// +// Returns the infinity norm of the given vector as defined in FIPS 204. +// = max(InfinityNorm(pvSrc[i])) for each polynomial in pvSrc +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaDecompose( + _In_ PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + _In_range_(0, SYMCRYPT_MLDSA_Q - 1) UINT32 r, + _Out_opt_ UINT32 *puR1, + _Out_opt_ UINT32 *puR0 ); +// +// Decompose from FIPS 204 +// Decomposes r into (r1, r0) such that r1*2*gamma_2 + r0 is congruent to r modulo q +// See note above in SymCryptMlDsaModPlusMinus for important information about the +// representation of r0. +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaVectorHighBits( + _In_ PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + _In_ PCSYMCRYPT_MLDSA_VECTOR pvSrc, + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvDst ); +// +// HighBits from FIPS 204 +// For each coefficent r of each polynomial in pvSrc, the corresponding coefficient in pvDst is +// set to *puR1 from Decompose(r). +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaVectorLowBits( + _In_ PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + _In_ PCSYMCRYPT_MLDSA_VECTOR pvSrc, + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvDst ); +// +// LowBits from FIPS 204 +// For each coefficent r of each polynomial in pvSrc, the corresponding coefficient in pvDst is +// set to *puR0 from Decompose(r). +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaPower2Round( + _In_range_(0, SYMCRYPT_MLDSA_Q - 1) UINT32 r, + _Out_ UINT32 *puR1, + _Out_ UINT32 *puR0 ); +// +// Power2Round from FIPS 204 +// Decomposes r into (r1, r0) such that r1*2^d + r0 is congruent to r modulo q +// See note above in SymCryptMlDsaModPlusMinus for important information about the +// representation of r0. +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaPolyElementPower2Round( + _In_ PCSYMCRYPT_MLDSA_POLYELEMENT peSrc, + _Inout_ PSYMCRYPT_MLDSA_POLYELEMENT peDst1, + _Inout_ PSYMCRYPT_MLDSA_POLYELEMENT peDst0 ); +// +// (peDst1[i], peDst0[i]) = Power2Round(peSrc[i]) for each coefficient in peSrc +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaVectorPower2Round( + _In_ PCSYMCRYPT_MLDSA_VECTOR pvSrc, + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvDst1, + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvDst0 ); +// +// (pvDst1[i], pvDst0[i]) = Power2Round(pvSrc[i]) for each polynomial in pvSrc +// + +UINT32 +SYMCRYPT_CALL +SymCryptMlDsaSignedCoefficientModQ( INT32 coefficient ); +// +// Maps a signed short coefficient to a residue modulo Q. +// + +_Success_( return != NULL ) +PSYMCRYPT_MLDSA_INTERNAL_COMPUTATION_TEMPORARIES +SYMCRYPT_CALL +SymCryptMlDsaTemporariesAllocateAndInitialize( + _In_ PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + UINT32 nRowVectors, + UINT32 nColVectors, + UINT32 nPolyElements, + UINT32 cbScratch ); +// +// Allocates and initializes a SYMCRYPT_MLDSA_INTERNAL_COMPUTATION_TEMPORARIES structure and +// returns a pointer to the caller. Returns NULL if allocation fails. +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaTemporariesFree( + _In_ _Post_invalid_ PSYMCRYPT_MLDSA_INTERNAL_COMPUTATION_TEMPORARIES pTemporaries ); +// +// Wipes and frees a SYMCRYPT_MLDSA_INTERNAL_COMPUTATION_TEMPORARIES structure previously allocated +// by SymCryptMlDsaTemporariesAllocateAndInitialize. +// diff --git a/libs/symcrypt/lib/sc_lib_mlkem.h b/libs/symcrypt/lib/sc_lib_mlkem.h new file mode 100644 index 00000000000..15c2ff91ce5 --- /dev/null +++ b/libs/symcrypt/lib/sc_lib_mlkem.h @@ -0,0 +1,468 @@ +// +// sc_lib_mlkem.h +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// Internal ML-KEM definitions for the symcrypt library. +// Always intended to be included as part of sc_lib.h +// + +//===================================================== +// ML-KEM internal high level types +// + +typedef SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_MLKEM_POLYELEMENT { + // PolyElements just store the coefficients without any header. + UINT16 coeffs[SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS]; +} SYMCRYPT_MLKEM_POLYELEMENT; +typedef SYMCRYPT_MLKEM_POLYELEMENT * PSYMCRYPT_MLKEM_POLYELEMENT; +typedef const SYMCRYPT_MLKEM_POLYELEMENT * PCSYMCRYPT_MLKEM_POLYELEMENT; + +typedef SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_MLKEM_POLYELEMENT_ACCUMULATOR { + // PolyElement Accumulators just store the coefficients without any header. + UINT32 coeffs[SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS]; +} SYMCRYPT_MLKEM_POLYELEMENT_ACCUMULATOR; +typedef SYMCRYPT_MLKEM_POLYELEMENT_ACCUMULATOR * PSYMCRYPT_MLKEM_POLYELEMENT_ACCUMULATOR; + +// Currently maximum size of MLKEM matrices is baked in, they are always square and up to 4x4. +#define SYMCRYPT_MLKEM_MATRIX_MAX_NROWS (4) + +typedef SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_MLKEM_VECTOR { + _Field_range_( 1, SYMCRYPT_MLKEM_MATRIX_MAX_NROWS ) + UINT32 nRows; + UINT32 cbTotalSize; // Total size of the Vector + + // Followed by: + // nRows PolyElements +} SYMCRYPT_MLKEM_VECTOR, *PSYMCRYPT_MLKEM_VECTOR; +typedef const SYMCRYPT_MLKEM_VECTOR * PCSYMCRYPT_MLKEM_VECTOR; + +typedef SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_MLKEM_MATRIX { + _Field_range_( 1, SYMCRYPT_MLKEM_MATRIX_MAX_NROWS ) + UINT32 nRows; + UINT32 cbTotalSize; // Total size of the Matrix + + // Array of pointers to PolyElements in row-major order + PSYMCRYPT_MLKEM_POLYELEMENT apPolyElements[SYMCRYPT_MLKEM_MATRIX_MAX_NROWS * SYMCRYPT_MLKEM_MATRIX_MAX_NROWS]; + // Note: the extra indirection is intentional to make transposing the matrix cheap, + // given that in the MLKEM context the underlying PolyElements are relatively large + // so we don't want to move them around + + // Followed by: + // nRows*nRows PolyElements +} SYMCRYPT_MLKEM_MATRIX, *PSYMCRYPT_MLKEM_MATRIX; +typedef const SYMCRYPT_MLKEM_MATRIX * PCSYMCRYPT_MLKEM_MATRIX; + +// +// MLKEMKEY type +// + +#define SYMCRYPT_MLKEMKEY_MAX_SIZEOF_ENCODED_T (1536) + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_MLKEM_INTERNAL_PARAMS { + UINT32 params; // parameter set of ML-KEM being used, takes a value from SYMCRYPT_MLKEM_PARAMS + + UINT32 cbPolyElement; // size of one polynomial ring element + UINT32 cbVector; // size of one vector + UINT32 cbMatrix; // size of one matrix + + UINT8 nRows; // corresponds to k from FIPS 203; the number of rows and columns in the matrix A, + // and the number of rows in column vectors s and t + UINT8 nEta1; // corresponds to eta_1 from FIPS 203; number of coinflips used in generating s and e + // in keypair generation, and r in encapsulation + UINT8 nEta2; // corresponds to eta_2 from FIPS 203; number of coinflips used in generating e_1 and + // e_2 in encapsulation + UINT8 nBitsOfU; // corresponds to d_u from FIPS 203; number of bits that the coefficients of the polynomial + // ring elements of u are compressed to in encapsulation for encoding into ciphertext + UINT8 nBitsOfV; // corresponds to d_v from FIPS 203; number of bits that the coefficients of the polynomial + // ring element v is compressed to in encapsulation for encoding into ciphertext +} SYMCRYPT_MLKEM_INTERNAL_PARAMS, *PSYMCRYPT_MLKEM_INTERNAL_PARAMS; +typedef const SYMCRYPT_MLKEM_INTERNAL_PARAMS * PCSYMCRYPT_MLKEM_INTERNAL_PARAMS; + +typedef SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_MLKEMKEY { + UINT32 fAlgorithmInfo; // Tracks which algorithms the key can be used in + // Also tracks which per-key selftests have been performed on this key + // A bitwise OR of SYMCRYPT_FLAG_KEY_*, SYMCRYPT_FLAG_MLKEMKEY_*, and + // SYMCRYPT_SELFTEST_KEY_* values + + SYMCRYPT_MLKEM_INTERNAL_PARAMS params; + + UINT32 cbTotalSize; // Total in-memory size of the ML-KEM key (this header and the following structs) + + BOOLEAN hasPrivateSeed; // Set to true if key has the private seed (d) + BOOLEAN hasPrivateKey; // Set to true if key has the private key (s and z) + + // seeds + BYTE privateSeed[32]; // private seed (d) from which entire private PKE key can be derived + BYTE privateRandom[32]; // private random (z) used in implicit rejection + + BYTE publicSeed[32]; // public seed (rho) from which A can be derived + + // A o s + e = t + PSYMCRYPT_MLKEM_MATRIX pmAtranspose; // public matrix in NTT form (derived from publicSeed) + PSYMCRYPT_MLKEM_VECTOR pvt; // public vector in NTT form + + PSYMCRYPT_MLKEM_VECTOR pvs; // private vector in NTT form + + // misc fields + BYTE encodedT[SYMCRYPT_MLKEMKEY_MAX_SIZEOF_ENCODED_T]; // byte-encoding of public vector + // may only use a prefix of this buffer + BYTE encapsKeyHash[32]; // Precomputed value of hash of ML-KEM's byte-encoding of encapsulation key + + SYMCRYPT_MAGIC_FIELD + // Followed by: + // Atranspose + // t + // s +} SYMCRYPT_MLKEMKEY; + +//===================================================== +// ML-KEM primitives +// + +#define SYMCRYPT_MLKEM_Q (3329) + +#define SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT ( sizeof(SYMCRYPT_MLKEM_POLYELEMENT) ) +#define SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT_ACCUMULATOR ( sizeof(SYMCRYPT_MLKEM_POLYELEMENT_ACCUMULATOR) ) +#define SYMCRYPT_INTERNAL_MLKEM_MAXIMUM_VECTOR_SIZE ( sizeof(SYMCRYPT_MLKEM_VECTOR) + (SYMCRYPT_MLKEM_MATRIX_MAX_NROWS * SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT) ) +#define SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT_OFFSET( _row ) ( sizeof(SYMCRYPT_MLKEM_VECTOR) + ((_row) * SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT) ) +#define SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT( _row, _pVector ) (PSYMCRYPT_MLKEM_POLYELEMENT)( (PBYTE)(_pVector) + SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT_OFFSET(_row) ) + +#define SYMCRYPT_MLKEM_SIZEOF_MAX_CIPHERTEXT (1568UL) +#define SYMCRYPT_MLKEM_SIZEOF_AGREED_SECRET (32UL) +#define SYMCRYPT_MLKEM_SIZEOF_ENCAPS_RANDOM (32UL) + +typedef SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_MLKEM_INTERNAL_COMPUTATION_TEMPORARIES { + BYTE abVectorBuffer0[SYMCRYPT_INTERNAL_MLKEM_MAXIMUM_VECTOR_SIZE]; + BYTE abVectorBuffer1[SYMCRYPT_INTERNAL_MLKEM_MAXIMUM_VECTOR_SIZE]; + BYTE abPolyElementBuffer0[SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT]; + BYTE abPolyElementBuffer1[SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT]; + BYTE abPolyElementAccumulatorBuffer[SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT_ACCUMULATOR]; + union { + SYMCRYPT_SHAKE128_STATE shake128State; + SYMCRYPT_SHAKE256_STATE shake256State; + SYMCRYPT_SHA3_256_STATE sha3_256State; + SYMCRYPT_SHA3_512_STATE sha3_512State; + } hashState0; + union { + SYMCRYPT_SHAKE128_STATE shake128State; + SYMCRYPT_SHAKE256_STATE shake256State; + SYMCRYPT_SHA3_256_STATE sha3_256State; + SYMCRYPT_SHA3_512_STATE sha3_512State; + } hashState1; +} SYMCRYPT_MLKEM_INTERNAL_COMPUTATION_TEMPORARIES; +typedef SYMCRYPT_MLKEM_INTERNAL_COMPUTATION_TEMPORARIES * PSYMCRYPT_MLKEM_INTERNAL_COMPUTATION_TEMPORARIES; + +// exposed here for KAT testing +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlKemEncapsulateInternal( + _In_ PCSYMCRYPT_MLKEMKEY pkMlKemkey, + _Out_writes_bytes_( cbAgreedSecret ) + PBYTE pbAgreedSecret, + SIZE_T cbAgreedSecret, + _Out_writes_bytes_( cbCiphertext ) + PBYTE pbCiphertext, + SIZE_T cbCiphertext, + _In_reads_bytes_( SYMCRYPT_MLKEM_SIZEOF_ENCAPS_RANDOM ) + PCBYTE pbRandom, + _Inout_ PSYMCRYPT_MLKEM_INTERNAL_COMPUTATION_TEMPORARIES pCompTemps ); + +PSYMCRYPT_MLKEM_POLYELEMENT +SYMCRYPT_CALL +SymCryptMlKemPolyElementCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + UINT32 cbBuffer ); + +PSYMCRYPT_MLKEM_POLYELEMENT_ACCUMULATOR +SYMCRYPT_CALL +SymCryptMlKemPolyElementAccumulatorCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + UINT32 cbBuffer ); + +PSYMCRYPT_MLKEM_VECTOR +SYMCRYPT_CALL +SymCryptMlKemVectorCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + UINT32 cbBuffer, + UINT32 nRows ); + +PSYMCRYPT_MLKEM_MATRIX +SYMCRYPT_CALL +SymCryptMlKemMatrixCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + UINT32 cbBuffer, + UINT32 nRows ); + +// +// ML-KEM operations acting on individual polynomial ring elements (PolyElements) +// + +VOID +SYMCRYPT_CALL +SymCryptMlKemPolyElementMulAndAccumulate( + _In_ PCSYMCRYPT_MLKEM_POLYELEMENT peSrc1, + _In_ PCSYMCRYPT_MLKEM_POLYELEMENT peSrc2, + _Inout_ PSYMCRYPT_MLKEM_POLYELEMENT_ACCUMULATOR paDst ); +// +// ML-KEM Polynomial Ring Element multiply and add: +// paDst = paDst + (peSrc1 o peSrc2) +// where: +// o is polynomial multiplication given sources in NTT form +// +// Requirements: +// - peSrc1 and peSrc2 must be PolyElements in ML-KEM's NTT form +// - paDst must be in NTT form +// + +VOID +SYMCRYPT_CALL +SymCryptMlKemMontgomeryReduceAndAddPolyElementAccumulatorToPolyElement( + _Inout_ PSYMCRYPT_MLKEM_POLYELEMENT_ACCUMULATOR paSrc, + _Inout_ PSYMCRYPT_MLKEM_POLYELEMENT peDst ); +// +// Montgomery reduce and add a Polynomial Ring Element Accumulator to a Polynomial Ring +// Element, and wipe the accumulator: +// peDst = peDst + (paSrc ./ R) +// paSrc = 0 +// where: +// ./ is coefficient-wise division and R is Montgomery multiplier +// +// - One of the following conditions must be true: +// - paSrc to be pre-multiplied coefficient-wise by R for addition with a canonical +// representation of peDst +// - peDst must be coefficient-wise multiplied by the same constant factor as the +// resulting of (paSrc ./ R) for the addition to make sense +// + +VOID +SYMCRYPT_CALL +SymCryptMlKemPolyElementMulR( + _In_ PCSYMCRYPT_MLKEM_POLYELEMENT peSrc, + _Out_ PSYMCRYPT_MLKEM_POLYELEMENT peDst ); +// +// ML-KEM Polynomial Ring Element multiply each coefficient by Montgomery multiplier R +// peDst = peSrc .* R +// + +VOID +SYMCRYPT_CALL +SymCryptMlKemPolyElementAdd( + _In_ PCSYMCRYPT_MLKEM_POLYELEMENT peSrc1, + _In_ PCSYMCRYPT_MLKEM_POLYELEMENT peSrc2, + _Out_ PSYMCRYPT_MLKEM_POLYELEMENT peDst ); +// +// ML-KEM Polynomial Ring Element addition +// peDst = peSrc1 + peSrc2 +// + +VOID +SYMCRYPT_CALL +SymCryptMlKemPolyElementSub( + _In_ PCSYMCRYPT_MLKEM_POLYELEMENT peSrc1, + _In_ PCSYMCRYPT_MLKEM_POLYELEMENT peSrc2, + _Out_ PSYMCRYPT_MLKEM_POLYELEMENT peDst ); +// +// ML-KEM Polynomial Ring Element subtract: +// peDst = peSrc1 - peSrc2 +// + +VOID +SYMCRYPT_CALL +SymCryptMlKemPolyElementNTT( + _Inout_ PSYMCRYPT_MLKEM_POLYELEMENT peSrc ); +// +// ML-KEM Polynomial Ring Element NTT: +// peSrc = NTT(peSrc) per FIPS 203 +// + +VOID +SYMCRYPT_CALL +SymCryptMlKemPolyElementINTTAndMulR( + _Inout_ PSYMCRYPT_MLKEM_POLYELEMENT peSrc ); +// +// ML-KEM Polynomial Ring Element INTT: +// peSrc = NTTinverse(peSrc) .* R +// where .* is coefficient-wise multiplication and R is Montgomery multiplier +// + +VOID +SYMCRYPT_CALL +SymCryptMlKemPolyElementCompressAndEncode( + _In_ PCSYMCRYPT_MLKEM_POLYELEMENT peSrc, + UINT32 nBitsPerCoefficient, + _Out_writes_bytes_(nBitsPerCoefficient*(SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8)) + PBYTE pbDst ); +// +// ML-KEM Polynomial Ring Element Compress and Encode. +// +// Each coefficient in the ring element is Compressed to nBitsPerCoefficient using +// rounding logic specified in FIPS 203, and the coefficients are encoded +// (packed together densely as 256 contiguous bitfields) into the pbDst buffer. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlKemPolyElementDecodeAndDecompress( + _In_reads_bytes_(nBitsPerCoefficient*(SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8)) + PCBYTE pbSrc, + UINT32 nBitsPerCoefficient, + _Out_ PSYMCRYPT_MLKEM_POLYELEMENT peDst ); +// +// ML-KEM Polynomial Ring Element Decode and Decompress. +// +// The pbSrc buffer is interpreted as an encoded ring element, with each coefficient +// being represented by nBitsPerCoefficient. The resulting ring element is written to +// peDst. +// + +VOID +SYMCRYPT_CALL +SymCryptMlKemPolyElementSampleNTTFromShake128( + _Inout_ PSYMCRYPT_SHAKE128_STATE pState, + _Out_ PSYMCRYPT_MLKEM_POLYELEMENT peDst ); +// +// Generates an ML-KEM Polynomial Ring Element in NTT form by extracting bytes from +// pre-instantiated SHAKE128 state. +// +// NOTE: we pass the SHAKE state to this function because we do not know up front +// how many bytes need to be extracted. +// + +VOID +SYMCRYPT_CALL +SymCryptMlKemPolyElementSampleCBDFromBytes( + _In_reads_bytes_(eta*2*(SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8) + 1) + PCBYTE pbSrc, + _In_range_(2,3) UINT32 eta, + _Out_ PSYMCRYPT_MLKEM_POLYELEMENT peDst ); +// +// Generates an ML-KEM Polynomial Ring Element in centered binomial distribution +// from input byte array. +// Each coefficient is generated using 2*eta bits. +// + + +// +// ML-KEM operations acting on Linear Algebra objects +// + +VOID +SYMCRYPT_CALL +SymCryptMlKemMatrixTranspose( + _Inout_ PSYMCRYPT_MLKEM_MATRIX pmSrc ); +// +// pmSrc = transpose(pmSrc) +// + +VOID +SYMCRYPT_CALL +SymCryptMlKemMatrixVectorMontMulAndAdd( + _In_ PCSYMCRYPT_MLKEM_MATRIX pmSrc1, + _In_ PCSYMCRYPT_MLKEM_VECTOR pvSrc2, + _Inout_ PSYMCRYPT_MLKEM_VECTOR pvDst, + _Inout_ PSYMCRYPT_MLKEM_POLYELEMENT_ACCUMULATOR paTmp ); +// +// pvDst = ((pmSrc1 o pvSrc2) ./ R) + pvDst +// +// Remarks: +// - paTmp is used internally for temporary storage, it is wiped before and after use +// + +VOID +SYMCRYPT_CALL +SymCryptMlKemVectorMontDotProduct( + _In_ PCSYMCRYPT_MLKEM_VECTOR pvSrc1, + _In_ PCSYMCRYPT_MLKEM_VECTOR pvSrc2, + _Inout_ PSYMCRYPT_MLKEM_POLYELEMENT peDst, + _Inout_ PSYMCRYPT_MLKEM_POLYELEMENT_ACCUMULATOR paTmp ); +// +// peDst = (pvSrc1 o pvSrc2) ./ R +// +// Remarks: +// - paTmp is used internally for temporary storage, it is wiped before and after use +// + +VOID +SYMCRYPT_CALL +SymCryptMlKemVectorSetZero( + _Inout_ PSYMCRYPT_MLKEM_VECTOR pvSrc ); +// +// pvSrc = 0 +// + +VOID +SYMCRYPT_CALL +SymCryptMlKemVectorMulR( + _In_ PCSYMCRYPT_MLKEM_VECTOR pvSrc, + _Out_ PSYMCRYPT_MLKEM_VECTOR pvDst ); +// +// pvDst = pvSrc .* R +// + + +VOID +SYMCRYPT_CALL +SymCryptMlKemVectorAdd( + _In_ PCSYMCRYPT_MLKEM_VECTOR pvSrc1, + _In_ PCSYMCRYPT_MLKEM_VECTOR pvSrc2, + _Out_ PSYMCRYPT_MLKEM_VECTOR pvDst ); +// +// pvDst = pvSrc1 + pvSrc2 +// + +VOID +SYMCRYPT_CALL +SymCryptMlKemVectorSub( + _In_ PCSYMCRYPT_MLKEM_VECTOR pvSrc1, + _In_ PCSYMCRYPT_MLKEM_VECTOR pvSrc2, + _Out_ PSYMCRYPT_MLKEM_VECTOR pvDst ); +// +// pvDst = pvSrc1 - pvSrc2 +// + +VOID +SYMCRYPT_CALL +SymCryptMlKemVectorNTT( + _Inout_ PSYMCRYPT_MLKEM_VECTOR pvSrc ); +// +// pvSrc = NTT(peSrc) per FIPS 203 +// + +VOID +SYMCRYPT_CALL +SymCryptMlKemVectorINTTAndMulR( + _Inout_ PSYMCRYPT_MLKEM_VECTOR pvSrc ); +// +// pvSrc = NTTinverse(pvSrc) .* R +// + +VOID +SYMCRYPT_CALL +SymCryptMlKemVectorCompressAndEncode( + _In_ PCSYMCRYPT_MLKEM_VECTOR pvSrc, + UINT32 nBitsPerCoefficient, + _Out_writes_bytes_(cbDst) PBYTE pbDst, + SIZE_T cbDst ); +// +// See ML-KEM Polynomial Ring Element Compress and Encode +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlKemVectorDecodeAndDecompress( + _In_reads_bytes_(cbSrc) PCBYTE pbSrc, + SIZE_T cbSrc, + UINT32 nBitsPerCoefficient, + _Out_ PSYMCRYPT_MLKEM_VECTOR pvDst ); +// +// See ML-KEM Polynomial Ring Element Decode and Decompress +// + +VOID +SYMCRYPT_CALL +SymCryptMlKemkeyWipePrivateState( + _Inout_ PSYMCRYPT_MLKEMKEY pkMlKemkey ); +// +// Wipes the ML-KEM key's private state. +// diff --git a/libs/symcrypt/lib/scsTools.c b/libs/symcrypt/lib/scsTools.c new file mode 100644 index 00000000000..3787d36adf6 --- /dev/null +++ b/libs/symcrypt/lib/scsTools.c @@ -0,0 +1,367 @@ +// +// scsTools.c Support tools for writing side-channel safe code +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +// +// This code needs to process data in words, and we'd like to use 32-bit words on 32-bit +// architectures and 64-bit words on 64-bit architectures. So we use NATIVE_UINT & friends. +// + +// Buffer limits for SymCryptScsRotateBuffer +#define MIN_BUFFER_SIZE (32) + +// +// Masking functions +// Masking functions can be more efficient if the inputs are restricted to values that can +// be represented in the signed data types. +// This is why we have some functions that take 31-bit inputs. +// + +// 31-bit inputs + +UINT32 +SYMCRYPT_CALL +SymCryptMask32IsNonzeroU31( UINT32 v ) +{ + SYMCRYPT_ASSERT( v < (1UL<<31) ); + return (-(INT32) v) >> 31; +} + +UINT32 +SYMCRYPT_CALL +SymCryptMask32IsZeroU31( UINT32 v ) +{ + return ~SymCryptMask32IsNonzeroU31( v ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptMask32NeqU31( UINT32 a, UINT32 b ) +{ + SYMCRYPT_ASSERT( a < (1UL<<31) ); + SYMCRYPT_ASSERT( b < (1UL<<31) ); + + return SymCryptMask32IsNonzeroU31( a ^ b ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptMask32LtU31( UINT32 a, UINT32 b ) +{ + SYMCRYPT_ASSERT( a < (1UL<<31) ); + SYMCRYPT_ASSERT( b < (1UL<<31) ); + + // Casting to INT32 is defined as a and b are < 2^31 + return ((INT32) a - (INT32) b) >> 31; +} + + +// 32-bit inputs + +UINT32 +SYMCRYPT_CALL +SymCryptMask32EqU32( UINT32 a, UINT32 b ) +{ + return ~(UINT32) ( (-(INT64)(a^b)) >> 32); +} + + +// Other helper functions +SIZE_T +SYMCRYPT_CALL +SymCryptRoundUpPow2Sizet( SIZE_T v ) +{ + SIZE_T res; + + SYMCRYPT_ASSERT( v <= (SIZE_T_MAX / 2) + 1); + // If v is very large, then the result res might overflow. + // As SIZE_T is an unsigned type, the overflow is defined to + // be modulo 2^n for some n, and therefore we'll get res==0 + // which will terminate the loop. + + res = 1; + while( res < v ) + { + res += res; + + // Catch any overflows; should never happen but break to avoid infinite loop + if( res == 0 ) + { + break; + } + } + + return res; +} + + +// +// Copy data +// + +VOID +SYMCRYPT_CALL +SymCryptScsCopy( + _In_reads_( cbDst ) PCBYTE pbSrc, + SIZE_T cbSrc, + _Out_writes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst ) +// Copy cbSrc bytes of pbSrc into pbDst without revealing cbSrc +// through side channels. +// - pbSrc/cbSrc: buffer to copy data from +// - pbDst/cbDst: buffer that receives the data +// Equivalent to: +// n = min( cbSrc, cbDst ) +// pbDst[ 0.. n-1 ] = pbSrc[ 0 .. n - 1 ] +// cbSrc is protected from side-channels; cbDst is public. +// Note that pbSrc must be cbDst bytes long, not cbSrc bytes. +{ + UINT32 i; + + SYMCRYPT_ASSERT( cbSrc <= (1UL << 31) && cbDst <= (1UL << 31) ); + + // Loop over the destination buffer and update each byte with the source data (if appropriate) + // We round-robin loop over the source buffer + for( i = 0; i < cbDst; i++ ) + { + pbDst[ i ] ^= (pbSrc[ i ] ^ pbDst[ i ]) & SymCryptMask32LtU31( i, (UINT32) cbSrc ); + } +} + + +// +// Buffer rotation +// To recover a message from an encoding with variable data position we have to do a copy from a +// variable memory location. But our memory access pattern cannot depend on the secret location. +// This code rotates a given buffer by a variable # bytes without revealing the shift amount. +// +// For efficiency we do this using NATIVE_UINT values so that we get the best performance on each platform. +// +// The first step is to rotate the array between 0 and NATIVE_BYTES-1 bytes to get the proper word alignment. +// After that we only have to rotate the words. +// We do this using a sequence of swaps. +// Notation: +// W[i] array of words, 0 <= i < n where n is the # words, a power of 2. +// s Rotation amount (to the left). The value in W[s] at the start should appear in W[0] at the end. +// +// We use masked swaps as they seem to be more efficient then masked multiplexers. +// We can split this problem down recursively +// +// Function Rotate( W, n, s) +// - Rotate W[0..n/2-1] by s mod n/2 +// - Rotate W[n/2..n-1] by s mod n/2 +// for i in 0..n/2-1: +// swap W[i] and W[i+n/2] if (i+s) % n >= n/2 +// +// After the two half-sized rotates, each word is in the right position modulo n/2, so all that needs to be +// done in possibly swap (W[i],W[i+n/2]) pairs. +// Let W' be the array after the half-sized rotates. We have +// W'[i] = W[ (i + s) % (n/2) ] for i in 0..n/2 +// In the final array W'' we should have W''[i] = W[ (i+s)%n ] +// So W''[i] = W'[i] when (i+s) % n = (i+s) % n/2 which is equivalent to (i+s)%n < n/2. +// +// We turn this into a non-recursive algorithm. +// First we do rotations on 2 words, +// then the fixups to make it 4-word rotations, +// then on to 8-words, etc. +// At each level we compute the masks for the swaps once, and re-use them for each copy +// As a further optimization, we merge the 1st and 2nd pass into one to reduce the # read/writes +// +// We avoid using / and % throughout to avoid any time-dependent instructions. +// + +VOID +SYMCRYPT_CALL +SymCryptScsRotateBuffer( + _Inout_updates_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + SIZE_T lshift ) +{ + NATIVE_UINT * pBuf; + UINT32 n; + UINT32 a; + UINT32 b; + UINT32 i; + UINT32 j; + UINT32 blockSize; + UINT32 blockSizeLog; + UINT32 blockSizeLimit; + + NATIVE_UINT V; + NATIVE_UINT T; + NATIVE_UINT A; + NATIVE_UINT B; + NATIVE_UINT C; + NATIVE_UINT D; + NATIVE_UINT M; + NATIVE_UINT M0; + NATIVE_UINT M1; + + NATIVE_UINT Mask[ 16 ]; // Size must be a power of 2 + + SYMCRYPT_ASSERT( (cbBuffer & (cbBuffer - 1)) == 0 && cbBuffer >= MIN_BUFFER_SIZE ); + SYMCRYPT_ASSERT( lshift < cbBuffer ); + + pBuf = (NATIVE_UINT *) pbBuffer; + n = (UINT32)cbBuffer / NATIVE_BYTES; + + // First a rotate left by lshift % NATIVE_BYTES + // This is more complex because shifting by NATIVE_BITS is not a defined operation, and behavior is different + // on different CPUs. + + // Compute the shift amounts & mask + // M = 0 if lshift % NATIVE_BYTES == 0, -1 otherwise + a = 8 * (lshift & (NATIVE_BYTES-1)); // Core shift + M = (-(NATIVE_INT)a) >> (NATIVE_BITS - 1); // mask + b = (NATIVE_BITS - a) & (UINT32) M; // complementary shift, or 0 if it would be equal to NATIVE_BITS + + i = n; + V = pBuf[0]; + do{ + // Loop invariant: i > 0 && v = pBuf[i] from before any changes; + i--; + T = pBuf[i]; + pBuf[i] = T >> a | ((V << b) & M); + V = T; + } while( i > 0 ); + + // Now that the rotation is word-aligned, we can start our word rotation + lshift >>= NATIVE_BYTES_LOG2; // convert to # words to rotate. + + // We know we have at least 4 words, so we start with a pass do do 4-word rotations + SYMCRYPT_ASSERT( n >= 4 ); + + M = -(NATIVE_INT)(lshift & 1); + M0 = -(NATIVE_INT)( ((lshift + 0) >> 1) & 1 ); // s + 0 mod 4 >= 2 + M1 = -(NATIVE_INT)( ((lshift + 1) >> 1) & 1 ); // s + 1 mod 4 >= 2 + + for( i=0; i<n; i+=4 ) + { + A = pBuf[i]; + B = pBuf[i+1]; + C = pBuf[i+2]; + D = pBuf[i+3]; + + T = (A ^ B) & M; + A ^= T; + B ^= T; + + T = (C ^ D) & M; + C ^= T; + D ^= T; + + T = (A ^ C) & M0; + A ^= T; + C ^= T; + + T = (B ^ D) & M1; + B ^= T; + D ^= T; + + pBuf[i ] = A; + pBuf[i+1] = B; + pBuf[i+2] = C; + pBuf[i+3] = D; + } + + // Do the swaps using the mask array + blockSize = 4; // size of rotated blocks + blockSizeLog = 2; + + // + // Using the mask array is beneficial as long as the array is used twice or more + // Each swap loop processes 2 * blockSize of data, so the block size should never + // be larger than n/4 + blockSizeLimit = SYMCRYPT_MIN( SYMCRYPT_ARRAY_SIZE( Mask ), n/4 ); + while( blockSize <= blockSizeLimit ) + { + // Compute the masks for this level + for( i=0; i<blockSize; i++ ) + { + Mask[i] =-(NATIVE_INT)( ((i + lshift) >> blockSizeLog) & 1); + } + + // Now swap the elements of pairs of blocks according to the masks + for( i=0; i < n; i += 2 * blockSize ) + { + for( j=0; j < blockSize; j++ ) + { + A = pBuf[ i + j ]; + B = pBuf[ i + j + blockSize ]; + T = (A ^ B) & Mask[j]; + A ^= T; + B ^= T; + pBuf[ i + j ] = A; + pBuf[ i + j + blockSize ] = B; + } + } + blockSize *= 2; + blockSizeLog += 1; + } + + // Do the rest without using a mask array, either because we are only + // going to use each mask value once, or because we don't have a large-enough + // array + while( blockSize < n ) + { + // Now swap the elements of pairs of blocks according to the masks + for( i=0; i < n; i += 2 * blockSize ) + { + for( j=0; j < blockSize; j++ ) + { + M = -(NATIVE_INT)( ((j + lshift) >> blockSizeLog) & 1); + A = pBuf[ i + j ]; + B = pBuf[ i + j + blockSize ]; + T = (A ^ B) & M; + A ^= T; + B ^= T; + pBuf[ i + j ] = A; + pBuf[ i + j + blockSize ] = B; + } + } + blockSize *= 2; + blockSizeLog += 1; + } + +} + + +// +// Map values in a side-channel safe way, typically used for mapping error codes. +// +// (pcMap, nMap) point to an array of nMap entries of type SYMCRYPT_UINT32_MAP; +// each entry specifies a single mapping. If u32Input matches the +// 'from' field, the return value will be the 'to' field value. +// If u32Input is not equal to any 'from' field values, the return value is u32Default. +// Both u32Input and the return value are treated as secrets w.r.t. side channels. +// +// If multiple map entries have the same 'from' field value, then the return value +// is one of the several 'to' field values; which one is not defined. +// +// This function is particularly useful when mapping error codes in situations where +// the actual error cannot be revealed through side channels. +// + +UINT32 +SYMCRYPT_CALL +SymCryptMapUint32( + UINT32 u32Input, + UINT32 u32Default, + _In_reads_(nMap) PCSYMCRYPT_UINT32_MAP pcMap, + SIZE_T nMap) +{ + UINT32 mask; + UINT32 u32Output = u32Default; + + for (SIZE_T i = 0; i < nMap; ++i) + { + mask = SymCryptMask32EqU32(u32Input, pcMap[i].from); + u32Output ^= (u32Output ^ pcMap[i].to) & mask; + } + + return u32Output; +} diff --git a/libs/symcrypt/lib/selftest.c b/libs/symcrypt/lib/selftest.c new file mode 100644 index 00000000000..4921b343a59 --- /dev/null +++ b/libs/symcrypt/lib/selftest.c @@ -0,0 +1,17 @@ +// +// selftest.c +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +const BYTE SymCryptTestMsg3 [ 3] = { 'a', 'b', 'c' }; + +const BYTE SymCryptTestKey32[32] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +}; + +const BYTE SymCryptTestMsg16[16] = { + 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff +}; diff --git a/libs/symcrypt/lib/session.c b/libs/symcrypt/lib/session.c new file mode 100644 index 00000000000..71f59549d9b --- /dev/null +++ b/libs/symcrypt/lib/session.c @@ -0,0 +1,377 @@ +// +// session.c code for Session API implementation +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSessionSenderInit( + _Inout_ PSYMCRYPT_SESSION pSession, + UINT32 senderId, + UINT32 flags ) +{ + // Make sure we only specify the correct flags + if (flags != 0) + { + return SYMCRYPT_INVALID_ARGUMENT; + } + + pSession->replayState.messageNumber = 0; + pSession->senderId = senderId; + pSession->flags = SYMCRYPT_FLAG_SESSION_ENCRYPT; + pSession->pMutex = NULL; + + return SYMCRYPT_NO_ERROR; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSessionReceiverInit( + _Inout_ PSYMCRYPT_SESSION pSession, + UINT32 senderId, + UINT32 flags ) +{ + PVOID pMutex = NULL; + + // Make sure we only specify the correct flags + if (flags != 0) + { + return SYMCRYPT_INVALID_ARGUMENT; + } + +#if SYMCRYPT_CPU_AMD64 + if ( !SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_CMPXCHG16B ) ) + { + pMutex = SymCryptCallbackAllocateMutexFastInproc(); + if( pMutex == NULL ) + { + return SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + } + } +#elif SYMCRYPT_CPU_ARM64 // Arm64 always has support for CAS128 - so never need a lock +#else // 32b and generic platforms will always need to use a lock + pMutex = SymCryptCallbackAllocateMutexFastInproc(); + if( pMutex == NULL ) + { + return SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + } +#endif + pSession->pMutex = pMutex; + + // This represents that the message numbers 1-64 inclusive have not yet been successfully use in decryption + pSession->replayState.replayMask = 0; + pSession->replayState.messageNumber = 64; + + pSession->senderId = senderId; + pSession->flags = 0; + + return SYMCRYPT_NO_ERROR; +} + +VOID +SYMCRYPT_CALL +SymCryptSessionDestroy(_Inout_ PSYMCRYPT_SESSION pSession ) +{ + if ( pSession->pMutex != NULL ) + { + SymCryptCallbackFreeMutexFastInproc(pSession->pMutex); + } + SymCryptWipeKnownSize(pSession, sizeof(*pSession)); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSessionGcmEncrypt( + _Inout_ PSYMCRYPT_SESSION pSession, + _In_ PCSYMCRYPT_GCM_EXPANDED_KEY pExpandedKey, + _In_reads_opt_( cbAuthData ) PCBYTE pbAuthData, + SIZE_T cbAuthData, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData, + _Out_writes_( cbTag ) PBYTE pbTag, + SIZE_T cbTag, + _Out_opt_ PUINT64 pu64MessageNumber ) +{ + BYTE nonce[12]; + UINT64 messageNumber; + + if ( (pSession->flags & SYMCRYPT_FLAG_SESSION_ENCRYPT) != SYMCRYPT_FLAG_SESSION_ENCRYPT ) + { + return SYMCRYPT_INVALID_ARGUMENT; + } + + messageNumber = SYMCRYPT_ATOMIC_ADD64_POST_RELAXED(&pSession->replayState.messageNumber, 1); + + // We do not allow messageNumber to go above some maximum value (currently 2^64 - 2^32) + if ( messageNumber > SYMCRYPT_SESSION_MAX_MESSAGE_NUMBER ) + { + // Decrement the session messageNumber on the error path so that this session will continue + // to only generate errors + SYMCRYPT_ATOMIC_ADD64_POST_RELAXED(&pSession->replayState.messageNumber, -1ll); + return SYMCRYPT_INVALID_ARGUMENT; + } + + SYMCRYPT_STORE_MSBFIRST32(&nonce[0], pSession->senderId); + SYMCRYPT_STORE_MSBFIRST64(&nonce[4], messageNumber); + + SymCryptGcmEncrypt( + pExpandedKey, + nonce, + sizeof(nonce), + pbAuthData, + cbAuthData, + pbSrc, + pbDst, + cbData, + pbTag, + cbTag); + + if( pu64MessageNumber != NULL ) + { + *pu64MessageNumber = messageNumber; + } + + return SYMCRYPT_NO_ERROR; +} + +// Convenience function used in SymCryptSessionDecryptUpdateState* +// +// Given an observedState check whether messageNumber represents a replay +// If it does, return SYMCRYPT_SESSION_REPLAY_FAILURE +// Otherwise, set desiredState to the observedState updated to represent messageNumber has been seen +// and return SYMCRYPT_NO_ERROR +FORCEINLINE +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSessionDecryptComputeDesiredReplayState( + _In_ PCSYMCRYPT_SESSION_REPLAY_STATE observedState, + _Out_ PSYMCRYPT_SESSION_REPLAY_STATE desiredState, + UINT64 messageNumber ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + UINT64 messageMask; + UINT64 shiftAmount; + UINT64 shiftedMask; + + if ( messageNumber > observedState->messageNumber ) + { + // The observed message number is behind messageNumber that we want to mark successful + // Shift replayMask appropriately to preserve previously seen message numbers + shiftedMask = 0; + shiftAmount = messageNumber - observedState->messageNumber; + if( shiftAmount < 64 ) + { + shiftedMask = observedState->replayMask << shiftAmount; + } + // Mark messageNumber as seen in the replayMask + desiredState->replayMask = shiftedMask | 1; + desiredState->messageNumber = messageNumber; + } + else if ( messageNumber <= observedState->messageNumber - 64 ) + { + // The observed message number is too far ahead of messageNumber + // We cannot hope to succeed + scError = SYMCRYPT_SESSION_REPLAY_FAILURE; + goto cleanup; + } + else + { + // The observed message number is ahead of or equal to messageNumber + // Check if messageNumber has already been used + messageMask = 1ull << (observedState->messageNumber - messageNumber); // shiftAmount is in [0, 63] + if ((messageMask & observedState->replayMask) == messageMask) + { + scError = SYMCRYPT_SESSION_REPLAY_FAILURE; + goto cleanup; + } + // This is first time we have seen messageNumber - set the replayMask bit appropriately + desiredState->replayMask = observedState->replayMask | messageMask; + desiredState->messageNumber = observedState->messageNumber; + } + +cleanup: + return scError; +} + +#if SYMCRYPT_USE_CAS128 + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSessionDecryptUpdateStateCAS128( + _Inout_ PSYMCRYPT_SESSION pSession, + UINT64 messageNumber ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SYMCRYPT_SESSION_REPLAY_STATE expectedState; + SYMCRYPT_SESSION_REPLAY_STATE desiredState; + + // Non-atomic read of pSession's replayState. We can use this initial value as a good guess of + // the expected state, but we cannot fail based on it (as replayMask and messageNumber may have + // been read from different writes to the replayState) + expectedState = pSession->replayState; + + // Compute desiredState based on non-atomic read + // If it looks like this may be a replay, ensure we fail first CAS so we recompute desiredState + // from an atomic read in the loop below + if ( SymCryptSessionDecryptComputeDesiredReplayState(&expectedState, &desiredState, messageNumber) != SYMCRYPT_NO_ERROR ) + { + // pSession->replayState.messageNumber can never take the value 0 as it starts at 64 and is + // monotonic increasing + expectedState.messageNumber = 0; + } + + while( scError == SYMCRYPT_NO_ERROR ) + { + if ( SymCryptAtomicCas128Relaxed((PUINT64)&pSession->replayState, (PUINT64)&expectedState, (PUINT64)&desiredState) ) + { + // We succeeded in updating pSession->replayState and are done + break; + } + + // Compute new desiredState based on atomic read from CAS failure + // We may now correctly fall out of loop if a replay is detected + scError = SymCryptSessionDecryptComputeDesiredReplayState(&expectedState, &desiredState, messageNumber); + } + + return scError; +} + +#endif + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSessionDecryptUpdateStateLock( + _Inout_ PSYMCRYPT_SESSION pSession, + UINT64 messageNumber ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SYMCRYPT_SESSION_REPLAY_STATE desiredState; + + if ( pSession->pMutex == NULL ) + { + return SYMCRYPT_INVALID_ARGUMENT; + } + + // Check whether we are definitely too late to proceed before attempting to acquire mutex + // Do not need atomic read of full replayState here, but do need atomic 64b read of + // pSession->replayState.messageNumber + if ( messageNumber <= (UINT64) SYMCRYPT_ATOMIC_LOAD64_RELAXED(&pSession->replayState.messageNumber) - 64 ) + { + return SYMCRYPT_SESSION_REPLAY_FAILURE; + } + + SymCryptCallbackAcquireMutexFastInproc(pSession->pMutex); + ////// + // !!! Do not return until we have called SymCryptCallbackReleaseMutexFastInproc !!! + ////// + + scError = SymCryptSessionDecryptComputeDesiredReplayState(&pSession->replayState, &desiredState, messageNumber); + if ( scError == SYMCRYPT_NO_ERROR ) + { + pSession->replayState = desiredState; + } + + SymCryptCallbackReleaseMutexFastInproc(pSession->pMutex); + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSessionDecryptUpdateState( + _Inout_ PSYMCRYPT_SESSION pSession, + UINT64 messageNumber ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + +#if SYMCRYPT_CPU_AMD64 + if ( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_CMPXCHG16B ) ) + { + scError = SymCryptSessionDecryptUpdateStateCAS128( pSession, messageNumber ); + } + else + { + scError = SymCryptSessionDecryptUpdateStateLock( pSession, messageNumber ); + } +#elif SYMCRYPT_CPU_ARM64 // Arm64 always has support for CAS128 (possibly via LDXP + STXP) + scError = SymCryptSessionDecryptUpdateStateCAS128( pSession, messageNumber ); +#else // 32b and generic platforms will always need to use a lock + scError = SymCryptSessionDecryptUpdateStateLock( pSession, messageNumber ); +#endif + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSessionGcmDecrypt( + _Inout_ PSYMCRYPT_SESSION pSession, + UINT64 messageNumber, + _In_ PCSYMCRYPT_GCM_EXPANDED_KEY pExpandedKey, + _In_reads_opt_( cbAuthData ) PCBYTE pbAuthData, + SIZE_T cbAuthData, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData, + _In_reads_( cbTag ) PCBYTE pbTag, + SIZE_T cbTag ) +{ + BYTE nonce[12]; + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + if ( (pSession->flags & SYMCRYPT_FLAG_SESSION_ENCRYPT) != 0 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Check for messageNumbers which are too high or not valid + if ( (messageNumber > SYMCRYPT_SESSION_MAX_MESSAGE_NUMBER) || (messageNumber == 0) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Check whether we are definitely too late to proceed before attempting to acquire mutex + // Do not need atomic read of full replayState here, but do need atomic 64b read of + // pSession->replayState.messageNumber + if ( messageNumber <= (UINT64) SYMCRYPT_ATOMIC_LOAD64_RELAXED(&pSession->replayState.messageNumber) - 64 ) + { + scError = SYMCRYPT_SESSION_REPLAY_FAILURE; + goto cleanup; + } + + SYMCRYPT_STORE_MSBFIRST32(&nonce[0], pSession->senderId); + SYMCRYPT_STORE_MSBFIRST64(&nonce[4], messageNumber); + + scError = SymCryptGcmDecrypt( + pExpandedKey, + nonce, + sizeof(nonce), + pbAuthData, + cbAuthData, + pbSrc, + pbDst, + cbData, + pbTag, + cbTag); + + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; // wipes pbDst twice, but we don't care about performance in the error case + } + + scError = SymCryptSessionDecryptUpdateState(pSession, messageNumber); + +cleanup: + if ( scError != SYMCRYPT_NO_ERROR ) + { + SymCryptWipe( pbDst, cbData ); + } + + return scError; +} diff --git a/libs/symcrypt/lib/sha1.c b/libs/symcrypt/lib/sha1.c new file mode 100644 index 00000000000..90711a9bfb3 --- /dev/null +++ b/libs/symcrypt/lib/sha1.c @@ -0,0 +1,472 @@ +// +// Sha1.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// This revised implementation is based on the older one in RSA32LIB by +// Scott Field and Dan Shumow. It is not based on any 3rd party code. +// + +#include "precomp.h" + +// +// See the symcrypt.h file for documentation on what the various functions do. +// + +const SYMCRYPT_HASH SymCryptSha1Algorithm_default = { + &SymCryptSha1Init, + &SymCryptSha1Append, + &SymCryptSha1Result, + &SymCryptSha1AppendBlocks, + &SymCryptSha1StateCopy, + sizeof( SYMCRYPT_SHA1_STATE ), + SYMCRYPT_SHA1_RESULT_SIZE, + SYMCRYPT_SHA1_INPUT_BLOCK_SIZE, + SYMCRYPT_FIELD_OFFSET( SYMCRYPT_SHA1_STATE, chain ), + SYMCRYPT_FIELD_SIZE( SYMCRYPT_SHA1_STATE, chain ), +}; + +const PCSYMCRYPT_HASH SymCryptSha1Algorithm = &SymCryptSha1Algorithm_default; + + +// +// The round constants used by SHA-1 +// +static const UINT32 Sha1K[4] = { + 0x5a827999UL, 0x6ed9eba1UL, 0x8f1bbcdcUL, 0xca62c1d6UL, +}; + +// +// Initial state +// +static const UINT32 sha1InitialState[5] = { + 0x67452301UL, + 0xefcdab89UL, + 0x98badcfeUL, + 0x10325476UL, + 0xc3d2e1f0UL, +}; + +// +// SymCryptSha1 +// +#define ALG SHA1 +#define Alg Sha1 +#include "hash_pattern.c" +#undef ALG +#undef Alg + + + + +// +// SymCryptSha1Init +// +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptSha1Init( _Out_ PSYMCRYPT_SHA1_STATE pState ) +{ + SYMCRYPT_SET_MAGIC( pState ); + + pState->dataLengthL = 0; + pState->dataLengthH = 0; + pState->bytesInBuffer = 0; + + memcpy( &pState->chain.H[0], &sha1InitialState[0], sizeof( sha1InitialState ) ); + + // + // There is no need to initialize the buffer part of the state as that will be + // filled before it is used. + // +} + + +// +// SymCryptSha1Append +// +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptSha1Append( + _Inout_ PSYMCRYPT_SHA1_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + SymCryptHashAppendInternal( SymCryptSha1Algorithm, (PSYMCRYPT_COMMON_HASH_STATE)pState, pbData, cbData ); +} + + +// +// SymCryptSha1Result +// +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptSha1Result( + _Inout_ PSYMCRYPT_SHA1_STATE pState, + _Out_writes_( SYMCRYPT_SHA1_RESULT_SIZE ) PBYTE pbResult ) +{ + UINT32 bytesInBuffer; + SIZE_T tmp; + + // + // SHA-1 uses almost the MD4 padding, except that the length in the padding is stored + // MSBFirst, rather than LSBFirst. + // As SHA-256 has a dedicated (fast) padding anyway, there is no gain to create a + // common padding routine for SHA-1 as it wouldn't be shared by anyone right now. + // + SYMCRYPT_CHECK_MAGIC( pState ); + + bytesInBuffer = (UINT32)(pState->bytesInBuffer); + + // + // The buffer is never completely full, so we can always put the first + // padding byte in. + // + pState->buffer[bytesInBuffer++] = 0x80; + + if( bytesInBuffer > 64-8 ) { + // + // No room for the rest of the padding. Pad with zeroes & process block + // bytesInBuffer is at most 64, so we do not have an integer underflow + // + memset( &pState->buffer[bytesInBuffer], 0, 64-bytesInBuffer ); + SymCryptSha1AppendBlocks( &pState->chain, pState->buffer, 64, &tmp ); + bytesInBuffer = 0; + } + + // + // Set rest of padding + // At this point bytesInBuffer <= 64-8, so we don't have an underflow + // We wipe to the end of the buffer as it is 16-aligned, + // and it is faster to wipe to an aligned point + // + memset( &pState->buffer[bytesInBuffer], 0, 64-bytesInBuffer ); + SYMCRYPT_STORE_MSBFIRST64( &pState->buffer[64-8], pState->dataLengthL * 8 ); + + // + // Process the final block + // + SymCryptSha1AppendBlocks( &pState->chain, pState->buffer, 64, &tmp ); + + // + // Write the output in the correct byte order + // + SymCryptUint32ToMsbFirst( &pState->chain.H[0], pbResult, 5 ); + + // + // Wipe & re-initialize + // We have to wipe the whole state because the Init call + // might be optimized away by a smart compiler. + // + SymCryptWipeKnownSize( pState, sizeof( *pState ) ); + SymCryptSha1Init( pState ); +} + + +// +// For documentation on these function see FIPS 180-2 +// +// CH, MAJ and PARITY are the functions Ch, Maj, and Parity from the standard. +// +//#define CH( x, y, z ) (((x) & (y)) ^ ((~(x)) & (z))) +//#define MAJ( x, y, z ) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) +#define MAJ( x, y, z ) ((((x) | (y)) & (z) ) | ((x) & (y))) +#define CH( x, y, z ) ((((z) ^ (y)) & (x)) ^ (z)) + +#define PARITY( x, y, z ) ((x) ^ (y) ^ (z) ) + + +// +// The values a-e are stored in an array called ae. +// We have unrolled the code completely. This makes both the indices into +// the ae array constant, and it makes the message addressing constant. +// + +// +// Initial round macro +// +// r is the round number +// ae[(r+0)%5] = e; +// ae[(r+1)%5] = d; +// ae[(r+2)%5] = c; +// ae[(r+3)%5] = b; +// ae[(r+4)%5] = a; +// After that incrementing the round number will automatically map a->b, b->c, etc. +// + +// +// The core round routine (excluding the message schedule) +// +// In more readable form this macro does the following: +// e = ROL(a,5) + F(b,c,d) + e + K[r/20] + W[round] +// b = ROL( b, 30 ) +// + +#define CROUND( a, b, c, d, e, r, F ) {\ + W[r%16] = Wt; \ + e += ROL32( a, 5 ) + F(b, c, d) + Sha1K[r/20] + Wt;\ + b = ROR32( b, 2 );\ +} + +#define IROUND( a, b, c, d, e, r, F ) { \ + Wt = SYMCRYPT_LOAD_MSBFIRST32( &pbData[ 4*r ] ); \ + CROUND( a, b, c, d, e, r, F ); \ +} + +// +// Subsequent rounds. +// This is the same as the IROUND except that it adds the message schedule, +// and takes the message word from the intermediate +// +#define FROUND( a, b, c, d, e, r, F ) { \ + Wt = ROL32( W[(r+13)%16] ^ W[(r+8)%16] ^ W[(r+2)%16] ^ W[r%16], 1 );\ + CROUND( a, b, c, d, e, r, F ); \ +} + +VOID +SYMCRYPT_CALL +SymCryptSha1AppendBlocks( + _Inout_ SYMCRYPT_SHA1_CHAINING_STATE * pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ) +{ + + SYMCRYPT_ALIGN UINT32 W[16]; + UINT32 A, B, C, D, E; + UINT32 Wt; + + A = pChain->H[0]; + B = pChain->H[1]; + C = pChain->H[2]; + D = pChain->H[3]; + E = pChain->H[4]; + + while( cbData >= 64 ) + { + // + // initial rounds 1 to 16 + // + + IROUND( A, B, C, D, E, 0, CH ); + IROUND( E, A, B, C, D, 1, CH ); + IROUND( D, E, A, B, C, 2, CH ); + IROUND( C, D, E, A, B, 3, CH ); + IROUND( B, C, D, E, A, 4, CH ); + IROUND( A, B, C, D, E, 5, CH ); + IROUND( E, A, B, C, D, 6, CH ); + IROUND( D, E, A, B, C, 7, CH ); + IROUND( C, D, E, A, B, 8, CH ); + IROUND( B, C, D, E, A, 9, CH ); + IROUND( A, B, C, D, E, 10, CH ); + IROUND( E, A, B, C, D, 11, CH ); + IROUND( D, E, A, B, C, 12, CH ); + IROUND( C, D, E, A, B, 13, CH ); + IROUND( B, C, D, E, A, 14, CH ); + IROUND( A, B, C, D, E, 15, CH ); + + // + // Full rounds (including msg expansion) from here on + // + FROUND( E, A, B, C, D, 16, CH ); + FROUND( D, E, A, B, C, 17, CH ); + FROUND( C, D, E, A, B, 18, CH ); + FROUND( B, C, D, E, A, 19, CH ); + + + FROUND( A, B, C, D, E, 20, PARITY ); + FROUND( E, A, B, C, D, 21, PARITY ); + FROUND( D, E, A, B, C, 22, PARITY ); + FROUND( C, D, E, A, B, 23, PARITY ); + FROUND( B, C, D, E, A, 24, PARITY ); + FROUND( A, B, C, D, E, 25, PARITY ); + FROUND( E, A, B, C, D, 26, PARITY ); + FROUND( D, E, A, B, C, 27, PARITY ); + FROUND( C, D, E, A, B, 28, PARITY ); + FROUND( B, C, D, E, A, 29, PARITY ); + FROUND( A, B, C, D, E, 30, PARITY ); + FROUND( E, A, B, C, D, 31, PARITY ); + FROUND( D, E, A, B, C, 32, PARITY ); + FROUND( C, D, E, A, B, 33, PARITY ); + FROUND( B, C, D, E, A, 34, PARITY ); + FROUND( A, B, C, D, E, 35, PARITY ); + FROUND( E, A, B, C, D, 36, PARITY ); + FROUND( D, E, A, B, C, 37, PARITY ); + FROUND( C, D, E, A, B, 38, PARITY ); + FROUND( B, C, D, E, A, 39, PARITY ); + + + FROUND( A, B, C, D, E, 40, MAJ ); + FROUND( E, A, B, C, D, 41, MAJ ); + FROUND( D, E, A, B, C, 42, MAJ ); + FROUND( C, D, E, A, B, 43, MAJ ); + FROUND( B, C, D, E, A, 44, MAJ ); + FROUND( A, B, C, D, E, 45, MAJ ); + FROUND( E, A, B, C, D, 46, MAJ ); + FROUND( D, E, A, B, C, 47, MAJ ); + FROUND( C, D, E, A, B, 48, MAJ ); + FROUND( B, C, D, E, A, 49, MAJ ); + FROUND( A, B, C, D, E, 50, MAJ ); + FROUND( E, A, B, C, D, 51, MAJ ); + FROUND( D, E, A, B, C, 52, MAJ ); + FROUND( C, D, E, A, B, 53, MAJ ); + FROUND( B, C, D, E, A, 54, MAJ ); + FROUND( A, B, C, D, E, 55, MAJ ); + FROUND( E, A, B, C, D, 56, MAJ ); + FROUND( D, E, A, B, C, 57, MAJ ); + FROUND( C, D, E, A, B, 58, MAJ ); + FROUND( B, C, D, E, A, 59, MAJ ); + + FROUND( A, B, C, D, E, 60, PARITY ); + FROUND( E, A, B, C, D, 61, PARITY ); + FROUND( D, E, A, B, C, 62, PARITY ); + FROUND( C, D, E, A, B, 63, PARITY ); + FROUND( B, C, D, E, A, 64, PARITY ); + FROUND( A, B, C, D, E, 65, PARITY ); + FROUND( E, A, B, C, D, 66, PARITY ); + FROUND( D, E, A, B, C, 67, PARITY ); + FROUND( C, D, E, A, B, 68, PARITY ); + FROUND( B, C, D, E, A, 69, PARITY ); + FROUND( A, B, C, D, E, 70, PARITY ); + FROUND( E, A, B, C, D, 71, PARITY ); + FROUND( D, E, A, B, C, 72, PARITY ); + FROUND( C, D, E, A, B, 73, PARITY ); + FROUND( B, C, D, E, A, 74, PARITY ); + FROUND( A, B, C, D, E, 75, PARITY ); + FROUND( E, A, B, C, D, 76, PARITY ); + FROUND( D, E, A, B, C, 77, PARITY ); + FROUND( C, D, E, A, B, 78, PARITY ); + FROUND( B, C, D, E, A, 79, PARITY ); + + + pChain->H[0] = A = A + pChain->H[0]; + pChain->H[1] = B = B + pChain->H[1]; + pChain->H[2] = C = C + pChain->H[2]; + pChain->H[3] = D = D + pChain->H[3]; + pChain->H[4] = E = E + pChain->H[4]; + + pbData += 64; + cbData -= 64; + } + + *pcbRemaining = cbData; + + // + // Wipe the variables; + // + SymCryptWipeKnownSize( W, sizeof( W ) ); + SYMCRYPT_FORCE_WRITE32( &A, 0 ); + SYMCRYPT_FORCE_WRITE32( &B, 0 ); + SYMCRYPT_FORCE_WRITE32( &C, 0 ); + SYMCRYPT_FORCE_WRITE32( &D, 0 ); + SYMCRYPT_FORCE_WRITE32( &E, 0 ); + SYMCRYPT_FORCE_WRITE32( &Wt, 0 ); +} + + +VOID +SYMCRYPT_CALL +SymCryptSha1StateExport( + _In_ PCSYMCRYPT_SHA1_STATE pState, + _Out_writes_bytes_( SYMCRYPT_SHA1_STATE_EXPORT_SIZE ) PBYTE pbBlob ) +{ + SYMCRYPT_ALIGN SYMCRYPT_SHA1_STATE_EXPORT_BLOB blob; // local copy to have proper alignment. + C_ASSERT( sizeof( blob ) == SYMCRYPT_SHA1_STATE_EXPORT_SIZE ); + + SYMCRYPT_CHECK_MAGIC( pState ); + + SymCryptWipeKnownSize( &blob, sizeof( blob ) ); // wipe to avoid any data leakage + + blob.header.magic = SYMCRYPT_BLOB_MAGIC; + blob.header.size = SYMCRYPT_SHA1_STATE_EXPORT_SIZE; + blob.header.type = SymCryptBlobTypeSha1State; + + // + // Copy the relevant data. Buffer will be 0-padded. + // + + SymCryptUint32ToMsbFirst( &pState->chain.H[0], &blob.chain[0], 5 ); + blob.dataLength = pState->dataLengthL; + memcpy( &blob.buffer[0], &pState->buffer[0], blob.dataLength & 0x3f ); + + SYMCRYPT_ASSERT( (PCBYTE) &blob + sizeof( blob ) - sizeof( SYMCRYPT_BLOB_TRAILER ) == (PCBYTE) &blob.trailer ); + SymCryptMarvin32( SymCryptMarvin32DefaultSeed, (PCBYTE) &blob, sizeof( blob ) - sizeof( SYMCRYPT_BLOB_TRAILER ), &blob.trailer.checksum[0] ); + + memcpy( pbBlob, &blob, sizeof( blob ) ); + +//cleanup: + SymCryptWipeKnownSize( &blob, sizeof( blob ) ); + return; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSha1StateImport( + _Out_ PSYMCRYPT_SHA1_STATE pState, + _In_reads_bytes_( SYMCRYPT_SHA1_STATE_EXPORT_SIZE) PCBYTE pbBlob ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SYMCRYPT_ALIGN SYMCRYPT_SHA1_STATE_EXPORT_BLOB blob; // local copy to have proper alignment. + BYTE checksum[8]; + + C_ASSERT( sizeof( blob ) == SYMCRYPT_SHA1_STATE_EXPORT_SIZE ); + memcpy( &blob, pbBlob, sizeof( blob ) ); + + if( blob.header.magic != SYMCRYPT_BLOB_MAGIC || + blob.header.size != SYMCRYPT_SHA1_STATE_EXPORT_SIZE || + blob.header.type != SymCryptBlobTypeSha1State ) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + SymCryptMarvin32( SymCryptMarvin32DefaultSeed, (PCBYTE) &blob, sizeof( blob ) - sizeof( SYMCRYPT_BLOB_TRAILER ), checksum ); + if( memcmp( checksum, &blob.trailer.checksum[0], 8 ) != 0 ) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + SymCryptMsbFirstToUint32( &blob.chain[0], &pState->chain.H[0], 5 ); + pState->dataLengthL = blob.dataLength; + pState->dataLengthH = 0; + pState->bytesInBuffer = blob.dataLength & 0x3f; + memcpy( &pState->buffer[0], &blob.buffer[0], pState->bytesInBuffer ); + + SYMCRYPT_SET_MAGIC( pState ); + +cleanup: + SymCryptWipeKnownSize( &blob, sizeof(blob) ); + return scError; +} + + + +// +// Simple test vector for FIPS module testing +// + +static const BYTE sha1KATAnswer[ 20 ] = { + 0xa9, 0x99, 0x3e, 0x36, + 0x47, 0x06, 0x81, 0x6a, + 0xba, 0x3e, 0x25, 0x71, + 0x78, 0x50, 0xc2, 0x6c, + 0x9c, 0xd0, 0xd8, 0x9d + } ; + +VOID +SYMCRYPT_CALL +SymCryptSha1Selftest(void) +{ + BYTE result[SYMCRYPT_SHA1_RESULT_SIZE]; + + SymCryptSha1( SymCryptTestMsg3, sizeof( SymCryptTestMsg3 ), result ); + + SymCryptInjectError( result, sizeof( result ) ); + + if( memcmp( result, sha1KATAnswer, sizeof( result ) ) != 0 ) { + SymCryptFatal( 'SHA1' ); + } +} diff --git a/libs/symcrypt/lib/sha256-xmm.c b/libs/symcrypt/lib/sha256-xmm.c new file mode 100644 index 00000000000..c181c4824a2 --- /dev/null +++ b/libs/symcrypt/lib/sha256-xmm.c @@ -0,0 +1,354 @@ +#include "precomp.h" + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + +#ifdef __clang__ +#pragma clang attribute push (__attribute__((target("ssse3"))), apply_to=function) +#else +#pragma GCC push_options +#pragma GCC target("ssse3") +#endif + +extern SYMCRYPT_ALIGN_AT(256) const UINT32 SymCryptSha256K[64]; + + +// Endianness transformation for 4 32-bit values in an XMM register +const SYMCRYPT_ALIGN_AT(16) UINT32 BYTE_REVERSE_32[4] = { + 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f, +}; + +// Shuffle 32-bit words in an XMM register: W3 W2 W1 W0 -> 0 0 W2 W0 +// Used by the SSSE3 assembly implementation +const SYMCRYPT_ALIGN_AT(16) UINT32 XMM_PACKLOW[4] = { + 0x03020100, 0x0b0a0908, 0x80808080, 0x80808080, +}; + +// Shuffle 32-bit words in an XMM register: W3 W2 W1 W0 -> W2 W0 0 0 +// Used by the SSSE3 assembly implementation +const SYMCRYPT_ALIGN_AT(16) UINT32 XMM_PACKHIGH[4] = { + 0x80808080, 0x80808080, 0x03020100, 0x0b0a0908, +}; + + +#if SYMCRYPT_MS_VC && !defined(__clang__) +#define RORX_U32 _rorx_u32 +#define RORX_U64 _rorx_u64 +#else +// TODO: implement _rorx functions for clang +#define RORX_U32 ROR32 +#define RORX_U64 ROR64 +#endif // SYMCRYPT_MS_VC + + +// +// For documentation on these function see FIPS 180-2 +// +// MAJ and CH are the functions Maj and Ch from the standard. +// CSIGMA0 and CSIGMA1 are the capital sigma functions. +// LSIGMA0 and LSIGMA1 are the lowercase sigma functions. +// +// The canonical definitions of the MAJ and CH functions are: +//#define MAJ( x, y, z ) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) +//#define CH( x, y, z ) (((x) & (y)) ^ ((~(x)) & (z))) +// We use optimized versions defined below +// +#define MAJ( x, y, z ) ((((z) | (y)) & (x) ) | ((z) & (y))) +#define CH( x, y, z ) ((((z) ^ (y)) & (x)) ^ (z)) + +#define LSIGMA0( x ) (ROR32((x), 7) ^ ROR32((x), 18) ^ ((x)>> 3)) +#define LSIGMA1( x ) (ROR32((x), 17) ^ ROR32((x), 19) ^ ((x)>>10)) + +#define CSIGMA0(x) (RORX_U32(x, 2) ^ RORX_U32(x, 13) ^ RORX_U32(x, 22)) +#define CSIGMA1(x) (RORX_U32(x, 6) ^ RORX_U32(x, 11) ^ RORX_U32(x, 25)) + + +#define LSIGMA0XMM( x ) \ + _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( \ + _mm_slli_epi32(x,25) , _mm_srli_epi32(x, 7) ),\ + _mm_slli_epi32(x,14) ), _mm_srli_epi32(x, 18) ),\ + _mm_srli_epi32(x, 3) ) +#define LSIGMA1XMM( x ) \ + _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( \ + _mm_slli_epi32(x,15) , _mm_srli_epi32(x, 17) ),\ + _mm_slli_epi32(x,13) ), _mm_srli_epi32(x, 19) ),\ + _mm_srli_epi32(x,10) ) + + + +// Initial loading of message words and endianness transformation. +// bl : The number of blocks to load, 1 <= bl <= 4. +// +// When bl < 4, the high order lanes of the XMM registers corresponding to the missing blocks are unused. +// +#define SHA256_MSG_LOAD_4BLOCKS(bl) { \ + for(SIZE_T i = 0; i < bl; i++) \ + { \ + Wx.xmm[i + 0] = _mm_shuffle_epi8(_mm_loadu_si128((__m128i*) &pbData[i * SYMCRYPT_SHA256_INPUT_BLOCK_SIZE + 0]), kBYTE_REVERSE_32); \ + Wx.xmm[i + 4] = _mm_shuffle_epi8(_mm_loadu_si128((__m128i*) &pbData[i * SYMCRYPT_SHA256_INPUT_BLOCK_SIZE + 16]), kBYTE_REVERSE_32); \ + Wx.xmm[i + 8] = _mm_shuffle_epi8(_mm_loadu_si128((__m128i*) &pbData[i * SYMCRYPT_SHA256_INPUT_BLOCK_SIZE + 32]), kBYTE_REVERSE_32); \ + Wx.xmm[i + 12] = _mm_shuffle_epi8(_mm_loadu_si128((__m128i*) &pbData[i * SYMCRYPT_SHA256_INPUT_BLOCK_SIZE + 48]), kBYTE_REVERSE_32); \ + } \ +} + +// Shuffles the initially loaded message words from multiple blocks +// so that each XMM register contains message words with the same index +// within a block (e.g. Wx.xmm[0] contains the first words of each block). +// +// We have to use this macro four times to transform the message blocks of 64-bytes. +// ind=0 processes the first quarter (16-bytes), ind=1 does the second quarter and so on. +// +#define SHA256_MSG_TRANSPOSE_QUARTER_4BLOCKS(ind) { \ + __m128i t1, t2, t3, t4; \ + t1 = _mm_unpacklo_epi32(Wx.xmm[4 * (ind) + 0], Wx.xmm[4 * (ind) + 1]); \ + t2 = _mm_unpacklo_epi32(Wx.xmm[4 * (ind) + 2], Wx.xmm[4 * (ind) + 3]); \ + t3 = _mm_unpackhi_epi32(Wx.xmm[4 * (ind) + 0], Wx.xmm[4 * (ind) + 1]); \ + t4 = _mm_unpackhi_epi32(Wx.xmm[4 * (ind) + 2], Wx.xmm[4 * (ind) + 3]); \ + Wx.xmm[4 * (ind) + 0] = _mm_unpacklo_epi64(t1, t2); \ + Wx.xmm[4 * (ind) + 1] = _mm_unpackhi_epi64(t1, t2); \ + Wx.xmm[4 * (ind) + 2] = _mm_unpacklo_epi64(t3, t4); \ + Wx.xmm[4 * (ind) + 3] = _mm_unpackhi_epi64(t3, t4); \ +} + +#define SHA256_MSG_TRANSPOSE_4BLOCKS() { \ + SHA256_MSG_TRANSPOSE_QUARTER_4BLOCKS(0); \ + SHA256_MSG_TRANSPOSE_QUARTER_4BLOCKS(1); \ + SHA256_MSG_TRANSPOSE_QUARTER_4BLOCKS(2); \ + SHA256_MSG_TRANSPOSE_QUARTER_4BLOCKS(3); \ +} + +// One round message schedule, updates the rth message word. ( 16 <= r < 64 ) +// Also adds the constants for round (r-16). +#define SHA256_MSG_EXPAND_4BLOCKS_1ROUND(r) { \ + Wx.xmm[r] = _mm_add_epi32(_mm_add_epi32(_mm_add_epi32(Wx.xmm[r - 16], Wx.xmm[r - 7]), \ + LSIGMA0XMM(Wx.xmm[r - 15])), LSIGMA1XMM(Wx.xmm[r - 2])); \ + Wx.xmm[r - 16] = _mm_add_epi32(Wx.xmm[r - 16], _mm_set1_epi32(SymCryptSha256K[r - 16])); \ +} + +// Four rounds of message schedule. Generates message words for rounds r, r+1, r+2, r+3. +#define SHA256_MSG_EXPAND_4BLOCKS_4ROUNDS(r) { \ + SHA256_MSG_EXPAND_4BLOCKS_1ROUND((r) + 0); SHA256_MSG_EXPAND_4BLOCKS_1ROUND((r) + 1); \ + SHA256_MSG_EXPAND_4BLOCKS_1ROUND((r) + 2); SHA256_MSG_EXPAND_4BLOCKS_1ROUND((r) + 3); \ +} +// Sixteen rounds of message schedule. Generates message words for rounds r, ..., r+15. +#define SHA256_MSG_EXPAND_4BLOCKS_16ROUNDS(r) { \ + SHA256_MSG_EXPAND_4BLOCKS_4ROUNDS((r) + 0); SHA256_MSG_EXPAND_4BLOCKS_4ROUNDS((r) + 4); \ + SHA256_MSG_EXPAND_4BLOCKS_4ROUNDS((r) + 8); SHA256_MSG_EXPAND_4BLOCKS_4ROUNDS((r) + 12); \ +} + +// Core round function using message words from Wx array. +// Wx contains -interleaved- expanded message words from b blocks. +// i.e. Message words for round r for each block, followed by the message words for the (r+1)^th block. +// +// r16 : round number mod 16 +// rb : base round number so that (rb+r16) gives the actual round number +// b : message block index, b = 0..3 +#define CROUND_4BLOCKS(r16, rb, b) { \ + Wt = Wx.ul4[(rb)+(r16)][b]; \ + ah[ r16 &7] += CSIGMA1(ah[(r16+3)&7]) + CH(ah[(r16+3)&7], ah[(r16+2)&7], ah[(r16+1)&7]) + Wt;\ + ah[(r16+4)&7] += ah[r16 &7];\ + ah[ r16 &7] += CSIGMA0(ah[(r16+7)&7]) + MAJ(ah[(r16+7)&7], ah[(r16+6)&7], ah[(r16+5)&7]);\ +} + +// +// Core round function +// +// r16 : round number mod 16 +// r : round number, r = 0..63 +// +#define CROUND( r16, r ) {;\ + ah[ r16 &7] += CSIGMA1(ah[(r16+3)&7]) + CH(ah[(r16+3)&7], ah[(r16+2)&7], ah[(r16+1)&7]) + SymCryptSha256K[r] + Wt;\ + ah[(r16+4)&7] += ah[r16 &7];\ + ah[ r16 &7] += CSIGMA0(ah[(r16+7)&7]) + MAJ(ah[(r16+7)&7], ah[(r16+6)&7], ah[(r16+5)&7]);\ +} + +// +// Initial round that reads the message. +// r is the round number 0..15 +// +#define IROUND( r ) {\ + Wt = SYMCRYPT_LOAD_MSBFIRST32( &pbData[ 4*r ] );\ + Wx.ul[r] = Wt; \ + CROUND(r,r);\ +} + +// +// Subsequent rounds. +// r16 is the round number mod 16. rb is the round number minus r16. +// +#define FROUND(r16, rb) { \ + Wt = LSIGMA1( Wx.ul[(r16-2) & 15] ) + Wx.ul[(r16-7) & 15] + \ + LSIGMA0( Wx.ul[(r16-15) & 15]) + Wx.ul[r16 & 15]; \ + Wx.ul[r16] = Wt; \ + CROUND( r16, r16+rb ); \ +} + + + +VOID +SYMCRYPT_CALL +SymCryptSha256AppendBlocks_xmm_4blocks( + _Inout_ SYMCRYPT_SHA256_CHAINING_STATE* pChain, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T* pcbRemaining) +{ + + SYMCRYPT_ALIGN union { UINT32 ul[16]; UINT32 ul4[64][4]; __m128i xmm[64]; } Wx; + SYMCRYPT_ALIGN UINT32 ah[8]; + UINT32 Wt; + SIZE_T uWipeSize = (cbData >= (3 * SYMCRYPT_SHA256_INPUT_BLOCK_SIZE)) ? (64 * 4 * sizeof(UINT32)) : (16 * sizeof(UINT32)); + + const __m128i kBYTE_REVERSE_32 = _mm_load_si128((const __m128i*)BYTE_REVERSE_32); + + while (cbData >= (3 * SYMCRYPT_SHA256_INPUT_BLOCK_SIZE)) + { + // If we have 4 or more blocks then process 4, else process whatever is left. + SIZE_T numBlocks = (cbData >= 4 * SYMCRYPT_SHA256_INPUT_BLOCK_SIZE) ? 4 : (cbData / SYMCRYPT_SHA256_INPUT_BLOCK_SIZE); + + SHA256_MSG_LOAD_4BLOCKS(numBlocks); + SHA256_MSG_TRANSPOSE_4BLOCKS(); + + for (int j = 16; j < 64; j += 16) + { + SHA256_MSG_EXPAND_4BLOCKS_16ROUNDS(j); + } + + // Constants up to r=48 were added during message expansion. Add the remaining ones here. + for (int i = 48; i < 64; i++) + { + Wx.xmm[i] = _mm_add_epi32(Wx.xmm[i], _mm_set1_epi32(SymCryptSha256K[i])); + } + + for (SIZE_T bl = 0; bl < numBlocks; bl++) + { + ah[7] = pChain->H[0]; + ah[6] = pChain->H[1]; + ah[5] = pChain->H[2]; + ah[4] = pChain->H[3]; + ah[3] = pChain->H[4]; + ah[2] = pChain->H[5]; + ah[1] = pChain->H[6]; + ah[0] = pChain->H[7]; + + for (int iterCount = 0; iterCount < (64/8); iterCount++) + { + const int roundBase = iterCount*8; + CROUND_4BLOCKS( 0, roundBase, bl); + CROUND_4BLOCKS( 1, roundBase, bl); + CROUND_4BLOCKS( 2, roundBase, bl); + CROUND_4BLOCKS( 3, roundBase, bl); + CROUND_4BLOCKS( 4, roundBase, bl); + CROUND_4BLOCKS( 5, roundBase, bl); + CROUND_4BLOCKS( 6, roundBase, bl); + CROUND_4BLOCKS( 7, roundBase, bl); + //CROUND_4BLOCKS( 8, roundBase, bl); + //CROUND_4BLOCKS( 9, roundBase, bl); + //CROUND_4BLOCKS(10, roundBase, bl); + //CROUND_4BLOCKS(11, roundBase, bl); + //CROUND_4BLOCKS(12, roundBase, bl); + //CROUND_4BLOCKS(13, roundBase, bl); + //CROUND_4BLOCKS(14, roundBase, bl); + //CROUND_4BLOCKS(15, roundBase, bl); + } + + pChain->H[0] = ah[7] + pChain->H[0]; + pChain->H[1] = ah[6] + pChain->H[1]; + pChain->H[2] = ah[5] + pChain->H[2]; + pChain->H[3] = ah[4] + pChain->H[3]; + pChain->H[4] = ah[3] + pChain->H[4]; + pChain->H[5] = ah[2] + pChain->H[5]; + pChain->H[6] = ah[1] + pChain->H[6]; + pChain->H[7] = ah[0] + pChain->H[7]; + } + + pbData += (numBlocks * SYMCRYPT_SHA256_INPUT_BLOCK_SIZE); + cbData -= (numBlocks * SYMCRYPT_SHA256_INPUT_BLOCK_SIZE); + } + + + while (cbData >= SYMCRYPT_SHA256_INPUT_BLOCK_SIZE) + { + ah[7] = pChain->H[0]; + ah[6] = pChain->H[1]; + ah[5] = pChain->H[2]; + ah[4] = pChain->H[3]; + ah[3] = pChain->H[4]; + ah[2] = pChain->H[5]; + ah[1] = pChain->H[6]; + ah[0] = pChain->H[7]; + + // + // initial rounds 1 to 16 + // + + IROUND(0); + IROUND(1); + IROUND(2); + IROUND(3); + IROUND(4); + IROUND(5); + IROUND(6); + IROUND(7); + IROUND(8); + IROUND(9); + IROUND(10); + IROUND(11); + IROUND(12); + IROUND(13); + IROUND(14); + IROUND(15); + + + // + // rounds 16 to 64. + // + for (int iterCount = 1; iterCount < (64/16); iterCount++) + { + const int roundBase = iterCount*16; + FROUND(0, roundBase); + FROUND(1, roundBase); + FROUND(2, roundBase); + FROUND(3, roundBase); + FROUND(4, roundBase); + FROUND(5, roundBase); + FROUND(6, roundBase); + FROUND(7, roundBase); + FROUND(8, roundBase); + FROUND(9, roundBase); + FROUND(10, roundBase); + FROUND(11, roundBase); + FROUND(12, roundBase); + FROUND(13, roundBase); + FROUND(14, roundBase); + FROUND(15, roundBase); + } + + pChain->H[0] = ah[7] + pChain->H[0]; + pChain->H[1] = ah[6] + pChain->H[1]; + pChain->H[2] = ah[5] + pChain->H[2]; + pChain->H[3] = ah[4] + pChain->H[3]; + pChain->H[4] = ah[3] + pChain->H[4]; + pChain->H[5] = ah[2] + pChain->H[5]; + pChain->H[6] = ah[1] + pChain->H[6]; + pChain->H[7] = ah[0] + pChain->H[7]; + + pbData += SYMCRYPT_SHA256_INPUT_BLOCK_SIZE; + cbData -= SYMCRYPT_SHA256_INPUT_BLOCK_SIZE; + } + + *pcbRemaining = cbData; + + // + // Wipe the variables; + // + SymCryptWipe(&Wx, uWipeSize); + SymCryptWipeKnownSize(ah, sizeof(ah)); +} + +#ifdef __clang__ +#pragma clang attribute pop +#else +#pragma GCC pop_options +#endif + +#endif // SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 diff --git a/libs/symcrypt/lib/sha256-ymm.c b/libs/symcrypt/lib/sha256-ymm.c new file mode 100644 index 00000000000..78bde9e2f5b --- /dev/null +++ b/libs/symcrypt/lib/sha256-ymm.c @@ -0,0 +1,441 @@ +#include "precomp.h" + + +#if SYMCRYPT_CPU_AMD64 + +#ifdef __clang__ +#pragma clang attribute push (__attribute__((target("avx2"))), apply_to=function) +#else +#pragma GCC push_options +#pragma GCC target("avx2") +#endif + +extern SYMCRYPT_ALIGN_AT(256) const UINT32 SymCryptSha256K[64]; + +// Endianness transformation for 8 32-bit values in a YMM register +const SYMCRYPT_ALIGN_AT(32) UINT32 BYTE_REVERSE_32X2[8] = { + 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f, + 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f, +}; + +#if SYMCRYPT_MS_VC && !defined(__clang__) +#define RORX_U32 _rorx_u32 +#define RORX_U64 _rorx_u64 +#else +// TODO: implement _rorx functions for clang +#define RORX_U32 ROR32 +#define RORX_U64 ROR64 +#endif // SYMCRYPT_MS_VC + + + +// +// For documentation on these function see FIPS 180-2 +// +// MAJ and CH are the functions Maj and Ch from the standard. +// CSIGMA0 and CSIGMA1 are the capital sigma functions. +// LSIGMA0 and LSIGMA1 are the lowercase sigma functions. +// +// The canonical definitions of the MAJ and CH functions are: +//#define MAJ( x, y, z ) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) +//#define CH( x, y, z ) (((x) & (y)) ^ ((~(x)) & (z))) +// We use optimized versions defined below +// + +#define MAJ( x, y, z ) ((((z) | (y)) & (x) ) | ((z) & (y))) +#define CH( x, y, z ) ((((z) ^ (y)) & (x)) ^ (z)) + +#define CSIGMA0(x) (RORX_U32(x, 2) ^ RORX_U32(x, 13) ^ RORX_U32(x, 22)) +#define CSIGMA1(x) (RORX_U32(x, 6) ^ RORX_U32(x, 11) ^ RORX_U32(x, 25)) + +#define LSIGMA0( x ) (ROR32((x), 7) ^ ROR32((x), 18) ^ ((x)>> 3)) +#define LSIGMA1( x ) (ROR32((x), 17) ^ ROR32((x), 19) ^ ((x)>>10)) + +#define LSIGMA0YMM( x ) \ + _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( \ + _mm256_slli_epi32(x,25) , _mm256_srli_epi32(x, 7) ),\ + _mm256_slli_epi32(x,14) ), _mm256_srli_epi32(x, 18) ),\ + _mm256_srli_epi32(x, 3) ) + +#define LSIGMA1YMM( x ) \ + _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( \ + _mm256_slli_epi32(x,15) , _mm256_srli_epi32(x, 17) ),\ + _mm256_slli_epi32(x,13) ), _mm256_srli_epi32(x, 19) ),\ + _mm256_srli_epi32(x,10) ) + + + +// Initial loading of message words and endianness transformation. +// bl : The number of blocks to load, 1 <= bl <= 8. +// +// When bl < 8, the high order lanes of the YMM registers corresponding to the missing blocks are unused. +// +#define SHA256_MSG_LOAD_8BLOCKS(_bl) { \ + for (int i = 0; i < (_bl); i++) \ + { \ + Wx.ymm[i + 0] = _mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*) &pbData[i * SYMCRYPT_SHA256_INPUT_BLOCK_SIZE + 0]), _mm256_load_si256((const __m256i*)BYTE_REVERSE_32X2)); \ + Wx.ymm[i + 8] = _mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*) &pbData[i * SYMCRYPT_SHA256_INPUT_BLOCK_SIZE + 32]), _mm256_load_si256((const __m256i*)BYTE_REVERSE_32X2)); \ + }\ +} + +// Shuffles the initially loaded message words from multiple blocks +// so that each YMM register contains message words with the same index +// within a block (e.g. Wx.ymm[0] contains the first words of each block). +// +// We have to use this macro twice to transform the message blocks of 64-bytes. +// ind=0 processes the first halves (32-bytes) of message blocks and ind=1 does the second halves. +// +#define SHA256_MSG_TRANSPOSE_HALF_8BLOCKS(ind) { \ + __m256i s1, s2, s3, s4, s5, s6, s7, s8; \ + __m256i u1, u2, u3, u4, u5, u6, u7, u8; \ + s1 = _mm256_unpacklo_epi32(Wx.ymm[8 * (ind) + 0], Wx.ymm[8 * (ind) + 1]); \ + s2 = _mm256_unpacklo_epi32(Wx.ymm[8 * (ind) + 2], Wx.ymm[8 * (ind) + 3]); \ + s3 = _mm256_unpacklo_epi32(Wx.ymm[8 * (ind) + 4], Wx.ymm[8 * (ind) + 5]); \ + s4 = _mm256_unpacklo_epi32(Wx.ymm[8 * (ind) + 6], Wx.ymm[8 * (ind) + 7]); \ + s5 = _mm256_unpackhi_epi32(Wx.ymm[8 * (ind) + 0], Wx.ymm[8 * (ind) + 1]); \ + s6 = _mm256_unpackhi_epi32(Wx.ymm[8 * (ind) + 2], Wx.ymm[8 * (ind) + 3]); \ + s7 = _mm256_unpackhi_epi32(Wx.ymm[8 * (ind) + 4], Wx.ymm[8 * (ind) + 5]); \ + s8 = _mm256_unpackhi_epi32(Wx.ymm[8 * (ind) + 6], Wx.ymm[8 * (ind) + 7]); \ + u1 = _mm256_unpacklo_epi64(s1, s2); \ + u2 = _mm256_unpacklo_epi64(s3, s4); \ + u3 = _mm256_unpacklo_epi64(s5, s6); \ + u4 = _mm256_unpacklo_epi64(s7, s8); \ + u5 = _mm256_unpackhi_epi64(s1, s2); \ + u6 = _mm256_unpackhi_epi64(s3, s4); \ + u7 = _mm256_unpackhi_epi64(s5, s6); \ + u8 = _mm256_unpackhi_epi64(s7, s8); \ + Wx.ymm[8 * (ind) + 0] = _mm256_permute2x128_si256(u1, u2, 0x20); \ + Wx.ymm[8 * (ind) + 1] = _mm256_permute2x128_si256(u5, u6, 0x20); \ + Wx.ymm[8 * (ind) + 2] = _mm256_permute2x128_si256(u3, u4, 0x20); \ + Wx.ymm[8 * (ind) + 3] = _mm256_permute2x128_si256(u7, u8, 0x20); \ + Wx.ymm[8 * (ind) + 4] = _mm256_permute2x128_si256(u1, u2, 0x31); \ + Wx.ymm[8 * (ind) + 5] = _mm256_permute2x128_si256(u5, u6, 0x31); \ + Wx.ymm[8 * (ind) + 6] = _mm256_permute2x128_si256(u3, u4, 0x31); \ + Wx.ymm[8 * (ind) + 7] = _mm256_permute2x128_si256(u7, u8, 0x31); \ +} + +#define SHA256_MSG_TRANSPOSE_8BLOCKS() { \ + SHA256_MSG_TRANSPOSE_HALF_8BLOCKS(0); \ + SHA256_MSG_TRANSPOSE_HALF_8BLOCKS(1); \ +} + +// +// One round of message expansion, generates message word at index r ( 16 <= r < 64 ). +// +// Additionally adds the constant to the (r-16)^th message word. We cannot add the constants to +// the message words with indices greater than (r-16) since they will be used in the message expansion. +// Constants for the last 16 words are added after message expansion is completed. +// +#define SHA256_MSG_EXPAND_8BLOCKS_1ROUND(r) { \ + Wx.ymm[r] = _mm256_add_epi32(_mm256_add_epi32(_mm256_add_epi32(Wx.ymm[r - 16], Wx.ymm[r - 7]), LSIGMA0YMM(Wx.ymm[r - 15])), LSIGMA1YMM(Wx.ymm[r - 2])); \ + Wx.ymm[r - 16] = _mm256_add_epi32(Wx.ymm[r - 16], _mm256_set1_epi32(SymCryptSha256K[r - 16])); \ +} + +// Four rounds of message schedule. Generates message words for rounds r, r+1, r+2, r+3. +#define SHA256_MSG_EXPAND_8BLOCKS_4ROUNDS(r) { \ + SHA256_MSG_EXPAND_8BLOCKS_1ROUND((r) + 0); SHA256_MSG_EXPAND_8BLOCKS_1ROUND((r) + 1); SHA256_MSG_EXPAND_8BLOCKS_1ROUND((r) + 2); SHA256_MSG_EXPAND_8BLOCKS_1ROUND((r) + 3); \ +} + +// Sixteen rounds of message schedule. Generates message words for rounds r, ..., r+15. +#define SHA256_MSG_EXPAND_8BLOCKS_16ROUNDS(r) { \ + SHA256_MSG_EXPAND_8BLOCKS_4ROUNDS((r) + 0); SHA256_MSG_EXPAND_8BLOCKS_4ROUNDS((r) + 4); SHA256_MSG_EXPAND_8BLOCKS_4ROUNDS((r) + 8); SHA256_MSG_EXPAND_8BLOCKS_4ROUNDS((r) + 12); \ +} + + +// Core round function without the constant addition. Uses rorx versions of CSIGMA functions. +// +// r16 : round number mod 16. +// rb : base round number so that (rb + r16) gives the actual round number. rb = 0, 16, 32, 48. +// bl : message block index, bl = 0..7. +#define CROUND_8BLOCKS(r16, rb, bl) { \ + UINT32 T2 = CSIGMA0(ah[(r16+7)&7]) + MAJ(ah[(r16+7)&7], ah[(r16+6)&7], ah[(r16+5)&7]); \ + UINT32 T1 = CSIGMA1(ah[(r16+3)&7]) + CH (ah[(r16+3)&7], ah[(r16+2)&7], ah[(r16+1)&7]) + Wx.ul8[(rb) + (r16)][bl];\ + ah[(r16+4)&7] += T1 + ah[ r16 &7]; \ + ah[ r16 &7] += T1 + T2; \ +} + +// +// Core round function for single message block processing +// r16 : round number mod 16 +// r : round number, r = 0..79 +// +#define CROUND( r16, r ) { \ + ah[ r16 &7] += CSIGMA1(ah[(r16+3)&7]) + CH(ah[(r16+3)&7], ah[(r16+2)&7], ah[(r16+1)&7]) + SymCryptSha256K[r] + Wt;\ + ah[(r16+4)&7] += ah[r16 &7];\ + ah[ r16 &7] += CSIGMA0(ah[(r16+7)&7]) + MAJ(ah[(r16+7)&7], ah[(r16+6)&7], ah[(r16+5)&7]);\ +} + + + +// +// Initial round that reads the message. +// r is the round number 0..15 +// +#define IROUND( r ) { \ + Wt = SYMCRYPT_LOAD_MSBFIRST32( &pbData[ 4*r ] );\ + Wx.ul[r] = Wt; \ + CROUND(r,r);\ +} + +// +// Subsequent rounds. +// r16 is the round number mod 16. rb is the round number minus r16. +// +#define FROUND(r16, rb) { \ + Wt = LSIGMA1( Wx.ul[(r16-2) & 15] ) + Wx.ul[(r16-7) & 15] + \ + LSIGMA0( Wx.ul[(r16-15) & 15]) + Wx.ul[r16 & 15]; \ + Wx.ul[r16] = Wt; \ + CROUND( r16, r16+rb ); \ +} + +// Constant addition and round processing for rounds = 48..63, must be called twice. +// This macro is not used at the moment but kept here for completeness. The implementation using +// this macro turns out to be slower compared to the existing one. +#define SHA256_8BLOCKS_FINAL_ROUNDS_8X(rnd) { \ + Wx.ymm[rnd + 0] = _mm256_add_epi32(Wx.ymm[rnd + 0], _mm256_set1_epi32(SymCryptSha256K[rnd + 0])); \ + Wx.ymm[rnd + 1] = _mm256_add_epi32(Wx.ymm[rnd + 1], _mm256_set1_epi32(SymCryptSha256K[rnd + 1])); \ + Wx.ymm[rnd + 2] = _mm256_add_epi32(Wx.ymm[rnd + 2], _mm256_set1_epi32(SymCryptSha256K[rnd + 2])); \ + Wx.ymm[rnd + 3] = _mm256_add_epi32(Wx.ymm[rnd + 3], _mm256_set1_epi32(SymCryptSha256K[rnd + 3])); \ + CROUND_8BLOCKS(0, rnd, 0); \ + CROUND_8BLOCKS(1, rnd, 0); \ + CROUND_8BLOCKS(2, rnd, 0); \ + CROUND_8BLOCKS(3, rnd, 0); \ + Wx.ymm[rnd + 4] = _mm256_add_epi32(Wx.ymm[rnd + 4], _mm256_set1_epi32(SymCryptSha256K[rnd + 4])); \ + Wx.ymm[rnd + 5] = _mm256_add_epi32(Wx.ymm[rnd + 5], _mm256_set1_epi32(SymCryptSha256K[rnd + 5])); \ + Wx.ymm[rnd + 6] = _mm256_add_epi32(Wx.ymm[rnd + 6], _mm256_set1_epi32(SymCryptSha256K[rnd + 6])); \ + Wx.ymm[rnd + 7] = _mm256_add_epi32(Wx.ymm[rnd + 7], _mm256_set1_epi32(SymCryptSha256K[rnd + 7])); \ + CROUND_8BLOCKS(4, rnd, 0); \ + CROUND_8BLOCKS(5, rnd, 0); \ + CROUND_8BLOCKS(6, rnd, 0); \ + CROUND_8BLOCKS(7, rnd, 0); \ +} + +VOID +SYMCRYPT_CALL +SymCryptSha256AppendBlocks_ymm_8blocks( + _Inout_ SYMCRYPT_SHA256_CHAINING_STATE* pChain, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T* pcbRemaining) +{ + + SYMCRYPT_ALIGN_AT(32) union { UINT32 ul[16]; UINT32 ul8[64][8]; __m256i ymm[64]; } Wx; + SYMCRYPT_ALIGN UINT32 ah[8]; + UINT32 Wt; + UINT32 uWipeSize = (cbData >= (5 * SYMCRYPT_SHA256_INPUT_BLOCK_SIZE)) ? (64 * 8 * sizeof(UINT32)) : (16 * sizeof(UINT32)); + + + _mm256_zeroupper(); + + while (cbData >= (5 * SYMCRYPT_SHA256_INPUT_BLOCK_SIZE)) + { + // If we have 8 or more blocks then process 8, else process whatever is left. + SIZE_T numBlocks = (cbData >= 8 * SYMCRYPT_SHA256_INPUT_BLOCK_SIZE) ? 8 : (cbData / SYMCRYPT_SHA256_INPUT_BLOCK_SIZE); + + SHA256_MSG_LOAD_8BLOCKS(numBlocks); + SHA256_MSG_TRANSPOSE_8BLOCKS(); + + // Process the first block together with message expansion. + // For the last 16 rounds we don't expand the message, instead just + // add the round constants. + { + ah[7] = pChain->H[0]; + ah[6] = pChain->H[1]; + ah[5] = pChain->H[2]; + ah[4] = pChain->H[3]; + ah[3] = pChain->H[4]; + ah[2] = pChain->H[5]; + ah[1] = pChain->H[6]; + ah[0] = pChain->H[7]; + + for (int r = 0; r < 64; r += 8) + { + if (r < 48) + { + SHA256_MSG_EXPAND_8BLOCKS_4ROUNDS(r + 16); + } + else + { + Wx.ymm[r + 0] = _mm256_add_epi32(Wx.ymm[r + 0], _mm256_set1_epi32(SymCryptSha256K[r + 0])); + Wx.ymm[r + 1] = _mm256_add_epi32(Wx.ymm[r + 1], _mm256_set1_epi32(SymCryptSha256K[r + 1])); + Wx.ymm[r + 2] = _mm256_add_epi32(Wx.ymm[r + 2], _mm256_set1_epi32(SymCryptSha256K[r + 2])); + Wx.ymm[r + 3] = _mm256_add_epi32(Wx.ymm[r + 3], _mm256_set1_epi32(SymCryptSha256K[r + 3])); + } + + CROUND_8BLOCKS(0, r, 0); + CROUND_8BLOCKS(1, r, 0); + CROUND_8BLOCKS(2, r, 0); + CROUND_8BLOCKS(3, r, 0); + + if (r < 48) + { + SHA256_MSG_EXPAND_8BLOCKS_4ROUNDS(r + 20); + } + else + { + Wx.ymm[r + 4] = _mm256_add_epi32(Wx.ymm[r + 4], _mm256_set1_epi32(SymCryptSha256K[r + 4])); + Wx.ymm[r + 5] = _mm256_add_epi32(Wx.ymm[r + 5], _mm256_set1_epi32(SymCryptSha256K[r + 5])); + Wx.ymm[r + 6] = _mm256_add_epi32(Wx.ymm[r + 6], _mm256_set1_epi32(SymCryptSha256K[r + 6])); + Wx.ymm[r + 7] = _mm256_add_epi32(Wx.ymm[r + 7], _mm256_set1_epi32(SymCryptSha256K[r + 7])); + } + + CROUND_8BLOCKS(4, r, 0); + CROUND_8BLOCKS(5, r, 0); + CROUND_8BLOCKS(6, r, 0); + CROUND_8BLOCKS(7, r, 0); + } + + // Alternative version where the loop above goes up to round=48 and + // the remaining 16 rounds are processed here. Despite the conditional logic, + // the above version is faster compared to the commented out one. + //SHA256_MS_8BLOCKS_FINAL_ROUNDS_8X(48); + //SHA256_MS_8BLOCKS_FINAL_ROUNDS_8X(56); + + pChain->H[0] = ah[7] + pChain->H[0]; + pChain->H[1] = ah[6] + pChain->H[1]; + pChain->H[2] = ah[5] + pChain->H[2]; + pChain->H[3] = ah[4] + pChain->H[3]; + pChain->H[4] = ah[3] + pChain->H[4]; + pChain->H[5] = ah[2] + pChain->H[5]; + pChain->H[6] = ah[1] + pChain->H[6]; + pChain->H[7] = ah[0] + pChain->H[7]; + } + + + for (int bl = 1; bl < numBlocks; bl++) + { + ah[7] = pChain->H[0]; + ah[6] = pChain->H[1]; + ah[5] = pChain->H[2]; + ah[4] = pChain->H[3]; + ah[3] = pChain->H[4]; + ah[2] = pChain->H[5]; + ah[1] = pChain->H[6]; + ah[0] = pChain->H[7]; + + for (int iterCount=0; iterCount<(64/16); iterCount++) + { + const int roundBase = iterCount*16; + CROUND_8BLOCKS( 0, roundBase, bl); + CROUND_8BLOCKS( 1, roundBase, bl); + CROUND_8BLOCKS( 2, roundBase, bl); + CROUND_8BLOCKS( 3, roundBase, bl); + CROUND_8BLOCKS( 4, roundBase, bl); + CROUND_8BLOCKS( 5, roundBase, bl); + CROUND_8BLOCKS( 6, roundBase, bl); + CROUND_8BLOCKS( 7, roundBase, bl); + CROUND_8BLOCKS( 8, roundBase, bl); + CROUND_8BLOCKS( 9, roundBase, bl); + CROUND_8BLOCKS(10, roundBase, bl); + CROUND_8BLOCKS(11, roundBase, bl); + CROUND_8BLOCKS(12, roundBase, bl); + CROUND_8BLOCKS(13, roundBase, bl); + CROUND_8BLOCKS(14, roundBase, bl); + CROUND_8BLOCKS(15, roundBase, bl); + } + + pChain->H[0] = ah[7] + pChain->H[0]; + pChain->H[1] = ah[6] + pChain->H[1]; + pChain->H[2] = ah[5] + pChain->H[2]; + pChain->H[3] = ah[4] + pChain->H[3]; + pChain->H[4] = ah[3] + pChain->H[4]; + pChain->H[5] = ah[2] + pChain->H[5]; + pChain->H[6] = ah[1] + pChain->H[6]; + pChain->H[7] = ah[0] + pChain->H[7]; + } + + pbData += (numBlocks * SYMCRYPT_SHA256_INPUT_BLOCK_SIZE); + cbData -= (numBlocks * SYMCRYPT_SHA256_INPUT_BLOCK_SIZE); + + } + + _mm256_zeroupper(); + + + while (cbData >= SYMCRYPT_SHA256_INPUT_BLOCK_SIZE) + { + ah[7] = pChain->H[0]; + ah[6] = pChain->H[1]; + ah[5] = pChain->H[2]; + ah[4] = pChain->H[3]; + ah[3] = pChain->H[4]; + ah[2] = pChain->H[5]; + ah[1] = pChain->H[6]; + ah[0] = pChain->H[7]; + + // + // initial rounds 1 to 16 + // + + IROUND(0); + IROUND(1); + IROUND(2); + IROUND(3); + IROUND(4); + IROUND(5); + IROUND(6); + IROUND(7); + IROUND(8); + IROUND(9); + IROUND(10); + IROUND(11); + IROUND(12); + IROUND(13); + IROUND(14); + IROUND(15); + + + // + // rounds 16 to 64. + // + for (int iterCount=1; iterCount<(64/16); iterCount++) + { + const int roundBase = iterCount*16; + FROUND( 0, roundBase); + FROUND( 1, roundBase); + FROUND( 2, roundBase); + FROUND( 3, roundBase); + FROUND( 4, roundBase); + FROUND( 5, roundBase); + FROUND( 6, roundBase); + FROUND( 7, roundBase); + FROUND( 8, roundBase); + FROUND( 9, roundBase); + FROUND(10, roundBase); + FROUND(11, roundBase); + FROUND(12, roundBase); + FROUND(13, roundBase); + FROUND(14, roundBase); + FROUND(15, roundBase); + } + + pChain->H[0] = ah[7] + pChain->H[0]; + pChain->H[1] = ah[6] + pChain->H[1]; + pChain->H[2] = ah[5] + pChain->H[2]; + pChain->H[3] = ah[4] + pChain->H[3]; + pChain->H[4] = ah[3] + pChain->H[4]; + pChain->H[5] = ah[2] + pChain->H[5]; + pChain->H[6] = ah[1] + pChain->H[6]; + pChain->H[7] = ah[0] + pChain->H[7]; + + pbData += SYMCRYPT_SHA256_INPUT_BLOCK_SIZE; + cbData -= SYMCRYPT_SHA256_INPUT_BLOCK_SIZE; + } + + *pcbRemaining = cbData; + + // + // Wipe the variables; + // + SymCryptWipe(&Wx, uWipeSize); + SymCryptWipeKnownSize(ah, sizeof(ah)); + SYMCRYPT_FORCE_WRITE32(&Wt, 0); +} + +#ifdef __clang__ +#pragma clang attribute pop +#else +#pragma GCC pop_options +#endif + +#endif // SYMCRYPT_CPU_AMD64 diff --git a/libs/symcrypt/lib/sha256.c b/libs/symcrypt/lib/sha256.c new file mode 100644 index 00000000000..975ead6d18c --- /dev/null +++ b/libs/symcrypt/lib/sha256.c @@ -0,0 +1,1884 @@ +// +// Sha256.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +// +// This module contains the routines to implement SHA2-256 from FIPS 180-2 +// +// This revised implementation is based on the older one in RSA32LIB by Scott Field from 2001 +// + +#include "precomp.h" + +// +// See the symcrypt.h file for documentation on what the various functions do. +// + +const SYMCRYPT_HASH SymCryptSha224Algorithm_default = { + &SymCryptSha224Init, + &SymCryptSha224Append, + &SymCryptSha224Result, + &SymCryptSha256AppendBlocks, + &SymCryptSha224StateCopy, + sizeof( SYMCRYPT_SHA224_STATE ), + SYMCRYPT_SHA224_RESULT_SIZE, + SYMCRYPT_SHA224_INPUT_BLOCK_SIZE, + SYMCRYPT_FIELD_OFFSET( SYMCRYPT_SHA224_STATE, chain ), + SYMCRYPT_FIELD_SIZE( SYMCRYPT_SHA224_STATE, chain ), +}; + +const SYMCRYPT_HASH SymCryptSha256Algorithm_default = { + &SymCryptSha256Init, + &SymCryptSha256Append, + &SymCryptSha256Result, + &SymCryptSha256AppendBlocks, + &SymCryptSha256StateCopy, + sizeof( SYMCRYPT_SHA256_STATE ), + SYMCRYPT_SHA256_RESULT_SIZE, + SYMCRYPT_SHA256_INPUT_BLOCK_SIZE, + SYMCRYPT_FIELD_OFFSET( SYMCRYPT_SHA256_STATE, chain ), + SYMCRYPT_FIELD_SIZE( SYMCRYPT_SHA256_STATE, chain ), +}; + +const PCSYMCRYPT_HASH SymCryptSha224Algorithm = &SymCryptSha224Algorithm_default; +const PCSYMCRYPT_HASH SymCryptSha256Algorithm = &SymCryptSha256Algorithm_default; + +// +// SHA-256 uses 64 magic constants of 32 bits each. These are +// referred to as K^{256}_i for i=0...63 by FIPS 180-2. +// This array is also used by the parallel SHA256 implementation +// For performance we align to 256 bytes, which gives optimal cache alignment. +// +SYMCRYPT_ALIGN_AT( 256 ) const UINT32 SymCryptSha256K[64] = { + 0x428a2f98UL, 0x71374491UL, 0xb5c0fbcfUL, 0xe9b5dba5UL, + 0x3956c25bUL, 0x59f111f1UL, 0x923f82a4UL, 0xab1c5ed5UL, + 0xd807aa98UL, 0x12835b01UL, 0x243185beUL, 0x550c7dc3UL, + 0x72be5d74UL, 0x80deb1feUL, 0x9bdc06a7UL, 0xc19bf174UL, + 0xe49b69c1UL, 0xefbe4786UL, 0x0fc19dc6UL, 0x240ca1ccUL, + 0x2de92c6fUL, 0x4a7484aaUL, 0x5cb0a9dcUL, 0x76f988daUL, + 0x983e5152UL, 0xa831c66dUL, 0xb00327c8UL, 0xbf597fc7UL, + 0xc6e00bf3UL, 0xd5a79147UL, 0x06ca6351UL, 0x14292967UL, + 0x27b70a85UL, 0x2e1b2138UL, 0x4d2c6dfcUL, 0x53380d13UL, + 0x650a7354UL, 0x766a0abbUL, 0x81c2c92eUL, 0x92722c85UL, + 0xa2bfe8a1UL, 0xa81a664bUL, 0xc24b8b70UL, 0xc76c51a3UL, + 0xd192e819UL, 0xd6990624UL, 0xf40e3585UL, 0x106aa070UL, + 0x19a4c116UL, 0x1e376c08UL, 0x2748774cUL, 0x34b0bcb5UL, + 0x391c0cb3UL, 0x4ed8aa4aUL, 0x5b9cca4fUL, 0x682e6ff3UL, + 0x748f82eeUL, 0x78a5636fUL, 0x84c87814UL, 0x8cc70208UL, + 0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL +}; + + +// +// Initial state +// +static const UINT32 sha224InitialState[8] = { + 0xc1059ed8UL, + 0x367cd507UL, + 0x3070dd17UL, + 0xf70e5939UL, + 0xffc00b31UL, + 0x68581511UL, + 0x64f98fa7UL, + 0xbefa4fa4UL, +}; + +static const UINT32 sha256InitialState[8] = { + 0x6a09e667UL, + 0xbb67ae85UL, + 0x3c6ef372UL, + 0xa54ff53aUL, + 0x510e527fUL, + 0x9b05688cUL, + 0x1f83d9abUL, + 0x5be0cd19UL, +}; + +// +// SymCryptSha224 +// +#define ALG SHA224 +#define Alg Sha224 +#include "hash_pattern.c" +#undef ALG +#undef Alg + +// +// SymCryptSha256 +// +#define ALG SHA256 +#define Alg Sha256 +#include "hash_pattern.c" +#undef ALG +#undef Alg + + + +// +// SymCryptSha256Init +// +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptSha256Init( _Out_ PSYMCRYPT_SHA256_STATE pState ) +{ + SYMCRYPT_SET_MAGIC( pState ); + + pState->dataLengthL = 0; + //pState->dataLengthH = 0; // not used + pState->bytesInBuffer = 0; + + memcpy( &pState->chain.H[0], &sha256InitialState[0], sizeof( sha256InitialState ) ); + + // + // There is no need to initialize the buffer part of the state as that will be + // filled before it is used. + // +} + + +// +// SymCryptSha224Init +// +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptSha224Init( _Out_ PSYMCRYPT_SHA224_STATE pState ) +{ + SYMCRYPT_SET_MAGIC( pState ); + + pState->dataLengthL = 0; + //pState->dataLengthH = 0; // not used + pState->bytesInBuffer = 0; + + memcpy( &pState->chain.H[0], &sha224InitialState[0], sizeof( sha224InitialState ) ); + + // + // There is no need to initialize the buffer part of the state as that will be + // filled before it is used. + // +} + + +// +// SymCryptSha256Append +// +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptSha256Append( + _Inout_ PSYMCRYPT_SHA256_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + UINT32 bytesInBuffer; + UINT32 freeInBuffer; + SIZE_T tmp; + + SYMCRYPT_CHECK_MAGIC( pState ); + + pState->dataLengthL += cbData; // dataLengthH is not used... + + bytesInBuffer = pState->bytesInBuffer; + + // + // If previous data in buffer, buffer new input and transform if possible. + // + if( bytesInBuffer > 0 ) + { + SYMCRYPT_ASSERT( SYMCRYPT_SHA256_INPUT_BLOCK_SIZE > bytesInBuffer ); + + freeInBuffer = SYMCRYPT_SHA256_INPUT_BLOCK_SIZE - bytesInBuffer; + if( cbData < freeInBuffer ) + { + // + // All the data will fit in the buffer. + // We don't do anything here. + // As cbData < inputBlockSize the bulk data processing is skipped, + // and the data will be copied to the buffer at the end + // of this code. + } else { + // + // Enough data to fill the whole buffer & process it + // + memcpy(&pState->buffer[bytesInBuffer], pbData, freeInBuffer); + pbData += freeInBuffer; + cbData -= freeInBuffer; + SymCryptSha256AppendBlocks( &pState->chain, &pState->buffer[0], SYMCRYPT_SHA256_INPUT_BLOCK_SIZE, &tmp ); + + bytesInBuffer = 0; + } + } + + // + // Internal buffer is empty; process all remaining whole blocks in the input + // + if( cbData >= SYMCRYPT_SHA256_INPUT_BLOCK_SIZE ) + { + SymCryptSha256AppendBlocks( &pState->chain, pbData, cbData, &tmp ); + SYMCRYPT_ASSERT( tmp < SYMCRYPT_SHA256_INPUT_BLOCK_SIZE ); + pbData += cbData - tmp; + cbData = tmp; + } + + SYMCRYPT_ASSERT( cbData < SYMCRYPT_SHA256_INPUT_BLOCK_SIZE ); + + // + // buffer remaining input if necessary. + // + if( cbData > 0 ) + { + memcpy( &pState->buffer[bytesInBuffer], pbData, cbData ); + bytesInBuffer += (UINT32) cbData; + } + + pState->bytesInBuffer = bytesInBuffer; +} + + +// +// SymCryptSha224Append +// +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptSha224Append( + _Inout_ PSYMCRYPT_SHA224_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + SymCryptSha256Append( (PSYMCRYPT_SHA256_STATE)pState, pbData, cbData ); +} + + +// +// SymCryptSha256Result +// +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptSha256Result( + _Inout_ PSYMCRYPT_SHA256_STATE pState, + _Out_writes_( SYMCRYPT_SHA256_RESULT_SIZE ) PBYTE pbResult ) +{ + // + // We don't use the common padding code as that is slower, and SHA-256 is very frequently used in + // performance-sensitive areas. + // + UINT32 bytesInBuffer; + SIZE_T tmp; + + SYMCRYPT_CHECK_MAGIC( pState ); + + bytesInBuffer = pState->bytesInBuffer; + + // + // The buffer is never completely full, so we can always put the first + // padding byte in. + // + pState->buffer[bytesInBuffer++] = 0x80; + + if( bytesInBuffer > 64-8 ) { + // + // No room for the rest of the padding. Pad with zeroes & process block + // bytesInBuffer is at most 64, so we do not have an integer underflow + // + SymCryptWipe( &pState->buffer[bytesInBuffer], 64-bytesInBuffer ); + SymCryptSha256AppendBlocks( &pState->chain, pState->buffer, 64, &tmp ); + bytesInBuffer = 0; + } + + // + // Set rest of padding + // At this point bytesInBuffer <= 64-8, so we don't have an underflow + // We wipe to the end of the buffer as it is 16-aligned, + // and it is faster to wipe to an aligned point + // + SymCryptWipe( &pState->buffer[bytesInBuffer], 64-bytesInBuffer ); + SYMCRYPT_STORE_MSBFIRST64( &pState->buffer[64-8], pState->dataLengthL * 8 ); + + // + // Process the final block + // + SymCryptSha256AppendBlocks( &pState->chain, pState->buffer, 64, &tmp ); + + // + // Write the output in the correct byte order + // + SymCryptUint32ToMsbFirst( &pState->chain.H[0], pbResult, 8 ); + + // + // Wipe & re-initialize + // We have to wipe the whole state because the Init call + // might be optimized away by a smart compiler. + // + SymCryptWipeKnownSize( pState, sizeof( *pState ) ); + + memcpy( &pState->chain.H[0], &sha256InitialState[0], sizeof( sha256InitialState ) ); + SYMCRYPT_SET_MAGIC( pState ); +} + + +// +// SymCryptSha224Result +// +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptSha224Result( + _Inout_ PSYMCRYPT_SHA224_STATE pState, + _Out_writes_( SYMCRYPT_SHA224_RESULT_SIZE ) PBYTE pbResult ) +{ + SYMCRYPT_ALIGN BYTE sha256Result[SYMCRYPT_SHA256_RESULT_SIZE]; // Buffer for SHA-256 output + + // + // The SHA-3224 result is the first 28 bytes of the SHA-256 result of our state + // + SymCryptSha256Result( (PSYMCRYPT_SHA256_STATE)pState, sha256Result ); + memcpy( pbResult, sha256Result, SYMCRYPT_SHA224_RESULT_SIZE ); + + // + // The buffer was already wiped by the SymCryptSha256Result function, we + // just have to re-initialize for SHA-224 + // + SymCryptSha224Init( pState ); + + SymCryptWipeKnownSize( sha256Result, sizeof( sha256Result ) ); +} + + +VOID +SYMCRYPT_CALL +SymCryptSha256StateExportCore( + _In_ PCSYMCRYPT_SHA256_STATE pState, + _Out_writes_bytes_( SYMCRYPT_SHA256_STATE_EXPORT_SIZE ) PBYTE pbBlob, + _In_ UINT32 type ) +{ + SYMCRYPT_ALIGN SYMCRYPT_SHA256_STATE_EXPORT_BLOB blob; // local copy to have proper alignment. + C_ASSERT( sizeof( blob ) == SYMCRYPT_SHA256_STATE_EXPORT_SIZE ); + + SYMCRYPT_CHECK_MAGIC( pState ); + + SymCryptWipeKnownSize( &blob, sizeof( blob ) ); // wipe to avoid any data leakage + + blob.header.magic = SYMCRYPT_BLOB_MAGIC; + blob.header.size = SYMCRYPT_SHA256_STATE_EXPORT_SIZE; + blob.header.type = type; + + // + // Copy the relevant data. Buffer will be 0-padded. + // + + SymCryptUint32ToMsbFirst( &pState->chain.H[0], &blob.chain[0], 8 ); + blob.dataLength = pState->dataLengthL; + memcpy( &blob.buffer[0], &pState->buffer[0], blob.dataLength & 0x3f ); + + SYMCRYPT_ASSERT( (PCBYTE) &blob + sizeof( blob ) - sizeof( SYMCRYPT_BLOB_TRAILER ) == (PCBYTE) &blob.trailer ); + SymCryptMarvin32( SymCryptMarvin32DefaultSeed, (PCBYTE) &blob, sizeof( blob ) - sizeof( SYMCRYPT_BLOB_TRAILER ), &blob.trailer.checksum[0] ); + + memcpy( pbBlob, &blob, sizeof( blob ) ); + +//cleanup: + SymCryptWipeKnownSize( &blob, sizeof( blob ) ); + return; +} + + +VOID +SYMCRYPT_CALL +SymCryptSha256StateExport( + _In_ PCSYMCRYPT_SHA256_STATE pState, + _Out_writes_bytes_( SYMCRYPT_SHA256_STATE_EXPORT_SIZE ) PBYTE pbBlob) +{ + SymCryptSha256StateExportCore( pState, pbBlob, SymCryptBlobTypeSha256State ); +} + + +VOID +SYMCRYPT_CALL +SymCryptSha224StateExport( + _In_ PCSYMCRYPT_SHA224_STATE pState, + _Out_writes_bytes_( SYMCRYPT_SHA256_STATE_EXPORT_SIZE ) PBYTE pbBlob) +{ + SymCryptSha256StateExportCore( (PSYMCRYPT_SHA256_STATE)pState, pbBlob, SymCryptBlobTypeSha224State ); +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSha256StateImportCore( + _Out_ PSYMCRYPT_SHA256_STATE pState, + _In_reads_bytes_( SYMCRYPT_SHA256_STATE_EXPORT_SIZE) PCBYTE pbBlob, + _In_ UINT32 type ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SYMCRYPT_ALIGN SYMCRYPT_SHA256_STATE_EXPORT_BLOB blob; // local copy to have proper alignment. + BYTE checksum[8]; + + C_ASSERT( sizeof( blob ) == SYMCRYPT_SHA256_STATE_EXPORT_SIZE ); + memcpy( &blob, pbBlob, sizeof( blob ) ); + + if( blob.header.magic != SYMCRYPT_BLOB_MAGIC || + blob.header.size != SYMCRYPT_SHA256_STATE_EXPORT_SIZE || + blob.header.type != type ) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + SymCryptMarvin32( SymCryptMarvin32DefaultSeed, (PCBYTE) &blob, sizeof( blob ) - sizeof( SYMCRYPT_BLOB_TRAILER ), checksum ); + if( memcmp( checksum, &blob.trailer.checksum[0], 8 ) != 0 ) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + SymCryptMsbFirstToUint32( &blob.chain[0], &pState->chain.H[0], 8 ); + pState->dataLengthL = blob.dataLength; + pState->bytesInBuffer = blob.dataLength & 0x3f; + memcpy( &pState->buffer[0], &blob.buffer[0], pState->bytesInBuffer ); + + SYMCRYPT_SET_MAGIC( pState ); + +cleanup: + SymCryptWipeKnownSize( &blob, sizeof(blob) ); + return scError; +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSha256StateImport( + _Out_ PSYMCRYPT_SHA256_STATE pState, + _In_reads_bytes_( SYMCRYPT_SHA256_STATE_EXPORT_SIZE) PCBYTE pbBlob ) +{ + return SymCryptSha256StateImportCore( pState, pbBlob, SymCryptBlobTypeSha256State ); +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSha224StateImport( + _Out_ PSYMCRYPT_SHA224_STATE pState, + _In_reads_bytes_( SYMCRYPT_SHA224_STATE_EXPORT_SIZE) PCBYTE pbBlob ) +{ + return SymCryptSha256StateImportCore( (PSYMCRYPT_SHA256_STATE)pState, pbBlob, SymCryptBlobTypeSha224State ); +} + + + +// +// Simple test vector for FIPS module testing +// + +const BYTE SymCryptSha256KATAnswer[ 32 ] = { + 0xba, 0x78, 0x16, 0xbf, 0x8f, 0x01, 0xcf, 0xea, + 0x41, 0x41, 0x40, 0xde, 0x5d, 0xae, 0x22, 0x23, + 0xb0, 0x03, 0x61, 0xa3, 0x96, 0x17, 0x7a, 0x9c, + 0xb4, 0x10, 0xff, 0x61, 0xf2, 0x00, 0x15, 0xad, + } ; + +VOID +SYMCRYPT_CALL +SymCryptSha256Selftest(void) +{ + BYTE result[SYMCRYPT_SHA256_RESULT_SIZE]; + + SymCryptSha256( SymCryptTestMsg3, sizeof( SymCryptTestMsg3 ), result ); + + SymCryptInjectError( result, sizeof( result ) ); + + if( memcmp( result, SymCryptSha256KATAnswer, sizeof( result ) ) != 0 ) { + SymCryptFatal( 'SH25' ); + } +} + +// +// Simple test vector for FIPS module testing +// + +const BYTE SymCryptSha224KATAnswer[ 28 ] = { + 0x23, 0x09, 0x7d, 0x22, 0x34, 0x05, 0xd8, 0x22, + 0x86, 0x42, 0xa4, 0x77, 0xbd, 0xa2, 0x55, 0xb3, + 0x2a, 0xad, 0xbc, 0xe4, 0xbd, 0xa0, 0xb3, 0xf7, + 0xe3, 0x6c, 0x9d, 0xa7, + } ; + +VOID +SYMCRYPT_CALL +SymCryptSha224Selftest(void) +{ + BYTE result[SYMCRYPT_SHA224_RESULT_SIZE]; + + SymCryptSha224( SymCryptTestMsg3, sizeof( SymCryptTestMsg3 ), result ); + + SymCryptInjectError( result, sizeof( result ) ); + + if( memcmp( result, SymCryptSha224KATAnswer, sizeof( result ) ) != 0 ) { + SymCryptFatal( 'SH22' ); + } +} + + + +// +// Below are multiple implementations of the SymCryptSha256AppendBlocks function, +// with a compile-time switch about which one to use. +// We keep the multiple implementations here for future reference; +// as CPU architectures evolve we might want to switch to one of the +// other implementations. +// All implementations here have been tested, but some lack production hardening. +// + +// +// Enable frame pointer omission to free up an extra register on X86. +// +#if SYMCRYPT_CPU_X86 && SYMCRYPT_MS_VC && !defined(__clang__) +#pragma optimize( "y", on ) +#endif + +// +// For documentation on these function see FIPS 180-2 +// +// MAJ and CH are the functions Maj and Ch from the standard. +// CSIGMA0 and CSIGMA1 are the capital sigma functions. +// LSIGMA0 and LSIGMA1 are the lowercase sigma functions. +// +// The canonical definitions of the MAJ and CH functions are: +//#define MAJ( x, y, z ) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) +//#define CH( x, y, z ) (((x) & (y)) ^ ((~(x)) & (z))) +// We use optimized versions defined below +// +#define MAJ( x, y, z ) ((((z) | (y)) & (x) ) | ((z) & (y))) +#define CH( x, y, z ) ((((z) ^ (y)) & (x)) ^ (z)) + +// +// The four Sigma functions +// + +// +// We have two versions of the rotate-and-xor functions. +// one is just a macro that does the rotations and xors. +// This works well on ARM +// For Intel/AMD we have one where we use the rotated value +// from one intermediate result to derive the next rotated +// value from. This removes one register copy from the +// code stream. +// +// In practice, our compiler doesn't take advantage of the +// reduction in the # operations required, and inserts a +// bunch of extra register copies anyway. +// It actually hurts on AMD64. +// +// This should be re-tuned for every release to get the best overall +// SHA-256 performance. +// At the moment we get an improvement from 19.76 c/B to 19.40 c/B on a Core 2 core. +// We should probably tune this to the Atom CPU. +// +#if SYMCRYPT_CPU_X86 +#define USE_CSIGMA0_MULTIROT 1 +#define USE_CSIGMA1_MULTIROT 0 +#define USE_LSIGMA0_MULTIROT 0 +#define USE_LSIGMA1_MULTIROT 0 + +#else +// +// On ARM we have no reason to believe this helps at all. +// on AMD64 it slows our code down. +// +#define USE_CSIGMA0_MULTIROT 0 +#define USE_CSIGMA1_MULTIROT 0 +#define USE_LSIGMA0_MULTIROT 0 +#define USE_LSIGMA1_MULTIROT 0 +#endif + +#if USE_CSIGMA0_MULTIROT +FORCEINLINE +UINT32 +CSIGMA0( UINT32 x ) +{ + UINT32 res; + x = ROR32( x, 2 ); + res = x; + x = ROR32( x, 11 ); + res ^= x; + x = ROR32( x, 9 ); + res ^= x; + return res; +} +#else +#define CSIGMA0( x ) (ROR32((x), 2) ^ ROR32((x), 13) ^ ROR32((x), 22)) +#endif + +#if USE_CSIGMA1_MULTIROT +FORCEINLINE +UINT32 +CSIGMA1( UINT32 x ) +{ + UINT32 res; + x = ROR32( x, 6 ); + res = x; + x = ROR32( x, 5 ); + res ^= x; + x = ROR32( x, 14 ); + res ^= x; + return res; +} +#else +#define CSIGMA1( x ) (ROR32((x), 6) ^ ROR32((x), 11) ^ ROR32((x), 25)) +#endif + +#if USE_LSIGMA0_MULTIROT +FORCEINLINE +UINT32 +LSIGMA0( UINT32 x ) +{ + UINT32 res; + res = x >> 3; + x = ROR32( x, 7 ); + res ^= x; + x = ROR32( x, 11 ); + res ^= x; + return res; +} +#else +#define LSIGMA0( x ) (ROR32((x), 7) ^ ROR32((x), 18) ^ ((x)>> 3)) +#endif + +#if USE_LSIGMA1_MULTIROT +FORCEINLINE +UINT32 +LSIGMA1( UINT32 x ) +{ + UINT32 res; + res = x >> 10; + x = ROR32( x, 17 ); + res ^= x; + x = ROR32( x, 2 ); + res ^= x; + return res; +} +#else +#define LSIGMA1( x ) (ROR32((x), 17) ^ ROR32((x), 19) ^ ((x)>>10)) +#endif + + +// +// The values a-h are stored in an array called ah. +// We have unrolled the loop 16 times. This makes both the indices into +// the ah array constant, and it makes the message addressing constant. +// This provides a significant speed improvement, at the cost of making +// the main loop about 4 kB in code. +// +// The earlier implementation had the loop unrolled 8 times, and is +// around 10 cycles/byte slower. If loading the code from disk takes +// 100 cycles/byte, then we break even once you have hashed 20 kB. +// This is a worthwhile tradeoff as all code is codesigned with SHA-256. +// + +// +// Core round macro +// +// r16 is the round number mod 16, r is the round number. +// r16 is a separate macro argument because it is always a compile-time constant +// which allows much better optimizations of the memory accesses. +// +// ah[ r16 &7] = h +// ah[(r16+1)&7] = g; +// ah[(r16+2)&7] = f; +// ah[(r16+3)&7] = e; +// ah[(r16+4)&7] = d; +// ah[(r16+5)&7] = c; +// ah[(r16+6)&7] = b; +// ah[(r16+7)&7] = a; +// +// After that incrementing the round number will automatically map a->b, b->c, etc. +// +// The core round, after the message word has been computed for this round and put in Wt. +// r16 is the round number modulo 16. (Static after loop unrolling) +// r is the round number (dynamic, which is why we don't use (r&0xf) for r16) +// In more readable form this macro does the following: +// h += CSIGMA( e ) + CH( e, f, g ) + K[round] + W[round]; +// d += h; +// h += CSIGMA( a ) + MAJ( a, b, c ); +// +#define CROUND( r16, r ) {;\ + ah[ r16 &7] += CSIGMA1(ah[(r16+3)&7]) + CH(ah[(r16+3)&7], ah[(r16+2)&7], ah[(r16+1)&7]) + SymCryptSha256K[r] + Wt;\ + ah[(r16+4)&7] += ah[r16 &7];\ + ah[ r16 &7] += CSIGMA0(ah[(r16+7)&7]) + MAJ(ah[(r16+7)&7], ah[(r16+6)&7], ah[(r16+5)&7]);\ +} + +// +// Initial round that reads the message. +// r is the round number 0..15 +// +#define IROUND( r ) {\ + Wt = SYMCRYPT_LOAD_MSBFIRST32( &pbData[ 4*r ] );\ + W[r] = Wt; \ + CROUND(r,r);\ + } + +// +// Subsequent rounds. +// r16 is the round number mod 16. rb is the round number minus r16. +// +#define FROUND(r16, rb) { \ + Wt = LSIGMA1( W[(r16-2) & 15] ) + W[(r16-7) & 15] + \ + LSIGMA0( W[(r16-15) & 15]) + W[r16 & 15]; \ + W[r16] = Wt; \ + CROUND( r16, r16+rb ); \ +} + +// +// UINT32 implementation 1 +// +VOID +SYMCRYPT_CALL +SymCryptSha256AppendBlocks_ul1( + _Inout_ SYMCRYPT_SHA256_CHAINING_STATE * pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ) +{ + SYMCRYPT_ALIGN UINT32 W[16]; + SYMCRYPT_ALIGN UINT32 ah[8]; + int round; + UINT32 Wt; + + while( cbData >= 64 ) + { + ah[7] = pChain->H[0]; + ah[6] = pChain->H[1]; + ah[5] = pChain->H[2]; + ah[4] = pChain->H[3]; + ah[3] = pChain->H[4]; + ah[2] = pChain->H[5]; + ah[1] = pChain->H[6]; + ah[0] = pChain->H[7]; + + // + // initial rounds 1 to 16 + // + + IROUND( 0 ); + IROUND( 1 ); + IROUND( 2 ); + IROUND( 3 ); + IROUND( 4 ); + IROUND( 5 ); + IROUND( 6 ); + IROUND( 7 ); + IROUND( 8 ); + IROUND( 9 ); + IROUND( 10 ); + IROUND( 11 ); + IROUND( 12 ); + IROUND( 13 ); + IROUND( 14 ); + IROUND( 15 ); + + + // + // rounds 16 to 64. + // + for( round=16; round<64; round += 16 ) + { + FROUND( 0, round ); + FROUND( 1, round ); + FROUND( 2, round ); + FROUND( 3, round ); + FROUND( 4, round ); + FROUND( 5, round ); + FROUND( 6, round ); + FROUND( 7, round ); + FROUND( 8, round ); + FROUND( 9, round ); + FROUND( 10, round ); + FROUND( 11, round ); + FROUND( 12, round ); + FROUND( 13, round ); + FROUND( 14, round ); + FROUND( 15, round ); + } + + pChain->H[0] = ah[7] + pChain->H[0]; + pChain->H[1] = ah[6] + pChain->H[1]; + pChain->H[2] = ah[5] + pChain->H[2]; + pChain->H[3] = ah[4] + pChain->H[3]; + pChain->H[4] = ah[3] + pChain->H[4]; + pChain->H[5] = ah[2] + pChain->H[5]; + pChain->H[6] = ah[1] + pChain->H[6]; + pChain->H[7] = ah[0] + pChain->H[7]; + + pbData += 64; + cbData -= 64; + + } + + *pcbRemaining = cbData; + + // + // Wipe the variables; + // + SymCryptWipeKnownSize( ah, sizeof( ah ) ); + SymCryptWipeKnownSize( W, sizeof( W ) ); + SYMCRYPT_FORCE_WRITE32( &Wt, 0 ); +} + +VOID +SYMCRYPT_CALL +SymCryptSha256AppendBlocks_ul2( + _Inout_ SYMCRYPT_SHA256_CHAINING_STATE * pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ) +{ + // + // Different arrangement of the code, currently 25 c/B vs 20 c/b for the version above. + // On Atom: 50 c/B vs 41 c/B for the one above. + // + SYMCRYPT_ALIGN UINT32 buf[4 + 8 + 64]; // chaining state concatenated with the expanded input block + UINT32 * W = &buf[4 + 8]; + UINT32 * ha = &buf[4]; // initial state words, in order h, g, ..., b, a + UINT32 A, B, C, D, T; + int r; + + ha[7] = pChain->H[0]; buf[3] = ha[7]; + ha[6] = pChain->H[1]; buf[2] = ha[6]; + ha[5] = pChain->H[2]; buf[1] = ha[5]; + ha[4] = pChain->H[3]; buf[0] = ha[4]; + ha[3] = pChain->H[4]; + ha[2] = pChain->H[5]; + ha[1] = pChain->H[6]; + ha[0] = pChain->H[7]; + + while( cbData >= 64 ) + { + // + // Capture the input into W[0..15] + // + for( r=0; r<16; r++ ) + { + W[r] = SYMCRYPT_LOAD_MSBFIRST32( &pbData[ 4*r ] ); + } + + // + // Expand the message + // + A = W[15]; + B = W[14]; + D = W[0]; + for( r=16; r<64; r+= 2 ) + { + // Loop invariant: A=W[r-1], B = W[r-2], D = W[r-16] + + // + // Macro for one word of message expansion. + // Invariant: + // on entry: a = W[r-1], b = W[r-2], d = W[r-16] + // on exit: W[r] computed, a = W[r-1], b = W[r], c = W[r-15] + // + #define EXPAND( a, b, c, d, r ) \ + c = W[r-15]; \ + b = d + LSIGMA1( b ) + W[r-7] + LSIGMA0( c ); \ + W[r] = b; \ + + EXPAND( A, B, C, D, r ); + EXPAND( B, A, D, C, (r+1)); + + #undef EXPAND + } + + A = ha[7]; + B = ha[6]; + C = ha[5]; + D = ha[4]; + + for( r=0; r<64; r += 4 ) + { + // + // Loop invariant: + // A, B, C, and D are the a,b,c,d values of the current state. + // W[r] is the next expanded message word to be processed. + // W[r-8 .. r-5] contain the current state words h, g, f, e. + // + + // + // Macro to compute one round + // + #define DO_ROUND( a, b, c, d, t, r ) \ + t = W[r] + CSIGMA1( W[r-5] ) + W[r-8] + CH( W[r-5], W[r-6], W[r-7] ) + SymCryptSha256K[r]; \ + W[r-4] = t + d; \ + d = t + CSIGMA0( a ) + MAJ( c, b, a ); + + DO_ROUND( A, B, C, D, T, r ); + DO_ROUND( D, A, B, C, T, (r+1) ); + DO_ROUND( C, D, A, B, T, (r+2) ); + DO_ROUND( B, C, D, A, T, (r+3) ); + #undef DO_ROUND + } + + buf[3] = ha[7] = buf[3] + A; + buf[2] = ha[6] = buf[2] + B; + buf[1] = ha[5] = buf[1] + C; + buf[0] = ha[4] = buf[0] + D; + ha[3] += W[r-5]; + ha[2] += W[r-6]; + ha[1] += W[r-7]; + ha[0] += W[r-8]; + + pbData += 64; + cbData -= 64; + } + + pChain->H[0] = ha[7]; + pChain->H[1] = ha[6]; + pChain->H[2] = ha[5]; + pChain->H[3] = ha[4]; + pChain->H[4] = ha[3]; + pChain->H[5] = ha[2]; + pChain->H[6] = ha[1]; + pChain->H[7] = ha[0]; + + *pcbRemaining = cbData; + + SymCryptWipeKnownSize( buf, sizeof( buf ) ); + SYMCRYPT_FORCE_WRITE32( &A, 0 ); + SYMCRYPT_FORCE_WRITE32( &B, 0 ); + SYMCRYPT_FORCE_WRITE32( &D, 0 ); + SYMCRYPT_FORCE_WRITE32( &T, 0 ); +} + +#undef CROUND +#undef IROUND +#undef FROUND + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + +// +// Don't omit frame pointer for XMM code; it isn't register-starved as much +// +#if SYMCRYPT_CPU_X86 && SYMCRYPT_MS_VC && !defined(__clang__) +#pragma optimize( "y", off ) +#endif + +#ifdef __clang__ +#pragma clang attribute push (__attribute__((target("ssse3,sha"))), apply_to=function) +#else +#pragma GCC push_options +#pragma GCC target("ssse3,sha") +#endif + +// +// Code that uses the XMM registers. +// This code is currently unused. It was written in case it would provide better performance, but +// it did not. We are retaining it in case it might be useful in a future CPU generation. +// +#if 0 + +#define MAJXMM( x, y, z ) _mm_or_si128( _mm_and_si128( _mm_or_si128( z, y ), x ), _mm_and_si128( z, y )) +#define CHXMM( x, y, z ) _mm_xor_si128( _mm_and_si128( _mm_xor_si128( z, y ), x ), z ) + +#define CSIGMA0XMM( x ) \ + _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( \ + _mm_slli_epi32(x,30) , _mm_srli_epi32(x, 2) ),\ + _mm_slli_epi32(x,19) ), _mm_srli_epi32(x, 13) ),\ + _mm_slli_epi32(x,10) ), _mm_srli_epi32(x, 22) ) +#define CSIGMA1XMM( x ) \ + _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( \ + _mm_slli_epi32(x,26) , _mm_srli_epi32(x, 6) ),\ + _mm_slli_epi32(x,21) ), _mm_srli_epi32(x, 11) ),\ + _mm_slli_epi32(x,7) ), _mm_srli_epi32(x, 25) ) +#define LSIGMA0XMM( x ) \ + _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( \ + _mm_slli_epi32(x,25) , _mm_srli_epi32(x, 7) ),\ + _mm_slli_epi32(x,14) ), _mm_srli_epi32(x, 18) ),\ + _mm_srli_epi32(x, 3) ) +#define LSIGMA1XMM( x ) \ + _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( \ + _mm_slli_epi32(x,15) , _mm_srli_epi32(x, 17) ),\ + _mm_slli_epi32(x,13) ), _mm_srli_epi32(x, 19) ),\ + _mm_srli_epi32(x,10) ) + +VOID +SYMCRYPT_CALL +SymCryptSha256AppendBlocks_xmm1( + _Inout_ SYMCRYPT_SHA256_CHAINING_STATE * pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ) +{ + // + // Implementation that has one value in each XMM register. + // This is significantly slower than the _ul1 implementation + // but can be extended to compute 4 hash blocks in parallel. + // + SYMCRYPT_ALIGN __m128i buf[4 + 8 + 64]; // chaining state concatenated with the expanded input block + __m128i * W = &buf[4 + 8]; + __m128i * ha = &buf[4]; // initial state words, in order h, g, ..., b, a + __m128i A, B, C, D, T; + int r; + + // + // For 1-input only; set the input buffer to zero so that we have known values in every byte + // + //SymCryptWipeKnownSize( buf, sizeof( buf ) ); + + // + // Copy the chaining state into the start of the buffer, order = h,g,f,e,d,c,b,a + // + ha[7] = _mm_insert_epi32(ha[7], pChain->H[0], 0); + ha[6] = _mm_insert_epi32(ha[6], pChain->H[1], 0); + ha[5] = _mm_insert_epi32(ha[5], pChain->H[2], 0); + ha[4] = _mm_insert_epi32(ha[4], pChain->H[3], 0); + ha[3] = _mm_insert_epi32(ha[3], pChain->H[4], 0); + ha[2] = _mm_insert_epi32(ha[2], pChain->H[5], 0); + ha[1] = _mm_insert_epi32(ha[1], pChain->H[6], 0); + ha[0] = _mm_insert_epi32(ha[0], pChain->H[7], 0); + + buf[0] = ha[4]; + buf[1] = ha[5]; + buf[2] = ha[6]; + buf[3] = ha[7]; + + while( cbData >= 64 ) + { + + // + // Capture the input into W[0..15] + // + for( r=0; r<16; r++ ) + { + W[r] = _mm_insert_epi32(W[r], SYMCRYPT_LOAD_MSBFIRST32( &pbData[ 4*r ] ), 0); + } + + // + // Expand the message + // + A = W[15]; + B = W[14]; + D = W[0]; + for( r=16; r<64; r+= 2 ) + { + // Loop invariant: A=W[r-1], B = W[r-2], D = W[r-16] + + // + // Macro for one word of message expansion. + // Invariant: + // on entry: a = W[r-1], b = W[r-2], d = W[r-16] + // on exit: W[r] computed, a = W[r-1], b = W[r], c = W[r-15] + // + #define EXPAND( a, b, c, d, r ) \ + c = W[r-15]; \ + b = _mm_add_epi32( _mm_add_epi32( _mm_add_epi32( d, LSIGMA1XMM( b ) ), W[r-7] ), LSIGMA0XMM( c ) ); \ + W[r] = b; \ + + EXPAND( A, B, C, D, r ); + EXPAND( B, A, D, C, (r+1)); + + #undef EXPAND + } + + A = ha[7]; + B = ha[6]; + C = ha[5]; + D = ha[4]; + + for( r=0; r<64; r += 4 ) + { + // + // Loop invariant: + // A, B, C, and D are the a,b,c,d values of the current state. + // W[r] is the next expanded message word to be processed. + // W[r-8 .. r-5] contain the current state words h, g, f, e. + // + + // + // Macro to compute one round + // + #define DO_ROUND( a, b, c, d, t, r ) \ + t = W[r]; \ + t = _mm_add_epi32( t, CSIGMA1XMM( W[r-5] ) ); \ + t = _mm_add_epi32( t, W[r-8] ); \ + t = _mm_add_epi32( t, CHXMM( W[r-5], W[r-6], W[r-7] ) ); \ + t = _mm_add_epi32( t, _mm_cvtsi32_si128( SymCryptSha256K[r] ) ); \ + W[r-4] = _mm_add_epi32( t, d ); \ + d = _mm_add_epi32( t, CSIGMA0XMM( a ) ); \ + d = _mm_add_epi32( d, MAJXMM( c, b, a ) ); + + DO_ROUND( A, B, C, D, T, r ); + DO_ROUND( D, A, B, C, T, (r+1) ); + DO_ROUND( C, D, A, B, T, (r+2) ); + DO_ROUND( B, C, D, A, T, (r+3) ); + #undef DO_ROUND + } + + buf[3] = ha[7] = _mm_add_epi32( buf[3], A ); + buf[2] = ha[6] = _mm_add_epi32( buf[2], B ); + buf[1] = ha[5] = _mm_add_epi32( buf[1], C ); + buf[0] = ha[4] = _mm_add_epi32( buf[0], D ); + ha[3] = _mm_add_epi32( ha[3], W[r-5] ); + ha[2] = _mm_add_epi32( ha[2], W[r-6] ); + ha[1] = _mm_add_epi32( ha[1], W[r-7] ); + ha[0] = _mm_add_epi32( ha[0], W[r-8] ); + + pbData += 64; + cbData -= 64; + } + + // + // Copy the chaining state back into the hash structure + // + pChain->H[0] = _mm_extract_epi32(ha[7], 0); + pChain->H[1] = _mm_extract_epi32(ha[6], 0); + pChain->H[2] = _mm_extract_epi32(ha[5], 0); + pChain->H[3] = _mm_extract_epi32(ha[4], 0); + pChain->H[4] = _mm_extract_epi32(ha[3], 0); + pChain->H[5] = _mm_extract_epi32(ha[2], 0); + pChain->H[6] = _mm_extract_epi32(ha[1], 0); + pChain->H[7] = _mm_extract_epi32(ha[0], 0); + + *pcbRemaining = cbData; + + SymCryptWipeKnownSize( buf, sizeof( buf ) ); + SymCryptWipeKnownSize( &A, sizeof( A ) ); + SymCryptWipeKnownSize( &B, sizeof( B ) ); + SymCryptWipeKnownSize( &C, sizeof( C ) ); + SymCryptWipeKnownSize( &D, sizeof( D ) ); + SymCryptWipeKnownSize( &T, sizeof( T ) ); +} + + +// +// XMM implementation 2 +// We use the XMM registers to compute part of the message schedule. +// The load, BSWAP, and part of the message schedule recursion are done in XMM registers. +// The rest of the work is done using integers. +// +// Core2: 0.1 c/B slower than the _ul1 +// Atom: 1.0 c/B slower than _ul1 (42.34 vs 41.39 c/B) +// +VOID +SYMCRYPT_CALL +SymCryptSha256AppendBlocks_xmm2( + _Inout_ SYMCRYPT_SHA256_CHAINING_STATE * pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ) +{ + SYMCRYPT_ALIGN union { UINT32 ul[16]; __m128i xmm[4]; } W; + SYMCRYPT_ALIGN UINT32 ah[8]; + int round; + UINT32 Wt; + const __m128i BYTE_REVERSE_32 = _mm_set_epi8( 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3 ); + + ah[7] = pChain->H[0]; + ah[6] = pChain->H[1]; + ah[5] = pChain->H[2]; + ah[4] = pChain->H[3]; + ah[3] = pChain->H[4]; + ah[2] = pChain->H[5]; + ah[1] = pChain->H[6]; + ah[0] = pChain->H[7]; + +#define CROUND( r16, r ) {;\ + ah[ r16 &7] += CSIGMA1(ah[(r16+3)&7]) + CH(ah[(r16+3)&7], ah[(r16+2)&7], ah[(r16+1)&7]) + SymCryptSha256K[r] + Wt;\ + ah[(r16+4)&7] += ah[r16 &7];\ + ah[ r16 &7] += CSIGMA0(ah[(r16+7)&7]) + MAJ(ah[(r16+7)&7], ah[(r16+6)&7], ah[(r16+5)&7]);\ +} + + +// +// Initial round that reads the message. +// r is the round number 0..15 +// +// Wt = LOAD_MSBFIRST32( &pbData[ 4*r ] );\ +// W.ul[r] = Wt; \ + +#define IROUND( r ) {\ + Wt = W.ul[r];\ + CROUND(r,r);\ + } + +// +// Subsequent rounds. +// r16 is the round number mod 16. rb is the round number minus r16. +// +#define FROUND(r16, rb) { \ + Wt = W.ul[r16];\ + CROUND( r16, r16+rb ); \ +} + + + while( cbData >= 64 ) + { + // + // The code is faster if we directly access the W.ul array, rather than the W.xmm alias. + // I think the compiler gets more confused if you use the W.xmm values. + // We retain them in the union to ensure alignment + // + _mm_store_si128( (__m128i *)&W.ul[ 0], _mm_shuffle_epi8( _mm_loadu_si128( (__m128i *)&pbData[ 0 ] ), BYTE_REVERSE_32 )); + _mm_store_si128( (__m128i *)&W.ul[ 4], _mm_shuffle_epi8( _mm_loadu_si128( (__m128i *)&pbData[ 16 ] ), BYTE_REVERSE_32 )); + _mm_store_si128( (__m128i *)&W.ul[ 8], _mm_shuffle_epi8( _mm_loadu_si128( (__m128i *)&pbData[ 32 ] ), BYTE_REVERSE_32 )); + _mm_store_si128( (__m128i *)&W.ul[12], _mm_shuffle_epi8( _mm_loadu_si128( (__m128i *)&pbData[ 48 ] ), BYTE_REVERSE_32 )); + + // + // initial rounds 1 to 16 + // + + IROUND( 0 ); + IROUND( 1 ); + IROUND( 2 ); + IROUND( 3 ); + IROUND( 4 ); + IROUND( 5 ); + IROUND( 6 ); + IROUND( 7 ); + IROUND( 8 ); + IROUND( 9 ); + IROUND( 10 ); + IROUND( 11 ); + IROUND( 12 ); + IROUND( 13 ); + IROUND( 14 ); + IROUND( 15 ); + + + // + // rounds 16 to 64. + // + for( round=16; round<64; round += 16 ) + { + __m128i Tmp; + + Tmp = _mm_add_epi32( _mm_add_epi32( + LSIGMA0XMM(_mm_loadu_si128( (__m128i *)&W.ul[1] )), + _mm_load_si128( (__m128i *)&W.ul[0] ) ), + _mm_loadu_si128( (__m128i *)&W.ul[9] ) ); + + // + // The final part of the message schedule can be done in XMM registers, but it isn't worth it. + // The rotates in XMM take two shifts and an OR/XOR, vs one instruction in integer registers. + // As the sigma1( W_{t-2} ) recursion component can only be computed 2 at a time + // (because the result of the first two are the inputs to the second two) + // you lose more than you gain by using XMM registers. + // + //Tmp = _mm_add_epi32( Tmp, LSIGMA1XMM( _mm_srli_si128( _mm_load_si128( (__m128i *)&W.ul[12] ), 8 ) ) ); + //Tmp = _mm_add_epi32( Tmp, LSIGMA1XMM( _mm_slli_si128( Tmp, 8 ) ) ); + //_mm_store_si128( (__m128i *)&W.ul[0], Tmp ); + // + + _mm_store_si128( (__m128i *)&W.ul[0], Tmp ); + W.ul[0] += LSIGMA1( W.ul[14] ); + W.ul[1] += LSIGMA1( W.ul[15] ); + W.ul[2] += LSIGMA1( W.ul[0] ); + W.ul[3] += LSIGMA1( W.ul[1] ); + + FROUND( 0, round ); + FROUND( 1, round ); + FROUND( 2, round ); + FROUND( 3, round ); + + Tmp = _mm_add_epi32( _mm_add_epi32( + LSIGMA0XMM(_mm_loadu_si128( (__m128i *)&W.ul[5] )), + _mm_load_si128( (__m128i *)&W.ul[4] ) ), + _mm_alignr_epi8( _mm_load_si128( (__m128i *)&W.ul[0] ), _mm_load_si128( (__m128i *)&W.ul[12] ), 4) ); + + _mm_store_si128( (__m128i *)&W.ul[4], Tmp ); + + W.ul[4] += LSIGMA1( W.ul[2] ); + W.ul[5] += LSIGMA1( W.ul[3] ); + W.ul[6] += LSIGMA1( W.ul[4] ); + W.ul[7] += LSIGMA1( W.ul[5] ); + + FROUND( 4, round ); + FROUND( 5, round ); + FROUND( 6, round ); + FROUND( 7, round ); + + Tmp = _mm_add_epi32( _mm_add_epi32( + LSIGMA0XMM(_mm_loadu_si128( (__m128i *)&W.ul[9] )), + _mm_load_si128( (__m128i *)&W.ul[8] ) ), + _mm_loadu_si128( (__m128i *)&W.ul[1] ) ); + + _mm_store_si128( (__m128i *)&W.ul[8], Tmp ); + W.ul[ 8] += LSIGMA1( W.ul[6] ); + W.ul[ 9] += LSIGMA1( W.ul[7] ); + W.ul[10] += LSIGMA1( W.ul[8] ); + W.ul[11] += LSIGMA1( W.ul[9] ); + + FROUND( 8, round ); + FROUND( 9, round ); + FROUND( 10, round ); + FROUND( 11, round ); + + + Tmp = _mm_add_epi32( _mm_add_epi32( + LSIGMA0XMM( _mm_alignr_epi8( _mm_load_si128( (__m128i *)&W.ul[0] ), _mm_load_si128( (__m128i *)&W.ul[12] ), 4) ), + _mm_load_si128( (__m128i *)&W.ul[12] ) ), + _mm_loadu_si128( (__m128i *)&W.ul[5] ) ); + + _mm_store_si128( (__m128i *)&W.ul[12], Tmp ); + W.ul[12] += LSIGMA1( W.ul[10] ); + W.ul[13] += LSIGMA1( W.ul[11] ); + W.ul[14] += LSIGMA1( W.ul[12] ); + W.ul[15] += LSIGMA1( W.ul[13] ); + + FROUND( 12, round ); + FROUND( 13, round ); + FROUND( 14, round ); + FROUND( 15, round ); + } + + pChain->H[0] = ah[7] = ah[7] + pChain->H[0]; + pChain->H[1] = ah[6] = ah[6] + pChain->H[1]; + pChain->H[2] = ah[5] = ah[5] + pChain->H[2]; + pChain->H[3] = ah[4] = ah[4] + pChain->H[3]; + pChain->H[4] = ah[3] = ah[3] + pChain->H[4]; + pChain->H[5] = ah[2] = ah[2] + pChain->H[5]; + pChain->H[6] = ah[1] = ah[1] + pChain->H[6]; + pChain->H[7] = ah[0] = ah[0] + pChain->H[7]; + + pbData += 64; + cbData -= 64; + + } + + *pcbRemaining = cbData; + + // + // Wipe the variables; + // + SymCryptWipeKnownSize( ah, sizeof( ah ) ); + SymCryptWipeKnownSize( &W, sizeof( W ) ); + SYMCRYPT_FORCE_WRITE32( &Wt, 0 ); + +#undef IROUND +#undef FROUND +#undef CROUND +} + +#endif + +// +// SHA-NI Implementation +// + +#if SYMCRYPT_MS_VC && !defined(__clang__) +// Intrinsic definitions included here +// until the header is updated. +// ******************************* +// ******************************* +// ******************************* +extern __m128i _mm_sha256rnds2_epu32(__m128i, __m128i, __m128i); +extern __m128i _mm_sha256msg1_epu32(__m128i, __m128i); +extern __m128i _mm_sha256msg2_epu32(__m128i, __m128i); +// ******************************* +// ******************************* +// ******************************* +#endif + +// For the SHA-NI implementation we will utilize 128-bit XMM registers. Each +// XMM state will be denoted as (R_3, R_2, R_1, R_0), where each R_i +// is a 32-bit word and R_i refers to bits [32*i : (32*i + 31)] of the +// 128-bit XMM state. +// +// The following macro updates the state variables A,B,C,...,H of the SHA algorithms +// for 4 rounds using: +// - The current round number t with 0<=t<= 63 and t a multiple of 4. +// - A current message XMM state _MSG which consists of 4 32-bit words +// ( W_(t+3), W_(t+2), W_(t+1), W_(t+0) ). +// - Two XMM states _ABEF and _CDGH which contain the variables +// ( A, B, E, F ) and ( C, D, G, H ) respectively. + +#define SHANI_UPDATE_STATE( _round, _MSG, _ABEF, _CDGH ) \ + _MSG = _mm_add_epi32( _MSG, *(__m128i *)&SymCryptSha256K[_round] ); /* Add the K_t constants to the W_t's */ \ + _CDGH = _mm_sha256rnds2_epu32( _CDGH, _ABEF, _MSG ); /* 2 rounds using SHA-NI */ \ + _MSG = _mm_shuffle_epi32( _MSG, 0x0e ); /* Move words 2 & 3 to positions 0 & 1 */ \ + _ABEF = _mm_sha256rnds2_epu32( _ABEF, _CDGH, _MSG ); /* 2 rounds using SHA-NI */ + +// For the SHA message schedule (i.e. to create words W_16 to W_63) we use 4 XMM states / accumulators. +// Each accumulator holds 4 words. +// +// The final result for each word will be of the form W_t = X_t + Y_t, where +// X_t = W_(t-16) + \sigma_0(W_(t-15)) and +// Y_t = W_(t- 7) + \sigma_1(W_(t- 2)) +// +// The X_t's are calculated by the _mm_sha256msg1_epu32 intrinsic. +// The \sigma_1(W_(t-2)) part of the Y_t's by the _mm_sha256msg2_epu32 intrinsic. +// +// Remarks: +// - Calculation of the first four X_t's (i.e. 16<=t<=19) can start from round 4 (since 19-15 = 4). +// - Calculation of the first four Y_t's can start from round 12 (since 19-7=12 and W_(19-7) is calculated +// in the intrinsic call). +// - Due to the W_(t-7) term, producing the Y_t's need special shifting via the _mm_alignr_epi8 intrinsic and +// adding the correct accumulator into another variable MTEMP. +// +// For rounds 16 - 51 we execute the following macro in a loop. For all the other rounds we +// use specific code. +// +// The loop invariant to be satisfied at the beginning of iteration i (corresponding to rounds +// (16+4*i) to (19+4*i) ) is the following: +// _MSG_0 = ( W_(19 + 4*i), W_(18 + 4*i), W_(17 + 4*i), W_(16 + 4*i) ) +// _MSG_1 = ( X_(23 + 4*i), X_(22 + 4*i), X_(21 + 4*i), X_(20 + 4*i) ) +// _MSG_2 = ( X_(27 + 4*i), X_(26 + 4*i), X_(25 + 4*i), X_(24 + 4*i) ) +// _MSG_3 = ( W_(15 + 4*i), W_(14 + 4*i), W_(13 + 4*i), W_(12 + 4*i) ) +// +#define SHANI_MESSAGE_SCHEDULE( _MSG_0, _MSG_1, _MSG_2, _MSG_3, _MTEMP ) \ + _MTEMP = _mm_alignr_epi8( _MSG_0, _MSG_3, 4); /* _MTEMP := ( W_(16 + 4*i), W_(15 + 4*i), W_(14 + 4*i), W_(13 + 4*i) ) */ \ + _MSG_1 = _mm_add_epi32( _MSG_1, _MTEMP); /* _MSG_1 := _MSG_1 + ( W_(16 + 4*i), W_(15 + 4*i), W_(14 + 4*i), W_(13 + 4*i) ) */ \ + _MSG_1 = _mm_sha256msg2_epu32( _MSG_1, _MSG_0 ); /* _MSG_1 := ( W_(23 + 4*i), W_(22 + 4*i), W_(21 + 4*i), W_(20 + 4*i) ) */ \ + _MSG_3 = _mm_sha256msg1_epu32( _MSG_3, _MSG_0 ); /* _MSG_3 := ( X_(31+4*i), X_(30+4*i), X_(29+4*i), X_(28+4*i) ) */ +// +// After each iteration the subsequent call rotates the accumulators so that the loop +// invariant is preserved (please verify!): +// -- MSG_0 <---- MSG_1 <--- MSG_2 <--- MSG_3 <-- +// | | +// ---------------------------------------------- + +VOID +SYMCRYPT_CALL +SymCryptSha256AppendBlocks_shani( + _Inout_ SYMCRYPT_SHA256_CHAINING_STATE * pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ) +{ + const __m128i BYTE_REVERSE_32 = _mm_set_epi8( 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3 ); + + // Our chain state is in order A, B, ..., H. + // First load our chaining state + __m128i DCBA = _mm_loadu_si128( (__m128i *)&(pChain->H[0]) ); // (D, C, B, A) + __m128i HGFE = _mm_loadu_si128( (__m128i *)&(pChain->H[4]) ); // (H, G, F, E) + __m128i FEBA = _mm_unpacklo_epi64( DCBA, HGFE ); // (F, E, B, A) + __m128i HGDC = _mm_unpackhi_epi64( DCBA, HGFE ); // (H, G, D, C) + __m128i ABEF = _mm_shuffle_epi32( FEBA, 0x1b ); // (A, B, E, F) + __m128i CDGH = _mm_shuffle_epi32( HGDC, 0x1b ); // (C, D, G, H) + + while( cbData >= 64 ) + { + // Save the current state for the feed-forward later + __m128i ABEF_start = ABEF; + __m128i CDGH_start = CDGH; + + // Current message and temporary state + __m128i MSG; + + // Accumulators + __m128i MSG_0; + __m128i MSG_1; + __m128i MSG_2; + __m128i MSG_3; + + // Rounds 0-3 + MSG = _mm_loadu_si128( (__m128i *)pbData ); // Reversed word - ( M_3, M_2, M_1, M_0 ) + pbData += 16; + MSG = _mm_shuffle_epi8( MSG, BYTE_REVERSE_32 ); // Reverse each word + MSG_0 = MSG; // MSG_0 := ( W_3 = M3, W_2 = M_2, W_1 = M_1, W_0 = M_0 ) + + SHANI_UPDATE_STATE( 0, MSG, ABEF, CDGH ); + + // Rounds 4-7 + MSG = _mm_loadu_si128( (__m128i *)pbData ); // Reversed word - ( M_7, M_6, M_5, M_4 ) + pbData += 16; + MSG = _mm_shuffle_epi8( MSG, BYTE_REVERSE_32 ); // Reverse each word + MSG_1 = MSG; // MSG_1 := ( W_7 = M_7, W_6 = M_6, W_5 = M_5, W_4 = M_4 ) + + SHANI_UPDATE_STATE( 4, MSG, ABEF, CDGH ); + + MSG_0 = _mm_sha256msg1_epu32( MSG_0, MSG_1 ); // MSG_0 := ( X_19, X_18, X_17, X_16 ) = + // ( W_3 + \sigma_0(W_4), ..., W_0 + \sigma_0(W_1) ) + + // Rounds 8-11 + MSG = _mm_loadu_si128( (__m128i *)pbData ); // Reversed word - ( M_11, M_10, M_9, M_8 ) + pbData += 16; + MSG = _mm_shuffle_epi8( MSG, BYTE_REVERSE_32 ); // Reverse each word + MSG_2 = MSG; // MSG_2 := ( W_11 = M_11, W_10 = M_10, W_9 = M_9, W_8 = M_8 ) + + SHANI_UPDATE_STATE( 8, MSG, ABEF, CDGH ); + + MSG_1 = _mm_sha256msg1_epu32( MSG_1, MSG_2 ); // MSG_1 := ( X_23, X_22, X_21, X_20 ) + + // Rounds 12-15 + MSG = _mm_loadu_si128( (__m128i *)pbData ); // Reversed word - ( M_15, M_14, M_13, M_12 ) + pbData += 16; + MSG = _mm_shuffle_epi8( MSG, BYTE_REVERSE_32 ); // Reverse each word + MSG_3 = MSG; // MSG_3 := ( W_15 = M_15, W_14 = M_14, W_13 = M_13, W_12 = M_12 ) + + SHANI_UPDATE_STATE( 12, MSG, ABEF, CDGH ); + + MSG = _mm_alignr_epi8( MSG_3, MSG_2, 4); // MSG := ( W_12, W_11, W_10, W_9 ) + MSG_0 = _mm_add_epi32( MSG_0, MSG); // MSG_0 := MSG_0 + ( W_12, W_11, W_10, W_9 ) + MSG_0 = _mm_sha256msg2_epu32( MSG_0, MSG_3 ); // MSG_0 := ( W_19, W_18, W_17, W_16 ) = + // ( X_19 + W_12 + \sigma_1(W_17)], ..., X_16 + W_9 + \sigma_1(W_14)] ) + + MSG_2 = _mm_sha256msg1_epu32( MSG_2, MSG_3 ); // MSG_2 := ( X_27, X_26, X_25, X_24 ) + + + // Rounds 16 - 19 + MSG = MSG_0; + SHANI_UPDATE_STATE( 16, MSG, ABEF, CDGH ); + SHANI_MESSAGE_SCHEDULE( MSG_0, MSG_1, MSG_2, MSG_3, MSG ); + + // Rounds 20 - 23 + MSG = MSG_1; + SHANI_UPDATE_STATE( 20, MSG, ABEF, CDGH ); + SHANI_MESSAGE_SCHEDULE( MSG_1, MSG_2, MSG_3, MSG_0, MSG ); + + // Rounds 24 - 27 + MSG = MSG_2; + SHANI_UPDATE_STATE( 24, MSG, ABEF, CDGH ); + SHANI_MESSAGE_SCHEDULE( MSG_2, MSG_3, MSG_0, MSG_1, MSG ); + + // Rounds 28 - 31 + MSG = MSG_3; + SHANI_UPDATE_STATE( 28, MSG, ABEF, CDGH ); + SHANI_MESSAGE_SCHEDULE( MSG_3, MSG_0, MSG_1, MSG_2, MSG ); + + // Rounds 32 - 35 + MSG = MSG_0; + SHANI_UPDATE_STATE( 32, MSG, ABEF, CDGH ); + SHANI_MESSAGE_SCHEDULE( MSG_0, MSG_1, MSG_2, MSG_3, MSG ); + + // Rounds 36 - 39 + MSG = MSG_1; + SHANI_UPDATE_STATE( 36, MSG, ABEF, CDGH ); + SHANI_MESSAGE_SCHEDULE( MSG_1, MSG_2, MSG_3, MSG_0, MSG ); + + // Rounds 40 - 43 + MSG = MSG_2; + SHANI_UPDATE_STATE( 40, MSG, ABEF, CDGH ); + SHANI_MESSAGE_SCHEDULE( MSG_2, MSG_3, MSG_0, MSG_1, MSG ); + + // Rounds 44 - 47 + MSG = MSG_3; + SHANI_UPDATE_STATE( 44, MSG, ABEF, CDGH ); + SHANI_MESSAGE_SCHEDULE( MSG_3, MSG_0, MSG_1, MSG_2, MSG ); + + // Rounds 48 - 51 + MSG = MSG_0; + SHANI_UPDATE_STATE( 48, MSG, ABEF, CDGH ); + SHANI_MESSAGE_SCHEDULE( MSG_0, MSG_1, MSG_2, MSG_3, MSG ); + + // Rounds 52 - 55 + MSG = MSG_1; // ( W_55, W_54, W_53, W_52 ) + SHANI_UPDATE_STATE( 52, MSG, ABEF, CDGH ); + + MSG = _mm_alignr_epi8( MSG_1, MSG_0, 4); // MSG := ( W_52, W_51, W_50, W_49 ) + MSG_2 = _mm_add_epi32( MSG_2, MSG); // MSG_2 := MSG_2 + ( W_52, W_51, W_50, W_49 ) + MSG_2 = _mm_sha256msg2_epu32( MSG_2, MSG_1 ); // Calculate ( W_59, W_58, W_57, W_56 ) + + // Rounds 56 - 59 + MSG = MSG_2; // ( W_59, W_58, W_57, W_56 ) + SHANI_UPDATE_STATE( 56, MSG, ABEF, CDGH ); + + MSG = _mm_alignr_epi8( MSG_2, MSG_1, 4); // MSG := ( W_56, W_55, W_54, W_53 ) + MSG_3 = _mm_add_epi32( MSG_3, MSG); // MSG_3 := MSG_3 + ( W_56, W_55, W_54, W_53 ) + MSG_3 = _mm_sha256msg2_epu32( MSG_3, MSG_2 ); // Calculate ( W_63, W_62, W_61, W_60 ) + + // Rounds 60 - 63 + SHANI_UPDATE_STATE( 60, MSG_3, ABEF, CDGH ); + + // Add the feed-forward + ABEF = _mm_add_epi32( ABEF, ABEF_start ); + CDGH = _mm_add_epi32( CDGH, CDGH_start ); + + cbData -= 64; + } + + // Unpack the state registers and store them in the state + FEBA = _mm_shuffle_epi32( ABEF, 0x1b ); + HGDC = _mm_shuffle_epi32( CDGH, 0x1b ); + DCBA = _mm_unpacklo_epi64( FEBA, HGDC ); // (D, C, B, A) + HGFE = _mm_unpackhi_epi64( FEBA, HGDC ); // (H, G, F, E) + _mm_storeu_si128 ( (__m128i *)&(pChain->H[0]), DCBA); // (D, C, B, A) + _mm_storeu_si128 ( (__m128i *)&(pChain->H[4]), HGFE); // (H, G, F, E) + + *pcbRemaining = cbData; +} + +#undef SHANI_UPDATE_STATE +#undef SHANI_MESSAGE_SCHEDULE + +#ifdef __clang__ +#pragma clang attribute pop +#else +#pragma GCC pop_options +#endif + +#endif // SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + +#if SYMCRYPT_CPU_ARM64 +/* +ARM64 has special SHA-256 instructions + +SHA256H and SHA256H2 implement 4 rounds of SHA-256. The inputs are two registers containing the 256-bit state, +and one register containing 128 bits of expanded message plus the round constants. +These instructions perform the same computation, but SHA256H returns the first half of the 256-bit result, +and SHA256H2 returns the second half of the 256-bit result. + +SHA256H( ABCDE, FGHIJ, W ) +Where the least significant word of the ABCDE vector is A. The W vector contains W_i + K_i for the four rounds being computed. + +SHA256SU0 is the message schedule update function. +It takes 2 inputs and produces 1 output. +We describe the vectors for i=0,1,2,3 +Inputs: [W_{t-16+i}], [W_{t-12+i}] +Output: [Sigma0(W_{t-15+i}) + W_{t-16+i}] + +SHA256SU1 is the second message schedule update function +Takes 3 inputs and produces 1 output +Input 1: Output of SHA256SU0: [Sigma0(W_{t-15+i}) + W_{t-16+i}] +Input 2: +Input 3: [W_{t-4+i}] + +*/ + +#ifdef __clang__ +#pragma clang attribute push (__attribute__((target("sha2"))), apply_to=function) +#else +#pragma GCC push_options +#pragma GCC target("sha2") +#endif + +#define vldq(_p) (*(__n128 *)(_p)) +#define vstq(_p, _v) (*(__n128 *)(_p) = (_v) ) + +VOID +SYMCRYPT_CALL +SymCryptSha256AppendBlocks_instr( + _Inout_ SYMCRYPT_SHA256_CHAINING_STATE * pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ) +{ + // + // Armv8 has 32 Neon registers. We can use a lot of variables. + // 16 for the constants, 4 for the message, 2 for the current state, 2 for the starting state, + // total = 24 which leaves enough for some temp values + // + __n128 ABCD, ABCDstart; + __n128 EFGH, EFGHstart; + __n128 W0, W1, W2, W3; + __n128 K0, K1, K2, K3, K4, K5, K6, K7, K8, K9, K10, K11, K12, K13, K14, K15; + + __n128 Wr; + __n128 t; + + ABCD = ABCDstart = vldq( &pChain->H[0] ); + EFGH = EFGHstart = vldq( &pChain->H[4] ); + + K0 = vldq( &SymCryptSha256K[ 4 * 0 ] ); + K1 = vldq( &SymCryptSha256K[ 4 * 1 ] ); + K2 = vldq( &SymCryptSha256K[ 4 * 2 ] ); + K3 = vldq( &SymCryptSha256K[ 4 * 3 ] ); + K4 = vldq( &SymCryptSha256K[ 4 * 4 ] ); + K5 = vldq( &SymCryptSha256K[ 4 * 5 ] ); + K6 = vldq( &SymCryptSha256K[ 4 * 6 ] ); + K7 = vldq( &SymCryptSha256K[ 4 * 7 ] ); + K8 = vldq( &SymCryptSha256K[ 4 * 8 ] ); + K9 = vldq( &SymCryptSha256K[ 4 * 9 ] ); + K10 = vldq( &SymCryptSha256K[ 4 * 10 ] ); + K11 = vldq( &SymCryptSha256K[ 4 * 11 ] ); + K12 = vldq( &SymCryptSha256K[ 4 * 12 ] ); + K13 = vldq( &SymCryptSha256K[ 4 * 13 ] ); + K14 = vldq( &SymCryptSha256K[ 4 * 14 ] ); + K15 = vldq( &SymCryptSha256K[ 4 * 15 ] ); + + while( cbData >= 64 ) + { + W0 = vrev32q_u8( vldq( &pbData[ 0] ) ); + W1 = vrev32q_u8( vldq( &pbData[16] ) ); + W2 = vrev32q_u8( vldq( &pbData[32] ) ); + W3 = vrev32q_u8( vldq( &pbData[48] ) ); + + // + // The sha256h/sha256h2 instructions overwrite one of the two state input registers. + // This implies we have to have a copy made of one of the input states. + // +#define ROUNDOP {\ + t = ABCD;\ + ABCD = vsha256hq_u32 ( ABCD, EFGH, Wr );\ + EFGH = vsha256h2q_u32( EFGH, t, Wr );\ + } + + Wr = vaddq_u32( W0, K0 ); + ROUNDOP; + Wr = vaddq_u32( W1, K1 ); + ROUNDOP; + Wr = vaddq_u32( W2, K2 ); + ROUNDOP; + Wr = vaddq_u32( W3, K3 ); + ROUNDOP; + + t = vsha256su0q_u32( W0, W1 ); + W0 = vsha256su1q_u32( t, W2, W3 ); + Wr = vaddq_u32( W0, K4 ); + ROUNDOP; + + t = vsha256su0q_u32( W1, W2 ); + W1 = vsha256su1q_u32( t, W3, W0 ); + Wr = vaddq_u32( W1, K5 ); + ROUNDOP; + + t = vsha256su0q_u32( W2, W3 ); + W2 = vsha256su1q_u32( t, W0, W1 ); + Wr = vaddq_u32( W2, K6 ); + ROUNDOP; + + t = vsha256su0q_u32( W3, W0 ); + W3 = vsha256su1q_u32( t, W1, W2 ); + Wr = vaddq_u32( W3, K7 ); + ROUNDOP; + + + t = vsha256su0q_u32( W0, W1 ); + W0 = vsha256su1q_u32( t, W2, W3 ); + Wr = vaddq_u32( W0, K8 ); + ROUNDOP; + + t = vsha256su0q_u32( W1, W2 ); + W1 = vsha256su1q_u32( t, W3, W0 ); + Wr = vaddq_u32( W1, K9 ); + ROUNDOP; + + t = vsha256su0q_u32( W2, W3 ); + W2 = vsha256su1q_u32( t, W0, W1 ); + Wr = vaddq_u32( W2, K10 ); + ROUNDOP; + + t = vsha256su0q_u32( W3, W0 ); + W3 = vsha256su1q_u32( t, W1, W2 ); + Wr = vaddq_u32( W3, K11 ); + ROUNDOP; + + + t = vsha256su0q_u32( W0, W1 ); + W0 = vsha256su1q_u32( t, W2, W3 ); + Wr = vaddq_u32( W0, K12 ); + ROUNDOP; + + t = vsha256su0q_u32( W1, W2 ); + W1 = vsha256su1q_u32( t, W3, W0 ); + Wr = vaddq_u32( W1, K13 ); + ROUNDOP; + + t = vsha256su0q_u32( W2, W3 ); + W2 = vsha256su1q_u32( t, W0, W1 ); + Wr = vaddq_u32( W2, K14 ); + ROUNDOP; + + t = vsha256su0q_u32( W3, W0 ); + W3 = vsha256su1q_u32( t, W1, W2 ); + Wr = vaddq_u32( W3, K15 ); + ROUNDOP; + + ABCDstart = ABCD = vaddq_u32( ABCDstart, ABCD ); + EFGHstart = EFGH = vaddq_u32( EFGHstart, EFGH ); + + pbData += 64; + cbData -= 64; +#undef ROUNDOP + + } + + *pcbRemaining = cbData; + vstq( &pChain->H[0], ABCD ); + vstq( &pChain->H[4], EFGH ); + + // + // All our local variables should be in registers, so no way to wipe them. + // +} + +#ifdef __clang__ +#pragma clang attribute pop +#else +#pragma GCC pop_options +#endif + +#endif + + + +// +// Easy switch between different implementations +// +//FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptSha256AppendBlocks( + _Inout_ SYMCRYPT_SHA256_CHAINING_STATE* pChain, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T* pcbRemaining) +{ +#if SYMCRYPT_CPU_AMD64 + + SYMCRYPT_EXTENDED_SAVE_DATA SaveData; + + if (SYMCRYPT_CPU_FEATURES_PRESENT(SYMCRYPT_CPU_FEATURES_FOR_SHANI_CODE) && + SymCryptSaveXmm(&SaveData) == SYMCRYPT_NO_ERROR) + { + SymCryptSha256AppendBlocks_shani(pChain, pbData, cbData, pcbRemaining); + + SymCryptRestoreXmm(&SaveData); + } + // Temporarily disabling use of Ymm in SHA2 + // else if (SYMCRYPT_CPU_FEATURES_PRESENT(SYMCRYPT_CPU_FEATURE_AVX2 | SYMCRYPT_CPU_FEATURE_BMI2) && + // SymCryptSaveYmm(&SaveData) == SYMCRYPT_NO_ERROR) + // { + // //SymCryptSha256AppendBlocks_ul1(pChain, pbData, cbData, pcbRemaining); + // //SymCryptSha256AppendBlocks_ymm_8blocks(pChain, pbData, cbData, pcbRemaining); + // SymCryptSha256AppendBlocks_ymm_avx2_asm(pChain, pbData, cbData, pcbRemaining); + + // SymCryptRestoreYmm(&SaveData); + // } + else if (SYMCRYPT_CPU_FEATURES_PRESENT(SYMCRYPT_CPU_FEATURE_SSSE3 | SYMCRYPT_CPU_FEATURE_BMI2) && + SymCryptSaveXmm(&SaveData) == SYMCRYPT_NO_ERROR) + { + //SymCryptSha256AppendBlocks_xmm_4blocks(pChain, pbData, cbData, pcbRemaining); + SymCryptSha256AppendBlocks_xmm_ssse3_asm(pChain, pbData, cbData, pcbRemaining); + + SymCryptRestoreXmm(&SaveData); + } + else + { + SymCryptSha256AppendBlocks_ul1( pChain, pbData, cbData, pcbRemaining ); + //SymCryptSha256AppendBlocks_ul2(pChain, pbData, cbData, pcbRemaining); + } +#elif SYMCRYPT_CPU_X86 + SYMCRYPT_EXTENDED_SAVE_DATA SaveData; + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_SHANI_CODE | SYMCRYPT_CPU_FEATURE_SAVEXMM_NOFAIL ) && + SymCryptSaveXmm( &SaveData ) == SYMCRYPT_NO_ERROR ) + { + SymCryptSha256AppendBlocks_shani( pChain, pbData, cbData, pcbRemaining ); + SymCryptRestoreXmm( &SaveData ); + } + else if (SYMCRYPT_CPU_FEATURES_PRESENT(SYMCRYPT_CPU_FEATURE_SSSE3 | SYMCRYPT_CPU_FEATURE_BMI2) + && SymCryptSaveXmm(&SaveData) == SYMCRYPT_NO_ERROR) + { + SymCryptSha256AppendBlocks_xmm_4blocks(pChain, pbData, cbData, pcbRemaining); + SymCryptRestoreXmm(&SaveData); + } + else { + SymCryptSha256AppendBlocks_ul1( pChain, pbData, cbData, pcbRemaining ); + } +#elif SYMCRYPT_CPU_ARM64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON_SHA256 ) ) + { + SymCryptSha256AppendBlocks_instr( pChain, pbData, cbData, pcbRemaining ); + } else { + SymCryptSha256AppendBlocks_ul1( pChain, pbData, cbData, pcbRemaining ); + } +#else + SymCryptSha256AppendBlocks_ul1( pChain, pbData, cbData, pcbRemaining ); +#endif + + //SymCryptSha256AppendBlocks_ul2( pChain, pbData, cbData, pcbRemaining ); + //SymCryptSha256AppendBlocks_xmm1( pChain, pbData, cbData, pcbRemaining ); !!! Needs Save/restore logic + //SymCryptSha256AppendBlocks_xmm2( pChain, pbData, cbData, pcbRemaining ); +} diff --git a/libs/symcrypt/lib/sha256Par-ymm.c b/libs/symcrypt/lib/sha256Par-ymm.c new file mode 100644 index 00000000000..9ae1b2b9dd4 --- /dev/null +++ b/libs/symcrypt/lib/sha256Par-ymm.c @@ -0,0 +1,269 @@ +// +// Sha256Par-ymm.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// All YMM code for SHA256 Parallel operations +// Requires compiler support for avx2 +// + +#include "precomp.h" + +extern SYMCRYPT_ALIGN_AT( 256 ) const UINT32 SymCryptSha256K[64]; + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + +#ifdef __clang__ +#pragma clang attribute push (__attribute__((target("avx2"))), apply_to=function) +#else +#pragma GCC push_options +#pragma GCC target("avx2") +#endif + +// +// Code that uses the YMM registers. +// + +#define MAJYMM( x, y, z ) _mm256_or_si256( _mm256_and_si256( _mm256_or_si256( z, y ), x ), _mm256_and_si256( z, y )) +#define CHYMM( x, y, z ) _mm256_xor_si256( _mm256_and_si256( _mm256_xor_si256( z, y ), x ), z ) + +#define CSIGMA0YMM( x ) \ + _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( \ + _mm256_slli_epi32(x,30) , _mm256_srli_epi32(x, 2) ),\ + _mm256_slli_epi32(x,19) ), _mm256_srli_epi32(x, 13) ),\ + _mm256_slli_epi32(x,10) ), _mm256_srli_epi32(x, 22) ) +#define CSIGMA1YMM( x ) \ + _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( \ + _mm256_slli_epi32(x,26) , _mm256_srli_epi32(x, 6) ),\ + _mm256_slli_epi32(x,21) ), _mm256_srli_epi32(x, 11) ),\ + _mm256_slli_epi32(x,7) ), _mm256_srli_epi32(x, 25) ) +#define LSIGMA0YMM( x ) \ + _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( \ + _mm256_slli_epi32(x,25) , _mm256_srli_epi32(x, 7) ),\ + _mm256_slli_epi32(x,14) ), _mm256_srli_epi32(x, 18) ),\ + _mm256_srli_epi32(x, 3) ) +#define LSIGMA1YMM( x ) \ + _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( \ + _mm256_slli_epi32(x,15) , _mm256_srli_epi32(x, 17) ),\ + _mm256_slli_epi32(x,13) ), _mm256_srli_epi32(x, 19) ),\ + _mm256_srli_epi32(x,10) ) + +// +// Transpose macro, convert S0..S7 into R0..R7; R0 is the lane 0, R3 is lane 7. +// +// +// S0 = S00, S01, S02, S03, S04, S05, S06, S07 +// S1 = S10, S11, S12, S13, S14, S15, S16, S17 +// S2 = S20, S21, S22, S23, S24, S25, S26, S27 +// S3 = S30, S31, S32, S33, S34, S35, S36, S37 +// S4 = S40, S41, S42, S43, S44, S45, S46, S47 +// S5 = S50, S51, S52, S53, S54, S55, S56, S57 +// S6 = S60, S61, S62, S63, S64, S65, S66, S67 +// S7 = S70, S71, S72, S73, S74, S75, S76, S77 +// +// T0 = S00, S10, S01, S11, S04, S14, S05, S15 +// T1 = S02, S12, S03, S13, S06, S16, S07, S17 +// T2 = S20, S30, S21, S31, S24, S34, S25, S35 +// T3 = S22, S32, S23, S33, S26, S36, S27, S37 +// T4 = S40, S50, S41, S51, S44, S54, S45, S55 +// T5 = S42, S52, S43, S53, S46, S56, S47, S57 +// T6 = S60, S70, S61, S71, S64, S74, S65, S75 +// T7 = S62, S72, S63, S73, S66, S76, S67, S77 +// +// U0 = S00, S10, S20, S30, S04, S14, S24, S34 +// U1 = S01, S11, S21, S31, S05, S15, S25, S35 +// U2 = S02, S12, S22, S32, S06, S16, S26, S36 +// U3 = S03, S13, S23, S33, S07, S17, S27, S37 +// U4 = S40, S50, S60, S70, S44, S54, S64, S74 +// U5 = S41, S51, S61, S71, S45, S55, S65, S75 +// U6 = S42, S52, S62, S72, S46, S56, S66, S76 +// U7 = S43, S53, S63, S73, S47, S47, S67, S77 +// +// R0 = s00, s10, s20, s30, s40, s50, s60, s70 +// R1 = s01, s11, s21, s31, s41, s51, s61, s71 +// R2 = s02, s12, s22, s32, s42, s52, s62, s72 +// R3 = s03, s13, s23, s33, s43, s53, s63, s73 +// R4 = s04, s14, s24, s34, s44, s54, s64, s74 +// R5 = s05, s15, s25, s35, s45, s55, s65, s75 +// R6 = s06, s16, s26, s36, s46, s56, s66, s76 +// R7 = s07, s17, s27, s37, s47, s57, s67, s77 +// +#define YMM_TRANSPOSE_32( _R0, _R1, _R2, _R3, _R4, _R5, _R6, _R7, _S0, _S1, _S2, _S3, _S4, _S5, _S6, _S7 ) \ + {\ + __m256i _T0, _T1, _T2, _T3, _T4, _T5, _T6, _T7;\ + __m256i _U0, _U1, _U2, _U3, _U4, _U5, _U6, _U7;\ + _T0 = _mm256_unpacklo_epi32( _S0, _S1 ); _T1 = _mm256_unpackhi_epi32( _S0, _S1 );\ + _T2 = _mm256_unpacklo_epi32( _S2, _S3 ); _T3 = _mm256_unpackhi_epi32( _S2, _S3 );\ + _T4 = _mm256_unpacklo_epi32( _S4, _S5 ); _T5 = _mm256_unpackhi_epi32( _S4, _S5 );\ + _T6 = _mm256_unpacklo_epi32( _S6, _S7 ); _T7 = _mm256_unpackhi_epi32( _S6, _S7 );\ + \ + _U0 = _mm256_unpacklo_epi64( _T0, _T2 ); _U1 = _mm256_unpackhi_epi64( _T0, _T2 );\ + _U2 = _mm256_unpacklo_epi64( _T1, _T3 ); _U3 = _mm256_unpackhi_epi64( _T1, _T3 );\ + _U4 = _mm256_unpacklo_epi64( _T4, _T6 ); _U5 = _mm256_unpackhi_epi64( _T4, _T6 );\ + _U6 = _mm256_unpacklo_epi64( _T5, _T7 ); _U7 = _mm256_unpackhi_epi64( _T5, _T7 );\ + \ + _R0 = _mm256_permute2x128_si256( _U0, _U4, 0x20 ); _R1 = _mm256_permute2x128_si256( _U1, _U5, 0x20);\ + _R2 = _mm256_permute2x128_si256( _U2, _U6, 0x20 ); _R3 = _mm256_permute2x128_si256( _U3, _U7, 0x20);\ + _R4 = _mm256_permute2x128_si256( _U0, _U4, 0x31 ); _R5 = _mm256_permute2x128_si256( _U1, _U5, 0x31);\ + _R6 = _mm256_permute2x128_si256( _U2, _U6, 0x31 ); _R7 = _mm256_permute2x128_si256( _U3, _U7, 0x31);\ + } + +VOID +SYMCRYPT_CALL +SymCryptParallelSha256AppendBlocks_ymm( + _Inout_updates_( 8 ) PSYMCRYPT_SHA256_CHAINING_STATE * pChain, + _Inout_updates_( 8 ) PCBYTE * ppByte, + SIZE_T nBytes, + _Out_writes_( PAR_SCRATCH_ELEMENTS_256 * 32 ) PBYTE pScratch ) +{ + // + // Implementation that uses 8 lanes in the YMM registers + // + __m256i * buf = (__m256i *)pScratch; + __m256i * W = &buf[4 + 8]; + __m256i * ha = &buf[4]; // initial state words, in order h, g, ..., b, a + __m256i A, B, C, D, T; + __m256i T0, T1, T2, T3, T4, T5, T6, T7; + __m256i BYTE_REVERSE_32; + int r; + + _mm256_zeroupper(); + BYTE_REVERSE_32 = _mm256_set_epi8( 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3 ); + + // + // The chaining state can be unaligned on x86, so we use unaligned loads + // + + T0 = _mm256_loadu_si256( (__m256i *)&pChain[0]->H[0] ); + T1 = _mm256_loadu_si256( (__m256i *)&pChain[1]->H[0] ); + T2 = _mm256_loadu_si256( (__m256i *)&pChain[2]->H[0] ); + T3 = _mm256_loadu_si256( (__m256i *)&pChain[3]->H[0] ); + T4 = _mm256_loadu_si256( (__m256i *)&pChain[4]->H[0] ); + T5 = _mm256_loadu_si256( (__m256i *)&pChain[5]->H[0] ); + T6 = _mm256_loadu_si256( (__m256i *)&pChain[6]->H[0] ); + T7 = _mm256_loadu_si256( (__m256i *)&pChain[7]->H[0] ); + + YMM_TRANSPOSE_32( ha[7], ha[6], ha[5], ha[4], ha[3], ha[2], ha[1], ha[0], T0, T1, T2, T3, T4, T5, T6, T7 ); + + buf[0] = ha[4]; + buf[1] = ha[5]; + buf[2] = ha[6]; + buf[3] = ha[7]; + + while( nBytes >= 64 ) + { + + // + // Capture the input into W[0..15] + // + for( r=0; r<16; r += 8 ) + { + T0 = _mm256_shuffle_epi8( _mm256_loadu_si256( (__m256i *) ppByte[0] ), BYTE_REVERSE_32 ); ppByte[0] += 32; + T1 = _mm256_shuffle_epi8( _mm256_loadu_si256( (__m256i *) ppByte[1] ), BYTE_REVERSE_32 ); ppByte[1] += 32; + T2 = _mm256_shuffle_epi8( _mm256_loadu_si256( (__m256i *) ppByte[2] ), BYTE_REVERSE_32 ); ppByte[2] += 32; + T3 = _mm256_shuffle_epi8( _mm256_loadu_si256( (__m256i *) ppByte[3] ), BYTE_REVERSE_32 ); ppByte[3] += 32; + T4 = _mm256_shuffle_epi8( _mm256_loadu_si256( (__m256i *) ppByte[4] ), BYTE_REVERSE_32 ); ppByte[4] += 32; + T5 = _mm256_shuffle_epi8( _mm256_loadu_si256( (__m256i *) ppByte[5] ), BYTE_REVERSE_32 ); ppByte[5] += 32; + T6 = _mm256_shuffle_epi8( _mm256_loadu_si256( (__m256i *) ppByte[6] ), BYTE_REVERSE_32 ); ppByte[6] += 32; + T7 = _mm256_shuffle_epi8( _mm256_loadu_si256( (__m256i *) ppByte[7] ), BYTE_REVERSE_32 ); ppByte[7] += 32; + + YMM_TRANSPOSE_32( W[r], W[r+1], W[r+2], W[r+3], W[r+4], W[r+5], W[r+6], W[r+7], T0, T1, T2, T3, T4, T5, T6, T7 ); + } + + // + // Expand the message + // + A = W[15]; + B = W[14]; + D = W[0]; + for( r=16; r<64; r+= 2 ) + { + // Loop invariant: A=W[r-1], B = W[r-2], D = W[r-16] + + // + // Macro for one word of message expansion. + // Invariant: + // on entry: a = W[r-1], b = W[r-2], d = W[r-16] + // on exit: W[r] computed, a = W[r-1], b = W[r], c = W[r-15] + // + #define EXPAND( a, b, c, d, r ) \ + c = W[r-15]; \ + b = _mm256_add_epi32( _mm256_add_epi32( _mm256_add_epi32( d, LSIGMA1YMM( b ) ), W[r-7] ), LSIGMA0YMM( c ) ); \ + W[r] = b; \ + + EXPAND( A, B, C, D, r ); + EXPAND( B, A, D, C, (r+1)); + + #undef EXPAND + } + + A = ha[7]; + B = ha[6]; + C = ha[5]; + D = ha[4]; + + for( r=0; r<64; r += 4 ) + { + // + // Loop invariant: + // A, B, C, and D are the a,b,c,d values of the current state. + // W[r] is the next expanded message word to be processed. + // W[r-8 .. r-5] contain the current state words h, g, f, e. + // + + // + // Macro to compute one round + // + #define DO_ROUND( a, b, c, d, t, r ) \ + t = W[r]; \ + t = _mm256_add_epi32( t, CSIGMA1YMM( W[r-5] ) ); \ + t = _mm256_add_epi32( t, W[r-8] ); \ + t = _mm256_add_epi32( t, CHYMM( W[r-5], W[r-6], W[r-7] ) ); \ + t = _mm256_add_epi32( t, _mm256_set1_epi32( SymCryptSha256K[r] )); \ + W[r-4] = _mm256_add_epi32( t, d ); \ + d = _mm256_add_epi32( t, CSIGMA0YMM( a ) ); \ + d = _mm256_add_epi32( d, MAJYMM( c, b, a ) ); + + DO_ROUND( A, B, C, D, T, r ); + DO_ROUND( D, A, B, C, T, (r+1) ); + DO_ROUND( C, D, A, B, T, (r+2) ); + DO_ROUND( B, C, D, A, T, (r+3) ); + #undef DO_ROUND + } + + buf[3] = ha[7] = _mm256_add_epi32( buf[3], A ); + buf[2] = ha[6] = _mm256_add_epi32( buf[2], B ); + buf[1] = ha[5] = _mm256_add_epi32( buf[1], C ); + buf[0] = ha[4] = _mm256_add_epi32( buf[0], D ); + ha[3] = _mm256_add_epi32( ha[3], W[r-5] ); + ha[2] = _mm256_add_epi32( ha[2], W[r-6] ); + ha[1] = _mm256_add_epi32( ha[1], W[r-7] ); + ha[0] = _mm256_add_epi32( ha[0], W[r-8] ); + + nBytes -= 64; + } + + // + // Copy the chaining state back into the hash structure + // + YMM_TRANSPOSE_32( T0, T1, T2, T3, T4, T5, T6, T7, ha[7], ha[6], ha[5], ha[4], ha[3], ha[2], ha[1], ha[0] ); + _mm256_storeu_si256( (__m256i *)&pChain[0]->H[0], T0 ); + _mm256_storeu_si256( (__m256i *)&pChain[1]->H[0], T1 ); + _mm256_storeu_si256( (__m256i *)&pChain[2]->H[0], T2 ); + _mm256_storeu_si256( (__m256i *)&pChain[3]->H[0], T3 ); + _mm256_storeu_si256( (__m256i *)&pChain[4]->H[0], T4 ); + _mm256_storeu_si256( (__m256i *)&pChain[5]->H[0], T5 ); + _mm256_storeu_si256( (__m256i *)&pChain[6]->H[0], T6 ); + _mm256_storeu_si256( (__m256i *)&pChain[7]->H[0], T7 ); + + _mm256_zeroupper(); + +} + +#ifdef __clang__ +#pragma clang attribute pop +#else +#pragma GCC pop_options +#endif + +#endif // CPU_X86_X64 diff --git a/libs/symcrypt/lib/sha256Par.c b/libs/symcrypt/lib/sha256Par.c new file mode 100644 index 00000000000..775913b016b --- /dev/null +++ b/libs/symcrypt/lib/sha256Par.c @@ -0,0 +1,1243 @@ +// +// Sha256Par.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +// +// This module contains the routines to implement SHA2-256 from FIPS 180-2 in parallel mode +// + +#include "precomp.h" + +extern SYMCRYPT_ALIGN_AT( 256 ) const UINT32 SymCryptSha256K[64]; + + +// +// Not all CPU architectures support parallel code. +// +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + +#define SUPPORT_PARALLEL 1 +#define MIN_PARALLEL 2 +#define MAX_PARALLEL 8 + +#elif SYMCRYPT_CPU_ARM + +#define SUPPORT_PARALLEL 1 +#define MIN_PARALLEL 3 +#define MAX_PARALLEL 4 + +#else + +#define SUPPORT_PARALLEL 0 + +#endif + + +VOID +SYMCRYPT_CALL +SymCryptParallelSha256AppendBytes_serial( + _Inout_updates_( nPar ) PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE * pWork, + _In_range_(1, MAX_PARALLEL) SIZE_T nPar, + SIZE_T nBytes ); + +// +// Currently these are the generic implementations in terms of the single hash code. +// + +VOID +SYMCRYPT_CALL +SymCryptParallelSha256Init( + _Out_writes_( nStates ) PSYMCRYPT_SHA256_STATE pStates, + SIZE_T nStates ) +{ + SIZE_T i; + + for( i=0; i<nStates; i++ ) + { + SymCryptSha256Init( &pStates[i] ); + } +} + +#if !SUPPORT_PARALLEL +// +// No parallel support on this CPU +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptParallelSha256Process( + _Inout_updates_( nStates ) PSYMCRYPT_SHA256_STATE pStates, + SIZE_T nStates, + _Inout_updates_( nOperations ) PSYMCRYPT_PARALLEL_HASH_OPERATION pOperations, + SIZE_T nOperations, + _Out_writes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + return SymCryptParallelHashProcess_serial( SymCryptParallelSha256Algorithm, pStates, nStates, pOperations, nOperations, pbScratch, cbScratch ); +} +#endif + + +#if SUPPORT_PARALLEL + + +// +// This function looks at a state and decides what to do. +// If it returns FALSE, then this state is done and no further processing is required. +// If it returns TRUE, the pbData/cbData have to be processed in parallel. +// This function is called again on the same state after the pbData/cbData have been processed. +// +// Internally, it keeps track of the next step to be taken for this state. +// the processingState keeps track of the next action to take. +// + + +BOOLEAN +SYMCRYPT_CALL +SymCryptParallelSha256Result1( + _In_ PCSYMCRYPT_PARALLEL_HASH pParHash, + _Inout_ PSYMCRYPT_COMMON_HASH_STATE pState, + _Inout_ PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE pScratch, + _Out_ BOOLEAN *pRes) +{ + UINT32 bytesInBuffer = pState->bytesInBuffer; + + UNREFERENCED_PARAMETER( pParHash ); + // + // Function is called when a Result is requested from a parallel hash state. + // Do the first step of the padding. + // + pState->buffer[bytesInBuffer++] = 0x80; + SymCryptWipe( &pState->buffer[bytesInBuffer], SYMCRYPT_SHA256_INPUT_BLOCK_SIZE - bytesInBuffer ); + + pScratch->pbData = &pState->buffer[0]; + pScratch->cbData = SYMCRYPT_SHA256_INPUT_BLOCK_SIZE; + + if( bytesInBuffer > SYMCRYPT_SHA256_INPUT_BLOCK_SIZE - 8 ) + { + // We need 2 blocks for the padding + pScratch->processingState = STATE_RESULT2; + } else { + SYMCRYPT_STORE_MSBFIRST64( &pState->buffer[SYMCRYPT_SHA256_INPUT_BLOCK_SIZE - 8], pState->dataLengthL * 8 ); + pScratch->processingState = STATE_RESULT_DONE; + } + + *pRes = TRUE; // return value from the SetWork function + return TRUE; // Return from the SetWork function +} + + +BOOLEAN +SYMCRYPT_CALL +SymCryptParallelSha256Result2( + _In_ PCSYMCRYPT_PARALLEL_HASH pParHash, + _Inout_ PSYMCRYPT_COMMON_HASH_STATE pState, + _Inout_ PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE pScratch, + _Out_ BOOLEAN *pRes) +{ + UNREFERENCED_PARAMETER( pParHash ); + // + // Called for the 2nd block of a long padding + // + SymCryptWipe( &pState->buffer[0], SYMCRYPT_SHA256_INPUT_BLOCK_SIZE ); + SYMCRYPT_STORE_MSBFIRST64( &pState->buffer[SYMCRYPT_SHA256_INPUT_BLOCK_SIZE - 8], pState->dataLengthL * 8 ); + pScratch->pbData = &pState->buffer[0]; + pScratch->cbData = SYMCRYPT_SHA256_INPUT_BLOCK_SIZE; + pScratch->processingState = STATE_RESULT_DONE; + *pRes = TRUE; + return TRUE; +} + +VOID +SYMCRYPT_CALL +SymCryptParallelSha256ResultDone( + _In_ PCSYMCRYPT_PARALLEL_HASH pParHash, + _Inout_ PSYMCRYPT_COMMON_HASH_STATE pState, + _In_ PCSYMRYPT_PARALLEL_HASH_OPERATION pOp) +{ + PSYMCRYPT_SHA256_STATE pSha256State = (PSYMCRYPT_SHA256_STATE) pState; + + UNREFERENCED_PARAMETER( pParHash ); + + SYMCRYPT_ASSERT( pOp->hashOperation == SYMCRYPT_HASH_OPERATION_RESULT ); + SYMCRYPT_ASSERT( pOp->cbBuffer == SYMCRYPT_SHA256_RESULT_SIZE ); + + SymCryptUint32ToMsbFirst( &pSha256State->chain.H[0], pOp->pbBuffer, 8 ); + SymCryptWipeKnownSize( pSha256State, sizeof( *pSha256State )); + SymCryptSha256Init( pSha256State ); +} + + +#if 0 + +BOOL +SYMCRYPT_CALL +SymCryptParallelSha256SetNextWork( PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE pScratch ) +{ + PSYMCRYPT_SHA256_STATE pState; + PCSYMRYPT_PARALLEL_HASH_OPERATION pOp; + UINT32 bytesInBuffer; + UINT32 todo; + + // Retrieve the state we will operate on. + pState = (PSYMCRYPT_SHA256_STATE) pScratch->hashState; + + // + // This is a state machine where some states have to iterate + // The loop allows them to use 'continue' for that. + // +#pragma warning( suppress: 4127 ) // conditional expression is constant + while( TRUE ) + { + // + // At this point, the processing state, pbData/cbData, and next pointer define what needs to be done. + // STATE_NEXT: cbData == 0 and we have to process the remaining operations. + // STATE_DATA_START: We are working on the next operation; the first BytesAlreadyProcessed have been hashed, + // and the hash state has an empty buffer. + // STATE_DATA_END: We are working on the next operation (an append), and pbData/cbData have whatever partial block remains + // after all the whole blocks have been processed. + // STATE_PAD2: We are working on the next operation (a result), and have processed the first half of a 2-block padding. + // STATE_RESULT: We are working on the next operation (a result), and have processed all the padding. + // + // The pState->dataLength is updated whenever we copy bytes from the append into the state's buffer, or when + // we return TRUE and process bulk data. + // + pOp = pScratch->next; + switch( pScratch->processingState ) + { + case STATE_NEXT: + + if( pOp == NULL ) + { + return FALSE; + } + + bytesInBuffer = pState->bytesInBuffer; + + // SYMCRYPT_ASSERT( pOp->cbBuffer < ((SIZE_T)-1)/2 ); // used during testing + + if( pOp->hashOperation == SYMCRYPT_HASH_OPERATION_APPEND ) + { + pState->dataLengthL += pOp->cbBuffer; + if( bytesInBuffer > 0 ) + { + todo = (UINT32) SYMCRYPT_MIN( SYMCRYPT_SHA256_INPUT_BLOCK_SIZE - bytesInBuffer, pOp->cbBuffer ); + memcpy( &pState->buffer[bytesInBuffer], pOp->pbBuffer, todo ); + pState->bytesInBuffer += todo; + if( pState->bytesInBuffer == SYMCRYPT_SHA256_INPUT_BLOCK_SIZE ) + { + // + // We filled the buffer; set it for processing. + // Remember the # bytes we did and set the next state to process the rest of the request. + // + pScratch->pbData = &pState->buffer[0]; + pScratch->cbData = sizeof( pState->buffer ); + pState->bytesInBuffer = 0; + if( todo == pOp->cbBuffer ) + { + // + // We finished the request after the pbData processing + // + pScratch->next = pOp->next; + // pScratch->processingState = STATE_NEXT // already has that value + } else { + pScratch->processingState = STATE_DATA_START; + SYMCRYPT_ASSERT( todo <= 0xff ); + pScratch->bytesAlreadyProcessed = (BYTE) todo; + } + // + // We process the buffer here, no need to update the dataLength + // + return TRUE; + } else { + // + // We finished the operation; skip to the next one. + // + pScratch->next = pOp->next; + // pScratch->processingState = STATE_NEXT // already has that value + continue; + } + } else { + // + // Buffer is empty; process the bulk data + // + pScratch->pbData = pOp->pbBuffer; + pScratch->cbData = pOp->cbBuffer; + pScratch->processingState = STATE_DATA_END; + + // + // Return TRUE if there is real data to process, and just re-run the state + // machine if we should copy the partial block to the buffer. + // + if( pScratch->cbData >= SYMCRYPT_SHA256_INPUT_BLOCK_SIZE ) + { + return TRUE; + } else { + continue; + } + } + } else { + SYMCRYPT_ASSERT( pOp->hashOperation == SYMCRYPT_HASH_OPERATION_RESULT ); + + pState->buffer[bytesInBuffer++] = 0x80; + SymCryptWipe( &pState->buffer[bytesInBuffer], SYMCRYPT_SHA256_INPUT_BLOCK_SIZE - bytesInBuffer ); + + pScratch->pbData = &pState->buffer[0]; + pScratch->cbData = sizeof( pState->buffer ); + + if( bytesInBuffer > SYMCRYPT_SHA256_INPUT_BLOCK_SIZE - 8 ) + { + // We need 2 blocks for the padding + pScratch->processingState = STATE_PAD2; + } else { + SYMCRYPT_STORE_MSBFIRST64( &pState->buffer[SYMCRYPT_SHA256_INPUT_BLOCK_SIZE - 8], pState->dataLengthL * 8 ); + pScratch->processingState = STATE_RESULT; + } + return TRUE; + } + break; + + case STATE_DATA_START: + // + // The next operation is an append, and the first few bytes of that operation have already been copied to + // the buffer and processed. We need to process the rest. + // Note that the # bytes remaining is never zero. + // + SYMCRYPT_ASSERT( pOp->hashOperation == SYMCRYPT_HASH_OPERATION_APPEND && pOp->cbBuffer >= pScratch->bytesAlreadyProcessed ); + + pScratch->pbData = pOp->pbBuffer + pScratch->bytesAlreadyProcessed; + pScratch->cbData = pOp->cbBuffer - pScratch->bytesAlreadyProcessed; + if( pScratch->cbData >= SYMCRYPT_SHA256_INPUT_BLOCK_SIZE ) + { + pScratch->processingState = STATE_DATA_END; + return TRUE; + } + + // + // We have less than one block left; this is exactly the same state as we have at the end of + // a normal append. Fall through to that code. + // + // FALLTHROUGH! + + case STATE_DATA_END: + // + // We finished processing the whole blocks of the pScratch->pbData, and have to process the rest. + // The current append is already popped off the work list. + // + if( pScratch->cbData > 0 ) + { + SYMCRYPT_ASSERT( pScratch->cbData < SYMCRYPT_SHA256_INPUT_BLOCK_SIZE ); + memcpy( &pState->buffer[0], pScratch->pbData, pScratch->cbData ); + pState->bytesInBuffer = (UINT32) pScratch->cbData; + } + pScratch->next = pOp->next; + pScratch->processingState = STATE_NEXT; + continue; + + case STATE_PAD2: + SymCryptWipe( &pState->buffer[0], sizeof( pState->buffer )); + SYMCRYPT_STORE_MSBFIRST64( &pState->buffer[SYMCRYPT_SHA256_INPUT_BLOCK_SIZE - 8], pState->dataLengthL * 8 ); + pScratch->pbData = &pState->buffer[0]; + pScratch->cbData = sizeof( pState->buffer ); + pScratch->processingState = STATE_RESULT; + return TRUE; + + case STATE_RESULT: + SYMCRYPT_ASSERT( pOp->hashOperation == SYMCRYPT_HASH_OPERATION_RESULT ); + + SymCryptUint32ToMsbFirst( &pState->chain.H[0], pOp->pbBuffer, 8 ); + SymCryptWipeKnownSize( pState, sizeof( *pState )); + SymCryptSha256Init( pState ); + + pScratch->next = pOp->next; + pScratch->processingState = STATE_NEXT; + continue; + } + } + +#if 0 // old code, retain until we have the new one working. + ============ old code + + + SIZE_T bytesInBuffer; + SIZE_T todo; + + switch( pScratch->processingState ) + { + case START: + + if( pState->pbData != NULL ) + { + bytesInBuffer = pState->internalState.hashState.dataLength & SYMCRYPT_SHA256_INPUT_BLOCK_SIZE - 1; + + // + // There are bytes in the buffer; consume enough input to get rid of them. + // + if( bytesInBuffer > 0 ) + { + todo = SYMCRYPT_MIN( SYMCRYPT_SHA256_INPUT_BLOCK_SIZE - bytesInBuffer, pState->cbData ); + memcpy( &pState->internalState.hashState.buffer[bytesInBuffer], pState->pbData, todo ); + pState->pbData += todo; + pState->cbData -= todo; + pState->internalState.hashState.dataLength += todo; + + // + // We don't parallelize the processing of the first block to get to the whole-block state. + // It would mean we get a 1-size block up front, and that interferes with the sorted scheduling + // we do. This is not a common case, and we document that this is inefficient. + // + if( (pState->internalState.hashState.dataLength & (SYMCRYPT_SHA256_INPUT_BLOCK_SIZE - 1)) == 0 ) + { + SymCryptSha256AppendBlocks( &pState->internalState.hashState.chain, + &pState->internalState.hashState.buffer[0], + SYMCRYPT_SHA256_INPUT_BLOCK_SIZE ); + } + } + + if( pState->cbData >= SYMCRYPT_SHA256_INPUT_BLOCK_SIZE ) + { + // + // We have more bytes to do; this means that the internal buffer is empty. + // Set the data blocks up for processing. We increment the dataLength here + // as that is part of this function, not of the processing code. + // + pState->internalState.processingState = DATA; + pState->internalState.hashState.dataLength += pState->cbData & ~(SYMCRYPT_SHA256_INPUT_BLOCK_SIZE - 1); + return TRUE; + } + + } + + // + // FALL THROUGH TO THE DATA PROCESSING + // + // There are two cases here: + // - the internal buffer is empty and we have between 1 and 63 bytes left to hash. + // - We have no bytes left to hash, but the internal buffer might contain data. + // The first case is exactly what we get after DATA processing. + // The second case is trivially handled by the same code paths as the first one. + // Instead of duplicating the code, + // we fall through to the DATA section. + // + + pState->internalState.processingState = DATA; + + case DATA: + // + // We just finished the data work, or the START code fell through here to handle the + // padding and/or pbResult + // If we just did data processing, the internal buffer is empty. + // If the internal buffer contains data, then cbData == 0. + // + + if( pState->pbData != NULL && pState->cbData > 0 ) + { + SYMCRYPT_ASSERT( pState->cbData < SYMCRYPT_SHA256_INPUT_BLOCK_SIZE ); + memcpy( &pState->internalState.hashState.buffer[0], pState->pbData, pState->cbData ); + pState->internalState.hashState.dataLength += pState->cbData; + } + + // + // This completes the consumption of the pbData. Set it to NULL as per the API spec. + // + + pState->pbData = NULL; + pState->cbData = 0; + + // + // This concludes the data processing. Now let's see if we have to compute the results + // + + if( pState->pbResult == NULL ) + { + return FALSE; + } + + bytesInBuffer = pState->internalState.hashState.dataLength & SYMCRYPT_SHA256_INPUT_BLOCK_SIZE - 1; + + // Add the first byte of padding. (Always fits as the buffer is never left full.) + pState->internalState.hashState.buffer[bytesInBuffer++] = 0x80; + SymCryptWipe( &pState->internalState.hashState.buffer[bytesInBuffer], SYMCRYPT_SHA256_INPUT_BLOCK_SIZE - bytesInBuffer ); + + if( bytesInBuffer > SYMCRYPT_SHA256_INPUT_BLOCK_SIZE - 8 ) + { + // + // We need 2 blocks for the padding. + // + pState->internalState.processingState = PAD_INTERMEDIATE; + pState->pbData = &pState->internalState.hashState.buffer[0]; + pState->cbData = SYMCRYPT_SHA256_INPUT_BLOCK_SIZE; + + return TRUE; + } + + // + // Single padding block + // + SYMCRYPT_STORE_MSBFIRST64( &pState->internalState.hashState.buffer[SYMCRYPT_SHA256_INPUT_BLOCK_SIZE - 8], pState->internalState.hashState.dataLength * 8 ); + pState->internalState.processingState = PAD_FINAL; + pState->pbData = &pState->internalState.hashState.buffer[0]; + pState->cbData = SYMCRYPT_SHA256_INPUT_BLOCK_SIZE; + return TRUE; + + case PAD_INTERMEDIATE: + // + // Done with the intermediate padding, do the final padding. + // We wipe to the end of the buffer, as it is 16-aligned and therefore often faster + // + SymCryptWipe( &pState->internalState.hashState.buffer[0], SYMCRYPT_SHA256_INPUT_BLOCK_SIZE ); + SYMCRYPT_STORE_MSBFIRST64( &pState->internalState.hashState.buffer[SYMCRYPT_SHA256_INPUT_BLOCK_SIZE - 8], pState->internalState.hashState.dataLength * 8 ); + + pState->internalState.processingState = PAD_FINAL; + pState->pbData = &pState->internalState.hashState.buffer[0]; + pState->cbData = SYMCRYPT_SHA256_INPUT_BLOCK_SIZE; + return TRUE; + + case PAD_FINAL: + SymCryptUint32ToMsbFirst( &pState->internalState.hashState.chain.H[0], pState->pbResult, 8 ); + + SymCryptWipeKnownSize( pState, sizeof( *pState ) ); + SymCryptSha256Init( &pState->internalState.hashState ); + SYMCRYPT_SET_MAGIC( &pState->internalState ); + return FALSE; + } +#endif + + SymCryptFatal( 'psha' ); + return FALSE; +} +#endif + +C_ASSERT( (SYMCRYPT_SIMD_ELEMENT_SIZE & (SYMCRYPT_SIMD_ELEMENT_SIZE - 1 )) == 0 ); // check that it is a power of 2 + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptParallelSha256Process( + _Inout_updates_( nStates ) PSYMCRYPT_SHA256_STATE pStates, + SIZE_T nStates, + _Inout_updates_( nOperations ) PSYMCRYPT_PARALLEL_HASH_OPERATION pOperations, + SIZE_T nOperations, + _Out_writes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + UINT32 maxParallel; + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + SYMCRYPT_EXTENDED_SAVE_DATA SaveState; + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_AVX2 ) && SymCryptSaveYmm( &SaveState ) == SYMCRYPT_NO_ERROR ) + { + maxParallel = 8; + scError = SymCryptParallelHashProcess( SymCryptParallelSha256Algorithm, + pStates, + nStates, + pOperations, + nOperations, + pbScratch, + cbScratch, + maxParallel ); + + SymCryptRestoreYmm( &SaveState ); + } else if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_SSSE3 ) && SymCryptSaveXmm( &SaveState ) == SYMCRYPT_NO_ERROR ) + { + maxParallel = 4; + scError = SymCryptParallelHashProcess( SymCryptParallelSha256Algorithm, + pStates, + nStates, + pOperations, + nOperations, + pbScratch, + cbScratch, + maxParallel ); + SymCryptRestoreXmm( &SaveState ); + } else { + scError = SymCryptParallelHashProcess_serial( SymCryptParallelSha256Algorithm, pStates, nStates, pOperations, nOperations, pbScratch, cbScratch ); + } + +#elif SYMCRYPT_CPU_ARM + maxParallel = MAX_PARALLEL; + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON ) ) + { + scError = SymCryptParallelHashProcess( SymCryptParallelSha256Algorithm, + pStates, + nStates, + pOperations, + nOperations, + pbScratch, + cbScratch, + maxParallel ); + } else { + scError = SymCryptParallelHashProcess_serial( SymCryptParallelSha256Algorithm, pStates, nStates, pOperations, nOperations, pbScratch, cbScratch ); + } +#else + scError = SymCryptParallelHashProcess_serial( SymCryptParallelSha256Algorithm, pStates, nStates, pOperations, nOperations, pbScratch, cbScratch ); +#endif + return scError; +} + + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 +// +// Code that uses the XMM registers. +// + +#ifdef __clang__ +#pragma clang attribute push (__attribute__((target("ssse3"))), apply_to=function) +#else +#pragma GCC push_options +#pragma GCC target("ssse3") +#endif + +#define MAJXMM( x, y, z ) _mm_or_si128( _mm_and_si128( _mm_or_si128( z, y ), x ), _mm_and_si128( z, y )) +#define CHXMM( x, y, z ) _mm_xor_si128( _mm_and_si128( _mm_xor_si128( z, y ), x ), z ) + +#define CSIGMA0XMM( x ) \ + _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( \ + _mm_slli_epi32(x,30) , _mm_srli_epi32(x, 2) ),\ + _mm_slli_epi32(x,19) ), _mm_srli_epi32(x, 13) ),\ + _mm_slli_epi32(x,10) ), _mm_srli_epi32(x, 22) ) +#define CSIGMA1XMM( x ) \ + _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( \ + _mm_slli_epi32(x,26) , _mm_srli_epi32(x, 6) ),\ + _mm_slli_epi32(x,21) ), _mm_srli_epi32(x, 11) ),\ + _mm_slli_epi32(x,7) ), _mm_srli_epi32(x, 25) ) +#define LSIGMA0XMM( x ) \ + _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( \ + _mm_slli_epi32(x,25) , _mm_srli_epi32(x, 7) ),\ + _mm_slli_epi32(x,14) ), _mm_srli_epi32(x, 18) ),\ + _mm_srli_epi32(x, 3) ) +#define LSIGMA1XMM( x ) \ + _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( \ + _mm_slli_epi32(x,15) , _mm_srli_epi32(x, 17) ),\ + _mm_slli_epi32(x,13) ), _mm_srli_epi32(x, 19) ),\ + _mm_srli_epi32(x,10) ) + +// +// Transpose macro, convert S0..S3 into R0..R3; R0 is the lane 0, R3 is lane 3. +// S0 = S00, S01, S02, S03; S1 = S10, S11, S12, S13; S2 = S20, S21, S22, S23; S3 = S30, S31, S32, S33 +// T0 = S00, S10, S01, S11; T1 = S02, S12, S03, S13; T2 = S20, S30, S21, S31; T3 = S22, S32, S23, S33 +// R0 = S00, S10, S20, S30; R1 = S01, S11, S21, S31; R2 = S02, S12, S22, S32; R3 = S03, S13, S23, S33 +// +#define XMM_TRANSPOSE_32( _R0, _R1, _R2, _R3, _S0, _S1, _S2, _S3 ) \ + {\ + __m128i _T0, _T1, _T2, _T3;\ + _T0 = _mm_unpacklo_epi32( _S0, _S1 ); _T1 = _mm_unpackhi_epi32( _S0, _S1 );\ + _T2 = _mm_unpacklo_epi32( _S2, _S3 ); _T3 = _mm_unpackhi_epi32( _S2, _S3 );\ + _R0 = _mm_unpacklo_epi64( _T0, _T2 ); _R1 = _mm_unpackhi_epi64( _T0, _T2 );\ + _R2 = _mm_unpacklo_epi64( _T1, _T3 ); _R3 = _mm_unpackhi_epi64( _T1, _T3 );\ + } + +VOID +SYMCRYPT_CALL +SymCryptParallelSha256AppendBlocks_xmm( + _Inout_updates_( 4 ) PSYMCRYPT_SHA256_CHAINING_STATE * pChain, + _Inout_updates_( 4 ) PCBYTE * ppByte, + SIZE_T nBytes, + _Out_writes_( PAR_SCRATCH_ELEMENTS_256 ) __m128i * pScratch ) +{ + // + // Implementation that uses 4 lanes in the XMM registers + // + __m128i * buf = pScratch; // chaining state concatenated with the expanded input block + __m128i * W = &buf[4 + 8]; // W are the 64 words of the expanded input + __m128i * ha = &buf[4]; // initial state words, in order h, g, ..., b, a + __m128i A, B, C, D, T; + __m128i T0, T1, T2, T3; + const __m128i BYTE_REVERSE_32 = _mm_set_epi8( 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3 ); + int r; + + // + // The chaining state can be unaligned on x86, so we use unaligned loads + // + + T0 = _mm_loadu_si128( (__m128i *)&pChain[0]->H[0] ); + T1 = _mm_loadu_si128( (__m128i *)&pChain[1]->H[0] ); + T2 = _mm_loadu_si128( (__m128i *)&pChain[2]->H[0] ); + T3 = _mm_loadu_si128( (__m128i *)&pChain[3]->H[0] ); + + XMM_TRANSPOSE_32( ha[7], ha[6], ha[5], ha[4], T0, T1, T2, T3 ); + + T0 = _mm_loadu_si128( (__m128i *)&pChain[0]->H[4] ); + T1 = _mm_loadu_si128( (__m128i *)&pChain[1]->H[4] ); + T2 = _mm_loadu_si128( (__m128i *)&pChain[2]->H[4] ); + T3 = _mm_loadu_si128( (__m128i *)&pChain[3]->H[4] ); + + XMM_TRANSPOSE_32( ha[3], ha[2], ha[1], ha[0], T0, T1, T2, T3 ); + + buf[0] = ha[4]; + buf[1] = ha[5]; + buf[2] = ha[6]; + buf[3] = ha[7]; + + while( nBytes >= 64 ) + { + + // + // Capture the input into W[0..15] + // + for( r=0; r<16; r += 4 ) + { + T0 = _mm_shuffle_epi8( _mm_loadu_si128( (__m128i *) ppByte[0] ), BYTE_REVERSE_32 ); ppByte[0] += 16; + T1 = _mm_shuffle_epi8( _mm_loadu_si128( (__m128i *) ppByte[1] ), BYTE_REVERSE_32 ); ppByte[1] += 16; + T2 = _mm_shuffle_epi8( _mm_loadu_si128( (__m128i *) ppByte[2] ), BYTE_REVERSE_32 ); ppByte[2] += 16; + T3 = _mm_shuffle_epi8( _mm_loadu_si128( (__m128i *) ppByte[3] ), BYTE_REVERSE_32 ); ppByte[3] += 16; + + XMM_TRANSPOSE_32( W[r], W[r+1], W[r+2], W[r+3], T0, T1, T2, T3 ); + } + + // + // Expand the message + // + A = W[15]; + B = W[14]; + D = W[0]; + for( r=16; r<64; r+= 2 ) + { + // Loop invariant: A=W[r-1], B = W[r-2], D = W[r-16] + + // + // Macro for one word of message expansion. + // Invariant: + // on entry: a = W[r-1], b = W[r-2], d = W[r-16] + // on exit: W[r] computed, a = W[r-1], b = W[r], c = W[r-15] + // + #define EXPAND( a, b, c, d, r ) \ + c = W[r-15]; \ + b = _mm_add_epi32( _mm_add_epi32( _mm_add_epi32( d, LSIGMA1XMM( b ) ), W[r-7] ), LSIGMA0XMM( c ) ); \ + W[r] = b; \ + + EXPAND( A, B, C, D, r ); + EXPAND( B, A, D, C, (r+1)); + + #undef EXPAND + } + + A = ha[7]; + B = ha[6]; + C = ha[5]; + D = ha[4]; + + for( r=0; r<64; r += 4 ) + { + // + // Loop invariant: + // A, B, C, and D are the a,b,c,d values of the current state. + // W[r] is the next expanded message word to be processed. + // W[r-8 .. r-5] contain the current state words h, g, f, e. + // + + // + // Macro to compute one round + // + #define DO_ROUND( a, b, c, d, t, r ) \ + t = W[r]; \ + t = _mm_add_epi32( t, CSIGMA1XMM( W[r-5] ) ); \ + t = _mm_add_epi32( t, W[r-8] ); \ + t = _mm_add_epi32( t, CHXMM( W[r-5], W[r-6], W[r-7] ) ); \ + t = _mm_add_epi32( t, _mm_set1_epi32( SymCryptSha256K[r] )); \ + W[r-4] = _mm_add_epi32( t, d ); \ + d = _mm_add_epi32( t, CSIGMA0XMM( a ) ); \ + d = _mm_add_epi32( d, MAJXMM( c, b, a ) ); + + DO_ROUND( A, B, C, D, T, r ); + DO_ROUND( D, A, B, C, T, (r+1) ); + DO_ROUND( C, D, A, B, T, (r+2) ); + DO_ROUND( B, C, D, A, T, (r+3) ); + #undef DO_ROUND + } + + buf[3] = ha[7] = _mm_add_epi32( buf[3], A ); + buf[2] = ha[6] = _mm_add_epi32( buf[2], B ); + buf[1] = ha[5] = _mm_add_epi32( buf[1], C ); + buf[0] = ha[4] = _mm_add_epi32( buf[0], D ); + ha[3] = _mm_add_epi32( ha[3], W[r-5] ); + ha[2] = _mm_add_epi32( ha[2], W[r-6] ); + ha[1] = _mm_add_epi32( ha[1], W[r-7] ); + ha[0] = _mm_add_epi32( ha[0], W[r-8] ); + + nBytes -= 64; + } + + // + // Copy the chaining state back into the hash structure + // + XMM_TRANSPOSE_32( T0, T1, T2, T3, ha[7], ha[6], ha[5], ha[4] ); + _mm_storeu_si128( (__m128i *)&pChain[0]->H[0], T0 ); + _mm_storeu_si128( (__m128i *)&pChain[1]->H[0], T1 ); + _mm_storeu_si128( (__m128i *)&pChain[2]->H[0], T2 ); + _mm_storeu_si128( (__m128i *)&pChain[3]->H[0], T3 ); + + XMM_TRANSPOSE_32( T0, T1, T2, T3, ha[3], ha[2], ha[1], ha[0] ); + _mm_storeu_si128( (__m128i *)&pChain[0]->H[4], T0 ); + _mm_storeu_si128( (__m128i *)&pChain[1]->H[4], T1 ); + _mm_storeu_si128( (__m128i *)&pChain[2]->H[4], T2 ); + _mm_storeu_si128( (__m128i *)&pChain[3]->H[4], T3 ); + +} + +#ifdef __clang__ +#pragma clang attribute pop +#else +#pragma GCC pop_options +#endif + +#endif // CPU_X86_X64 + +#if SYMCRYPT_CPU_ARM +// +// Code that uses the Neon registers. +// + +#define MAJ( x, y, z ) vorrq_u32( vandq_u32( vorrq_u32( z, y ), x ), vandq_u32( z, y )) +#define CH( x, y, z ) veorq_u32( vandq_u32( veorq_u32( z, y ), x ), z ) + +#define CSIGMA0( x ) \ + veorq_u32( veorq_u32( veorq_u32( veorq_u32( veorq_u32( \ + vshlq_n_u32(x,30) , vshrq_n_u32(x, 2) ),\ + vshlq_n_u32(x,19) ), vshrq_n_u32(x, 13) ),\ + vshlq_n_u32(x,10) ), vshrq_n_u32(x, 22) ) +#define CSIGMA1( x ) \ + veorq_u32( veorq_u32( veorq_u32( veorq_u32( veorq_u32( \ + vshlq_n_u32(x,26) , vshrq_n_u32(x, 6) ),\ + vshlq_n_u32(x,21) ), vshrq_n_u32(x, 11) ),\ + vshlq_n_u32(x,7) ), vshrq_n_u32(x, 25) ) +#define LSIGMA0( x ) \ + veorq_u32( veorq_u32( veorq_u32( veorq_u32( \ + vshlq_n_u32(x,25) , vshrq_n_u32(x, 7) ),\ + vshlq_n_u32(x,14) ), vshrq_n_u32(x, 18) ),\ + vshrq_n_u32(x, 3) ) +#define LSIGMA1( x ) \ + veorq_u32( veorq_u32( veorq_u32( veorq_u32( \ + vshlq_n_u32(x,15) , vshrq_n_u32(x, 17) ),\ + vshlq_n_u32(x,13) ), vshrq_n_u32(x, 19) ),\ + vshrq_n_u32(x,10) ) + +VOID +SYMCRYPT_CALL +SymCryptParallelSha256AppendBlocks_neon( + _Inout_updates_( 4 ) PSYMCRYPT_SHA256_CHAINING_STATE * pChain, + _Inout_updates_( 4 ) PCBYTE * ppByte, + SIZE_T nBytes, + _Out_writes_( PAR_SCRATCH_ELEMENTS_256 ) __n128 * pScratch ) +{ + // + // Implementation that uses 4 lanes in the Neon registers + // + __n128 * buf = pScratch; + __n128 * W = &buf[4 + 8]; + __n128 * ha = &buf[4]; // initial state words, in order h, g, ..., b, a + __n128 A, B, C, D, T; + __n128 T0; + int r; + + // + // This can probably be done faster, but we are missing the VTRN.64 instruction + // which makes it hard to do this efficient in intrinsics. + // + ha[7] = vsetq_lane_u32( pChain[0]->H[0], ha[7], 0 ); + ha[7] = vsetq_lane_u32( pChain[1]->H[0], ha[7], 1 ); + ha[7] = vsetq_lane_u32( pChain[2]->H[0], ha[7], 2 ); + ha[7] = vsetq_lane_u32( pChain[3]->H[0], ha[7], 3 ); + + ha[6] = vsetq_lane_u32( pChain[0]->H[1], ha[6], 0 ); + ha[6] = vsetq_lane_u32( pChain[1]->H[1], ha[6], 1 ); + ha[6] = vsetq_lane_u32( pChain[2]->H[1], ha[6], 2 ); + ha[6] = vsetq_lane_u32( pChain[3]->H[1], ha[6], 3 ); + + ha[5] = vsetq_lane_u32( pChain[0]->H[2], ha[5], 0 ); + ha[5] = vsetq_lane_u32( pChain[1]->H[2], ha[5], 1 ); + ha[5] = vsetq_lane_u32( pChain[2]->H[2], ha[5], 2 ); + ha[5] = vsetq_lane_u32( pChain[3]->H[2], ha[5], 3 ); + + ha[4] = vsetq_lane_u32( pChain[0]->H[3], ha[4], 0 ); + ha[4] = vsetq_lane_u32( pChain[1]->H[3], ha[4], 1 ); + ha[4] = vsetq_lane_u32( pChain[2]->H[3], ha[4], 2 ); + ha[4] = vsetq_lane_u32( pChain[3]->H[3], ha[4], 3 ); + + ha[3] = vsetq_lane_u32( pChain[0]->H[4], ha[3], 0 ); + ha[3] = vsetq_lane_u32( pChain[1]->H[4], ha[3], 1 ); + ha[3] = vsetq_lane_u32( pChain[2]->H[4], ha[3], 2 ); + ha[3] = vsetq_lane_u32( pChain[3]->H[4], ha[3], 3 ); + + ha[2] = vsetq_lane_u32( pChain[0]->H[5], ha[2], 0 ); + ha[2] = vsetq_lane_u32( pChain[1]->H[5], ha[2], 1 ); + ha[2] = vsetq_lane_u32( pChain[2]->H[5], ha[2], 2 ); + ha[2] = vsetq_lane_u32( pChain[3]->H[5], ha[2], 3 ); + + ha[1] = vsetq_lane_u32( pChain[0]->H[6], ha[1], 0 ); + ha[1] = vsetq_lane_u32( pChain[1]->H[6], ha[1], 1 ); + ha[1] = vsetq_lane_u32( pChain[2]->H[6], ha[1], 2 ); + ha[1] = vsetq_lane_u32( pChain[3]->H[6], ha[1], 3 ); + + ha[0] = vsetq_lane_u32( pChain[0]->H[7], ha[0], 0 ); + ha[0] = vsetq_lane_u32( pChain[1]->H[7], ha[0], 1 ); + ha[0] = vsetq_lane_u32( pChain[2]->H[7], ha[0], 2 ); + ha[0] = vsetq_lane_u32( pChain[3]->H[7], ha[0], 3 ); + + buf[0] = ha[4]; + buf[1] = ha[5]; + buf[2] = ha[6]; + buf[3] = ha[7]; + + while( nBytes >= 64 ) + { + + // + // Capture the input into W[0..15] + // + for( r=0; r<16; r ++ ) + { + T0 = vsetq_lane_u32( SYMCRYPT_LOAD_MSBFIRST32( ppByte[0] ), T0, 0 ); ppByte[0] += 4; + T0 = vsetq_lane_u32( SYMCRYPT_LOAD_MSBFIRST32( ppByte[1] ), T0, 1 ); ppByte[1] += 4; + T0 = vsetq_lane_u32( SYMCRYPT_LOAD_MSBFIRST32( ppByte[2] ), T0, 2 ); ppByte[2] += 4; + T0 = vsetq_lane_u32( SYMCRYPT_LOAD_MSBFIRST32( ppByte[3] ), T0, 3 ); ppByte[3] += 4; + W[r] = T0; + } + + // + // Expand the message + // + A = W[15]; + B = W[14]; + D = W[0]; + for( r=16; r<64; r+= 2 ) + { + // Loop invariant: A=W[r-1], B = W[r-2], D = W[r-16] + + // + // Macro for one word of message expansion. + // Invariant: + // on entry: a = W[r-1], b = W[r-2], d = W[r-16] + // on exit: W[r] computed, a = W[r-1], b = W[r], c = W[r-15] + // + #define EXPAND( a, b, c, d, r ) \ + c = W[r-15]; \ + b = vaddq_u32( vaddq_u32( vaddq_u32( d, LSIGMA1( b ) ), W[r-7] ), LSIGMA0( c ) ); \ + W[r] = b; \ + + EXPAND( A, B, C, D, r ); + EXPAND( B, A, D, C, (r+1)); + + #undef EXPAND + } + + A = ha[7]; + B = ha[6]; + C = ha[5]; + D = ha[4]; + + for( r=0; r<64; r += 4 ) + { + // + // Loop invariant: + // A, B, C, and D are the a,b,c,d values of the current state. + // W[r] is the next expanded message word to be processed. + // W[r-8 .. r-5] contain the current state words h, g, f, e. + // + + // + // Macro to compute one round + // + #define DO_ROUND( a, b, c, d, t, r ) \ + t = W[r]; \ + t = vaddq_u32( t, CSIGMA1( W[r-5] ) ); \ + t = vaddq_u32( t, W[r-8] ); \ + t = vaddq_u32( t, CH( W[r-5], W[r-6], W[r-7] ) ); \ + t = vaddq_u32( t, vdupq_n_u32( SymCryptSha256K[r] )); \ + W[r-4] = vaddq_u32( t, d ); \ + d = vaddq_u32( t, CSIGMA0( a ) ); \ + d = vaddq_u32( d, MAJ( c, b, a ) ); + + DO_ROUND( A, B, C, D, T, r ); + DO_ROUND( D, A, B, C, T, (r+1) ); + DO_ROUND( C, D, A, B, T, (r+2) ); + DO_ROUND( B, C, D, A, T, (r+3) ); + #undef DO_ROUND + } + + buf[3] = ha[7] = vaddq_u32( buf[3], A ); + buf[2] = ha[6] = vaddq_u32( buf[2], B ); + buf[1] = ha[5] = vaddq_u32( buf[1], C ); + buf[0] = ha[4] = vaddq_u32( buf[0], D ); + ha[3] = vaddq_u32( ha[3], W[r-5] ); + ha[2] = vaddq_u32( ha[2], W[r-6] ); + ha[1] = vaddq_u32( ha[1], W[r-7] ); + ha[0] = vaddq_u32( ha[0], W[r-8] ); + + nBytes -= 64; + } + + // + // Copy the chaining state back into the hash structure + // + pChain[0]->H[0] = vgetq_lane_u32( ha[7], 0 ); + pChain[1]->H[0] = vgetq_lane_u32( ha[7], 1 ); + pChain[2]->H[0] = vgetq_lane_u32( ha[7], 2 ); + pChain[3]->H[0] = vgetq_lane_u32( ha[7], 3 ); + + pChain[0]->H[1] = vgetq_lane_u32( ha[6], 0 ); + pChain[1]->H[1] = vgetq_lane_u32( ha[6], 1 ); + pChain[2]->H[1] = vgetq_lane_u32( ha[6], 2 ); + pChain[3]->H[1] = vgetq_lane_u32( ha[6], 3 ); + + pChain[0]->H[2] = vgetq_lane_u32( ha[5], 0 ); + pChain[1]->H[2] = vgetq_lane_u32( ha[5], 1 ); + pChain[2]->H[2] = vgetq_lane_u32( ha[5], 2 ); + pChain[3]->H[2] = vgetq_lane_u32( ha[5], 3 ); + + pChain[0]->H[3] = vgetq_lane_u32( ha[4], 0 ); + pChain[1]->H[3] = vgetq_lane_u32( ha[4], 1 ); + pChain[2]->H[3] = vgetq_lane_u32( ha[4], 2 ); + pChain[3]->H[3] = vgetq_lane_u32( ha[4], 3 ); + + pChain[0]->H[4] = vgetq_lane_u32( ha[3], 0 ); + pChain[1]->H[4] = vgetq_lane_u32( ha[3], 1 ); + pChain[2]->H[4] = vgetq_lane_u32( ha[3], 2 ); + pChain[3]->H[4] = vgetq_lane_u32( ha[3], 3 ); + + pChain[0]->H[5] = vgetq_lane_u32( ha[2], 0 ); + pChain[1]->H[5] = vgetq_lane_u32( ha[2], 1 ); + pChain[2]->H[5] = vgetq_lane_u32( ha[2], 2 ); + pChain[3]->H[5] = vgetq_lane_u32( ha[2], 3 ); + + pChain[0]->H[6] = vgetq_lane_u32( ha[1], 0 ); + pChain[1]->H[6] = vgetq_lane_u32( ha[1], 1 ); + pChain[2]->H[6] = vgetq_lane_u32( ha[1], 2 ); + pChain[3]->H[6] = vgetq_lane_u32( ha[1], 3 ); + + pChain[0]->H[7] = vgetq_lane_u32( ha[0], 0 ); + pChain[1]->H[7] = vgetq_lane_u32( ha[0], 1 ); + pChain[2]->H[7] = vgetq_lane_u32( ha[0], 2 ); + pChain[3]->H[7] = vgetq_lane_u32( ha[0], 3 ); + + SymCryptWipeKnownSize( buf, sizeof( buf ) ); +} + +#undef CH +#undef MAJ +#undef CSIGMA0 +#undef CSIGMA1 +#undef LSIGMA0 +#undef LSIGMA1 + +#endif // CPU_X86_X64 + + + +#if SYMCRYPT_CPU_X86 || SYMCRYPT_CPU_AMD64 || SYMCRYPT_CPU_ARM + +VOID +SYMCRYPT_CALL +SymCryptParallelSha256AppendBytes_serial( + _Inout_updates_( nPar ) PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE * pWork, + _In_range_(1, MAX_PARALLEL) SIZE_T nPar, + SIZE_T nBytes ) +{ + SIZE_T i; + SIZE_T tmp; + + SYMCRYPT_ASSERT( nBytes % SYMCRYPT_SHA256_INPUT_BLOCK_SIZE == 0 ); + SYMCRYPT_ASSERT( nPar >= 1 && nPar <= MAX_PARALLEL ); + + for( i=0; i < nPar; i++ ) + { + SYMCRYPT_ASSERT( pWork[i]->cbData >= nBytes ); + SymCryptSha256AppendBlocks( & ((PSYMCRYPT_SHA256_STATE)(pWork[i]->hashState))->chain, pWork[i]->pbData, nBytes, &tmp ); + pWork[i]->pbData += nBytes; + pWork[i]->cbData -= nBytes; + } + return; +} + +VOID +SYMCRYPT_CALL +SymCryptParallelSha256Append( + _Inout_updates_( nPar ) PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE * pWork, + _In_range_(1, MAX_PARALLEL) SIZE_T nPar, + SIZE_T nBytes, + _Out_writes_to_( SYMCRYPT_SIMD_ELEMENT_SIZE * PAR_SCRATCH_ELEMENTS_256, 0 ) + PBYTE pbSimdScratch, + SIZE_T cbSimdScratch ) +{ + PSYMCRYPT_SHA256_CHAINING_STATE apChain[MAX_PARALLEL]; + PCBYTE apData[MAX_PARALLEL]; + SIZE_T i; + UINT32 maxParallel; + + UNREFERENCED_PARAMETER( cbSimdScratch ); // not referenced on FRE builds + SYMCRYPT_ASSERT( cbSimdScratch >= PAR_SCRATCH_ELEMENTS_256 * SYMCRYPT_SIMD_ELEMENT_SIZE ); + SYMCRYPT_ASSERT( ((SIZE_T)pbSimdScratch & (SYMCRYPT_SIMD_ELEMENT_SIZE - 1)) == 0 ); + + // + // Compute maxParallel; this is 4 if nPar <= 4, and 8 if nPar = 5, ..., 8. + // This is how many parameter sets we have to set up. + // +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + + maxParallel = (nPar + 3) & ~3; + SYMCRYPT_ASSERT( maxParallel == 4 || (maxParallel == 8 && SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_AVX2 )) ); + +#elif SYMCRYPT_CPU_ARM + + maxParallel = 4; + +#endif + + SYMCRYPT_ASSERT( nPar >= 1 && nPar <= maxParallel ); + + if( nPar < MIN_PARALLEL ) + { + SymCryptParallelSha256AppendBytes_serial( pWork, nPar, nBytes ); + + // Done with this function. + goto cleanup; + } + + // + // Our parallel code expects exactly four or eight parallel computations. + // We simply duplicate the first one if we get fewer parallel ones. + // That means we write the result multiple times, but it saves a lot of + // extra if()s in the main codeline. + // + + i = 0; + while( i < nPar ) + { + SYMCRYPT_ASSERT( pWork[i]->cbData >= nBytes ); + apChain[i] = & ((PSYMCRYPT_SHA256_STATE)(pWork[i]->hashState))->chain; + apData[i] = pWork[i]->pbData; + pWork[i]->pbData += nBytes; + pWork[i]->cbData -= nBytes; + i++; + } + + while( i < maxParallel ) + { + apChain[i] = apChain[0]; + apData[i] = apData[0]; + i++; + } + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + if( maxParallel == 8 ) + { + SymCryptParallelSha256AppendBlocks_ymm( &apChain[0], &apData[0], nBytes, (PBYTE)((__m256i *)pbSimdScratch) ); + } else { + SymCryptParallelSha256AppendBlocks_xmm( &apChain[0], &apData[0], nBytes, (__m128i *)pbSimdScratch ); + } +#elif SYMCRYPT_CPU_ARM + SymCryptParallelSha256AppendBlocks_neon( &apChain[0], &apData[0], nBytes, (__n128 *) pbSimdScratch ); +#else +#error Unknown CPU +#endif + +cleanup: + ;// no cleanup at this moment. +} + +#endif + +/* +VOID +SYMCRYPT_CALL +SymCryptParallelSha256AppendBlocks( + _Inout_updates_( nWork ) PSYMCRYPT_PARALLEL_SHA256_STATE * pWork, + SIZE_T nWork, + SIZE_T nBytes ) +{ + SIZE_T i; + + SYMCRYPT_ASSERT( nWork >= 1 && nWork <= 4 ); + + for( i=0; i < nWork; i++ ) + { + SYMCRYPT_ASSERT( pWork[i]->cbData >= nBytes ); + SymCryptSha256AppendBlocks( &pWork[i]->internalState.hashState.chain, pWork[i]->pbData, nBytes ); + pWork[i]->pbData += nBytes; + pWork[i]->cbData -= nBytes; + } +} + +*/ + +#endif // SUPPORT_PARALLEL + +#if SUPPORT_PARALLEL + +const SYMCRYPT_PARALLEL_HASH SymCryptParallelSha256Algorithm_default = { + &SymCryptSha256Algorithm_default, + PAR_SCRATCH_ELEMENTS_256 * SYMCRYPT_SIMD_ELEMENT_SIZE, + &SymCryptParallelSha256Result1, + &SymCryptParallelSha256Result2, + &SymCryptParallelSha256ResultDone, + &SymCryptParallelSha256Append, +}; + +#else + +// +// For platforms that do not have a parallel hash implementation +// we use this structure to provide the necessary data to the _serial +// implementation of the function. +// +const SYMCRYPT_PARALLEL_HASH SymCryptParallelSha256Algorithm_default = { + &SymCryptSha256Algorithm_default, + PAR_SCRATCH_ELEMENTS_256 * SYMCRYPT_SIMD_ELEMENT_SIZE, + NULL, + NULL, + NULL, + NULL, +}; + +#endif + +const PCSYMCRYPT_PARALLEL_HASH SymCryptParallelSha256Algorithm = &SymCryptParallelSha256Algorithm_default; + + +#define N_SELFTEST_STATES 5 // Just enough to trigger YMM usage + +VOID +SYMCRYPT_CALL +SymCryptParallelSha256Selftest(void) +{ + SYMCRYPT_ERROR scError; + SYMCRYPT_SHA256_STATE states[N_SELFTEST_STATES]; + BYTE result[N_SELFTEST_STATES][SYMCRYPT_SHA256_RESULT_SIZE]; + SYMCRYPT_PARALLEL_HASH_OPERATION op[2*N_SELFTEST_STATES]; + BYTE scratch[SYMCRYPT_PARALLEL_SHA256_FIXED_SCRATCH + N_SELFTEST_STATES * SYMCRYPT_PARALLEL_HASH_PER_STATE_SCRATCH]; + int i; + + SymCryptParallelSha256Init( &states[0], N_SELFTEST_STATES ); + + for( i=0; i<N_SELFTEST_STATES; i++ ) + { + op[2*i ].iHash = i; + op[2*i ].hashOperation = SYMCRYPT_HASH_OPERATION_APPEND; + op[2*i ].pbBuffer = (PBYTE) SymCryptTestMsg3; + op[2*i ].cbBuffer = sizeof(SymCryptTestMsg3); + op[2*i + 1].iHash = i; + op[2*i + 1].hashOperation = SYMCRYPT_HASH_OPERATION_RESULT; + op[2*i + 1].pbBuffer = &result[i][0]; + op[2*i + 1].cbBuffer = SYMCRYPT_SHA256_RESULT_SIZE; + } + + scError = SymCryptParallelSha256Process( &states[0], N_SELFTEST_STATES, op, 2*N_SELFTEST_STATES, scratch, sizeof( scratch ) ); + if( scError != SYMCRYPT_NO_ERROR ) + { + SymCryptFatal( 'PS25' ); + } + + for( i=0; i<N_SELFTEST_STATES; i++ ) + { + SymCryptInjectError( &result[i][0], SYMCRYPT_SHA256_RESULT_SIZE ); + + if( memcmp( &result[i][0], SymCryptSha256KATAnswer, SYMCRYPT_SHA256_RESULT_SIZE ) != 0 ) { + SymCryptFatal( 'PS25' ); + } + } +} diff --git a/libs/symcrypt/lib/sha3.c b/libs/symcrypt/lib/sha3.c new file mode 100644 index 00000000000..fbbd0fe28dd --- /dev/null +++ b/libs/symcrypt/lib/sha3.c @@ -0,0 +1,619 @@ +// +// Sha3.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +// +// See the symcrypt.h file for documentation on what the various functions do. +// + + +// +// Keccak state +// +// Keccak-f[1600] state consists of 25 64-bit words. We represent this state as a single +// dimensional array of 25 elements (Wi being the i^th element of the array for i=0..24) +// with the following mapping to two dimensional coordinates. Note that in FIPS 202 Figure 2, +// the element W0 at (x,y)=(0,0) is depicted in the middle of the 5x5 array. We set W0 +// to be the first element so that the rate part of the permutation maps to the beginning +// of the state. +// +// x=0 x=1 x=2 x=3 x=4 +// ----------------------- +// y=0 W0 W1 W2 W3 W4 +// y=1 W5 W6 W7 W8 W9 +// y=2 W10 W11 W12 W13 W14 +// y=3 W15 W16 W17 W18 W19 +// y=4 W20 W21 W22 W23 W24 + + + +// Rotation constants for Keccak Rho transformation +static const UINT8 KeccakRhoK[25] = { + 0, 1, 62, 28, 27, // y = 0 + 36, 44, 6, 55, 20, // y = 1 + 3, 10, 43, 25, 39, // y = 2 + 41, 45, 15, 21, 8, // y = 3 + 18, 2, 61, 56, 14, // y = 4 +}; + +// Keccak round constants +static UINT64 KeccakIotaK[24] = { + 0x0000000000000001ULL, 0x0000000000008082ULL, 0x800000000000808aULL, 0x8000000080008000ULL, + 0x000000000000808bULL, 0x0000000080000001ULL, 0x8000000080008081ULL, 0x8000000000008009ULL, + 0x000000000000008aULL, 0x0000000000000088ULL, 0x0000000080008009ULL, 0x000000008000000aULL, + 0x000000008000808bULL, 0x800000000000008bULL, 0x8000000000008089ULL, 0x8000000000008003ULL, + 0x8000000000008002ULL, 0x8000000000000080ULL, 0x000000000000800aULL, 0x800000008000000aULL, + 0x8000000080008081ULL, 0x8000000000008080ULL, 0x0000000080000001ULL, 0x8000000080008008ULL +}; + +// XOR sum of column c of the state +#define KECCAK_COLUMN_SUM(state, c) \ + (state[0 + (c)] ^ state[5 + (c)] ^ state[10 + (c)] ^ state[15 + (c)] ^ state[20 + (c)]) + +// XOR w to all the lanes in column c of the state +// +// Note: The expression to be XORed is copied to a temporary variable to avoid reevaluation +#define KECCAK_COLUMN_UPDATE(state, c, w) { \ + UINT64 t = (w); \ + state[ 0 + (c)] ^= t; \ + state[ 5 + (c)] ^= t; \ + state[10 + (c)] ^= t; \ + state[15 + (c)] ^= t; \ + state[20 + (c)] ^= t; \ +} + +// Apply Theta transformation to the state +#define KECCAK_THETA(state) { \ + UINT64 colSum[5]; \ + colSum[0] = KECCAK_COLUMN_SUM(state, 0); \ + colSum[1] = KECCAK_COLUMN_SUM(state, 1); \ + colSum[2] = KECCAK_COLUMN_SUM(state, 2); \ + colSum[3] = KECCAK_COLUMN_SUM(state, 3); \ + colSum[4] = KECCAK_COLUMN_SUM(state, 4); \ + KECCAK_COLUMN_UPDATE(state, 0, colSum[4] ^ ROL64(colSum[1], 1)); \ + KECCAK_COLUMN_UPDATE(state, 1, colSum[0] ^ ROL64(colSum[2], 1)); \ + KECCAK_COLUMN_UPDATE(state, 2, colSum[1] ^ ROL64(colSum[3], 1)); \ + KECCAK_COLUMN_UPDATE(state, 3, colSum[2] ^ ROL64(colSum[4], 1)); \ + KECCAK_COLUMN_UPDATE(state, 4, colSum[3] ^ ROL64(colSum[0], 1)); \ +} + +// Apply Rho transformation to row r of the state +#define KECCAK_RHO_ROW(state, r) { \ + state[5 * (r) + 0] = ROL64(state[5 * (r) + 0], KeccakRhoK[5 * (r) + 0]); \ + state[5 * (r) + 1] = ROL64(state[5 * (r) + 1], KeccakRhoK[5 * (r) + 1]); \ + state[5 * (r) + 2] = ROL64(state[5 * (r) + 2], KeccakRhoK[5 * (r) + 2]); \ + state[5 * (r) + 3] = ROL64(state[5 * (r) + 3], KeccakRhoK[5 * (r) + 3]); \ + state[5 * (r) + 4] = ROL64(state[5 * (r) + 4], KeccakRhoK[5 * (r) + 4]); \ +} + +// Apply Rho transformation to row 0 of the state +// +// The first row contains a rotation by 0 on the first lane that uses a shift +// by 64 which we want to avoid. Rho operation below omits the rotation on the first lane. +#define KECCAK_RHO_ROW0(state) { \ + state[1] = ROL64(state[1], KeccakRhoK[1]); \ + state[2] = ROL64(state[2], KeccakRhoK[2]); \ + state[3] = ROL64(state[3], KeccakRhoK[3]); \ + state[4] = ROL64(state[4], KeccakRhoK[4]); \ +} + +// Apply Rho transformation to the state +#define KECCAK_RHO(state) { \ + KECCAK_RHO_ROW0(state); \ + KECCAK_RHO_ROW(state, 1); \ + KECCAK_RHO_ROW(state, 2); \ + KECCAK_RHO_ROW(state, 3); \ + KECCAK_RHO_ROW(state, 4); \ +} + +// Apply Pi transformation to the state +#define KECCAK_PI(state) { \ + UINT64 t = state[ 1]; state[ 1] = state[ 6]; state[ 6] = state[ 9]; state[ 9] = state[22]; state[22] = state[14]; \ + state[14] = state[20]; state[20] = state[ 2]; state[ 2] = state[12]; state[12] = state[13]; state[13] = state[19]; \ + state[19] = state[23]; state[23] = state[15]; state[15] = state[ 4]; state[ 4] = state[24]; state[24] = state[21]; \ + state[21] = state[ 8]; state[ 8] = state[16]; state[16] = state[ 5]; state[ 5] = state[ 3]; state[ 3] = state[18]; \ + state[18] = state[17]; state[17] = state[11]; state[11] = state[ 7]; state[ 7] = state[10]; state[10] = t; \ +} + +// Apply Chi transformation on row r of state +#define KECCAK_CHI_ROW(state, r) { \ + UINT64 t1 = state[5 * (r) + 0] ^ (~state[5 * (r) + 1] & state[5 * (r) + 2]); \ + UINT64 t2 = state[5 * (r) + 1] ^ (~state[5 * (r) + 2] & state[5 * (r) + 3]); \ + state[5 * (r) + 2] = state[5 * (r) + 2] ^ (~state[5 * (r) + 3] & state[5 * (r) + 4]); \ + state[5 * (r) + 3] = state[5 * (r) + 3] ^ (~state[5 * (r) + 4] & state[5 * (r) + 0]); \ + state[5 * (r) + 4] = state[5 * (r) + 4] ^ (~state[5 * (r) + 0] & state[5 * (r) + 1]); \ + state[5 * (r) + 0] = t1; \ + state[5 * (r) + 1] = t2; \ +} + +// Apply Chi transformation to state +#define KECCAK_CHI(state) { \ + KECCAK_CHI_ROW(state, 0); \ + KECCAK_CHI_ROW(state, 1); \ + KECCAK_CHI_ROW(state, 2); \ + KECCAK_CHI_ROW(state, 3); \ + KECCAK_CHI_ROW(state, 4); \ +} + +// Add round constant to state +#define KECCAK_IOTA(state, rnd) state[0] ^= KeccakIotaK[rnd] + +// Perform one round of Keccak permutation on state +#define KECCAK_PERM_ROUND(state, rnd) { \ + KECCAK_THETA(state); \ + KECCAK_RHO(state); \ + KECCAK_PI(state); \ + KECCAK_CHI(state); \ + KECCAK_IOTA(state, rnd); \ +} + + +// +// SymCryptKeccakPermute +// +VOID +SYMCRYPT_CALL +SymCryptKeccakPermute(_Inout_updates_(25) UINT64* pState) +{ + for (int r = 0; r < 24; r++) + { + KECCAK_PERM_ROUND(pState, r); + } +} + + +// +// SymCryptKeccakInit +// +VOID +SYMCRYPT_CALL +SymCryptKeccakInit(_Out_ PSYMCRYPT_KECCAK_STATE pState, UINT32 inputBlockSize, UINT8 paddingValue) +{ + pState->inputBlockSize = inputBlockSize; + pState->paddingValue = paddingValue; + + // Initialize the Keccak permutation state and set mutable state variables + // to their default values. + SymCryptKeccakReset(pState); +} + +VOID +SYMCRYPT_CALL +SymCryptKeccakReset(_Out_ PSYMCRYPT_KECCAK_STATE pState) +{ + // + // Wipe & re-initialize + // + // Wipe the Keccak permutation state and set the mutable state variables to their + // default values. Non-mutable state variables retain their values. State becomes + // re-initialized after this call. + SymCryptWipeKnownSize(pState->state, sizeof(pState->state)); + pState->stateIndex = 0; + pState->squeezeMode = FALSE; +} + +// +// SymCryptKeccakAppendByte +// +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptKeccakAppendByte(_Inout_ PSYMCRYPT_KECCAK_STATE pState, BYTE val) +{ + SYMCRYPT_ASSERT(!pState->squeezeMode); + SYMCRYPT_ASSERT(pState->stateIndex < pState->inputBlockSize); + + pState->state[pState->stateIndex / sizeof(UINT64)] ^= ((UINT64)val << (8 * (pState->stateIndex % 8))); + pState->stateIndex++; +} + +// +// SymCryptKeccakAppendBytes +// +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptKeccakAppendBytes(_Inout_ PSYMCRYPT_KECCAK_STATE pState, PCBYTE pbBuffer, SIZE_T cbBuffer) +{ + SYMCRYPT_ASSERT(!pState->squeezeMode); + SYMCRYPT_ASSERT((pState->stateIndex + cbBuffer) <= pState->inputBlockSize); + + for (SIZE_T i = 0; i < cbBuffer; i++) + { + pState->state[(pState->stateIndex + i) / sizeof(UINT64)] ^= ((UINT64)pbBuffer[i] << (8 * ((pState->stateIndex + i) % 8))); + } + + pState->stateIndex += (UINT32)cbBuffer; +} + + +// +// SymCryptKeccakAppendLanes +// +VOID +SYMCRYPT_CALL +SymCryptKeccakAppendLanes( + _Inout_ PSYMCRYPT_KECCAK_STATE pState, + _In_reads_(uLaneCount * sizeof(UINT64)) PCBYTE pbData, + SIZE_T uLaneCount) +{ + SYMCRYPT_ASSERT(!pState->squeezeMode); + SYMCRYPT_ASSERT((pState->inputBlockSize & 0x7) == 0); + SYMCRYPT_ASSERT((pState->stateIndex & 0x7) == 0); + SYMCRYPT_ASSERT(pState->stateIndex != pState->inputBlockSize); + + // Locate the lane in the state for next append. + // Currently, pState->stateIndex/sizeof(UINT64) of the lanes are used. + UINT32 uLaneIndex = pState->stateIndex / sizeof(UINT64); + + for (SIZE_T i = 0; i < uLaneCount; i++) + { + pState->state[uLaneIndex] ^= SYMCRYPT_LOAD_LSBFIRST64(pbData + i * sizeof(UINT64)); + pState->stateIndex += sizeof(UINT64); + uLaneIndex++; + + if (pState->stateIndex == pState->inputBlockSize) + { + SymCryptKeccakPermute(pState->state); + pState->stateIndex = 0; + uLaneIndex = 0; + } + } +} + +// +// SymCryptKeccakZeroAppendBlock +// +VOID +SYMCRYPT_CALL +SymCryptKeccakZeroAppendBlock(_Inout_ PSYMCRYPT_KECCAK_STATE pState) +{ + SYMCRYPT_ASSERT(!pState->squeezeMode); + SymCryptKeccakPermute(pState->state); + pState->stateIndex = 0; +} + +// +// SymCryptKeccakAppend +// +VOID +SYMCRYPT_CALL +SymCryptKeccakAppend( + _Inout_ PSYMCRYPT_KECCAK_STATE pState, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData) +{ + SYMCRYPT_ASSERT(pState->inputBlockSize % 8 == 0); + + // If we were in squeeze mode (Append is called after an Extract without wiping), + // switch to absorb mode to start a new hash computation. + if (pState->squeezeMode) + { + SymCryptKeccakReset(pState); + } + + SYMCRYPT_ASSERT(pState->stateIndex < pState->inputBlockSize); + + // Make pState->stateIndex a multiple of 8. + // Message block boundary will not be crossed, check + // if permutation is needed after this part. + while (cbData > 0 && (pState->stateIndex & 0x7)) + { + SymCryptKeccakAppendByte(pState, *pbData); + pbData++; + cbData--; + } + + // Permute if input message block is filled + if (pState->stateIndex == pState->inputBlockSize) + { + SymCryptKeccakPermute(pState->state); + pState->stateIndex = 0; + } + + // Append full lanes + SIZE_T uFullLanes = cbData / sizeof(UINT64); + if (uFullLanes > 0) + { + SymCryptKeccakAppendLanes(pState, pbData, uFullLanes); + pbData += uFullLanes * sizeof(UINT64); + cbData -= uFullLanes * sizeof(UINT64); + } + + SYMCRYPT_ASSERT(cbData < sizeof(UINT64)); + SymCryptKeccakAppendBytes(pState, pbData, cbData); + + SYMCRYPT_ASSERT(pState->stateIndex != pState->inputBlockSize); +} + +// +// SymCryptKeccakApplyPadding +// +VOID +SYMCRYPT_CALL +SymCryptKeccakApplyPadding(_Inout_ PSYMCRYPT_KECCAK_STATE pState) +{ + SYMCRYPT_ASSERT(!pState->squeezeMode); + + // Locate the lane and byte position for the padding byte + UINT32 uLanePos = pState->stateIndex / sizeof(UINT64); + UINT32 uBytePos = pState->stateIndex % sizeof(UINT64); + pState->state[uLanePos] ^= ((UINT64)pState->paddingValue << (8 * uBytePos)); + + // Pad the final 1 bit to the msb of the last lane in the rate portion of the state + pState->state[pState->inputBlockSize / sizeof(UINT64) - 1] ^= (1ULL << 63); + + // Process the padded block and switch to squeeze mode + SymCryptKeccakPermute(pState->state); + pState->stateIndex = 0; + pState->squeezeMode = TRUE; +} + +// +// SymCryptKeccakExtractByte +// +FORCEINLINE +BYTE +SYMCRYPT_CALL +SymCryptKeccakExtractByte(_Inout_ PSYMCRYPT_KECCAK_STATE pState) +{ + SYMCRYPT_ASSERT(pState->squeezeMode); + SYMCRYPT_ASSERT(pState->stateIndex < pState->inputBlockSize); + + BYTE ret = (BYTE)((pState->state[pState->stateIndex / sizeof(UINT64)] >> (8 * (pState->stateIndex % 8))) & 0xff); + pState->stateIndex++; + return ret; +} + +// +// SymCryptKeccakExtractLanes +// +VOID +SYMCRYPT_CALL +SymCryptKeccakExtractLanes( + _Inout_ PSYMCRYPT_KECCAK_STATE pState, + _Out_writes_(uLaneCount * sizeof(UINT64)) PBYTE pbResult, + SIZE_T uLaneCount) +{ + SYMCRYPT_ASSERT(pState->squeezeMode); + SYMCRYPT_ASSERT((pState->inputBlockSize & 0x7) == 0); + SYMCRYPT_ASSERT((pState->stateIndex & 0x7) == 0); + + // Locate the lane in the state for next extraction + UINT32 uLaneIndex = pState->stateIndex / sizeof(UINT64); + + for (SIZE_T i = 0; i < uLaneCount; i++) + { + SYMCRYPT_ASSERT(pState->stateIndex <= pState->inputBlockSize); + + if (pState->stateIndex == pState->inputBlockSize) + { + SymCryptKeccakPermute(pState->state); + pState->stateIndex = 0; + uLaneIndex = 0; + } + + SYMCRYPT_STORE_LSBFIRST64(pbResult + i * sizeof(UINT64), pState->state[uLaneIndex]); + pState->stateIndex += sizeof(UINT64); + uLaneIndex++; + } +} + +// +// SymCryptKeccakExtract +// +VOID +SYMCRYPT_CALL +SymCryptKeccakExtract( + _Inout_ PSYMCRYPT_KECCAK_STATE pState, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult, + BOOLEAN bWipe) +{ + // Apply padding and switch to squeeze mode if this is the first call to Extract + if (!pState->squeezeMode) + { + SymCryptKeccakApplyPadding(pState); + } + + // Do the permutation if there are no bytes available in the state + if ( (cbResult > 0) && (pState->stateIndex == pState->inputBlockSize) ) + { + SymCryptKeccakPermute(pState->state); + pState->stateIndex= 0; + } + + // Make stateIndex a multiple of 8 so that the extraction can be performed in lanes. + // We don't call the permutation as soon as the stateIndex reaches inputBlockSize, + // cbResult must also be non-zero for that. This condition is checked + // in ExtractLanes or in the 'remaining bytes' block that follows it. + while (cbResult > 0 && (pState->stateIndex & 0x7)) + { + *pbResult = SymCryptKeccakExtractByte(pState); + pbResult++; + cbResult--; + } + + SYMCRYPT_ASSERT((cbResult == 0) || ((pState->stateIndex & 0x7) == 0)); + + // Extract full lanes + SIZE_T uFullLanes = cbResult / sizeof(UINT64); + if (uFullLanes > 0) + { + SymCryptKeccakExtractLanes(pState, pbResult, uFullLanes); + pbResult += uFullLanes * sizeof(UINT64); + cbResult -= uFullLanes * sizeof(UINT64); + } + + // Extract the remaining bytes + SYMCRYPT_ASSERT(cbResult < sizeof(UINT64)); + while (cbResult > 0) + { + if (pState->stateIndex == pState->inputBlockSize) + { + SymCryptKeccakPermute(pState->state); + pState->stateIndex = 0; + } + + *pbResult = SymCryptKeccakExtractByte(pState); + pbResult++; + cbResult--; + } + + if (bWipe) + { + // Wipe the Keccak state and make it ready for a new hash computation + SymCryptKeccakReset(pState); + } +} + +// +// SymCryptKeccakStateExport +// +VOID +SYMCRYPT_CALL +SymCryptKeccakStateExport( + SYMCRYPT_BLOB_TYPE type, + _In_ PCSYMCRYPT_KECCAK_STATE pState, + _Out_writes_bytes_(SYMCRYPT_KECCAK_STATE_EXPORT_SIZE) PBYTE pbBlob) +{ + + SYMCRYPT_ALIGN SYMCRYPT_KECCAK_STATE_EXPORT_BLOB blob; // local copy to have proper alignment. + C_ASSERT(sizeof(blob) == SYMCRYPT_KECCAK_STATE_EXPORT_SIZE); + + SymCryptWipeKnownSize(&blob, sizeof(blob)); // wipe to avoid any data leakage + + blob.header.magic = SYMCRYPT_BLOB_MAGIC; + blob.header.size = SYMCRYPT_KECCAK_STATE_EXPORT_SIZE; + blob.header.type = type; + + // + // Copy the relevant data. Buffer will be 0-padded. + // + + SymCryptUint64ToLsbFirst(&pState->state[0], &blob.state[0], 25); + blob.stateIndex = pState->stateIndex; + blob.paddingValue = pState->paddingValue; + blob.squeezeMode = pState->squeezeMode; + + SYMCRYPT_ASSERT((PCBYTE)&blob + sizeof(blob) - sizeof(SYMCRYPT_BLOB_TRAILER) == (PCBYTE)&blob.trailer); + SymCryptMarvin32(SymCryptMarvin32DefaultSeed, (PCBYTE)&blob, sizeof(blob) - sizeof(SYMCRYPT_BLOB_TRAILER), &blob.trailer.checksum[0]); + + memcpy(pbBlob, &blob, sizeof(blob)); + + SymCryptWipeKnownSize(&blob, sizeof(blob)); + return; +} + + +// +// SymCryptKeccakStateImport +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptKeccakStateImport( + SYMCRYPT_BLOB_TYPE type, + _Out_ PSYMCRYPT_KECCAK_STATE pState, + _In_reads_bytes_(SYMCRYPT_KECCAK_STATE_EXPORT_SIZE) PCBYTE pbBlob) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + SYMCRYPT_ALIGN SYMCRYPT_KECCAK_STATE_EXPORT_BLOB blob; // local copy to have proper alignment. + BYTE checksum[8]; + + C_ASSERT(sizeof(blob) == SYMCRYPT_KECCAK_STATE_EXPORT_SIZE); + memcpy(&blob, pbBlob, sizeof(blob)); + + if (blob.header.magic != SYMCRYPT_BLOB_MAGIC || + blob.header.size != SYMCRYPT_KECCAK_STATE_EXPORT_SIZE || + blob.header.type != (UINT32)type) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + SymCryptMarvin32(SymCryptMarvin32DefaultSeed, (PCBYTE)&blob, sizeof(blob) - sizeof(SYMCRYPT_BLOB_TRAILER), checksum); + if (memcmp(checksum, &blob.trailer.checksum[0], 8) != 0) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + SymCryptLsbFirstToUint64(&blob.state[0], &pState->state[0], 25); + pState->stateIndex = blob.stateIndex; + pState->paddingValue = blob.paddingValue; + pState->squeezeMode = blob.squeezeMode; + + // + // Set state fields based on the blob type and do validation + // + + // default values indicate error + pState->inputBlockSize = 0; + pState->paddingValue = 0; + + switch (blob.header.type) + { + case SymCryptBlobTypeSha3_224State: + pState->inputBlockSize = SYMCRYPT_SHA3_224_INPUT_BLOCK_SIZE; + if (blob.paddingValue == SYMCRYPT_SHA3_PADDING_VALUE) + { + pState->paddingValue = blob.paddingValue; + } + break; + case SymCryptBlobTypeSha3_256State: + pState->inputBlockSize = SYMCRYPT_SHA3_256_INPUT_BLOCK_SIZE; + if (blob.paddingValue == SYMCRYPT_SHA3_PADDING_VALUE) + { + pState->paddingValue = blob.paddingValue; + } + break; + + case SymCryptBlobTypeSha3_384State: + pState->inputBlockSize = SYMCRYPT_SHA3_384_INPUT_BLOCK_SIZE; + if (blob.paddingValue == SYMCRYPT_SHA3_PADDING_VALUE) + { + pState->paddingValue = blob.paddingValue; + } + break; + + case SymCryptBlobTypeSha3_512State: + pState->inputBlockSize = SYMCRYPT_SHA3_512_INPUT_BLOCK_SIZE; + if (blob.paddingValue == SYMCRYPT_SHA3_PADDING_VALUE) + { + pState->paddingValue = blob.paddingValue; + } + break; + default: + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + if (pState->inputBlockSize == 0 || pState->paddingValue == 0) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + if (pState->stateIndex > pState->inputBlockSize) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + // Allow stateIndex = inputBlockSize only in squeeze mode + if ((pState->stateIndex == pState->inputBlockSize) && !pState->squeezeMode) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + +cleanup: + SymCryptWipeKnownSize(&blob, sizeof(blob)); + + return scError; +} diff --git a/libs/symcrypt/lib/sha3_224.c b/libs/symcrypt/lib/sha3_224.c new file mode 100644 index 00000000000..69dec754509 --- /dev/null +++ b/libs/symcrypt/lib/sha3_224.c @@ -0,0 +1,141 @@ +// +// Sha3_224.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +// +// See the symcrypt.h file for documentation on what the various functions do. +// + + +const SYMCRYPT_HASH SymCryptSha3_224Algorithm_default = { + &SymCryptSha3_224Init, + &SymCryptSha3_224Append, + &SymCryptSha3_224Result, + NULL, // AppendBlocks function is not implemented for SHA-3 + &SymCryptSha3_224StateCopy, + sizeof(SYMCRYPT_SHA3_224_STATE), + SYMCRYPT_SHA3_224_RESULT_SIZE, + SYMCRYPT_SHA3_224_INPUT_BLOCK_SIZE, + SYMCRYPT_FIELD_OFFSET(SYMCRYPT_SHA3_224_STATE, ks.state), + SYMCRYPT_FIELD_SIZE(SYMCRYPT_SHA3_224_STATE, ks.state), +}; + +const PCSYMCRYPT_HASH SymCryptSha3_224Algorithm = &SymCryptSha3_224Algorithm_default; + + +// +// SymCryptSha3_224 +// +#define ALG SHA3_224 +#define Alg Sha3_224 +#include "hash_pattern.c" +#undef ALG +#undef Alg + + +// +// SymCryptSha3_224Init +// +VOID +SYMCRYPT_CALL +SymCryptSha3_224Init(_Out_ PSYMCRYPT_SHA3_224_STATE pState) +{ + SymCryptKeccakInit(&pState->ks, + SYMCRYPT_SHA3_224_INPUT_BLOCK_SIZE, + SYMCRYPT_SHA3_PADDING_VALUE); + + SYMCRYPT_SET_MAGIC(pState); +} + + +// +// SymCryptSha3_224Append +// +VOID +SYMCRYPT_CALL +SymCryptSha3_224Append( + _Inout_ PSYMCRYPT_SHA3_224_STATE pState, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData) +{ + SymCryptKeccakAppend(&pState->ks, pbData, cbData); +} + + +// +// SymCryptSha3_224Result +// +VOID +SYMCRYPT_CALL +SymCryptSha3_224Result( + _Inout_ PSYMCRYPT_SHA3_224_STATE pState, + _Out_writes_(SYMCRYPT_SHA3_224_RESULT_SIZE) PBYTE pbResult) +{ + SymCryptKeccakExtract(&pState->ks, pbResult, SYMCRYPT_SHA3_224_RESULT_SIZE, TRUE); +} + + +// +// SymCryptSha3_224StateExport +// +VOID +SYMCRYPT_CALL +SymCryptSha3_224StateExport( + _In_ PCSYMCRYPT_SHA3_224_STATE pState, + _Out_writes_bytes_(SYMCRYPT_SHA3_224_STATE_EXPORT_SIZE) PBYTE pbBlob) +{ + SYMCRYPT_CHECK_MAGIC(pState); + SymCryptKeccakStateExport(SymCryptBlobTypeSha3_224State, &pState->ks, pbBlob); +} + + +// +// SymCryptSha3_224StateImport +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSha3_224StateImport( + _Out_ PSYMCRYPT_SHA3_224_STATE pState, + _In_reads_bytes_(SYMCRYPT_SHA3_224_STATE_EXPORT_SIZE) PCBYTE pbBlob) +{ + SYMCRYPT_ERROR scError = SymCryptKeccakStateImport(SymCryptBlobTypeSha3_224State, &pState->ks, pbBlob); + + if (scError == SYMCRYPT_NO_ERROR) + { + SYMCRYPT_SET_MAGIC(pState); + } + + return scError; +} + + +// +// Simple test vector for FIPS module testing +// + +static const BYTE sha3_224KATAnswer[28] = { + 0xe6, 0x42, 0x82, 0x4c, 0x3f, 0x8c, 0xf2, 0x4a, + 0xd0, 0x92, 0x34, 0xee, 0x7d, 0x3c, 0x76, 0x6f, + 0xc9, 0xa3, 0xa5, 0x16, 0x8d, 0x0c, 0x94, 0xad, + 0x73, 0xb4, 0x6f, 0xdf, +}; + +VOID +SYMCRYPT_CALL +SymCryptSha3_224Selftest(void) +{ + BYTE result[SYMCRYPT_SHA3_224_RESULT_SIZE]; + + SymCryptSha3_224(SymCryptTestMsg3, sizeof(SymCryptTestMsg3), result); + + SymCryptInjectError(result, sizeof(result)); + + if (memcmp(result, sha3_224KATAnswer, sizeof(result)) != 0) + { + SymCryptFatal('SHA3'); + } +} diff --git a/libs/symcrypt/lib/sha3_256.c b/libs/symcrypt/lib/sha3_256.c new file mode 100644 index 00000000000..aabe11b71a3 --- /dev/null +++ b/libs/symcrypt/lib/sha3_256.c @@ -0,0 +1,141 @@ +// +// Sha3_256.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +// +// See the symcrypt.h file for documentation on what the various functions do. +// + + +const SYMCRYPT_HASH SymCryptSha3_256Algorithm_default = { + &SymCryptSha3_256Init, + &SymCryptSha3_256Append, + &SymCryptSha3_256Result, + NULL, // AppendBlocks function is not implemented for SHA-3 + &SymCryptSha3_256StateCopy, + sizeof(SYMCRYPT_SHA3_256_STATE), + SYMCRYPT_SHA3_256_RESULT_SIZE, + SYMCRYPT_SHA3_256_INPUT_BLOCK_SIZE, + SYMCRYPT_FIELD_OFFSET(SYMCRYPT_SHA3_256_STATE, ks.state), + SYMCRYPT_FIELD_SIZE(SYMCRYPT_SHA3_256_STATE, ks.state), +}; + +const PCSYMCRYPT_HASH SymCryptSha3_256Algorithm = &SymCryptSha3_256Algorithm_default; + + +// +// SymCryptSha3_256 +// +#define ALG SHA3_256 +#define Alg Sha3_256 +#include "hash_pattern.c" +#undef ALG +#undef Alg + + +// +// SymCryptSha3_256Init +// +VOID +SYMCRYPT_CALL +SymCryptSha3_256Init(_Out_ PSYMCRYPT_SHA3_256_STATE pState) +{ + SymCryptKeccakInit(&pState->ks, + SYMCRYPT_SHA3_256_INPUT_BLOCK_SIZE, + SYMCRYPT_SHA3_PADDING_VALUE); + + SYMCRYPT_SET_MAGIC(pState); +} + + +// +// SymCryptSha3_256Append +// +VOID +SYMCRYPT_CALL +SymCryptSha3_256Append( + _Inout_ PSYMCRYPT_SHA3_256_STATE pState, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData) +{ + SymCryptKeccakAppend(&pState->ks, pbData, cbData); +} + + +// +// SymCryptSha3_256Result +// +VOID +SYMCRYPT_CALL +SymCryptSha3_256Result( + _Inout_ PSYMCRYPT_SHA3_256_STATE pState, + _Out_writes_(SYMCRYPT_SHA3_256_RESULT_SIZE) PBYTE pbResult) +{ + SymCryptKeccakExtract(&pState->ks, pbResult, SYMCRYPT_SHA3_256_RESULT_SIZE, TRUE); +} + + +// +// SymCryptSha3_256StateExport +// +VOID +SYMCRYPT_CALL +SymCryptSha3_256StateExport( + _In_ PCSYMCRYPT_SHA3_256_STATE pState, + _Out_writes_bytes_(SYMCRYPT_SHA3_256_STATE_EXPORT_SIZE) PBYTE pbBlob) +{ + SYMCRYPT_CHECK_MAGIC(pState); + SymCryptKeccakStateExport(SymCryptBlobTypeSha3_256State, &pState->ks, pbBlob); +} + + +// +// SymCryptSha3_256StateImport +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSha3_256StateImport( + _Out_ PSYMCRYPT_SHA3_256_STATE pState, + _In_reads_bytes_(SYMCRYPT_SHA3_256_STATE_EXPORT_SIZE) PCBYTE pbBlob) +{ + SYMCRYPT_ERROR scError = SymCryptKeccakStateImport(SymCryptBlobTypeSha3_256State, &pState->ks, pbBlob); + + if (scError == SYMCRYPT_NO_ERROR) + { + SYMCRYPT_SET_MAGIC(pState); + } + + return scError; +} + + +// +// Simple test vector for FIPS module testing +// + +static const BYTE sha3_256KATAnswer[32] = { + 0x3a, 0x98, 0x5d, 0xa7, 0x4f, 0xe2, 0x25, 0xb2, + 0x04, 0x5c, 0x17, 0x2d, 0x6b, 0xd3, 0x90, 0xbd, + 0x85, 0x5f, 0x08, 0x6e, 0x3e, 0x9d, 0x52, 0x5b, + 0x46, 0xbf, 0xe2, 0x45, 0x11, 0x43, 0x15, 0x32 +}; + +VOID +SYMCRYPT_CALL +SymCryptSha3_256Selftest(void) +{ + BYTE result[SYMCRYPT_SHA3_256_RESULT_SIZE]; + + SymCryptSha3_256(SymCryptTestMsg3, sizeof(SymCryptTestMsg3), result); + + SymCryptInjectError(result, sizeof(result)); + + if (memcmp(result, sha3_256KATAnswer, sizeof(result)) != 0) + { + SymCryptFatal('SHA3'); + } +} diff --git a/libs/symcrypt/lib/sha3_384.c b/libs/symcrypt/lib/sha3_384.c new file mode 100644 index 00000000000..0f557661ac2 --- /dev/null +++ b/libs/symcrypt/lib/sha3_384.c @@ -0,0 +1,143 @@ +// +// Sha3_384.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +// +// See the symcrypt.h file for documentation on what the various functions do. +// + + +const SYMCRYPT_HASH SymCryptSha3_384Algorithm_default = { + &SymCryptSha3_384Init, + &SymCryptSha3_384Append, + &SymCryptSha3_384Result, + NULL, // AppendBlocks function is not implemented for SHA-3 + &SymCryptSha3_384StateCopy, + sizeof(SYMCRYPT_SHA3_384_STATE), + SYMCRYPT_SHA3_384_RESULT_SIZE, + SYMCRYPT_SHA3_384_INPUT_BLOCK_SIZE, + SYMCRYPT_FIELD_OFFSET(SYMCRYPT_SHA3_384_STATE, ks.state), + SYMCRYPT_FIELD_SIZE(SYMCRYPT_SHA3_384_STATE, ks.state), +}; + +const PCSYMCRYPT_HASH SymCryptSha3_384Algorithm = &SymCryptSha3_384Algorithm_default; + + +// +// SymCryptSha3_384 +// +#define ALG SHA3_384 +#define Alg Sha3_384 +#include "hash_pattern.c" +#undef ALG +#undef Alg + + +// +// SymCryptSha3_384Init +// +VOID +SYMCRYPT_CALL +SymCryptSha3_384Init(_Out_ PSYMCRYPT_SHA3_384_STATE pState) +{ + SymCryptKeccakInit(&pState->ks, + SYMCRYPT_SHA3_384_INPUT_BLOCK_SIZE, + SYMCRYPT_SHA3_PADDING_VALUE); + + SYMCRYPT_SET_MAGIC(pState); +} + + +// +// SymCryptSha3_384Append +// +VOID +SYMCRYPT_CALL +SymCryptSha3_384Append( + _Inout_ PSYMCRYPT_SHA3_384_STATE pState, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData) +{ + SymCryptKeccakAppend(&pState->ks, pbData, cbData); +} + + +// +// SymCryptSha3_384Result +// +VOID +SYMCRYPT_CALL +SymCryptSha3_384Result( + _Inout_ PSYMCRYPT_SHA3_384_STATE pState, + _Out_writes_(SYMCRYPT_SHA3_384_RESULT_SIZE) PBYTE pbResult) +{ + SymCryptKeccakExtract(&pState->ks, pbResult, SYMCRYPT_SHA3_384_RESULT_SIZE, TRUE); +} + + +// +// SymCryptSha3_384StateExport +// +VOID +SYMCRYPT_CALL +SymCryptSha3_384StateExport( + _In_ PCSYMCRYPT_SHA3_384_STATE pState, + _Out_writes_bytes_(SYMCRYPT_SHA3_384_STATE_EXPORT_SIZE) PBYTE pbBlob) +{ + SYMCRYPT_CHECK_MAGIC(pState); + SymCryptKeccakStateExport(SymCryptBlobTypeSha3_384State, &pState->ks, pbBlob); +} + + +// +// SymCryptSha3_384StateImport +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSha3_384StateImport( + _Out_ PSYMCRYPT_SHA3_384_STATE pState, + _In_reads_bytes_(SYMCRYPT_SHA3_384_STATE_EXPORT_SIZE) PCBYTE pbBlob) +{ + SYMCRYPT_ERROR scError = SymCryptKeccakStateImport(SymCryptBlobTypeSha3_384State, &pState->ks, pbBlob); + + if (scError == SYMCRYPT_NO_ERROR) + { + SYMCRYPT_SET_MAGIC(pState); + } + + return scError; +} + + +// +// Simple test vector for FIPS module testing +// + +static const BYTE sha3_384KATAnswer[48] = { + 0xec, 0x01, 0x49, 0x82, 0x88, 0x51, 0x6f, 0xc9, + 0x26, 0x45, 0x9f, 0x58, 0xe2, 0xc6, 0xad, 0x8d, + 0xf9, 0xb4, 0x73, 0xcb, 0x0f, 0xc0, 0x8c, 0x25, + 0x96, 0xda, 0x7c, 0xf0, 0xe4, 0x9b, 0xe4, 0xb2, + 0x98, 0xd8, 0x8c, 0xea, 0x92, 0x7a, 0xc7, 0xf5, + 0x39, 0xf1, 0xed, 0xf2, 0x28, 0x37, 0x6d, 0x25 +}; + +VOID +SYMCRYPT_CALL +SymCryptSha3_384Selftest(void) +{ + BYTE result[SYMCRYPT_SHA3_384_RESULT_SIZE]; + + SymCryptSha3_384(SymCryptTestMsg3, sizeof(SymCryptTestMsg3), result); + + SymCryptInjectError(result, sizeof(result)); + + if (memcmp(result, sha3_384KATAnswer, sizeof(result)) != 0) + { + SymCryptFatal('SHA3'); + } +} diff --git a/libs/symcrypt/lib/sha3_512.c b/libs/symcrypt/lib/sha3_512.c new file mode 100644 index 00000000000..71eccc71a64 --- /dev/null +++ b/libs/symcrypt/lib/sha3_512.c @@ -0,0 +1,144 @@ +// +// Sha3_512.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +// +// See the symcrypt.h file for documentation on what the various functions do. +// + + +const SYMCRYPT_HASH SymCryptSha3_512Algorithm_default = { + &SymCryptSha3_512Init, + &SymCryptSha3_512Append, + &SymCryptSha3_512Result, + NULL, // AppendBlocks function is not implemented for SHA-3 + &SymCryptSha3_512StateCopy, + sizeof(SYMCRYPT_SHA3_512_STATE), + SYMCRYPT_SHA3_512_RESULT_SIZE, + SYMCRYPT_SHA3_512_INPUT_BLOCK_SIZE, + SYMCRYPT_FIELD_OFFSET(SYMCRYPT_SHA3_512_STATE, ks.state), + SYMCRYPT_FIELD_SIZE(SYMCRYPT_SHA3_512_STATE, ks.state), +}; + +const PCSYMCRYPT_HASH SymCryptSha3_512Algorithm = &SymCryptSha3_512Algorithm_default; + + +// +// SymCryptSha3_512 +// +#define ALG SHA3_512 +#define Alg Sha3_512 +#include "hash_pattern.c" +#undef ALG +#undef Alg + + +// +// SymCryptSha3_512Init +// +VOID +SYMCRYPT_CALL +SymCryptSha3_512Init(_Out_ PSYMCRYPT_SHA3_512_STATE pState) +{ + SymCryptKeccakInit(&pState->ks, + SYMCRYPT_SHA3_512_INPUT_BLOCK_SIZE, + SYMCRYPT_SHA3_PADDING_VALUE); + + SYMCRYPT_SET_MAGIC(pState); +} + + +// +// SymCryptSha3_512Append +// +VOID +SYMCRYPT_CALL +SymCryptSha3_512Append( + _Inout_ PSYMCRYPT_SHA3_512_STATE pState, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData) +{ + SymCryptKeccakAppend(&pState->ks, pbData, cbData); +} + + +// +// SymCryptSha3_512Result +// +VOID +SYMCRYPT_CALL +SymCryptSha3_512Result( + _Inout_ PSYMCRYPT_SHA3_512_STATE pState, + _Out_writes_(SYMCRYPT_SHA3_512_RESULT_SIZE) PBYTE pbResult) +{ + SymCryptKeccakExtract(&pState->ks, pbResult, SYMCRYPT_SHA3_512_RESULT_SIZE, TRUE); +} + + +// +// SymCryptSha3_512StateExport +// +VOID +SYMCRYPT_CALL +SymCryptSha3_512StateExport( + _In_ PCSYMCRYPT_SHA3_512_STATE pState, + _Out_writes_bytes_(SYMCRYPT_SHA3_512_STATE_EXPORT_SIZE) PBYTE pbBlob) +{ + SYMCRYPT_CHECK_MAGIC(pState); + SymCryptKeccakStateExport(SymCryptBlobTypeSha3_512State, &pState->ks, pbBlob); +} + +// +// SymCryptSha3_512StateExport +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSha3_512StateImport( + _Out_ PSYMCRYPT_SHA3_512_STATE pState, + _In_reads_bytes_(SYMCRYPT_SHA3_512_STATE_EXPORT_SIZE) PCBYTE pbBlob) +{ + SYMCRYPT_ERROR scError = SymCryptKeccakStateImport(SymCryptBlobTypeSha3_512State, &pState->ks, pbBlob); + + if (scError == SYMCRYPT_NO_ERROR) + { + SYMCRYPT_SET_MAGIC(pState); + } + + return scError; +} + + +// +// Simple test vector for FIPS module testing +// + +static const BYTE sha3_512KATAnswer[64] = { + 0xb7, 0x51, 0x85, 0x0b, 0x1a, 0x57, 0x16, 0x8a, + 0x56, 0x93, 0xcd, 0x92, 0x4b, 0x6b, 0x09, 0x6e, + 0x08, 0xf6, 0x21, 0x82, 0x74, 0x44, 0xf7, 0x0d, + 0x88, 0x4f, 0x5d, 0x02, 0x40, 0xd2, 0x71, 0x2e, + 0x10, 0xe1, 0x16, 0xe9, 0x19, 0x2a, 0xf3, 0xc9, + 0x1a, 0x7e, 0xc5, 0x76, 0x47, 0xe3, 0x93, 0x40, + 0x57, 0x34, 0x0b, 0x4c, 0xf4, 0x08, 0xd5, 0xa5, + 0x65, 0x92, 0xf8, 0x27, 0x4e, 0xec, 0x53, 0xf0 +}; + +VOID +SYMCRYPT_CALL +SymCryptSha3_512Selftest(void) +{ + BYTE result[SYMCRYPT_SHA3_512_RESULT_SIZE]; + + SymCryptSha3_512(SymCryptTestMsg3, sizeof(SymCryptTestMsg3), result); + + SymCryptInjectError(result, sizeof(result)); + + if (memcmp(result, sha3_512KATAnswer, sizeof(result)) != 0) + { + SymCryptFatal('SHA3'); + } +} diff --git a/libs/symcrypt/lib/sha512-ymm.c b/libs/symcrypt/lib/sha512-ymm.c new file mode 100644 index 00000000000..de7f87fd3a5 --- /dev/null +++ b/libs/symcrypt/lib/sha512-ymm.c @@ -0,0 +1,801 @@ +#include "precomp.h" + +#if SYMCRYPT_CPU_AMD64 + +#ifdef __clang__ +#pragma clang attribute push (__attribute__((target("avx2"))), apply_to=function) +#else +#pragma GCC push_options +#pragma GCC target("avx2") +#endif + +extern SYMCRYPT_ALIGN_AT(64) const UINT64 SymCryptSha512K[81]; + + +// Endianness transformation for 4 64-bit values in a YMM register +const SYMCRYPT_ALIGN_AT(32) UINT64 BYTE_REVERSE_64X2[4] = { + 0x0001020304050607, 0x08090a0b0c0d0e0f, + 0x0001020304050607, 0x08090a0b0c0d0e0f +}; + +// Rotate right each 64-bit value in a YMM register by 1 byte +const SYMCRYPT_ALIGN_AT(32) UINT64 BYTE_ROTATE_64[4] = { + 0x0007060504030201, 0x080f0e0d0c0b0a09, + 0x0007060504030201, 0x080f0e0d0c0b0a09, +}; + + +#if SYMCRYPT_MS_VC && !defined(__clang__) +#define RORX_U32 _rorx_u32 +#define RORX_U64 _rorx_u64 +#else +// TODO: implement _rorx functions for clang +#define RORX_U32 ROR32 +#define RORX_U64 ROR64 +#endif // SYMCRYPT_MS_VC + + +// +// For documentation on these function see FIPS 180-2 +// +// MAJ and CH are the functions Maj and Ch from the standard. +// CSIGMA0 and CSIGMA1 are the capital sigma functions. +// LSIGMA0 and LSIGMA1 are the lowercase sigma functions. +// +// The canonical definitions of the MAJ and CH functions are: +//#define MAJ( x, y, z ) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) +//#define CH( x, y, z ) (((x) & (y)) ^ ((~(x)) & (z))) +// We use optimized versions defined below +// + +#define MAJ( x, y, z ) ((((z) | (y)) & (x) ) | ((z) & (y))) +#define CH( x, y, z ) ((((z) ^ (y)) & (x)) ^ (z)) + +#define CSIGMA0(x) (RORX_U64(x, 28) ^ RORX_U64(x, 34) ^ RORX_U64(x, 39)) +#define CSIGMA1(x) (RORX_U64(x, 14) ^ RORX_U64(x, 18) ^ RORX_U64(x, 41)) + +#define LSIGMA0( x ) (ROR64( (x) ^ ROR64((x), 7), 1) ^ ((x)>> 7)) +#define LSIGMA1( x ) (ROR64( (x) ^ ROR64((x), 42), 19) ^ ((x)>> 6)) + +#define YMMADD( _a, _b ) _mm256_add_epi64((_a), (_b)) +#define YMMROR( _a, _n ) _mm256_xor_si256( _mm256_slli_epi64( (_a), 64-(_n)), _mm256_srli_epi64( (_a), (_n)) ) +#define YMMSHR( _a, _n ) _mm256_srli_epi64((_a), (_n)) +#define YMMXOR( _a, _b ) _mm256_xor_si256((_a), (_b)) + +// Rotation by 8 bits is faster with byte shuffling +#if 1 +#define YMMROR8( _a ) _mm256_shuffle_epi8((_a), _mm256_load_si256((__m256i*)BYTE_ROTATE_64)) +#define YMMLSIGMA0( x ) YMMXOR( YMMXOR( YMMROR((x), 1), YMMROR8((x))), YMMSHR((x), 7)) +#else +#define YMMLSIGMA0( x ) YMMXOR( YMMXOR( YMMROR((x), 1), YMMROR((x), 8)), YMMSHR((x), 7)) +#endif + +#define YMMLSIGMA1( x ) YMMXOR( YMMXOR( YMMROR((x), 19), YMMROR((x), 61)), YMMSHR((x), 6)) + + +// +// YMM implementation that processes 1 message block at a time +// + +// Core round function +// Message words are loaded from Wx.ul[80]. +#define CROUND_1BLOCK(a, b, c, d, e, f, g, h, r) {;\ + UINT64 T1 = CSIGMA1(e) + CH(e, f, g) + Wx.ul[r] + SymCryptSha512K[r]; \ + UINT64 T2 = CSIGMA0(a) + MAJ(a, b, c); \ + h += T1; \ + d += h;\ + h += T2;\ +} + +// +// Message expansion for 4 rounds +// +// Each element of Wx.ymm[] array contains 4 message words, with the +// first 4 elements containing the original 16 message words. These are +// then expanded 16 times to generate the next 16 * 4 message words, +// comprising the 80 expanded message words in the union arrays Wx.ymm[20] or Wx.ul[80]. +// +// rnd: starts from 16 (updating the 4th element of the Wx.ymm[] array) and +// goes up to 76 in multiples of 4. +// +#define SHA512_MSG_EXPAND_1BLOCK_4ROUNDS(rnd) { \ + Wx.ymm[(rnd) / 4] = _mm256_add_epi64(_mm256_add_epi64( \ + YMMLSIGMA0(_mm256_loadu_si256((__m256i*)& Wx.ul[(rnd) - 15])), \ + _mm256_load_si256((__m256i*)& Wx.ul[(rnd) - 16])), \ + _mm256_loadu_si256((__m256i*)& Wx.ul[(rnd) - 7])); \ + Wx.ul[(rnd) + 0] += LSIGMA1(Wx.ul[(rnd) - 2]); \ + Wx.ul[(rnd) + 1] += LSIGMA1(Wx.ul[(rnd) - 1]); \ + Wx.ul[(rnd) + 2] += LSIGMA1(Wx.ul[(rnd) + 0]); \ + Wx.ul[(rnd) + 3] += LSIGMA1(Wx.ul[(rnd) + 1]); \ +} + + +VOID +SYMCRYPT_CALL +SymCryptSha512AppendBlocks_ymm_1block( + _Inout_ SYMCRYPT_SHA512_CHAINING_STATE* pChain, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T* pcbRemaining) +{ + SYMCRYPT_ALIGN_AT(32) union { UINT64 ul[80]; __m256i ymm[20]; } Wx; + UINT64 A, B, C, D, E, F, G, H; + + _mm256_zeroupper(); + + while (cbData >= SYMCRYPT_SHA512_INPUT_BLOCK_SIZE) + { + A = pChain->H[0]; + B = pChain->H[1]; + C = pChain->H[2]; + D = pChain->H[3]; + E = pChain->H[4]; + F = pChain->H[5]; + G = pChain->H[6]; + H = pChain->H[7]; + +#if 0 + Wx.ul[ 0] = SYMCRYPT_LOAD_MSBFIRST64(&pbData[8 * 0]); + Wx.ul[ 1] = SYMCRYPT_LOAD_MSBFIRST64(&pbData[8 * 1]); + Wx.ul[ 2] = SYMCRYPT_LOAD_MSBFIRST64(&pbData[8 * 2]); + Wx.ul[ 3] = SYMCRYPT_LOAD_MSBFIRST64(&pbData[8 * 3]); + Wx.ul[ 4] = SYMCRYPT_LOAD_MSBFIRST64(&pbData[8 * 4]); + Wx.ul[ 5] = SYMCRYPT_LOAD_MSBFIRST64(&pbData[8 * 5]); + Wx.ul[ 6] = SYMCRYPT_LOAD_MSBFIRST64(&pbData[8 * 6]); + Wx.ul[ 7] = SYMCRYPT_LOAD_MSBFIRST64(&pbData[8 * 7]); + Wx.ul[ 8] = SYMCRYPT_LOAD_MSBFIRST64(&pbData[8 * 8]); + Wx.ul[ 9] = SYMCRYPT_LOAD_MSBFIRST64(&pbData[8 * 9]); + Wx.ul[10] = SYMCRYPT_LOAD_MSBFIRST64(&pbData[8 * 10]); + Wx.ul[11] = SYMCRYPT_LOAD_MSBFIRST64(&pbData[8 * 11]); + Wx.ul[12] = SYMCRYPT_LOAD_MSBFIRST64(&pbData[8 * 12]); + Wx.ul[13] = SYMCRYPT_LOAD_MSBFIRST64(&pbData[8 * 13]); + Wx.ul[14] = SYMCRYPT_LOAD_MSBFIRST64(&pbData[8 * 14]); + Wx.ul[15] = SYMCRYPT_LOAD_MSBFIRST64(&pbData[8 * 15]); +#else + Wx.ymm[0] = _mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*) & pbData[0 * SYMCRYPT_SHA512_INPUT_BLOCK_SIZE + (0) * 32]), _mm256_load_si256((__m256i*)BYTE_REVERSE_64X2)); + Wx.ymm[1] = _mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*) & pbData[0 * SYMCRYPT_SHA512_INPUT_BLOCK_SIZE + (1) * 32]), _mm256_load_si256((__m256i*)BYTE_REVERSE_64X2)); + Wx.ymm[2] = _mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*) & pbData[0 * SYMCRYPT_SHA512_INPUT_BLOCK_SIZE + (2) * 32]), _mm256_load_si256((__m256i*)BYTE_REVERSE_64X2)); + Wx.ymm[3] = _mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*) & pbData[0 * SYMCRYPT_SHA512_INPUT_BLOCK_SIZE + (3) * 32]), _mm256_load_si256((__m256i*)BYTE_REVERSE_64X2)); +#endif + + for (int iterCount=0; iterCount<(64/16); iterCount++) + { + const int roundBase = iterCount*16; + + CROUND_1BLOCK(A, B, C, D, E, F, G, H, roundBase + 0); + CROUND_1BLOCK(H, A, B, C, D, E, F, G, roundBase + 1); + CROUND_1BLOCK(G, H, A, B, C, D, E, F, roundBase + 2); + CROUND_1BLOCK(F, G, H, A, B, C, D, E, roundBase + 3); + SHA512_MSG_EXPAND_1BLOCK_4ROUNDS(roundBase + 16); + + CROUND_1BLOCK(E, F, G, H, A, B, C, D, roundBase + 4); + CROUND_1BLOCK(D, E, F, G, H, A, B, C, roundBase + 5); + CROUND_1BLOCK(C, D, E, F, G, H, A, B, roundBase + 6); + CROUND_1BLOCK(B, C, D, E, F, G, H, A, roundBase + 7); + SHA512_MSG_EXPAND_1BLOCK_4ROUNDS(roundBase + 20); + + CROUND_1BLOCK(A, B, C, D, E, F, G, H, roundBase + 8); + CROUND_1BLOCK(H, A, B, C, D, E, F, G, roundBase + 9); + CROUND_1BLOCK(G, H, A, B, C, D, E, F, roundBase + 10); + CROUND_1BLOCK(F, G, H, A, B, C, D, E, roundBase + 11); + SHA512_MSG_EXPAND_1BLOCK_4ROUNDS(roundBase + 24); + + CROUND_1BLOCK(E, F, G, H, A, B, C, D, roundBase + 12); + CROUND_1BLOCK(D, E, F, G, H, A, B, C, roundBase + 13); + CROUND_1BLOCK(C, D, E, F, G, H, A, B, roundBase + 14); + CROUND_1BLOCK(B, C, D, E, F, G, H, A, roundBase + 15); + SHA512_MSG_EXPAND_1BLOCK_4ROUNDS(roundBase + 28); + } + + CROUND_1BLOCK(A, B, C, D, E, F, G, H, 64 + 0); + CROUND_1BLOCK(H, A, B, C, D, E, F, G, 64 + 1); + CROUND_1BLOCK(G, H, A, B, C, D, E, F, 64 + 2); + CROUND_1BLOCK(F, G, H, A, B, C, D, E, 64 + 3); + CROUND_1BLOCK(E, F, G, H, A, B, C, D, 64 + 4); + CROUND_1BLOCK(D, E, F, G, H, A, B, C, 64 + 5); + CROUND_1BLOCK(C, D, E, F, G, H, A, B, 64 + 6); + CROUND_1BLOCK(B, C, D, E, F, G, H, A, 64 + 7); + CROUND_1BLOCK(A, B, C, D, E, F, G, H, 64 + 8); + CROUND_1BLOCK(H, A, B, C, D, E, F, G, 64 + 9); + CROUND_1BLOCK(G, H, A, B, C, D, E, F, 64 + 10); + CROUND_1BLOCK(F, G, H, A, B, C, D, E, 64 + 11); + CROUND_1BLOCK(E, F, G, H, A, B, C, D, 64 + 12); + CROUND_1BLOCK(D, E, F, G, H, A, B, C, 64 + 13); + CROUND_1BLOCK(C, D, E, F, G, H, A, B, 64 + 14); + CROUND_1BLOCK(B, C, D, E, F, G, H, A, 64 + 15); + + pChain->H[0] = A + pChain->H[0]; + pChain->H[1] = B + pChain->H[1]; + pChain->H[2] = C + pChain->H[2]; + pChain->H[3] = D + pChain->H[3]; + pChain->H[4] = E + pChain->H[4]; + pChain->H[5] = F + pChain->H[5]; + pChain->H[6] = G + pChain->H[6]; + pChain->H[7] = H + pChain->H[7]; + + pbData += SYMCRYPT_SHA512_INPUT_BLOCK_SIZE; + cbData -= SYMCRYPT_SHA512_INPUT_BLOCK_SIZE; + } + + *pcbRemaining = cbData; + + _mm256_zeroupper(); + + // + // Wipe the variables; + // + SymCryptWipeKnownSize(Wx.ymm, sizeof(Wx.ymm)); + } + + + + + // + // 2-way parallel message block processing + // + + // Core round function + // + // r : round number ( 0 <= r < 80) + // bl: message block index ( bl = 0, 1) + // + // The message words are generated by YMM code into the array Wx.ul[40][4]. + // Let W0, W1, ..., W15 be the message words from the first message block and + // Y0, Y1, ..., Y15 be the message words from the second message block. After message + // expansion, Wx.ul[][] array will take the following form: + // + // Wx.ul[40][4] = { + // W0, W1, Y0, Y1, + // W2, W3, Y2, Y3, + // ... + // W78, W79, Y78, Y79 + // }; + // +#define CROUND_2BLOCKS(a, b, c, d, e, f, g, h, r, bl ) { \ + UINT64 T1 = CSIGMA1(e) + CH(e, f, g) + Wx.ul[(r) / 2][2 * (bl) + ((r) & 1)] + SymCryptSha512K[r]; \ + UINT64 T2 = CSIGMA0(a) + MAJ(a, b, c); \ + h += T1; \ + d += h;\ + h += T2;\ +} + +// Message expansion of 2 message blocks for 2 rounds +#define SHA512_MSG_EXPAND_2BLOCKS_2ROUNDS(ind) { \ + __m256i t1 = _mm256_permute4x64_epi64(_mm256_blend_epi32(Wx.ymm[ind + 0], Wx.ymm[ind + 1], 0x33), 0xb1); \ + __m256i t2 = _mm256_permute4x64_epi64(_mm256_blend_epi32(Wx.ymm[ind + 4], Wx.ymm[ind + 5], 0x33), 0xb1); \ + __m256i s = _mm256_add_epi64(Wx.ymm[ind], _mm256_add_epi64(YMMLSIGMA0(t1), _mm256_add_epi64(YMMLSIGMA1(Wx.ymm[ind + 7]), t2))); \ + _mm256_store_si256(&Wx.ymm[ind + 8], s); \ +} + +// +// 16 rounds of 2-block message expansion with 16 rounds of message processing of the first message block +// +// This macro is called four times to generate 64 expanded message words and do 64 rounds of processing of +// first message block. The indices substituted for SYMCRYPT_SHA512_MS_2B_ROUND_YMM() (resp. CROUND_512_VAR_MS_2B() ) +// range from 0 to 31 (resp. 0 to 63). +#define SHA512_2BLOCKS_ROUND_STITCHED_16X(rb, ind) { \ + SHA512_MSG_EXPAND_2BLOCKS_2ROUNDS(rb + ind + 0); \ + CROUND_2BLOCKS(A, B, C, D, E, F, G, H, 2 * (rb + ind) + 0, 0); \ + CROUND_2BLOCKS(H, A, B, C, D, E, F, G, 2 * (rb + ind) + 1, 0); \ + SHA512_MSG_EXPAND_2BLOCKS_2ROUNDS(rb + ind + 1); \ + CROUND_2BLOCKS(G, H, A, B, C, D, E, F, 2 * (rb + ind) + 2, 0); \ + CROUND_2BLOCKS(F, G, H, A, B, C, D, E, 2 * (rb + ind) + 3, 0); \ + SHA512_MSG_EXPAND_2BLOCKS_2ROUNDS(rb + ind + 2); \ + CROUND_2BLOCKS(E, F, G, H, A, B, C, D, 2 * (rb + ind) + 4, 0); \ + CROUND_2BLOCKS(D, E, F, G, H, A, B, C, 2 * (rb + ind) + 5, 0); \ + SHA512_MSG_EXPAND_2BLOCKS_2ROUNDS(rb + ind + 3); \ + CROUND_2BLOCKS(C, D, E, F, G, H, A, B, 2 * (rb + ind) + 6, 0); \ + CROUND_2BLOCKS(B, C, D, E, F, G, H, A, 2 * (rb + ind) + 7, 0); \ + SHA512_MSG_EXPAND_2BLOCKS_2ROUNDS(rb + ind + 4); \ + CROUND_2BLOCKS(A, B, C, D, E, F, G, H, 2 * (rb + ind) + 8, 0); \ + CROUND_2BLOCKS(H, A, B, C, D, E, F, G, 2 * (rb + ind) + 9, 0); \ + SHA512_MSG_EXPAND_2BLOCKS_2ROUNDS(rb + ind + 5); \ + CROUND_2BLOCKS(G, H, A, B, C, D, E, F, 2 * (rb + ind) + 10, 0); \ + CROUND_2BLOCKS(F, G, H, A, B, C, D, E, 2 * (rb + ind) + 11, 0); \ + SHA512_MSG_EXPAND_2BLOCKS_2ROUNDS(rb + ind + 6); \ + CROUND_2BLOCKS(E, F, G, H, A, B, C, D, 2 * (rb + ind) + 12, 0); \ + CROUND_2BLOCKS(D, E, F, G, H, A, B, C, 2 * (rb + ind) + 13, 0); \ + SHA512_MSG_EXPAND_2BLOCKS_2ROUNDS(rb + ind + 7); \ + CROUND_2BLOCKS(C, D, E, F, G, H, A, B, 2 * (rb + ind) + 14, 0); \ + CROUND_2BLOCKS(B, C, D, E, F, G, H, A, 2 * (rb + ind) + 15, 0); \ +} + + +VOID +SYMCRYPT_CALL +SymCryptSha512AppendBlocks_ymm_2blocks( + _Inout_ SYMCRYPT_SHA512_CHAINING_STATE* pChain, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T* pcbRemaining) +{ + SYMCRYPT_ALIGN_AT(32) union { UINT64 ul[40][4]; __m256i ymm[40]; } Wx; + __m256i w1[4], w2[4]; + UINT64 A, B, C, D, E, F, G, H; + SIZE_T numBlocks; + + _mm256_zeroupper(); + + while (cbData >= SYMCRYPT_SHA512_INPUT_BLOCK_SIZE) + { + // Load message words from first block + // + // w1[0] = W3 W2 W1 W0 + // w1[1] = W7 W6 W5 W4 + // w1[2] = W11 W10 W9 W8 + // w1[3] = W15 W14 W13 W12 + // + numBlocks = 1; + w1[0] = _mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*) & pbData[0 * SYMCRYPT_SHA512_INPUT_BLOCK_SIZE + (0) * 32]), _mm256_load_si256((__m256i*)BYTE_REVERSE_64X2)); + w1[1] = _mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*) & pbData[0 * SYMCRYPT_SHA512_INPUT_BLOCK_SIZE + (1) * 32]), _mm256_load_si256((__m256i*)BYTE_REVERSE_64X2)); + w1[2] = _mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*) & pbData[0 * SYMCRYPT_SHA512_INPUT_BLOCK_SIZE + (2) * 32]), _mm256_load_si256((__m256i*)BYTE_REVERSE_64X2)); + w1[3] = _mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*) & pbData[0 * SYMCRYPT_SHA512_INPUT_BLOCK_SIZE + (3) * 32]), _mm256_load_si256((__m256i*)BYTE_REVERSE_64X2)); + + if (cbData >= (2 * SYMCRYPT_SHA512_INPUT_BLOCK_SIZE)) + { + // Load message words from second block + // + // w2[0] = Y3 Y2 Y1 Y0 + // w2[1] = Y7 Y6 Y5 Y4 + // w2[2] = Y11 Y10 Y9 Y8 + // w2[3] = Y15 Y14 Y13 Y12 + // + numBlocks = 2; + w2[0] = _mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*) & pbData[1 * SYMCRYPT_SHA512_INPUT_BLOCK_SIZE + (0) * 32]), _mm256_load_si256((__m256i*)BYTE_REVERSE_64X2)); + w2[1] = _mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*) & pbData[1 * SYMCRYPT_SHA512_INPUT_BLOCK_SIZE + (1) * 32]), _mm256_load_si256((__m256i*)BYTE_REVERSE_64X2)); + w2[2] = _mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*) & pbData[1 * SYMCRYPT_SHA512_INPUT_BLOCK_SIZE + (2) * 32]), _mm256_load_si256((__m256i*)BYTE_REVERSE_64X2)); + w2[3] = _mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*) & pbData[1 * SYMCRYPT_SHA512_INPUT_BLOCK_SIZE + (3) * 32]), _mm256_load_si256((__m256i*)BYTE_REVERSE_64X2)); + } + + // process first block and do the message expansion for two blocks at the same time + { + A = pChain->H[0]; + B = pChain->H[1]; + C = pChain->H[2]; + D = pChain->H[3]; + E = pChain->H[4]; + F = pChain->H[5]; + G = pChain->H[6]; + H = pChain->H[7]; + + // + // Combine message words from two blocks + // + // Wx.ymm[0] = Y1 Y0 W1 W0 + // ... ... + // Wx.ymm[7] = Y15 Y14 W15 W14 + // + Wx.ymm[0] = _mm256_permute2x128_si256(w1[0], w2[0], 0x20); + Wx.ymm[1] = _mm256_permute2x128_si256(w1[0], w2[0], 0x31); + Wx.ymm[2] = _mm256_permute2x128_si256(w1[1], w2[1], 0x20); + Wx.ymm[3] = _mm256_permute2x128_si256(w1[1], w2[1], 0x31); + Wx.ymm[4] = _mm256_permute2x128_si256(w1[2], w2[2], 0x20); + Wx.ymm[5] = _mm256_permute2x128_si256(w1[2], w2[2], 0x31); + Wx.ymm[6] = _mm256_permute2x128_si256(w1[3], w2[3], 0x20); + Wx.ymm[7] = _mm256_permute2x128_si256(w1[3], w2[3], 0x31); + + // Do the message expansion of two message blocks together with the + // processing of first 64 rounds of first message block + SHA512_2BLOCKS_ROUND_STITCHED_16X(0, 0); + SHA512_2BLOCKS_ROUND_STITCHED_16X(0, 8); + SHA512_2BLOCKS_ROUND_STITCHED_16X(16, 0); + SHA512_2BLOCKS_ROUND_STITCHED_16X(16, 8); + + // + // Last 16 rounds of round processing + // + CROUND_2BLOCKS(A, B, C, D, E, F, G, H, 64 + 0, 0); + CROUND_2BLOCKS(H, A, B, C, D, E, F, G, 64 + 1, 0); + CROUND_2BLOCKS(G, H, A, B, C, D, E, F, 64 + 2, 0); + CROUND_2BLOCKS(F, G, H, A, B, C, D, E, 64 + 3, 0); + CROUND_2BLOCKS(E, F, G, H, A, B, C, D, 64 + 4, 0); + CROUND_2BLOCKS(D, E, F, G, H, A, B, C, 64 + 5, 0); + CROUND_2BLOCKS(C, D, E, F, G, H, A, B, 64 + 6, 0); + CROUND_2BLOCKS(B, C, D, E, F, G, H, A, 64 + 7, 0); + + CROUND_2BLOCKS(A, B, C, D, E, F, G, H, 72 + 0, 0); + CROUND_2BLOCKS(H, A, B, C, D, E, F, G, 72 + 1, 0); + CROUND_2BLOCKS(G, H, A, B, C, D, E, F, 72 + 2, 0); + CROUND_2BLOCKS(F, G, H, A, B, C, D, E, 72 + 3, 0); + CROUND_2BLOCKS(E, F, G, H, A, B, C, D, 72 + 4, 0); + CROUND_2BLOCKS(D, E, F, G, H, A, B, C, 72 + 5, 0); + CROUND_2BLOCKS(C, D, E, F, G, H, A, B, 72 + 6, 0); + CROUND_2BLOCKS(B, C, D, E, F, G, H, A, 72 + 7, 0); + + pChain->H[0] = A + pChain->H[0]; + pChain->H[1] = B + pChain->H[1]; + pChain->H[2] = C + pChain->H[2]; + pChain->H[3] = D + pChain->H[3]; + pChain->H[4] = E + pChain->H[4]; + pChain->H[5] = F + pChain->H[5]; + pChain->H[6] = G + pChain->H[6]; + pChain->H[7] = H + pChain->H[7]; + } + + // second block + if(numBlocks > 1) + { + A = pChain->H[0]; + B = pChain->H[1]; + C = pChain->H[2]; + D = pChain->H[3]; + E = pChain->H[4]; + F = pChain->H[5]; + G = pChain->H[6]; + H = pChain->H[7]; + + for (int iterCount=0; iterCount<(80/8); iterCount++) + { + const int roundBase = iterCount*8; + CROUND_2BLOCKS(A, B, C, D, E, F, G, H, roundBase + 0, 1); + CROUND_2BLOCKS(H, A, B, C, D, E, F, G, roundBase + 1, 1); + CROUND_2BLOCKS(G, H, A, B, C, D, E, F, roundBase + 2, 1); + CROUND_2BLOCKS(F, G, H, A, B, C, D, E, roundBase + 3, 1); + CROUND_2BLOCKS(E, F, G, H, A, B, C, D, roundBase + 4, 1); + CROUND_2BLOCKS(D, E, F, G, H, A, B, C, roundBase + 5, 1); + CROUND_2BLOCKS(C, D, E, F, G, H, A, B, roundBase + 6, 1); + CROUND_2BLOCKS(B, C, D, E, F, G, H, A, roundBase + 7, 1); + } + + pChain->H[0] = A + pChain->H[0]; + pChain->H[1] = B + pChain->H[1]; + pChain->H[2] = C + pChain->H[2]; + pChain->H[3] = D + pChain->H[3]; + pChain->H[4] = E + pChain->H[4]; + pChain->H[5] = F + pChain->H[5]; + pChain->H[6] = G + pChain->H[6]; + pChain->H[7] = H + pChain->H[7]; + } + + pbData += (numBlocks * SYMCRYPT_SHA512_INPUT_BLOCK_SIZE); + cbData -= (numBlocks * SYMCRYPT_SHA512_INPUT_BLOCK_SIZE); + } + + *pcbRemaining = cbData; + + _mm256_zeroupper(); + + // + // Wipe the variables; + // + SymCryptWipeKnownSize(Wx.ymm, sizeof(Wx.ymm)); + SymCryptWipeKnownSize(w1, sizeof(w1)); + SymCryptWipeKnownSize(w2, sizeof(w2)); +} + + + +// +// 4-way parallel message block processing +// + + +// Initial loading of message words and endianness transformation. +// +// _bl : Number of message blocks to load, 1 <= bl <= 4. +// +// When bl < 4, the high order lanes of the YMM registers corresponding to the missing blocks are unused. +// +#define SHA512_MSG_LOAD_4BLOCKS(bl) { \ + for(int i = 0; i < bl; i++) \ + { \ + Wx.ymm[i + 0] = _mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*) &pbData[i * SYMCRYPT_SHA512_INPUT_BLOCK_SIZE + 0]), _mm256_load_si256((__m256i*)BYTE_REVERSE_64X2)); \ + Wx.ymm[i + 4] = _mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*) &pbData[i * SYMCRYPT_SHA512_INPUT_BLOCK_SIZE + 32]), _mm256_load_si256((__m256i*)BYTE_REVERSE_64X2)); \ + Wx.ymm[i + 8] = _mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*) &pbData[i * SYMCRYPT_SHA512_INPUT_BLOCK_SIZE + 64]), _mm256_load_si256((__m256i*)BYTE_REVERSE_64X2)); \ + Wx.ymm[i + 12] = _mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*) &pbData[i * SYMCRYPT_SHA512_INPUT_BLOCK_SIZE + 96]), _mm256_load_si256((__m256i*)BYTE_REVERSE_64X2)); \ + } \ +} + +// Shuffles the initially loaded message words from multiple blocks +// so that each YMM register contains message words with the same index +// within a block (e.g. Wx.ymm[0] contains the first words of each block). +// +// We have to use this macro four times to transform message blocks of 128-bytes. +// ind=0 processes the first quarter (32-bytes), ind=1 does the second quarter and so on. +// +#define SHA512_MSG_TRANSPOSE_QUARTER_4BLOCKS(ind) { \ + __m256i t1, t2, t3, t4; \ + t1 = _mm256_unpacklo_epi64(Wx.ymm[4 * (ind) + 0], Wx.ymm[4 * (ind) + 1]); \ + t2 = _mm256_unpacklo_epi64(Wx.ymm[4 * (ind) + 2], Wx.ymm[4 * (ind) + 3]); \ + t3 = _mm256_unpackhi_epi64(Wx.ymm[4 * (ind) + 0], Wx.ymm[4 * (ind) + 1]); \ + t4 = _mm256_unpackhi_epi64(Wx.ymm[4 * (ind) + 2], Wx.ymm[4 * (ind) + 3]); \ + Wx.ymm[4 * (ind) + 0] = _mm256_permute2x128_si256(t1, t2, 0x20); \ + Wx.ymm[4 * (ind) + 1] = _mm256_permute2x128_si256(t3, t4, 0x20); \ + Wx.ymm[4 * (ind) + 2] = _mm256_permute2x128_si256(t1, t2, 0x31); \ + Wx.ymm[4 * (ind) + 3] = _mm256_permute2x128_si256(t3, t4, 0x31); \ +} + +// Transpose all message words. Each SYMCRYPT_SHA512_MSG_TRANSPOSE_QUARTER_YMM() does the +// transposition for four message words (i.e. 0 1 2 3, 4 5 6 7, 8 9 10 11, 12 13 14 15) +#define SHA512_MSG_TRANSPOSE_4BLOCKS() { \ + SHA512_MSG_TRANSPOSE_QUARTER_4BLOCKS(0); \ + SHA512_MSG_TRANSPOSE_QUARTER_4BLOCKS(1); \ + SHA512_MSG_TRANSPOSE_QUARTER_4BLOCKS(2); \ + SHA512_MSG_TRANSPOSE_QUARTER_4BLOCKS(3); \ +} + +// One round message schedule, updates the rth message word, and adds the constants to message words for (r-16). +#define SHA512_MSG_EXPAND_4BLOCKS_1ROUND(r) { \ + Wx.ymm[r] = _mm256_add_epi64(_mm256_add_epi64(_mm256_add_epi64(Wx.ymm[r - 16], Wx.ymm[r - 7]), \ + YMMLSIGMA0(Wx.ymm[r - 15])), YMMLSIGMA1(Wx.ymm[r - 2])); \ + Wx.ymm[r - 16] = _mm256_add_epi64(Wx.ymm[r - 16], _mm256_set1_epi64x(SymCryptSha512K[r - 16])); \ +} + +// Four rounds of message schedule. Generates message words for rounds r, r+1, r+2, r+3. +#define SHA512_MSG_EXPAND_4BLOCKS_4ROUNDS(r) { \ + SHA512_MSG_EXPAND_4BLOCKS_1ROUND((r) + 0); SHA512_MSG_EXPAND_4BLOCKS_1ROUND((r) + 1); \ + SHA512_MSG_EXPAND_4BLOCKS_1ROUND((r) + 2); SHA512_MSG_EXPAND_4BLOCKS_1ROUND((r) + 3); \ +} + +// Sixteen rounds of message schedule. Generates message words for rounds r, ..., r+15. +#define SHA512_MSG_EXPAND_4BLOCKS_16ROUNDS(r) { \ + SHA512_MSG_EXPAND_4BLOCKS_4ROUNDS((r) + 0); SHA512_MSG_EXPAND_4BLOCKS_4ROUNDS((r) + 4); \ + SHA512_MSG_EXPAND_4BLOCKS_4ROUNDS((r) + 8); SHA512_MSG_EXPAND_4BLOCKS_4ROUNDS((r) + 12); \ +} + +// +// Core round for 4-way message expansion without constant addition +// +// r: round number (0 <= r < 80) +// +// bl: message block index (0 <= bl < 4) +// +// Message words for four blocks are store in Wx.ul[80][4] in interleaved form: +// W0 X0 Y0 Z0 +// W1 X1 Y1 Z1 +// ... +// W79 X79 Y79 Z79 +// +#define CROUND_4BLOCKS(a, b, c, d, e, f, g, h, r, bl ) { \ + UINT64 T1 = CSIGMA1(e) + CH(e, f, g) + Wx.ul4[r][bl]; \ + UINT64 T2 = CSIGMA0(a) + MAJ(a, b, c); \ + h += T1; \ + d += h; \ + h += T2; \ +} + +// Core round for single block +#define CROUND(a, b, c, d, e, f, g, h, r, r16) { \ + Wx.ul[r16] = Wt; \ + UINT64 T1 = CSIGMA1(e) + CH(e, f, g) + Wt + SymCryptSha512K[r]; \ + UINT64 T2 = CSIGMA0(a) + MAJ(a, b, c); \ + h += T1; \ + d += h;\ + h += T2;\ +} + +// Initial round for single block +#define IROUND( a, b, c, d, e, f, g, h, r ) { \ + Wt = SYMCRYPT_LOAD_MSBFIRST64( &pbData[ 8*r ] );\ + CROUND( a, b, c, d, e, f, g, h, r, r);\ +} + +// Full round for single block +#define FROUND( a, b, c, d, e, f, g, h, r, r16 ) { \ + Wt = LSIGMA1( Wx.ul[(r16-2) & 15] ) + Wx.ul[(r16-7) & 15] + \ + LSIGMA0( Wx.ul[(r16-15) & 15]) + Wx.ul[r16 & 15]; \ + CROUND( a, b, c, d, e, f, g, h, r, r16 ); \ +} + +// Constant addition and round processing for 8 rounds. Constants up to r=64 are added in message expansion. +// This macro is called to twice to add the constants do the round processing for the last 16 rounds. +#define SHA512_4BLOCKS_FINAL_ROUNDS_8X(rnd) { \ + Wx.ymm[rnd + 0] = _mm256_add_epi64(Wx.ymm[rnd + 0], _mm256_set1_epi64x(SymCryptSha512K[rnd + 0])); \ + Wx.ymm[rnd + 1] = _mm256_add_epi64(Wx.ymm[rnd + 1], _mm256_set1_epi64x(SymCryptSha512K[rnd + 1])); \ + Wx.ymm[rnd + 2] = _mm256_add_epi64(Wx.ymm[rnd + 2], _mm256_set1_epi64x(SymCryptSha512K[rnd + 2])); \ + Wx.ymm[rnd + 3] = _mm256_add_epi64(Wx.ymm[rnd + 3], _mm256_set1_epi64x(SymCryptSha512K[rnd + 3])); \ + CROUND_4BLOCKS(A, B, C, D, E, F, G, H, rnd + 0, 0); \ + CROUND_4BLOCKS(H, A, B, C, D, E, F, G, rnd + 1, 0); \ + CROUND_4BLOCKS(G, H, A, B, C, D, E, F, rnd + 2, 0); \ + CROUND_4BLOCKS(F, G, H, A, B, C, D, E, rnd + 3, 0); \ + Wx.ymm[rnd + 4] = _mm256_add_epi64(Wx.ymm[rnd + 4], _mm256_set1_epi64x(SymCryptSha512K[rnd + 4])); \ + Wx.ymm[rnd + 5] = _mm256_add_epi64(Wx.ymm[rnd + 5], _mm256_set1_epi64x(SymCryptSha512K[rnd + 5])); \ + Wx.ymm[rnd + 6] = _mm256_add_epi64(Wx.ymm[rnd + 6], _mm256_set1_epi64x(SymCryptSha512K[rnd + 6])); \ + Wx.ymm[rnd + 7] = _mm256_add_epi64(Wx.ymm[rnd + 7], _mm256_set1_epi64x(SymCryptSha512K[rnd + 7])); \ + CROUND_4BLOCKS(E, F, G, H, A, B, C, D, rnd + 4, 0); \ + CROUND_4BLOCKS(D, E, F, G, H, A, B, C, rnd + 5, 0); \ + CROUND_4BLOCKS(C, D, E, F, G, H, A, B, rnd + 6, 0); \ + CROUND_4BLOCKS(B, C, D, E, F, G, H, A, rnd + 7, 0); \ +} + + +VOID +SYMCRYPT_CALL +SymCryptSha512AppendBlocks_ymm_4blocks( + _Inout_ SYMCRYPT_SHA512_CHAINING_STATE* pChain, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T* pcbRemaining) +{ + SYMCRYPT_ALIGN_AT(32) union { UINT64 ul[16]; UINT64 ul4[80][4]; __m256i ymm[80]; } Wx; + UINT64 Wt; + UINT64 A, B, C, D, E, F, G, H; + UINT32 uWipeSize = (cbData >= (3 * SYMCRYPT_SHA512_INPUT_BLOCK_SIZE)) ? (80 * 4 * sizeof(UINT64)) : (16 * sizeof(UINT64)); + + + _mm256_zeroupper(); + + while (cbData >= (3 * SYMCRYPT_SHA512_INPUT_BLOCK_SIZE)) + { + SIZE_T numBlocks = (cbData >= 4 * SYMCRYPT_SHA512_INPUT_BLOCK_SIZE) ? 4 : (cbData / SYMCRYPT_SHA512_INPUT_BLOCK_SIZE); + + SHA512_MSG_LOAD_4BLOCKS(numBlocks); + SHA512_MSG_TRANSPOSE_4BLOCKS(); + + // + // Process the first block together with message expansion + // + A = pChain->H[0]; + B = pChain->H[1]; + C = pChain->H[2]; + D = pChain->H[3]; + E = pChain->H[4]; + F = pChain->H[5]; + G = pChain->H[6]; + H = pChain->H[7]; + + for (int iterCount=0; iterCount<(64/8); iterCount++) + { + const int roundBase = iterCount*8; + + SHA512_MSG_EXPAND_4BLOCKS_4ROUNDS(roundBase + 16); + CROUND_4BLOCKS(A, B, C, D, E, F, G, H, roundBase + 0, 0); + CROUND_4BLOCKS(H, A, B, C, D, E, F, G, roundBase + 1, 0); + CROUND_4BLOCKS(G, H, A, B, C, D, E, F, roundBase + 2, 0); + CROUND_4BLOCKS(F, G, H, A, B, C, D, E, roundBase + 3, 0); + + SHA512_MSG_EXPAND_4BLOCKS_4ROUNDS(roundBase + 20); + CROUND_4BLOCKS(E, F, G, H, A, B, C, D, roundBase + 4, 0); + CROUND_4BLOCKS(D, E, F, G, H, A, B, C, roundBase + 5, 0); + CROUND_4BLOCKS(C, D, E, F, G, H, A, B, roundBase + 6, 0); + CROUND_4BLOCKS(B, C, D, E, F, G, H, A, roundBase + 7, 0); + } + + // Last 16 rounds; add round constants and process. Message expansion is completed above. + SHA512_4BLOCKS_FINAL_ROUNDS_8X(64); + SHA512_4BLOCKS_FINAL_ROUNDS_8X(72); + + pChain->H[0] = A + pChain->H[0]; + pChain->H[1] = B + pChain->H[1]; + pChain->H[2] = C + pChain->H[2]; + pChain->H[3] = D + pChain->H[3]; + pChain->H[4] = E + pChain->H[4]; + pChain->H[5] = F + pChain->H[5]; + pChain->H[6] = G + pChain->H[6]; + pChain->H[7] = H + pChain->H[7]; + + // Process the remaining message blocks + for (int bl = 1; bl < numBlocks; bl++) + { + A = pChain->H[0]; + B = pChain->H[1]; + C = pChain->H[2]; + D = pChain->H[3]; + E = pChain->H[4]; + F = pChain->H[5]; + G = pChain->H[6]; + H = pChain->H[7]; + + for (int iterCount=0; iterCount<(80/8); iterCount++) + { + const int roundBase = iterCount*8; + + CROUND_4BLOCKS(A, B, C, D, E, F, G, H, roundBase + 0, bl); + CROUND_4BLOCKS(H, A, B, C, D, E, F, G, roundBase + 1, bl); + CROUND_4BLOCKS(G, H, A, B, C, D, E, F, roundBase + 2, bl); + CROUND_4BLOCKS(F, G, H, A, B, C, D, E, roundBase + 3, bl); + CROUND_4BLOCKS(E, F, G, H, A, B, C, D, roundBase + 4, bl); + CROUND_4BLOCKS(D, E, F, G, H, A, B, C, roundBase + 5, bl); + CROUND_4BLOCKS(C, D, E, F, G, H, A, B, roundBase + 6, bl); + CROUND_4BLOCKS(B, C, D, E, F, G, H, A, roundBase + 7, bl); + //CROUND_4BLOCKS(A, B, C, D, E, F, G, H, roundBase + 8, bl); + //CROUND_4BLOCKS(H, A, B, C, D, E, F, G, roundBase + 9, bl); + //CROUND_4BLOCKS(G, H, A, B, C, D, E, F, roundBase + 10, bl); + //CROUND_4BLOCKS(F, G, H, A, B, C, D, E, roundBase + 11, bl); + //CROUND_4BLOCKS(E, F, G, H, A, B, C, D, roundBase + 12, bl); + //CROUND_4BLOCKS(D, E, F, G, H, A, B, C, roundBase + 13, bl); + //CROUND_4BLOCKS(C, D, E, F, G, H, A, B, roundBase + 14, bl); + //CROUND_4BLOCKS(B, C, D, E, F, G, H, A, roundBase + 15, bl); + } + + pChain->H[0] = A + pChain->H[0]; + pChain->H[1] = B + pChain->H[1]; + pChain->H[2] = C + pChain->H[2]; + pChain->H[3] = D + pChain->H[3]; + pChain->H[4] = E + pChain->H[4]; + pChain->H[5] = F + pChain->H[5]; + pChain->H[6] = G + pChain->H[6]; + pChain->H[7] = H + pChain->H[7]; + } + + pbData += (numBlocks * SYMCRYPT_SHA512_INPUT_BLOCK_SIZE); + cbData -= (numBlocks * SYMCRYPT_SHA512_INPUT_BLOCK_SIZE); + } + + _mm256_zeroupper(); + + + // The vectorized version above consumes multiple blocks at a time. + // The remaining blocks if any are processed here. + while (cbData >= SYMCRYPT_SHA512_INPUT_BLOCK_SIZE) + { + A = pChain->H[0]; + B = pChain->H[1]; + C = pChain->H[2]; + D = pChain->H[3]; + E = pChain->H[4]; + F = pChain->H[5]; + G = pChain->H[6]; + H = pChain->H[7]; + + // + // initial rounds 1 to 16 + // + + IROUND(A, B, C, D, E, F, G, H, 0); + IROUND(H, A, B, C, D, E, F, G, 1); + IROUND(G, H, A, B, C, D, E, F, 2); + IROUND(F, G, H, A, B, C, D, E, 3); + IROUND(E, F, G, H, A, B, C, D, 4); + IROUND(D, E, F, G, H, A, B, C, 5); + IROUND(C, D, E, F, G, H, A, B, 6); + IROUND(B, C, D, E, F, G, H, A, 7); + IROUND(A, B, C, D, E, F, G, H, 8); + IROUND(H, A, B, C, D, E, F, G, 9); + IROUND(G, H, A, B, C, D, E, F, 10); + IROUND(F, G, H, A, B, C, D, E, 11); + IROUND(E, F, G, H, A, B, C, D, 12); + IROUND(D, E, F, G, H, A, B, C, 13); + IROUND(C, D, E, F, G, H, A, B, 14); + IROUND(B, C, D, E, F, G, H, A, 15); + + for (int iterCount=1; iterCount<(80/16); iterCount++) + { + const int roundBase = iterCount*16; + + FROUND(A, B, C, D, E, F, G, H, roundBase + 0, 0); + FROUND(H, A, B, C, D, E, F, G, roundBase + 1, 1); + FROUND(G, H, A, B, C, D, E, F, roundBase + 2, 2); + FROUND(F, G, H, A, B, C, D, E, roundBase + 3, 3); + FROUND(E, F, G, H, A, B, C, D, roundBase + 4, 4); + FROUND(D, E, F, G, H, A, B, C, roundBase + 5, 5); + FROUND(C, D, E, F, G, H, A, B, roundBase + 6, 6); + FROUND(B, C, D, E, F, G, H, A, roundBase + 7, 7); + FROUND(A, B, C, D, E, F, G, H, roundBase + 8, 8); + FROUND(H, A, B, C, D, E, F, G, roundBase + 9, 9); + FROUND(G, H, A, B, C, D, E, F, roundBase + 10, 10); + FROUND(F, G, H, A, B, C, D, E, roundBase + 11, 11); + FROUND(E, F, G, H, A, B, C, D, roundBase + 12, 12); + FROUND(D, E, F, G, H, A, B, C, roundBase + 13, 13); + FROUND(C, D, E, F, G, H, A, B, roundBase + 14, 14); + FROUND(B, C, D, E, F, G, H, A, roundBase + 15, 15); + } + + pChain->H[0] = A + pChain->H[0]; + pChain->H[1] = B + pChain->H[1]; + pChain->H[2] = C + pChain->H[2]; + pChain->H[3] = D + pChain->H[3]; + pChain->H[4] = E + pChain->H[4]; + pChain->H[5] = F + pChain->H[5]; + pChain->H[6] = G + pChain->H[6]; + pChain->H[7] = H + pChain->H[7]; + + pbData += SYMCRYPT_SHA512_INPUT_BLOCK_SIZE; + cbData -= SYMCRYPT_SHA512_INPUT_BLOCK_SIZE; + } + + *pcbRemaining = cbData; + + // + // Wipe the variables; + // + SymCryptWipe(&Wx, uWipeSize); +} + +#ifdef __clang__ +#pragma clang attribute pop +#else +#pragma GCC pop_options +#endif + +#endif // SYMCRYPT_CPU_AMD64 diff --git a/libs/symcrypt/lib/sha512.c b/libs/symcrypt/lib/sha512.c new file mode 100644 index 00000000000..0b763c54677 --- /dev/null +++ b/libs/symcrypt/lib/sha512.c @@ -0,0 +1,1715 @@ +// +// Sha512.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +// +// This module contains the routines to implement SHA2-512 from FIPS 180-2 +// + + +#include "precomp.h" + +// +// SHA-512 uses 80 magic constants of 64 bits each. These are +// referred to as K^{512}_i for i=0...79 by FIPS 180-2. +// We use a static array as that does not pollute the linker name space +// For performance we align to the cache line size of 64 bytes +// We have one extra value at the end to allow an XMM read from each element +// of the array. +// +SYMCRYPT_ALIGN_AT( 64 ) const UINT64 SymCryptSha512K[81] = { + 0x428a2f98d728ae22UL, 0x7137449123ef65cdUL, + 0xb5c0fbcfec4d3b2fUL, 0xe9b5dba58189dbbcUL, + 0x3956c25bf348b538UL, 0x59f111f1b605d019UL, + 0x923f82a4af194f9bUL, 0xab1c5ed5da6d8118UL, + 0xd807aa98a3030242UL, 0x12835b0145706fbeUL, + 0x243185be4ee4b28cUL, 0x550c7dc3d5ffb4e2UL, + 0x72be5d74f27b896fUL, 0x80deb1fe3b1696b1UL, + 0x9bdc06a725c71235UL, 0xc19bf174cf692694UL, + 0xe49b69c19ef14ad2UL, 0xefbe4786384f25e3UL, + 0x0fc19dc68b8cd5b5UL, 0x240ca1cc77ac9c65UL, + 0x2de92c6f592b0275UL, 0x4a7484aa6ea6e483UL, + 0x5cb0a9dcbd41fbd4UL, 0x76f988da831153b5UL, + 0x983e5152ee66dfabUL, 0xa831c66d2db43210UL, + 0xb00327c898fb213fUL, 0xbf597fc7beef0ee4UL, + 0xc6e00bf33da88fc2UL, 0xd5a79147930aa725UL, + 0x06ca6351e003826fUL, 0x142929670a0e6e70UL, + 0x27b70a8546d22ffcUL, 0x2e1b21385c26c926UL, + 0x4d2c6dfc5ac42aedUL, 0x53380d139d95b3dfUL, + 0x650a73548baf63deUL, 0x766a0abb3c77b2a8UL, + 0x81c2c92e47edaee6UL, 0x92722c851482353bUL, + 0xa2bfe8a14cf10364UL, 0xa81a664bbc423001UL, + 0xc24b8b70d0f89791UL, 0xc76c51a30654be30UL, + 0xd192e819d6ef5218UL, 0xd69906245565a910UL, + 0xf40e35855771202aUL, 0x106aa07032bbd1b8UL, + 0x19a4c116b8d2d0c8UL, 0x1e376c085141ab53UL, + 0x2748774cdf8eeb99UL, 0x34b0bcb5e19b48a8UL, + 0x391c0cb3c5c95a63UL, 0x4ed8aa4ae3418acbUL, + 0x5b9cca4f7763e373UL, 0x682e6ff3d6b2b8a3UL, + 0x748f82ee5defb2fcUL, 0x78a5636f43172f60UL, + 0x84c87814a1f0ab72UL, 0x8cc702081a6439ecUL, + 0x90befffa23631e28UL, 0xa4506cebde82bde9UL, + 0xbef9a3f7b2c67915UL, 0xc67178f2e372532bUL, + 0xca273eceea26619cUL, 0xd186b8c721c0c207UL, + 0xeada7dd6cde0eb1eUL, 0xf57d4f7fee6ed178UL, + 0x06f067aa72176fbaUL, 0x0a637dc5a2c898a6UL, + 0x113f9804bef90daeUL, 0x1b710b35131c471bUL, + 0x28db77f523047d84UL, 0x32caab7b40c72493UL, + 0x3c9ebe0a15c9bebcUL, 0x431d67c49c100d4cUL, + 0x4cc5d4becb3e42b6UL, 0x597f299cfc657e2aUL, + 0x5fcb6fab3ad6faecUL, 0x6c44198c4a475817UL, +}; + +// +// Initial states +// +const UINT64 SymCryptSha512InitialState[8] = { + 0x6a09e667f3bcc908UL, + 0xbb67ae8584caa73bUL, + 0x3c6ef372fe94f82bUL, + 0xa54ff53a5f1d36f1UL, + 0x510e527fade682d1UL, + 0x9b05688c2b3e6c1fUL, + 0x1f83d9abfb41bd6bUL, + 0x5be0cd19137e2179UL, +}; + +const UINT64 SymCryptSha384InitialState[8] = { + 0xcbbb9d5dc1059ed8UL, + 0x629a292a367cd507UL, + 0x9159015a3070dd17UL, + 0x152fecd8f70e5939UL, + 0x67332667ffc00b31UL, + 0x8eb44a8768581511UL, + 0xdb0c2e0d64f98fa7UL, + 0x47b5481dbefa4fa4UL, +}; + +const UINT64 SymCryptSha512_224InitialState[8] = { + 0x8c3d37c819544da2UL, + 0x73e1996689dcd4d6UL, + 0x1dfab7ae32ff9c82UL, + 0x679dd514582f9fcfUL, + 0x0f6d2b697bd44da8UL, + 0x77e36f7304c48942UL, + 0x3f9d85a86a1d36c8UL, + 0x1112e6ad91d692a1UL, +}; + +const UINT64 SymCryptSha512_256InitialState[8] = { + 0x22312194fc2bf72cUL, + 0x9f555fa3c84c64c2UL, + 0x2393b86b6f53b151UL, + 0x963877195940eabdUL, + 0x96283ee2a88effe3UL, + 0xbe5e1e2553863992UL, + 0x2b0199fc2c85b8aaUL, + 0x0eb72ddc81c52ca2UL, +}; + + +// +// Todo: this structure pulls in the SHA284 code anytime someone uses +// SHA-512; should be split into a separate file. +// +const SYMCRYPT_HASH SymCryptSha384Algorithm_default = { + &SymCryptSha384Init, + &SymCryptSha384Append, + &SymCryptSha384Result, + &SymCryptSha512AppendBlocks, + &SymCryptSha384StateCopy, + sizeof( SYMCRYPT_SHA384_STATE ), + SYMCRYPT_SHA384_RESULT_SIZE, + SYMCRYPT_SHA384_INPUT_BLOCK_SIZE, + SYMCRYPT_FIELD_OFFSET( SYMCRYPT_SHA384_STATE, chain ), + SYMCRYPT_FIELD_SIZE( SYMCRYPT_SHA384_STATE, chain ), +}; + +const SYMCRYPT_HASH SymCryptSha512Algorithm_default = { + &SymCryptSha512Init, + &SymCryptSha512Append, + &SymCryptSha512Result, + &SymCryptSha512AppendBlocks, + &SymCryptSha512StateCopy, + sizeof( SYMCRYPT_SHA512_STATE ), + SYMCRYPT_SHA512_RESULT_SIZE, + SYMCRYPT_SHA512_INPUT_BLOCK_SIZE, + SYMCRYPT_FIELD_OFFSET( SYMCRYPT_SHA512_STATE, chain ), + SYMCRYPT_FIELD_SIZE( SYMCRYPT_SHA512_STATE, chain ), +}; + +const SYMCRYPT_HASH SymCryptSha512_224Algorithm_default = { + &SymCryptSha512_224Init, + &SymCryptSha512_224Append, + &SymCryptSha512_224Result, + &SymCryptSha512AppendBlocks, + &SymCryptSha512_224StateCopy, + sizeof( SYMCRYPT_SHA512_224_STATE ), + SYMCRYPT_SHA512_224_RESULT_SIZE, + SYMCRYPT_SHA512_224_INPUT_BLOCK_SIZE, + SYMCRYPT_FIELD_OFFSET( SYMCRYPT_SHA512_224_STATE, chain ), + SYMCRYPT_FIELD_SIZE( SYMCRYPT_SHA512_224_STATE, chain ), +}; + +const SYMCRYPT_HASH SymCryptSha512_256Algorithm_default = { + &SymCryptSha512_256Init, + &SymCryptSha512_256Append, + &SymCryptSha512_256Result, + &SymCryptSha512AppendBlocks, + &SymCryptSha512_256StateCopy, + sizeof( SYMCRYPT_SHA512_256_STATE ), + SYMCRYPT_SHA512_256_RESULT_SIZE, + SYMCRYPT_SHA512_256_INPUT_BLOCK_SIZE, + SYMCRYPT_FIELD_OFFSET( SYMCRYPT_SHA512_256_STATE, chain ), + SYMCRYPT_FIELD_SIZE( SYMCRYPT_SHA512_256_STATE, chain ), +}; + +const PCSYMCRYPT_HASH SymCryptSha384Algorithm = &SymCryptSha384Algorithm_default; +const PCSYMCRYPT_HASH SymCryptSha512Algorithm = &SymCryptSha512Algorithm_default; +const PCSYMCRYPT_HASH SymCryptSha512_224Algorithm = &SymCryptSha512_224Algorithm_default; +const PCSYMCRYPT_HASH SymCryptSha512_256Algorithm = &SymCryptSha512_256Algorithm_default; + +// +// SymCryptSha384 +// +#define ALG SHA384 +#define Alg Sha384 +#include "hash_pattern.c" +#undef ALG +#undef Alg + +// +// SymCryptSha512 +// +#define ALG SHA512 +#define Alg Sha512 +#include "hash_pattern.c" +#undef ALG +#undef Alg + +// +// SymCryptSha512/224 +// +#define ALG SHA512_224 +#define Alg Sha512_224 +#include "hash_pattern.c" +#undef ALG +#undef Alg + +// +// SymCryptSha512/256 +// +#define ALG SHA512_256 +#define Alg Sha512_256 +#include "hash_pattern.c" +#undef ALG +#undef Alg + + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptSha512Init( _Out_ PSYMCRYPT_SHA512_STATE pState ) +{ + SYMCRYPT_SET_MAGIC( pState ); + + pState->dataLengthH = 0; + pState->dataLengthL = 0; + pState->bytesInBuffer = 0; + + memcpy( &pState->chain.H[0], &SymCryptSha512InitialState[0], sizeof( SymCryptSha512InitialState ) ); + + // + // There is no need to initialize the buffer part of the state as that will be + // filled before it is used. + // +} + + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptSha384Init( _Out_ PSYMCRYPT_SHA384_STATE pState ) +{ + SYMCRYPT_SET_MAGIC( pState ); + + pState->dataLengthH = 0; + pState->dataLengthL = 0; + pState->bytesInBuffer = 0; + + memcpy( &pState->chain.H[0], &SymCryptSha384InitialState[0], sizeof( SymCryptSha384InitialState ) ); + + // + // There is no need to initialize the buffer part of the state as that will be + // filled before it is used. + // +} + + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptSha512_224Init( _Out_ PSYMCRYPT_SHA512_224_STATE pState ) +{ + SYMCRYPT_SET_MAGIC( pState ); + + pState->dataLengthH = 0; + pState->dataLengthL = 0; + pState->bytesInBuffer = 0; + + memcpy( &pState->chain.H[0], &SymCryptSha512_224InitialState[0], sizeof( SymCryptSha512_224InitialState ) ); + + // + // There is no need to initialize the buffer part of the state as that will be + // filled before it is used. + // +} + + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptSha512_256Init( _Out_ PSYMCRYPT_SHA512_256_STATE pState ) +{ + SYMCRYPT_SET_MAGIC( pState ); + + pState->dataLengthH = 0; + pState->dataLengthL = 0; + pState->bytesInBuffer = 0; + + memcpy( &pState->chain.H[0], &SymCryptSha512_256InitialState[0], sizeof( SymCryptSha512_256InitialState ) ); + + // + // There is no need to initialize the buffer part of the state as that will be + // filled before it is used. + // +} + + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptSha512Append( + _Inout_ PSYMCRYPT_SHA512_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + UINT32 bytesInBuffer; + UINT32 freeInBuffer; + SIZE_T tmp; + + SYMCRYPT_CHECK_MAGIC( pState ); + + pState->dataLengthL += cbData; + if( pState->dataLengthL < cbData ) { + pState->dataLengthH++; + } + + bytesInBuffer = pState->bytesInBuffer; + + // + // If previous data in buffer, buffer new input and transform if possible. + // + if( bytesInBuffer > 0 ) + { + SYMCRYPT_ASSERT( SYMCRYPT_SHA512_INPUT_BLOCK_SIZE > bytesInBuffer ); + + freeInBuffer = SYMCRYPT_SHA512_INPUT_BLOCK_SIZE - bytesInBuffer; + if( cbData < freeInBuffer ) + { + // + // All the data will fit in the buffer. + // We don't do anything here. + // As cbData < inputBlockSize the bulk data processing is skipped, + // and the data will be copied to the buffer at the end + // of this code. + } else { + // + // Enough data to fill the whole buffer & process it + // + memcpy(&pState->buffer[bytesInBuffer], pbData, freeInBuffer); + pbData += freeInBuffer; + cbData -= freeInBuffer; + SymCryptSha512AppendBlocks( &pState->chain, &pState->buffer[0], SYMCRYPT_SHA512_INPUT_BLOCK_SIZE, &tmp ); + + bytesInBuffer = 0; + } + } + + // + // Internal buffer is empty; process all remaining whole blocks in the input + // + if( cbData >= SYMCRYPT_SHA512_INPUT_BLOCK_SIZE ) + { + SymCryptSha512AppendBlocks( &pState->chain, pbData, cbData, &tmp ); + SYMCRYPT_ASSERT( tmp < SYMCRYPT_SHA512_INPUT_BLOCK_SIZE ); + pbData += cbData - tmp; + cbData = tmp; + } + + SYMCRYPT_ASSERT( cbData < SYMCRYPT_SHA512_INPUT_BLOCK_SIZE ); + + // + // buffer remaining input if necessary. + // + if( cbData > 0 ) + { + memcpy( &pState->buffer[bytesInBuffer], pbData, cbData ); + bytesInBuffer += (UINT32) cbData; + } + + pState->bytesInBuffer = bytesInBuffer; + +} + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptSha384Append( + _Inout_ PSYMCRYPT_SHA384_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + + SymCryptSha512Append( (PSYMCRYPT_SHA512_STATE)pState, pbData, cbData ); + +} + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptSha512_224Append( + _Inout_ PSYMCRYPT_SHA512_224_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + SymCryptSha512Append( (PSYMCRYPT_SHA512_STATE)pState, pbData, cbData ); +} + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptSha512_256Append( + _Inout_ PSYMCRYPT_SHA512_256_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + SymCryptSha512Append( (PSYMCRYPT_SHA512_STATE)pState, pbData, cbData ); +} + + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptSha512Result( + _Inout_ PSYMCRYPT_SHA512_STATE pState, + _Out_writes_( SYMCRYPT_SHA512_RESULT_SIZE ) PBYTE pbResult ) +{ + UINT32 bytesInBuffer; + SIZE_T tmp; + + SYMCRYPT_CHECK_MAGIC( pState ); + + bytesInBuffer = pState->bytesInBuffer; + + // + // The buffer is never completely full, so we can always put the first + // padding byte in. + // + pState->buffer[bytesInBuffer++] = 0x80; + + if( bytesInBuffer > 128-16 ) { + // + // No room for the rest of the padding. Pad with zeroes & process block + // bytesInBuffer is at most 128, so we do not have an integer underflow + // + SymCryptWipe( &pState->buffer[bytesInBuffer], 128-bytesInBuffer ); + SymCryptSha512AppendBlocks( &pState->chain, pState->buffer, 128, &tmp ); + bytesInBuffer = 0; + } + + // + // Set rest of padding + // We wipe to the end of the buffer as it is 16-aligned, + // and it is faster to wipe to an aligned point + // + SymCryptWipe( &pState->buffer[bytesInBuffer], 128-bytesInBuffer ); + SYMCRYPT_STORE_MSBFIRST64( &pState->buffer[128-16], (pState->dataLengthH << 3) + (pState->dataLengthL >> 61) ); + SYMCRYPT_STORE_MSBFIRST64( &pState->buffer[128- 8], (pState->dataLengthL << 3) ); + + SymCryptSha512AppendBlocks( &pState->chain, pState->buffer, 128, &tmp ); + + SymCryptUint64ToMsbFirst( &pState->chain.H[0], pbResult, 8 ); + + // + // We have to wipe the whole state because the Init call + // might be optimized away by a smart compiler. + // + SymCryptWipeKnownSize( pState, sizeof( *pState ) ); + + SYMCRYPT_SET_MAGIC( pState ); + + memcpy( &pState->chain.H[0], &SymCryptSha512InitialState[0], sizeof( SymCryptSha512InitialState ) ); + } + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptSha384Result( + _Inout_ PSYMCRYPT_SHA384_STATE pState, + _Out_writes_( SYMCRYPT_SHA384_RESULT_SIZE ) PBYTE pbResult ) +{ + // + // For simplicity we re-use SymCryptSha512Result. This is slightly slower, + // but SHA-384 isn't used that much. + // + SYMCRYPT_ALIGN BYTE sha512Result[SYMCRYPT_SHA512_RESULT_SIZE]; // Buffer for SHA-512 output + + // + // The SHA-384 result is the first 48 bytes of the SHA-512 result of our state + // + SymCryptSha512Result( (PSYMCRYPT_SHA512_STATE)pState, sha512Result ); + memcpy( pbResult, sha512Result, SYMCRYPT_SHA384_RESULT_SIZE ); + + // + // The buffer was already wiped by the SymCryptSha512Result function, we + // just have to re-initialize for SHA-384 + // + SymCryptSha384Init( pState ); + + SymCryptWipeKnownSize( sha512Result, sizeof( sha512Result ) ); +} + + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptSha512_224Result( + _Inout_ PSYMCRYPT_SHA512_224_STATE pState, + _Out_writes_( SYMCRYPT_SHA512_224_RESULT_SIZE ) PBYTE pbResult ) +{ + SYMCRYPT_ALIGN BYTE sha512Result[SYMCRYPT_SHA512_RESULT_SIZE]; // Buffer for SHA-512 output + + // + // The SHA-512/224 result is the first 28 bytes of the SHA-512 result of our state + // + SymCryptSha512Result( (PSYMCRYPT_SHA512_STATE)pState, sha512Result ); + memcpy( pbResult, sha512Result, SYMCRYPT_SHA512_224_RESULT_SIZE ); + + // + // The buffer was already wiped by the SymCryptSha512Result function, we + // just have to re-initialize for SHA-512/224 + // + SymCryptSha512_224Init( pState ); + + SymCryptWipeKnownSize( sha512Result, sizeof( sha512Result ) ); +} + + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptSha512_256Result( + _Inout_ PSYMCRYPT_SHA512_256_STATE pState, + _Out_writes_( SYMCRYPT_SHA512_256_RESULT_SIZE ) PBYTE pbResult ) +{ + SYMCRYPT_ALIGN BYTE sha512Result[SYMCRYPT_SHA512_RESULT_SIZE]; // Buffer for SHA-512 output + + // + // The SHA-512/256 result is the first 32 bytes of the SHA-512 result of our state + // + SymCryptSha512Result( (PSYMCRYPT_SHA512_STATE)pState, sha512Result ); + memcpy( pbResult, sha512Result, SYMCRYPT_SHA512_256_RESULT_SIZE ); + + // + // The buffer was already wiped by the SymCryptSha512Result function, we + // just have to re-initialize for SHA-512/256 + // + SymCryptSha512_256Init( pState ); + + SymCryptWipeKnownSize( sha512Result, sizeof( sha512Result ) ); +} + + +VOID +SYMCRYPT_CALL +SymCryptSha512StateExportCore( + _In_ PCSYMCRYPT_SHA512_STATE pState, + _Out_writes_bytes_( SYMCRYPT_SHA512_STATE_EXPORT_SIZE ) PBYTE pbBlob, + _In_ UINT32 type ) +{ + SYMCRYPT_ALIGN SYMCRYPT_SHA512_STATE_EXPORT_BLOB blob; // local copy to have proper alignment. + C_ASSERT( sizeof( blob ) == SYMCRYPT_SHA512_STATE_EXPORT_SIZE ); + + SYMCRYPT_CHECK_MAGIC( pState ); + + SymCryptWipeKnownSize( &blob, sizeof( blob ) ); // wipe to avoid any data leakage + + blob.header.magic = SYMCRYPT_BLOB_MAGIC; + blob.header.size = SYMCRYPT_SHA512_STATE_EXPORT_SIZE; + blob.header.type = type; + + // + // Copy the relevant data. Buffer will be 0-padded. + // + + SymCryptUint64ToMsbFirst( &pState->chain.H[0], &blob.chain[0], 8 ); + blob.dataLengthL = pState->dataLengthL; + blob.dataLengthH = pState->dataLengthH; + memcpy( &blob.buffer[0], &pState->buffer[0], blob.dataLengthL & 0x7f ); + + SYMCRYPT_ASSERT( (PCBYTE) &blob + sizeof( blob ) - sizeof( SYMCRYPT_BLOB_TRAILER ) == (PCBYTE) &blob.trailer ); + SymCryptMarvin32( SymCryptMarvin32DefaultSeed, (PCBYTE) &blob, sizeof( blob ) - sizeof( SYMCRYPT_BLOB_TRAILER ), &blob.trailer.checksum[0] ); + + memcpy( pbBlob, &blob, sizeof( blob ) ); + +//cleanup: + SymCryptWipeKnownSize( &blob, sizeof( blob ) ); + return; +} + +VOID +SYMCRYPT_CALL +SymCryptSha512StateExport( + _In_ PCSYMCRYPT_SHA512_STATE pState, + _Out_writes_bytes_( SYMCRYPT_SHA512_STATE_EXPORT_SIZE ) PBYTE pbBlob ) +{ + SymCryptSha512StateExportCore( pState, pbBlob, SymCryptBlobTypeSha512State ); +} + +VOID +SYMCRYPT_CALL +SymCryptSha384StateExport( + _In_ PCSYMCRYPT_SHA384_STATE pState, + _Out_writes_bytes_( SYMCRYPT_SHA384_STATE_EXPORT_SIZE ) PBYTE pbBlob ) +{ + SymCryptSha512StateExportCore( (PCSYMCRYPT_SHA512_STATE)pState, pbBlob, SymCryptBlobTypeSha384State ); +} + +VOID +SYMCRYPT_CALL +SymCryptSha512_224StateExport( + _In_ PCSYMCRYPT_SHA512_224_STATE pState, + _Out_writes_bytes_( SYMCRYPT_SHA512_224_STATE_EXPORT_SIZE ) PBYTE pbBlob ) +{ + SymCryptSha512StateExportCore( (PCSYMCRYPT_SHA512_STATE)pState, pbBlob, SymCryptBlobTypeSha512_224State ); +} + +VOID +SYMCRYPT_CALL +SymCryptSha512_256StateExport( + _In_ PCSYMCRYPT_SHA512_256_STATE pState, + _Out_writes_bytes_( SYMCRYPT_SHA512_256_STATE_EXPORT_SIZE ) PBYTE pbBlob ) +{ + SymCryptSha512StateExportCore( (PCSYMCRYPT_SHA512_STATE)pState, pbBlob, SymCryptBlobTypeSha512_256State ); +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSha512StateImportCore( + _Out_ PSYMCRYPT_SHA512_STATE pState, + _In_reads_bytes_( SYMCRYPT_SHA512_STATE_EXPORT_SIZE) PCBYTE pbBlob, + _In_ UINT32 type ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SYMCRYPT_ALIGN SYMCRYPT_SHA512_STATE_EXPORT_BLOB blob; // local copy to have proper alignment. + BYTE checksum[8]; + + C_ASSERT( sizeof( blob ) == SYMCRYPT_SHA512_STATE_EXPORT_SIZE ); + memcpy( &blob, pbBlob, sizeof( blob ) ); + + if( blob.header.magic != SYMCRYPT_BLOB_MAGIC || + blob.header.size != SYMCRYPT_SHA512_STATE_EXPORT_SIZE || + blob.header.type != type ) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + SymCryptMarvin32( SymCryptMarvin32DefaultSeed, (PCBYTE) &blob, sizeof( blob ) - sizeof( SYMCRYPT_BLOB_TRAILER ), checksum ); + if( memcmp( checksum, &blob.trailer.checksum[0], 8 ) != 0 ) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + SymCryptMsbFirstToUint64( &blob.chain[0], &pState->chain.H[0], 8 ); + pState->dataLengthL = blob.dataLengthL; + pState->dataLengthH = blob.dataLengthH; + pState->bytesInBuffer = blob.dataLengthL & 0x7f; + memcpy( &pState->buffer[0], &blob.buffer[0], pState->bytesInBuffer ); + + SYMCRYPT_SET_MAGIC( pState ); + +cleanup: + SymCryptWipeKnownSize( &blob, sizeof(blob) ); + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSha512StateImport( + _Out_ PSYMCRYPT_SHA512_STATE pState, + _In_reads_bytes_( SYMCRYPT_SHA512_STATE_EXPORT_SIZE) PCBYTE pbBlob ) +{ + return SymCryptSha512StateImportCore( pState, pbBlob, SymCryptBlobTypeSha512State ); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSha384StateImport( + _Out_ PSYMCRYPT_SHA384_STATE pState, + _In_reads_bytes_( SYMCRYPT_SHA384_STATE_EXPORT_SIZE) PCBYTE pbBlob ) +{ + return SymCryptSha512StateImportCore( (PSYMCRYPT_SHA512_STATE)pState, pbBlob, SymCryptBlobTypeSha384State ); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSha512_224StateImport( + _Out_ PSYMCRYPT_SHA512_224_STATE pState, + _In_reads_bytes_( SYMCRYPT_SHA512_224_STATE_EXPORT_SIZE) PCBYTE pbBlob ) +{ + return SymCryptSha512StateImportCore( (PSYMCRYPT_SHA512_STATE)pState, pbBlob, SymCryptBlobTypeSha512_224State ); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSha512_256StateImport( + _Out_ PSYMCRYPT_SHA512_256_STATE pState, + _In_reads_bytes_( SYMCRYPT_SHA512_256_STATE_EXPORT_SIZE) PCBYTE pbBlob ) +{ + return SymCryptSha512StateImportCore( (PSYMCRYPT_SHA512_STATE)pState, pbBlob, SymCryptBlobTypeSha512_256State ); +} + + +// +// A simple test case intended for module testing for +// FIPS compliance. +// This is the one-block example message from FIPS 180-2 appendix C +// + +const BYTE SymCryptSha512KATAnswer[64] = +{ + 0xdd, 0xaf, 0x35, 0xa1, 0x93, 0x61, 0x7a, 0xba, + 0xcc, 0x41, 0x73, 0x49, 0xae, 0x20, 0x41, 0x31, + 0x12, 0xe6, 0xfa, 0x4e, 0x89, 0xa9, 0x7e, 0xa2, + 0x0a, 0x9e, 0xee, 0xe6, 0x4b, 0x55, 0xd3, 0x9a, + 0x21, 0x92, 0x99, 0x2a, 0x27, 0x4f, 0xc1, 0xa8, + 0x36, 0xba, 0x3c, 0x23, 0xa3, 0xfe, 0xeb, 0xbd, + 0x45, 0x4d, 0x44, 0x23, 0x64, 0x3c, 0xe8, 0x0e, + 0x2a, 0x9a, 0xc9, 0x4f, 0xa5, 0x4c, 0xa4, 0x9f, +}; + +VOID +SYMCRYPT_CALL +SymCryptSha512Selftest(void) +{ + BYTE result[SYMCRYPT_SHA512_RESULT_SIZE]; + + SymCryptSha512( SymCryptTestMsg3, sizeof( SymCryptTestMsg3 ), result ); + + SymCryptInjectError( result, sizeof( result ) ); + + if( memcmp( result, SymCryptSha512KATAnswer, sizeof( result ) ) != 0 ) { + SymCryptFatal( 'SH51' ); + } +} + +// +// A simple test case intended for module testing for +// FIPS compliance. +// This is the one-block example message from FIPS 180-2 appendix D +// + +const BYTE SymCryptSha384KATAnswer[ 48 ] = +{ + 0xcb, 0x00, 0x75, 0x3f, 0x45, 0xa3, 0x5e, 0x8b, + 0xb5, 0xa0, 0x3d, 0x69, 0x9a, 0xc6, 0x50, 0x07, + 0x27, 0x2c, 0x32, 0xab, 0x0e, 0xde, 0xd1, 0x63, + 0x1a, 0x8b, 0x60, 0x5a, 0x43, 0xff, 0x5b, 0xed, + 0x80, 0x86, 0x07, 0x2b, 0xa1, 0xe7, 0xcc, 0x23, + 0x58, 0xba, 0xec, 0xa1, 0x34, 0xc8, 0x25, 0xa7, +}; + +VOID +SYMCRYPT_CALL +SymCryptSha384Selftest(void) +{ + BYTE result[SYMCRYPT_SHA384_RESULT_SIZE]; + + SymCryptSha384( SymCryptTestMsg3, sizeof( SymCryptTestMsg3 ), result ); + + SymCryptInjectError( result, sizeof( result ) ); + + if( memcmp( result, SymCryptSha384KATAnswer, sizeof( result ) ) != 0 ) { + SymCryptFatal( 'SH38' ); + } +} + +// +// Simple test vector for FIPS module testing +// + +const BYTE SymCryptSha512_224KATAnswer[ 28 ] = +{ + 0x46, 0x34, 0x27, 0x0f, 0x70, 0x7b, 0x6a, 0x54, + 0xda, 0xae, 0x75, 0x30, 0x46, 0x08, 0x42, 0xe2, + 0x0e, 0x37, 0xed, 0x26, 0x5c, 0xee, 0xe9, 0xa4, + 0x3e, 0x89, 0x24, 0xaa, +}; + +VOID +SYMCRYPT_CALL +SymCryptSha512_224Selftest(void) +{ + BYTE result[SYMCRYPT_SHA512_224_RESULT_SIZE]; + + SymCryptSha512_224( SymCryptTestMsg3, sizeof( SymCryptTestMsg3 ), result ); + + SymCryptInjectError( result, sizeof( result ) ); + + if( memcmp( result, SymCryptSha512_224KATAnswer, sizeof( result ) ) != 0 ) { + SymCryptFatal( 'SH51' ); + } +} + +// +// Simple test vector for FIPS module testing +// + +const BYTE SymCryptSha512_256KATAnswer[ 32 ] = +{ + 0x53, 0x04, 0x8e, 0x26, 0x81, 0x94, 0x1e, 0xf9, + 0x9b, 0x2e, 0x29, 0xb7, 0x6b, 0x4c, 0x7d, 0xab, + 0xe4, 0xc2, 0xd0, 0xc6, 0x34, 0xfc, 0x6d, 0x46, + 0xe0, 0xe2, 0xf1, 0x31, 0x07, 0xe7, 0xaf, 0x23, +}; + +VOID +SYMCRYPT_CALL +SymCryptSha512_256Selftest(void) +{ + BYTE result[SYMCRYPT_SHA512_256_RESULT_SIZE]; + + SymCryptSha512_256( SymCryptTestMsg3, sizeof( SymCryptTestMsg3 ), result ); + + SymCryptInjectError( result, sizeof( result ) ); + + if( memcmp( result, SymCryptSha512_256KATAnswer, sizeof( result ) ) != 0 ) { + SymCryptFatal( 'SH51' ); + } +} + +// +// We keep multiple implementations in this file. +// This allows us to switch different platforms to different implementations, whichever +// is faster. Even if we don't use a particular implementation in one release, +// we keep it around in case it becomes the preferred one for a new CPU release. +// (Performance can change a lot with changes in micro-architecture.) +// + +//=================================================================================== +// Implementation of compression function using UINT64s +// + +// +// For documentation on these function see FIPS 180-2 +// +// MAJ and CH are the functions Maj and Ch from the standard. +// CSIGMA0 and CSIGMA1 are the capital sigma functions. +// LSIGMA0 and LSIGMA1 are the lowercase sigma functions. +// +// The canonical definitions of the MAJ and CH functions are: +//#define MAJ( x, y, z ) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) +//#define CH( x, y, z ) (((x) & (y)) ^ ((~(x)) & (z))) +// We use optimized versions defined below +// +#define MAJ( x, y, z ) ((((z) | (y)) & (x) ) | ((z) & (y))) +#define CH( x, y, z ) ((((z) ^ (y)) & (x)) ^ (z)) + +// +// The four Sigma functions +// + +//#define CSIGMA0( x ) (ROR64((x), 28) ^ ROR64((x), 34) ^ ROR64((x), 39)) +//#define CSIGMA1( x ) (ROR64((x), 14) ^ ROR64((x), 18) ^ ROR64((x), 41)) +//#define LSIGMA0( x ) (ROR64((x), 1) ^ ROR64((x), 8) ^ ((x)>> 7)) +//#define LSIGMA1( x ) (ROR64((x), 19) ^ ROR64((x), 61) ^ ((x)>> 6)) + +#define CSIGMA0( x ) (ROR64((ROR64((x), 6) ^ ROR64((x), 11) ^ (x)), 28)) +#define CSIGMA1( x ) (ROR64((ROR64((x), 4) ^ ROR64((x), 27) ^ (x)), 14)) +#define LSIGMA0( x ) (ROR64((x) ^ ROR64((x), 7), 1) ^ ((x)>> 7)) +#define LSIGMA1( x ) (ROR64((x) ^ ROR64((x), 42), 19) ^ ((x)>> 6)) + + + +// +// The values a-h were stored in an array called ah. +// We have unrolled the loop 16 times. This makes both the indices into +// the ah array constant, and it makes the message addressing constant. +// This provides a significant speed improvement, at the cost of making +// the main loop about 4 kB in code. +// +// Initial round; r16 is the round number mod 16 +// ah[ r16 &7] = h +// ah[(r16+1)&7] = g; +// ah[(r16+2)&7] = f; +// ah[(r16+3)&7] = e; +// ah[(r16+4)&7] = d; +// ah[(r16+5)&7] = c; +// ah[(r16+6)&7] = b; +// ah[(r16+7)&7] = a; +// +// Unfortunately, the compiler seems to choke on this, allocating an extra variable for +// each of the array indices, with duplicate stores to both locations. +// + +// +// The core round, after the message word has been computed for this round and put in Wt. +// r16 is the round number modulo 16. (Static after loop unrolling) +// r is the round number +#define CROUND( a, b, c, d, e, f, g, h, r, r16 ) {;\ + W[r16] = Wt; \ + h += CSIGMA1(e) + CH(e, f, g) + SymCryptSha512K[r] + Wt;\ + d += h;\ + h += CSIGMA0(a) + MAJ(a, b, c);\ +} + +// +// Initial round that reads the message. +// r is the round number 0..15 +// +#define IROUND( a, b, c, d, e, f, g, h, r ) {\ + Wt = SYMCRYPT_LOAD_MSBFIRST64( &pbData[ 8*r ] );\ + CROUND( a, b, c, d, e, f, g, h, r, r);\ + } +// +// Subsequent rounds. +// r is the round number, r16 is the round number mod 16. +// These are separate as typically r is run-time and r16 is compile time constant. +// +#define FROUND( a, b, c, d, e, f, g, h, r, r16 ) { \ + Wt = LSIGMA1( W[(r16-2) & 15] ) + W[(r16-7) & 15] + \ + LSIGMA0( W[(r16-15) & 15]) + W[r16 & 15]; \ + CROUND( a, b, c, d, e, f, g, h, r, r16 ); \ + } + +// +// This is the core routine that does the actual hard work +// This is based on the older one in RSA32LIB by Scott Field from 2001 +// +VOID +SYMCRYPT_CALL +SymCryptSha512AppendBlocks_ull( + _Inout_ SYMCRYPT_SHA512_CHAINING_STATE * pChain, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ) +{ + SYMCRYPT_ALIGN UINT64 W[16]; + UINT64 A, B, C, D, E, F, G, H; + int round; + UINT64 Wt; + + + while( cbData >= 128 ) + { + A = pChain->H[0]; + B = pChain->H[1]; + C = pChain->H[2]; + D = pChain->H[3]; + E = pChain->H[4]; + F = pChain->H[5]; + G = pChain->H[6]; + H = pChain->H[7]; + + // + // initial rounds 1 to 16 + // + + IROUND( A, B, C, D, E, F, G, H, 0 ); + IROUND( H, A, B, C, D, E, F, G, 1 ); + IROUND( G, H, A, B, C, D, E, F, 2 ); + IROUND( F, G, H, A, B, C, D, E, 3 ); + IROUND( E, F, G, H, A, B, C, D, 4 ); + IROUND( D, E, F, G, H, A, B, C, 5 ); + IROUND( C, D, E, F, G, H, A, B, 6 ); + IROUND( B, C, D, E, F, G, H, A, 7 ); + IROUND( A, B, C, D, E, F, G, H, 8 ); + IROUND( H, A, B, C, D, E, F, G, 9 ); + IROUND( G, H, A, B, C, D, E, F, 10 ); + IROUND( F, G, H, A, B, C, D, E, 11 ); + IROUND( E, F, G, H, A, B, C, D, 12 ); + IROUND( D, E, F, G, H, A, B, C, 13 ); + IROUND( C, D, E, F, G, H, A, B, 14 ); + IROUND( B, C, D, E, F, G, H, A, 15 ); + + for( round=16; round<80; round += 16 ) + { + FROUND( A, B, C, D, E, F, G, H, round + 0, 0 ); + FROUND( H, A, B, C, D, E, F, G, round + 1, 1 ); + FROUND( G, H, A, B, C, D, E, F, round + 2, 2 ); + FROUND( F, G, H, A, B, C, D, E, round + 3, 3 ); + FROUND( E, F, G, H, A, B, C, D, round + 4, 4 ); + FROUND( D, E, F, G, H, A, B, C, round + 5, 5 ); + FROUND( C, D, E, F, G, H, A, B, round + 6, 6 ); + FROUND( B, C, D, E, F, G, H, A, round + 7, 7 ); + FROUND( A, B, C, D, E, F, G, H, round + 8, 8 ); + FROUND( H, A, B, C, D, E, F, G, round + 9, 9 ); + FROUND( G, H, A, B, C, D, E, F, round + 10, 10 ); + FROUND( F, G, H, A, B, C, D, E, round + 11, 11 ); + FROUND( E, F, G, H, A, B, C, D, round + 12, 12 ); + FROUND( D, E, F, G, H, A, B, C, round + 13, 13 ); + FROUND( C, D, E, F, G, H, A, B, round + 14, 14 ); + FROUND( B, C, D, E, F, G, H, A, round + 15, 15 ); + } + + pChain->H[0] = A + pChain->H[0]; + pChain->H[1] = B + pChain->H[1]; + pChain->H[2] = C + pChain->H[2]; + pChain->H[3] = D + pChain->H[3]; + pChain->H[4] = E + pChain->H[4]; + pChain->H[5] = F + pChain->H[5]; + pChain->H[6] = G + pChain->H[6]; + pChain->H[7] = H + pChain->H[7]; + + pbData += 128; + cbData -= 128; + } + + *pcbRemaining = cbData; + + // + // Wipe the variables; + // + SymCryptWipeKnownSize( W, sizeof( W ) ); + SYMCRYPT_FORCE_WRITE64( &A, 0 ); + SYMCRYPT_FORCE_WRITE64( &B, 0 ); + SYMCRYPT_FORCE_WRITE64( &C, 0 ); + SYMCRYPT_FORCE_WRITE64( &D, 0 ); + SYMCRYPT_FORCE_WRITE64( &E, 0 ); + SYMCRYPT_FORCE_WRITE64( &F, 0 ); + SYMCRYPT_FORCE_WRITE64( &G, 0 ); + SYMCRYPT_FORCE_WRITE64( &H, 0 ); + SYMCRYPT_FORCE_WRITE64( &Wt, 0 ); +} + +// +// UINT64 based implementation that +// first computes the expanded message, and then the +// actual hash computation. +// It tries to use fewer registers; this is probably a good approach for CPUs with only 8 +// 64-bit registers; which is what you would use on x86 XMM, but we have XMM code below. +// This uses more memory, but might allow better register re-use and thereby +// reduce the number of load/stores. +// + +VOID +SYMCRYPT_CALL +SymCryptSha512AppendBlocks_ull2( + _Inout_ SYMCRYPT_SHA512_CHAINING_STATE * pChain, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ) +{ + SYMCRYPT_ALIGN UINT64 buf[4 + 8 + 80]; // 4 words original chaining state, chaining state, and expanded input block + UINT64 * W = &buf[4 + 8]; + UINT64 * ha = &buf[4]; // initial state words, in order h, g, ..., b, a + UINT64 A, B, C, D, T; + int r; + + ha[7] = pChain->H[0]; buf[3] = ha[7]; + ha[6] = pChain->H[1]; buf[2] = ha[6]; + ha[5] = pChain->H[2]; buf[1] = ha[5]; + ha[4] = pChain->H[3]; buf[0] = ha[4]; + ha[3] = pChain->H[4]; + ha[2] = pChain->H[5]; + ha[1] = pChain->H[6]; + ha[0] = pChain->H[7]; + + while( cbData >= 128 ) + { + + // + // Capture the input into W[0..15] + // + for( r=0; r<16; r+= 2 ) + { + W[r ] = SYMCRYPT_LOAD_MSBFIRST64( &pbData[ 8* r ] ); + W[r+1] = SYMCRYPT_LOAD_MSBFIRST64( &pbData[ 8*(r+1) ] ); + } + + // + // Expand the message + // + A = W[15]; + B = W[14]; + D = W[0]; + for( r=16; r<80; r+= 2 ) + { + // Loop invariant: A=W[r-1], B = W[r-2], D = W[r-16] + + // + // Macro for one word of message expansion. + // Invariant: + // on entry: a = W[r-1], b = W[r-2], d = W[r-16] + // on exit: W[r] computed, a = W[r-1], b = W[r], c = W[r-15] + // + #define EXPAND( a, b, c, d, r ) \ + c = W[r-15]; \ + b = d + LSIGMA1( b ) + W[r-7] + LSIGMA0( c ); \ + W[r] = b; \ + + EXPAND( A, B, C, D, r ); + EXPAND( B, A, D, C, (r+1)); + + #undef EXPAND + } + + A = ha[7]; + B = ha[6]; + C = ha[5]; + D = ha[4]; + + for( r=0; r<80; r += 4 ) + { + // + // Loop invariant: + // A, B, C, and D are the a,b,c,d values of the current state. + // W[r] is the next expanded message word to be processed. + // W[r-8 .. r-5] contain the current state words h, g, f, e. + // + + // + // Macro to compute one round + // + #define DO_ROUND( a, b, c, d, t, r ) \ + t = W[r] + CSIGMA1( W[r-5] ) + W[r-8] + CH( W[r-5], W[r-6], W[r-7] ) + SymCryptSha512K[r]; \ + W[r-4] = t + d; \ + d = t + CSIGMA0( a ) + MAJ( c, b, a ); + + DO_ROUND( A, B, C, D, T, r ); + DO_ROUND( D, A, B, C, T, (r+1) ); + DO_ROUND( C, D, A, B, T, (r+2) ); + DO_ROUND( B, C, D, A, T, (r+3) ); + #undef DO_ROUND + } + + buf[3] = ha[7] = buf[3] + A; + buf[2] = ha[6] = buf[2] + B; + buf[1] = ha[5] = buf[1] + C; + buf[0] = ha[4] = buf[0] + D; + ha[3] += W[r-5]; + ha[2] += W[r-6]; + ha[1] += W[r-7]; + ha[0] += W[r-8]; + + pbData += 128; + cbData -= 128; + } + + pChain->H[0] = ha[7]; + pChain->H[1] = ha[6]; + pChain->H[2] = ha[5]; + pChain->H[3] = ha[4]; + pChain->H[4] = ha[3]; + pChain->H[5] = ha[2]; + pChain->H[6] = ha[1]; + pChain->H[7] = ha[0]; + + *pcbRemaining = cbData; + + // + // Wipe the variables; + // + SymCryptWipeKnownSize( buf, sizeof( buf ) ); + SYMCRYPT_FORCE_WRITE64( &A, 0 ); + SYMCRYPT_FORCE_WRITE64( &B, 0 ); + SYMCRYPT_FORCE_WRITE64( &C, 0 ); + SYMCRYPT_FORCE_WRITE64( &D, 0 ); + SYMCRYPT_FORCE_WRITE64( &T, 0 ); + +} + +// +// UINT64 based implementation that +// first computes the expanded message, and then the +// actual hash computation. +// This one uses more registers than the previous one. +// + +VOID +SYMCRYPT_CALL +SymCryptSha512AppendBlocks_ull3( + _Inout_ SYMCRYPT_SHA512_CHAINING_STATE * pChain, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ) +{ + SYMCRYPT_ALIGN UINT64 W[80]; + SYMCRYPT_ALIGN UINT64 ha[8]; + UINT64 A, B, C, D, E, F, G, H; + int r; + + ha[7] = pChain->H[0]; + ha[6] = pChain->H[1]; + ha[5] = pChain->H[2]; + ha[4] = pChain->H[3]; + ha[3] = pChain->H[4]; + ha[2] = pChain->H[5]; + ha[1] = pChain->H[6]; + ha[0] = pChain->H[7]; + + while( cbData >= 128 ) + { + + // + // Capture the input into W[0..15] + // + for( r=0; r<16; r+= 2 ) + { + W[r ] = SYMCRYPT_LOAD_MSBFIRST64( &pbData[ 8* r ] ); + W[r+1] = SYMCRYPT_LOAD_MSBFIRST64( &pbData[ 8*(r+1) ] ); + } + + // + // Expand the message + // + A = W[15]; + B = W[14]; + D = W[0]; + for( r=16; r<80; r+= 2 ) + { + // Loop invariant: A=W[r-1], B = W[r-2], D = W[r-16] + + // + // Macro for one word of message expansion. + // Invariant: + // on entry: a = W[r-1], b = W[r-2], d = W[r-16] + // on exit: W[r] computed, a = W[r-1], b = W[r], c = W[r-15] + // + #define EXPAND( a, b, c, d, r ) \ + c = W[r-15]; \ + b = d + LSIGMA1( b ) + W[r-7] + LSIGMA0( c ); \ + W[r] = b; \ + + EXPAND( A, B, C, D, r ); + EXPAND( B, A, D, C, (r+1)); + + #undef EXPAND + } + + A = ha[7]; + B = ha[6]; + C = ha[5]; + D = ha[4]; + E = ha[3]; + F = ha[2]; + G = ha[1]; + H = ha[0]; + + for( r=0; r<80; r += 8 ) + { + // + // Loop invariant: + // A, B, C, and D, E, F, G, H, are the values of the current state. + // W[r] is the next expanded message word to be processed. + // + + // + // Macro to compute one round + // + #define DO_ROUND( a, b, c, d, e, f, g, h, r ) \ + h += W[r] + CSIGMA1( e ) + CH( e, f, g ) + SymCryptSha512K[r]; \ + d += h; \ + h += CSIGMA0( a ) + MAJ( c, b, a ); + + DO_ROUND( A, B, C, D, E, F, G, H, (r ) ); + DO_ROUND( H, A, B, C, D, E, F, G, (r+1) ); + DO_ROUND( G, H, A, B, C, D, E, F, (r+2) ); + DO_ROUND( F, G, H, A, B, C, D, E, (r+3) ); + DO_ROUND( E, F, G, H, A, B, C, D, (r+4) ); + DO_ROUND( D, E, F, G, H, A, B, C, (r+5) ); + DO_ROUND( C, D, E, F, G, H, A, B, (r+6) ); + DO_ROUND( B, C, D, E, F, G, H, A, (r+7) ); + #undef DO_ROUND + } + + ha[7] += A; + ha[6] += B; + ha[5] += C; + ha[4] += D; + ha[3] += E; + ha[2] += F; + ha[1] += G; + ha[0] += H; + + pbData += 128; + cbData -= 128; + } + + pChain->H[0] = ha[7]; + pChain->H[1] = ha[6]; + pChain->H[2] = ha[5]; + pChain->H[3] = ha[4]; + pChain->H[4] = ha[3]; + pChain->H[5] = ha[2]; + pChain->H[6] = ha[1]; + pChain->H[7] = ha[0]; + + *pcbRemaining = cbData; + + // + // Wipe the variables; + // + SymCryptWipeKnownSize( W, sizeof( W ) ); + SymCryptWipeKnownSize( ha, sizeof( ha ) ); + SYMCRYPT_FORCE_WRITE64( &A, 0 ); + SYMCRYPT_FORCE_WRITE64( &B, 0 ); + SYMCRYPT_FORCE_WRITE64( &C, 0 ); + SYMCRYPT_FORCE_WRITE64( &D, 0 ); + SYMCRYPT_FORCE_WRITE64( &E, 0 ); + SYMCRYPT_FORCE_WRITE64( &F, 0 ); + SYMCRYPT_FORCE_WRITE64( &G, 0 ); + SYMCRYPT_FORCE_WRITE64( &H, 0 ); +} + +#undef MAJ +#undef CH +#undef CSIGMA0 +#undef CSIGMA1 +#undef LSIGMA0 +#undef LSIGMA1 +#undef CROUND +#undef IROUND +#undef FROUND + +//====================================================================================== +// Implementation using Xmm registers +// +#if SYMCRYPT_CPU_X86 // only on X86; AMD64 is faster when using UINT64s + +#ifdef __clang__ +#pragma clang attribute push (__attribute__((target("ssse3"))), apply_to=function) +#else +#pragma GCC push_options +#pragma GCC target("ssse3") +#endif + +#if SYMCRYPT_MS_VC +#ifndef _mm_storeu_si64 + // Workaround missing intrinsic on some versions of MSVC + #define _mm_storeu_si64(p, a) (_mm_storel_epi64((__m128i*)(p), (a))) +#endif +#endif + +#define XMMADD( _a, _b ) _mm_add_epi64((_a), (_b)) +#define XMMAND( _a, _b ) _mm_and_si128((_a), (_b)) +#define XMMOR( _a, _b ) _mm_or_si128((_a), (_b)) +#define XMMROR( _a, _n ) _mm_xor_si128( _mm_slli_epi64( (_a), 64-(_n)), _mm_srli_epi64( (_a), (_n)) ) +#define XMMSHR( _a, _n ) _mm_srli_epi64((_a), (_n)) +#define XMMXOR( _a, _b ) _mm_xor_si128((_a), (_b)) +#define XMMSTORE_UINT64( _a, _addr ) _mm_storeu_si64((_addr), (_a)) + +#define XMMMAJ( x, y, z ) XMMOR( XMMAND( XMMOR( (z), (y)), (x)), XMMAND( (z), (y) ) ) +#define XMMCH( x, y, z ) XMMXOR( XMMAND( XMMXOR( (z), (y) ), (x)), (z)) +#define XMMCSIGMA0( x ) XMMXOR( XMMXOR( XMMROR((x), 28), XMMROR((x), 34)), XMMROR((x), 39)) +#define XMMCSIGMA1( x ) XMMXOR( XMMXOR( XMMROR((x), 14), XMMROR((x), 18)), XMMROR((x), 41)) +#define XMMLSIGMA0( x ) XMMXOR( XMMXOR( XMMROR((x), 1), XMMROR((x), 8)), XMMSHR((x), 7)) +#define XMMLSIGMA1( x ) XMMXOR( XMMXOR( XMMROR((x), 19), XMMROR((x), 61)), XMMSHR((x), 6)) + +// +// Core round takes two arguments: r16 = round number modulo 16, r = round number - r16. +// On entry, Wt must be equal to the sum of the round constant and the expanded message word for this round. +// Only the lower word of each Xmm register is used. +// +#define XMMCROUND( r16, r ) {;\ + ah[r16 & 7] = XMMADD( XMMADD( XMMADD( ah[r16 & 7], XMMCSIGMA1(ah[(r16+3)&7]) ), XMMCH(ah[(r16+3)&7], ah[(r16+2)&7], ah[(r16+1)&7]) ), Wt );\ + ah[(r16+4)&7] = XMMADD( ah[(r16+4)&7], ah[r16 &7] );\ + ah[r16 & 7] = XMMADD( XMMADD( ah[r16 & 7], XMMCSIGMA0(ah[(r16+7)&7])), XMMMAJ(ah[(r16+7)&7], ah[(r16+6)&7], ah[(r16+5)&7]) );\ +} + +#pragma warning( disable: 4127 ) // conditional expression is constant + +// +// Initial round; reads data and performs a round. +// Data is read in 128-bit chunks every other round. +// +#define XMMIROUND( r ) {\ + if( (r&1) == 0 ) \ + { \ + Wt = _mm_loadu_si128( (__m128i *)&pbData[ 8*r ] ); \ + Wt = _mm_shuffle_epi8( Wt, BYTE_REVERSE_64 ); \ + W[r/2] = Wt; \ + Wt = XMMADD( Wt, _mm_load_si128( (__m128i *)&SymCryptSha512K[r] ) ); \ + Ws = _mm_srli_si128( Wt, 8 ); \ + } else {\ + Wt = Ws;\ + }\ + XMMCROUND( r, r );\ +} + +// +// Working version of XMMIROUND: +// Wt = XMMFROM_MSBF( &pbData[ 8*r ] );\ +// W[r] = Wt;\ +// Wt = XMMADD( XMMFROM_UINT64(SymCryptSha512K[r]), Wt );\ +// XMMCROUND(r,r);\ + +#define XMMFROUND(r16, rb) { \ + if( (r16 & 1) == 0 ) \ + {\ + Wt = XMMADD( XMMADD( XMMADD( XMMLSIGMA1( W[((r16 - 2)&15)/2] ), \ + _mm_alignr_epi8( W[((r16 - 6)&15)/2], W[((r16 - 7)&15)/2], 8 ) ), \ + XMMLSIGMA0( _mm_alignr_epi8( W[((r16 - 14)&15)/2], W[((r16 - 15)&15)/2], 8 ) ) ), \ + W[((r16 - 16)&15)/2] ); \ + W[r16/2] = Wt;\ + Ws = _mm_load_si128( (__m128i *)&SymCryptSha512K[r16 + rb] );\ + Wt = XMMADD( Ws , Wt );\ + Ws = _mm_srli_si128( Wt, 8 );\ + } else {\ + Wt = Ws;\ + }\ + XMMCROUND( r16, r16+rb ); \ +} + +VOID +SYMCRYPT_CALL +SymCryptSha512AppendBlocks_xmm( + _Inout_ SYMCRYPT_SHA512_CHAINING_STATE * pChain, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ) +{ + SYMCRYPT_ALIGN __m128i W[8]; // message expansion buffer, 8 elements each storing 2 consecutive UINT64s + SYMCRYPT_ALIGN __m128i ah[8]; + SYMCRYPT_ALIGN __m128i feedf[8]; + int round; + __m128i Wt, Ws; + const __m128i BYTE_REVERSE_64 = _mm_set_epi8( 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7 ); + + Wt = _mm_loadu_si128( (__m128i *) &pChain->H[0] ); + feedf[7] = ah[7] = Wt; + feedf[6] = ah[6] = _mm_srli_si128( Wt, 8 ); + Wt = _mm_loadu_si128( (__m128i *) &pChain->H[2] ); + feedf[5] = ah[5] = Wt; + feedf[4] = ah[4] = _mm_srli_si128( Wt, 8 ); + Wt = _mm_loadu_si128( (__m128i *) &pChain->H[4] ); + feedf[3] = ah[3] = Wt; + feedf[2] = ah[2] = _mm_srli_si128( Wt, 8 ); + Wt = _mm_loadu_si128( (__m128i *) &pChain->H[6] ); + feedf[1] = ah[1] = Wt; + feedf[0] = ah[0] = _mm_srli_si128( Wt, 8 ); + + while( cbData >= 128 ) + { + // + // initial rounds 1 to 16 + // + + XMMIROUND( 0 ); + XMMIROUND( 1 ); + XMMIROUND( 2 ); + XMMIROUND( 3 ); + XMMIROUND( 4 ); + XMMIROUND( 5 ); + XMMIROUND( 6 ); + XMMIROUND( 7 ); + XMMIROUND( 8 ); + XMMIROUND( 9 ); + XMMIROUND( 10 ); + XMMIROUND( 11 ); + XMMIROUND( 12 ); + XMMIROUND( 13 ); + XMMIROUND( 14 ); + XMMIROUND( 15 ); + + for( round=16; round<80; round += 16 ) + { + XMMFROUND( 0, round ); + XMMFROUND( 1, round ); + XMMFROUND( 2, round ); + XMMFROUND( 3, round ); + XMMFROUND( 4, round ); + XMMFROUND( 5, round ); + XMMFROUND( 6, round ); + XMMFROUND( 7, round ); + XMMFROUND( 8, round ); + XMMFROUND( 9, round ); + XMMFROUND( 10, round ); + XMMFROUND( 11, round ); + XMMFROUND( 12, round ); + XMMFROUND( 13, round ); + XMMFROUND( 14, round ); + XMMFROUND( 15, round ); + } + + feedf[0] = ah[0] = XMMADD( ah[0], feedf[0] ); + feedf[1] = ah[1] = XMMADD( ah[1], feedf[1] ); + feedf[2] = ah[2] = XMMADD( ah[2], feedf[2] ); + feedf[3] = ah[3] = XMMADD( ah[3], feedf[3] ); + feedf[4] = ah[4] = XMMADD( ah[4], feedf[4] ); + feedf[5] = ah[5] = XMMADD( ah[5], feedf[5] ); + feedf[6] = ah[6] = XMMADD( ah[6], feedf[6] ); + feedf[7] = ah[7] = XMMADD( ah[7], feedf[7] ); + + pbData += 128; + cbData -= 128; + + } + + XMMSTORE_UINT64( ah[7], &(pChain->H[0]) ); + XMMSTORE_UINT64( ah[6], &(pChain->H[1]) ); + XMMSTORE_UINT64( ah[5], &(pChain->H[2]) ); + XMMSTORE_UINT64( ah[4], &(pChain->H[3]) ); + XMMSTORE_UINT64( ah[3], &(pChain->H[4]) ); + XMMSTORE_UINT64( ah[2], &(pChain->H[5]) ); + XMMSTORE_UINT64( ah[1], &(pChain->H[6]) ); + XMMSTORE_UINT64( ah[0], &(pChain->H[7]) ); + + *pcbRemaining = cbData; + + // + // Wipe the variables; + // + SymCryptWipeKnownSize( ah, sizeof( ah ) ); + SymCryptWipeKnownSize( feedf, sizeof( feedf ) ); + SymCryptWipeKnownSize( W, sizeof( W ) ); + SymCryptWipeKnownSize( &Wt, sizeof( Wt )); + SymCryptWipeKnownSize( &Ws, sizeof( Ws )); +} + +#ifdef __clang__ +#pragma clang attribute pop +#else +#pragma GCC pop_options +#endif + +#endif + + + +//====================================================================================== +// Implementation using NEON registers +// +#if SYMCRYPT_CPU_ARM + + +#define ROR( _a, _n ) vorr_u64( vshl_n_u64( _a, 64 - _n ), vshr_n_u64( _a, _n ) ) +#define ADD( x, y ) vadd_u64( (x), (y) ) + +#define MAJ( x, y, z ) vorr_u64( vand_u64( vorr_u64( (z), (y)), (x)), vand_u64( (z), (y) ) ) +#define CH( x, y, z ) veor_u64( vand_u64( veor_u64( (z), (y) ), (x)), (z)) +#define CSIGMA0( x ) veor_u64( veor_u64( ROR((x), 28), ROR((x), 34)), ROR((x), 39)) +#define CSIGMA1( x ) veor_u64( veor_u64( ROR((x), 14), ROR((x), 18)), ROR((x), 41)) +#define LSIGMA0( x ) veor_u64( veor_u64( ROR((x), 1), ROR((x), 8)), vshr_n_u64((x), 7)) +#define LSIGMA1( x ) veor_u64( veor_u64( ROR((x), 19), ROR((x), 61)), vshr_n_u64((x), 6)) + +// +// r = round number, r16 = r mod 16 (often a compile-time constant when r is not) +// +#define CROUND( a, b, c, d, e, f, g, h, r, r16 ) {\ + W[r16] = Wt; \ + h = ADD( h, ADD( ADD( ADD( CSIGMA1(e), CH(e, f, g)), *(__n64 *)&SymCryptSha512K[r]), Wt ));\ + d = ADD( d, h );\ + h = ADD( h, ADD( CSIGMA0(a), MAJ(a, b, c)));\ +} + +// +// Initial round that reads the message. +// r is the round number 0..15 +// +#define IROUND( a, b, c, d, e, f, g, h, r ) {\ + Wt = vmov_n_u64( SYMCRYPT_LOAD_MSBFIRST64( &pbData[ 8*r ] ) );\ + CROUND( a, b, c, d, e, f, g, h, r, r);\ + } +// +// Subsequent rounds. +// r is the round number, r16 is the round number mod 16. +// These are separate as typically r is run-time and r16 is compile time constant. +// +#define FROUND( a, b, c, d, e, f, g, h, r, r16 ) { \ + Wt = ADD( ADD( LSIGMA1( W[(r16-2) & 15] ), LSIGMA0( W[(r16-15) & 15])) , ADD( W[(r16-7) & 15], W[r16 & 15])); \ + CROUND( a, b, c, d, e, f, g, h, r, r16 ); \ + } + +// +// This is the core routine that does the actual hard work +// This is based on the older one in RSA32LIB by Scott Field from 2001 +// +VOID +SYMCRYPT_CALL +SymCryptSha512AppendBlocks_neon( + _Inout_ SYMCRYPT_SHA512_CHAINING_STATE * pChain, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ) +{ + SYMCRYPT_ALIGN __n64 W[16]; + __n64 A, B, C, D, E, F, G, H; + int round; + __n64 Wt; + __n64 * pH = (__n64 *) &pChain->H[0]; + + A = pH[0]; + B = pH[1]; + C = pH[2]; + D = pH[3]; + E = pH[4]; + F = pH[5]; + G = pH[6]; + H = pH[7]; + + while( cbData >= 128 ) + { + // + // initial rounds 1 to 16 + // + + IROUND( A, B, C, D, E, F, G, H, 0 ); + IROUND( H, A, B, C, D, E, F, G, 1 ); + IROUND( G, H, A, B, C, D, E, F, 2 ); + IROUND( F, G, H, A, B, C, D, E, 3 ); + IROUND( E, F, G, H, A, B, C, D, 4 ); + IROUND( D, E, F, G, H, A, B, C, 5 ); + IROUND( C, D, E, F, G, H, A, B, 6 ); + IROUND( B, C, D, E, F, G, H, A, 7 ); + IROUND( A, B, C, D, E, F, G, H, 8 ); + IROUND( H, A, B, C, D, E, F, G, 9 ); + IROUND( G, H, A, B, C, D, E, F, 10 ); + IROUND( F, G, H, A, B, C, D, E, 11 ); + IROUND( E, F, G, H, A, B, C, D, 12 ); + IROUND( D, E, F, G, H, A, B, C, 13 ); + IROUND( C, D, E, F, G, H, A, B, 14 ); + IROUND( B, C, D, E, F, G, H, A, 15 ); + + for( round=16; round<80; round += 16 ) + { + FROUND( A, B, C, D, E, F, G, H, round + 0, 0 ); + FROUND( H, A, B, C, D, E, F, G, round + 1, 1 ); + FROUND( G, H, A, B, C, D, E, F, round + 2, 2 ); + FROUND( F, G, H, A, B, C, D, E, round + 3, 3 ); + FROUND( E, F, G, H, A, B, C, D, round + 4, 4 ); + FROUND( D, E, F, G, H, A, B, C, round + 5, 5 ); + FROUND( C, D, E, F, G, H, A, B, round + 6, 6 ); + FROUND( B, C, D, E, F, G, H, A, round + 7, 7 ); + FROUND( A, B, C, D, E, F, G, H, round + 8, 8 ); + FROUND( H, A, B, C, D, E, F, G, round + 9, 9 ); + FROUND( G, H, A, B, C, D, E, F, round + 10, 10 ); + FROUND( F, G, H, A, B, C, D, E, round + 11, 11 ); + FROUND( E, F, G, H, A, B, C, D, round + 12, 12 ); + FROUND( D, E, F, G, H, A, B, C, round + 13, 13 ); + FROUND( C, D, E, F, G, H, A, B, round + 14, 14 ); + FROUND( B, C, D, E, F, G, H, A, round + 15, 15 ); + } + + pH[0] = A = ADD( A, pH[0] ); + pH[1] = B = ADD( B, pH[1] ); + pH[2] = C = ADD( C, pH[2] ); + pH[3] = D = ADD( D, pH[3] ); + pH[4] = E = ADD( E, pH[4] ); + pH[5] = F = ADD( F, pH[5] ); + pH[6] = G = ADD( G, pH[6] ); + pH[7] = H = ADD( H, pH[7] ); + + pbData += 128; + cbData -= 128; + } + + *pcbRemaining = cbData; + + // + // Wipe the variables; + // + SymCryptWipeKnownSize( W, sizeof( W ) ); + SymCryptWipeKnownSize( &A, sizeof( A ) ); + SymCryptWipeKnownSize( &B, sizeof( B ) ); + SymCryptWipeKnownSize( &C, sizeof( C ) ); + SymCryptWipeKnownSize( &D, sizeof( D ) ); + SymCryptWipeKnownSize( &E, sizeof( E ) ); + SymCryptWipeKnownSize( &F, sizeof( F ) ); + SymCryptWipeKnownSize( &G, sizeof( G ) ); + SymCryptWipeKnownSize( &H, sizeof( H ) ); + SymCryptWipeKnownSize( &Wt, sizeof( Wt ) ); +} + +#endif + +//====================================================================================== +// +// Switch between different implementations of compression function +// +//FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptSha512AppendBlocks( + _Inout_ SYMCRYPT_SHA512_CHAINING_STATE * pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ) +{ +#if SYMCRYPT_CPU_AMD64 + + // Temporarily disabling use of Ymm in SHA2 + // SYMCRYPT_EXTENDED_SAVE_DATA SaveData; + + // if (SYMCRYPT_CPU_FEATURES_PRESENT(SYMCRYPT_CPU_FEATURE_AVX512 | SYMCRYPT_CPU_FEATURE_BMI2) && + // SymCryptSaveYmm(&SaveData) == SYMCRYPT_NO_ERROR) + // { + // SymCryptSha512AppendBlocks_ymm_avx512vl_asm(pChain, pbData, cbData, pcbRemaining); + + // SymCryptRestoreYmm(&SaveData); + // } + // else if (SYMCRYPT_CPU_FEATURES_PRESENT(SYMCRYPT_CPU_FEATURE_AVX2 | SYMCRYPT_CPU_FEATURE_BMI2) && + // SymCryptSaveYmm(&SaveData) == SYMCRYPT_NO_ERROR) + // { + // //SymCryptSha512AppendBlocks_ymm_1block(pChain, pbData, cbData, pcbRemaining); + // //SymCryptSha512AppendBlocks_ymm_2blocks(pChain, pbData, cbData, pcbRemaining); + // //SymCryptSha512AppendBlocks_ymm_4blocks(pChain, pbData, cbData, pcbRemaining); + // SymCryptSha512AppendBlocks_ymm_avx2_asm(pChain, pbData, cbData, pcbRemaining); + + // SymCryptRestoreYmm(&SaveData); + // } + // else + { + SymCryptSha512AppendBlocks_ull( pChain, pbData, cbData, pcbRemaining ); + //SymCryptSha512AppendBlocks_ull2( pChain, pbData, cbData, pcbRemaining ); + //SymCryptSha512AppendBlocks_ull3( pChain, pbData, cbData, pcbRemaining ); + } + + +#elif SYMCRYPT_CPU_ARM + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON ) ) + { + SymCryptSha512AppendBlocks_neon( pChain, pbData, cbData, pcbRemaining ); // Tegra T3: 48 c/B + } else { + SymCryptSha512AppendBlocks_ull( pChain, pbData, cbData, pcbRemaining ); // Tegra T3: 65.34 c/B + //SymCryptSha512AppendBlocks_ull2( pChain, pbData, cbData, pcbRemaining ); // Tegra T3: 77.4 c/B + //SymCryptSha512AppendBlocks_ull3( pChain, pbData, cbData, pcbRemaining ); // Tegra T3: 71.6 c/B + } + +#elif SYMCRYPT_CPU_X86 + + SYMCRYPT_EXTENDED_SAVE_DATA SaveData; + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_SSSE3 ) && SymCryptSaveXmm( &SaveData ) == SYMCRYPT_NO_ERROR ) + { + SymCryptSha512AppendBlocks_xmm( pChain, pbData, cbData, pcbRemaining ); + SymCryptRestoreXmm( &SaveData ); + } else { + SymCryptSha512AppendBlocks_ull( pChain, pbData, cbData, pcbRemaining ); // core2: 36.40 c/B + //SymCryptSha512AppendBlocks_ull2( pChain, pbData, cbData, pcbRemaining ); // core2: 49.09 c/B + //SymCryptSha512AppendBlocks_ull3( pChain, pbData, cbData, pcbRemaining ); // core2: 38.29 c/B + } + +#else + + SymCryptSha512AppendBlocks_ull( pChain, pbData, cbData, pcbRemaining ); // need tuning... + +#endif +} diff --git a/libs/symcrypt/lib/sha512Par-ymm.c b/libs/symcrypt/lib/sha512Par-ymm.c new file mode 100644 index 00000000000..c720569c929 --- /dev/null +++ b/libs/symcrypt/lib/sha512Par-ymm.c @@ -0,0 +1,243 @@ +// +// Sha512Par-ymm.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// All YMM code for SHA-512/SHA-384 Parallel operations +// Requires compiler support for avx2 +// + +#include "precomp.h" + +extern SYMCRYPT_ALIGN_AT( 64 ) const UINT64 SymCryptSha512K[81]; + + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + +#ifdef __clang__ +#pragma clang attribute push (__attribute__((target("avx2"))), apply_to=function) +#else +#pragma GCC push_options +#pragma GCC target("avx2") +#endif + +// +// Code that uses the YMM registers. +// + +// +// ugly hack, there is no generic way to broadcast a 64-bit value between x86 & amd64 +// +#if SYMCRYPT_CPU_X86 +#define M4x64broadcast_load(_p) _mm256_set_epi32( ((UINT32 *)(_p))[1], ((UINT32 *)(_p))[0], ((UINT32 *)(_p))[1], ((UINT32 *)(_p))[0], ((UINT32 *)(_p))[1], ((UINT32 *)(_p))[0], ((UINT32 *)(_p))[1], ((UINT32 *)(_p))[0] ) +#elif SYMCRYPT_CPU_AMD64 +#define M4x64broadcast_load(_p) _mm256_set1_epi64x( *(_p) ) +#endif + +#define MAJYMM( x, y, z ) _mm256_or_si256( _mm256_and_si256( _mm256_or_si256( z, y ), x ), _mm256_and_si256( z, y )) +#define CHYMM( x, y, z ) _mm256_xor_si256( _mm256_and_si256( _mm256_xor_si256( z, y ), x ), z ) + +#define CSIGMA0YMM( x ) \ + _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( \ + _mm256_slli_epi64(x,36) , _mm256_srli_epi64(x, 28) ),\ + _mm256_slli_epi64(x,30) ), _mm256_srli_epi64(x, 34) ),\ + _mm256_slli_epi64(x,25) ), _mm256_srli_epi64(x, 39) ) +#define CSIGMA1YMM( x ) \ + _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( \ + _mm256_slli_epi64(x,50) , _mm256_srli_epi64(x, 14) ),\ + _mm256_slli_epi64(x,46) ), _mm256_srli_epi64(x, 18) ),\ + _mm256_slli_epi64(x,23) ), _mm256_srli_epi64(x, 41) ) +#define LSIGMA0YMM( x ) \ + _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( \ + _mm256_slli_epi64(x,63) , _mm256_srli_epi64(x, 1) ),\ + _mm256_slli_epi64(x,56) ), _mm256_srli_epi64(x, 8) ),\ + _mm256_srli_epi64(x, 7) ) +#define LSIGMA1YMM( x ) \ + _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( \ + _mm256_slli_epi64(x,45) , _mm256_srli_epi64(x, 19) ),\ + _mm256_slli_epi64(x, 3) ), _mm256_srli_epi64(x, 61) ),\ + _mm256_srli_epi64(x,6) ) + +// +// S0: 00 01 02 03 +// S1: 10 11 12 13 +// S2: 20 21 22 23 +// S3: 30 31 32 33 +// +// T0: 00 10 02 12 unpacklo_epi64( S0, S1 ) note: unpacklo in AVX works in parallel on 2 128-bit values +// T1: 01 11 03 13 unpackhi_epi64( S0, S1 ) +// T2: 20 30 22 32 +// T3: 21 31 23 33 +// +// R0: 00 10 20 30 +// R1: 01 11 21 31 +// R2: 02 12 22 32 +// R3: 03 13 23 33 + + +#define YMM_TRANSPOSE_64( _R0, _R1, _R2, _R3, _S0, _S1, _S2, _S3 ) \ + {\ + __m256i _T0, _T1, _T2, _T3;\ + _T0 = _mm256_unpacklo_epi64( _S0, _S1 ); _T1 = _mm256_unpackhi_epi64( _S0, _S1 );\ + _T2 = _mm256_unpacklo_epi64( _S2, _S3 ); _T3 = _mm256_unpackhi_epi64( _S2, _S3 );\ + \ + _R0 = _mm256_permute2x128_si256( _T0, _T2, 0x20 ); _R1 = _mm256_permute2x128_si256( _T1, _T3, 0x20);\ + _R2 = _mm256_permute2x128_si256( _T0, _T2, 0x31 ); _R3 = _mm256_permute2x128_si256( _T1, _T3, 0x31);\ + } + +VOID +SYMCRYPT_CALL +SymCryptParallelSha512AppendBlocks_ymm( + _Inout_updates_( 4 ) PSYMCRYPT_SHA512_CHAINING_STATE * pChain, + _Inout_updates_( 4 ) PCBYTE * ppByte, + SIZE_T nBytes, + _Out_writes_( PAR_SCRATCH_ELEMENTS_512 * 32 ) PBYTE pScratch ) +{ + __m256i * buf = (__m256i *)pScratch; // chaining state concatenated with the expanded input block + __m256i * W = &buf[4 + 8]; // W are the 64 words of the expanded input + __m256i * ha = &buf[4]; // initial state words, in order h, g, ..., b, a + __m256i A, B, C, D, T; + __m256i T0, T1, T2, T3; + int r; + __m256i BYTE_REVERSE_64; + + _mm256_zeroupper(); + BYTE_REVERSE_64 = _mm256_set_epi8( 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7 ); + + // + // The chaining state can be unaligned on x86, so we use unaligned loads + // + + T0 = _mm256_loadu_si256( (__m256i *)&pChain[0]->H[0] ); + T1 = _mm256_loadu_si256( (__m256i *)&pChain[1]->H[0] ); + T2 = _mm256_loadu_si256( (__m256i *)&pChain[2]->H[0] ); + T3 = _mm256_loadu_si256( (__m256i *)&pChain[3]->H[0] ); + + YMM_TRANSPOSE_64( ha[7], ha[6], ha[5], ha[4], T0, T1, T2, T3 ); + + T0 = _mm256_loadu_si256( (__m256i *)&pChain[0]->H[4] ); + T1 = _mm256_loadu_si256( (__m256i *)&pChain[1]->H[4] ); + T2 = _mm256_loadu_si256( (__m256i *)&pChain[2]->H[4] ); + T3 = _mm256_loadu_si256( (__m256i *)&pChain[3]->H[4] ); + + YMM_TRANSPOSE_64( ha[3], ha[2], ha[1], ha[0], T0, T1, T2, T3 ); + + buf[0] = ha[4]; + buf[1] = ha[5]; + buf[2] = ha[6]; + buf[3] = ha[7]; + + while( nBytes >= 128 ) + { + + // + // Capture the input into W[0..15] + // + for( r=0; r<16; r += 4 ) + { + T0 = _mm256_shuffle_epi8( _mm256_loadu_si256( (__m256i *) ppByte[0] ), BYTE_REVERSE_64 ); ppByte[0] += 32; + T1 = _mm256_shuffle_epi8( _mm256_loadu_si256( (__m256i *) ppByte[1] ), BYTE_REVERSE_64 ); ppByte[1] += 32; + T2 = _mm256_shuffle_epi8( _mm256_loadu_si256( (__m256i *) ppByte[2] ), BYTE_REVERSE_64 ); ppByte[2] += 32; + T3 = _mm256_shuffle_epi8( _mm256_loadu_si256( (__m256i *) ppByte[3] ), BYTE_REVERSE_64 ); ppByte[3] += 32; + + YMM_TRANSPOSE_64( W[r], W[r+1], W[r+2], W[r+3], T0, T1, T2, T3 ); + } + + // + // Expand the message + // + A = W[15]; + B = W[14]; + D = W[0]; + for( r=16; r<80; r+= 2 ) + { + // Loop invariant: A=W[r-1], B = W[r-2], D = W[r-16] + + // + // Macro for one word of message expansion. + // Invariant: + // on entry: a = W[r-1], b = W[r-2], d = W[r-16] + // on exit: W[r] computed, a = W[r-1], b = W[r], c = W[r-15] + // + #define EXPAND( a, b, c, d, r ) \ + c = W[r-15]; \ + b = _mm256_add_epi64( _mm256_add_epi64( _mm256_add_epi64( d, LSIGMA1YMM( b ) ), W[r-7] ), LSIGMA0YMM( c ) ); \ + W[r] = b; \ + + EXPAND( A, B, C, D, r ); + EXPAND( B, A, D, C, (r+1)); + + #undef EXPAND + } + + A = ha[7]; + B = ha[6]; + C = ha[5]; + D = ha[4]; + + for( r=0; r<80; r += 4 ) + { + // + // Loop invariant: + // A, B, C, and D are the a,b,c,d values of the current state. + // W[r] is the next expanded message word to be processed. + // W[r-8 .. r-5] contain the current state words h, g, f, e. + // + + // + // Macro to compute one round + // The shuffle is to duplicate the 64-bit value to both lanes. + // Each half of the immediate is 0100. See the documentation of the + // PSHUFD instruction. + // + #define DO_ROUND( a, b, c, d, t, r ) \ + t = W[r]; \ + t = _mm256_add_epi64( t, CSIGMA1YMM( W[r-5] ) ); \ + t = _mm256_add_epi64( t, W[r-8] ); \ + t = _mm256_add_epi64( t, CHYMM( W[r-5], W[r-6], W[r-7] ) ); \ + t = _mm256_add_epi64( t, M4x64broadcast_load( &SymCryptSha512K[r] )); \ + W[r-4] = _mm256_add_epi64( t, d ); \ + d = _mm256_add_epi64( t, CSIGMA0YMM( a ) ); \ + d = _mm256_add_epi64( d, MAJYMM( c, b, a ) ); + + DO_ROUND( A, B, C, D, T, r ); + DO_ROUND( D, A, B, C, T, (r+1) ); + DO_ROUND( C, D, A, B, T, (r+2) ); + DO_ROUND( B, C, D, A, T, (r+3) ); + #undef DO_ROUND + } + + buf[3] = ha[7] = _mm256_add_epi64( buf[3], A ); + buf[2] = ha[6] = _mm256_add_epi64( buf[2], B ); + buf[1] = ha[5] = _mm256_add_epi64( buf[1], C ); + buf[0] = ha[4] = _mm256_add_epi64( buf[0], D ); + ha[3] = _mm256_add_epi64( ha[3], W[r-5] ); + ha[2] = _mm256_add_epi64( ha[2], W[r-6] ); + ha[1] = _mm256_add_epi64( ha[1], W[r-7] ); + ha[0] = _mm256_add_epi64( ha[0], W[r-8] ); + + nBytes -= 128; + } + + YMM_TRANSPOSE_64( T0, T1, T2, T3, ha[7], ha[6], ha[5], ha[4] ); + _mm256_storeu_si256( (__m256i *)&pChain[0]->H[0], T0 ); + _mm256_storeu_si256( (__m256i *)&pChain[1]->H[0], T1 ); + _mm256_storeu_si256( (__m256i *)&pChain[2]->H[0], T2 ); + _mm256_storeu_si256( (__m256i *)&pChain[3]->H[0], T3 ); + + YMM_TRANSPOSE_64( T0, T1, T2, T3, ha[3], ha[2], ha[1], ha[0] ); + _mm256_storeu_si256( (__m256i *)&pChain[0]->H[4], T0 ); + _mm256_storeu_si256( (__m256i *)&pChain[1]->H[4], T1 ); + _mm256_storeu_si256( (__m256i *)&pChain[2]->H[4], T2 ); + _mm256_storeu_si256( (__m256i *)&pChain[3]->H[4], T3 ); + + _mm256_zeroupper(); +} + +#ifdef __clang__ +#pragma clang attribute pop +#else +#pragma GCC pop_options +#endif + +#endif // CPU_X86_X64 diff --git a/libs/symcrypt/lib/sha512Par.c b/libs/symcrypt/lib/sha512Par.c new file mode 100644 index 00000000000..d85bdecbf63 --- /dev/null +++ b/libs/symcrypt/lib/sha512Par.c @@ -0,0 +1,798 @@ +// +// Sha512Par.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +// +// This module contains the routines to implement SHA-512/SHA-384 from FIPS 180-2 in parallel mode +// + +#include "precomp.h" + +extern SYMCRYPT_ALIGN_AT( 64 ) const UINT64 SymCryptSha512K[81]; + + +// +// Not all CPU architectures support parallel code. +// +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + +#define SUPPORT_PARALLEL 1 + +#define MIN_PARALLEL 2 +#define MAX_PARALLEL 4 + +#elif SYMCRYPT_CPU_ARM + +#define SUPPORT_PARALLEL 0 +//#define MIN_PARALLEL 3 +//#define MAX_PARALLEL 3 + +#else + +#define SUPPORT_PARALLEL 0 + +#endif + + +// +// ugly hack, there is no generic way to broadcast a 64-bit value between x86 & amd64 +// +#if SYMCRYPT_CPU_X86 +#define M2x64broadcast_load(_p) _mm_set_epi32( ((UINT32 *)(_p))[1], ((UINT32 *)(_p))[0], ((UINT32 *)(_p))[1], ((UINT32 *)(_p))[0] ) +#elif SYMCRYPT_CPU_AMD64 +#define M2x64broadcast_load(_p) _mm_shuffle_epi32( _mm_cvtsi64_si128( *(_p) ), 0x44 ) +#endif + +VOID +SYMCRYPT_CALL +SymCryptParallelSha512AppendBytes_serial( + _Inout_updates_( nPar ) PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE * pWork, + _In_range_(1, MAX_PARALLEL) SIZE_T nPar, + SIZE_T nBytes ); + +// +// Currently these are the generic implementations in terms of the single hash code. +// + +VOID +SYMCRYPT_CALL +SymCryptParallelSha512Init( + _Out_writes_( nStates ) PSYMCRYPT_SHA512_STATE pStates, + SIZE_T nStates ) +{ + SIZE_T i; + + for( i=0; i<nStates; i++ ) + { + SymCryptSha512Init( &pStates[i] ); + } +} + +VOID +SYMCRYPT_CALL +SymCryptParallelSha384Init( + _Out_writes_( nStates ) PSYMCRYPT_SHA384_STATE pStates, + SIZE_T nStates ) +{ + SIZE_T i; + + for( i=0; i<nStates; i++ ) + { + SymCryptSha384Init( &pStates[i] ); + } +} + +#if !SUPPORT_PARALLEL +// +// No parallel support on this CPU +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptParallelSha512Process( + _Inout_updates_( nStates ) PSYMCRYPT_SHA512_STATE pStates, + SIZE_T nStates, + _Inout_updates_( nOperations ) PSYMCRYPT_PARALLEL_HASH_OPERATION pOperations, + SIZE_T nOperations, + _Out_writes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + return SymCryptParallelHashProcess_serial( SymCryptParallelSha512Algorithm, pStates, nStates, pOperations, nOperations, pbScratch, cbScratch ); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptParallelSha384Process( + _Inout_updates_( nStates ) PSYMCRYPT_SHA384_STATE pStates, + SIZE_T nStates, + _Inout_updates_( nOperations ) PSYMCRYPT_PARALLEL_HASH_OPERATION pOperations, + SIZE_T nOperations, + _Out_writes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + return SymCryptParallelHashProcess_serial( SymCryptParallelSha384Algorithm, pStates, nStates, pOperations, nOperations, pbScratch, cbScratch ); +} + +#endif + + +#if SUPPORT_PARALLEL + + +// +// This function looks at a state and decides what to do. +// If it returns FALSE, then this state is done and no further processing is required. +// If it returns TRUE, the pbData/cbData have to be processed in parallel. +// This function is called again on the same state after the pbData/cbData have been processed. +// +// Internally, it keeps track of the next step to be taken for this state. +// the processingState keeps track of the next action to take. +// + + +BOOLEAN +SYMCRYPT_CALL +SymCryptParallelSha512Result1( + _In_ PCSYMCRYPT_PARALLEL_HASH pParHash, + _Inout_ PSYMCRYPT_COMMON_HASH_STATE pState, + _Inout_ PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE pScratch, + _Out_ BOOLEAN *pRes) +{ + UINT32 bytesInBuffer = pState->bytesInBuffer; + + UNREFERENCED_PARAMETER( pParHash ); + // + // Function is called when a Result is requested from a parallel hash state. + // Do the first step of the padding. + // + pState->buffer[bytesInBuffer++] = 0x80; + SymCryptWipe( &pState->buffer[bytesInBuffer], SYMCRYPT_SHA512_INPUT_BLOCK_SIZE - bytesInBuffer ); + + pScratch->pbData = &pState->buffer[0]; + pScratch->cbData = SYMCRYPT_SHA512_INPUT_BLOCK_SIZE; + + if( bytesInBuffer > SYMCRYPT_SHA512_INPUT_BLOCK_SIZE - 16 ) + { + // We need 2 blocks for the padding + pScratch->processingState = STATE_RESULT2; + } else { + SYMCRYPT_STORE_MSBFIRST64( &pState->buffer[SYMCRYPT_SHA512_INPUT_BLOCK_SIZE-16], (pState->dataLengthH << 3) + (pState->dataLengthL >> 61) ); + SYMCRYPT_STORE_MSBFIRST64( &pState->buffer[SYMCRYPT_SHA512_INPUT_BLOCK_SIZE- 8], (pState->dataLengthL << 3) ); + pScratch->processingState = STATE_RESULT_DONE; + } + + *pRes = TRUE; // return value from the SetWork function + return TRUE; // Return from the SetWork function +} + + +BOOLEAN +SYMCRYPT_CALL +SymCryptParallelSha512Result2( + _In_ PCSYMCRYPT_PARALLEL_HASH pParHash, + _Inout_ PSYMCRYPT_COMMON_HASH_STATE pState, + _Inout_ PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE pScratch, + _Out_ BOOLEAN *pRes) +{ + UNREFERENCED_PARAMETER( pParHash ); + // + // Called for the 2nd block of a long padding + // + SymCryptWipe( &pState->buffer[0], SYMCRYPT_SHA512_INPUT_BLOCK_SIZE ); + SYMCRYPT_STORE_MSBFIRST64( &pState->buffer[SYMCRYPT_SHA512_INPUT_BLOCK_SIZE-16], (pState->dataLengthH << 3) + (pState->dataLengthL >> 61) ); + SYMCRYPT_STORE_MSBFIRST64( &pState->buffer[SYMCRYPT_SHA512_INPUT_BLOCK_SIZE- 8], (pState->dataLengthL << 3) ); + pScratch->pbData = &pState->buffer[0]; + pScratch->cbData = SYMCRYPT_SHA512_INPUT_BLOCK_SIZE; + pScratch->processingState = STATE_RESULT_DONE; + *pRes = TRUE; + return TRUE; +} + +VOID +SYMCRYPT_CALL +SymCryptParallelSha512ResultDone( + _In_ PCSYMCRYPT_PARALLEL_HASH pParHash, + _Inout_ PSYMCRYPT_COMMON_HASH_STATE pState, + _In_ PCSYMRYPT_PARALLEL_HASH_OPERATION pOp) +{ + PSYMCRYPT_SHA512_STATE pSha512State = (PSYMCRYPT_SHA512_STATE) pState; + + UNREFERENCED_PARAMETER( pParHash ); + + SYMCRYPT_ASSERT( pOp->hashOperation == SYMCRYPT_HASH_OPERATION_RESULT ); + SYMCRYPT_ASSERT( pOp->cbBuffer == SYMCRYPT_SHA512_RESULT_SIZE ); + + SymCryptUint64ToMsbFirst( &pSha512State->chain.H[0], pOp->pbBuffer, 8 ); + SymCryptWipeKnownSize( pSha512State, sizeof( *pSha512State )); + SymCryptSha512Init( pSha512State ); +} + +VOID +SYMCRYPT_CALL +SymCryptParallelSha384ResultDone( + _In_ PCSYMCRYPT_PARALLEL_HASH pParHash, + _Inout_ PSYMCRYPT_COMMON_HASH_STATE pState, + _In_ PCSYMRYPT_PARALLEL_HASH_OPERATION pOp) +{ + PSYMCRYPT_SHA384_STATE pSha384State = (PSYMCRYPT_SHA384_STATE) pState; + + UNREFERENCED_PARAMETER( pParHash ); + + SYMCRYPT_ASSERT( pOp->hashOperation == SYMCRYPT_HASH_OPERATION_RESULT ); + SYMCRYPT_ASSERT( pOp->cbBuffer == SYMCRYPT_SHA384_RESULT_SIZE ); + + SymCryptUint64ToMsbFirst( &pSha384State->chain.H[0], pOp->pbBuffer, 6 ); + SymCryptWipeKnownSize( pSha384State, sizeof( *pSha384State )); + SymCryptSha384Init( pSha384State ); +} + + +C_ASSERT( (SYMCRYPT_SIMD_ELEMENT_SIZE & (SYMCRYPT_SIMD_ELEMENT_SIZE - 1 )) == 0 ); // check that it is a power of 2 + + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptParallelSha512Sha384Process( + _In_ PCSYMCRYPT_PARALLEL_HASH pParHash, + _Inout_ PVOID pStates, + SIZE_T nStates, + _Inout_updates_( nOperations ) PSYMCRYPT_PARALLEL_HASH_OPERATION pOperations, + SIZE_T nOperations, + _Out_writes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + UINT32 maxParallel; + +#if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_X86 + SYMCRYPT_EXTENDED_SAVE_DATA SaveState; + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_AVX2 | SYMCRYPT_CPU_FEATURE_SSSE3 ) && SymCryptSaveYmm( &SaveState ) == SYMCRYPT_NO_ERROR ) + { + maxParallel = 4; + scError = SymCryptParallelHashProcess( pParHash, + pStates, + nStates, + pOperations, + nOperations, + pbScratch, + cbScratch, + maxParallel ); + + SymCryptRestoreYmm( &SaveState ); + } else if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_SSSE3 ) && SymCryptSaveXmm( &SaveState ) == SYMCRYPT_NO_ERROR ) + { + maxParallel = 2; + scError = SymCryptParallelHashProcess( pParHash, + pStates, + nStates, + pOperations, + nOperations, + pbScratch, + cbScratch, + maxParallel ); + SymCryptRestoreXmm( &SaveState ); + } else { + scError = SymCryptParallelHashProcess_serial( pParHash, pStates, nStates, pOperations, nOperations, pbScratch, cbScratch ); + } + +#elif SYMCRYPT_CPU_ARM + maxParallel = MAX_PARALLEL; + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON ) ) + { + scError = SymCryptParallelHashProcess( pParHash, + pStates, + nStates, + pOperations, + nOperations, + pbScratch, + cbScratch, + maxParallel ); + } else { + scError = SymCryptParallelHashProcess_serial( pParHash, pStates, nStates, pOperations, nOperations, pbScratch, cbScratch ); + } +#else + scError = SymCryptParallelHashProcess_serial( pParHash, pStates, nStates, pOperations, nOperations, pbScratch, cbScratch ); +#endif + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptParallelSha512Process( + _Inout_updates_( nStates ) PSYMCRYPT_SHA512_STATE pStates, + SIZE_T nStates, + _Inout_updates_( nOperations ) PSYMCRYPT_PARALLEL_HASH_OPERATION pOperations, + SIZE_T nOperations, + _Out_writes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + return SymCryptParallelSha512Sha384Process( SymCryptParallelSha512Algorithm, pStates, nStates, pOperations, nOperations, pbScratch, cbScratch ); +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptParallelSha384Process( + _Inout_updates_( nStates ) PSYMCRYPT_SHA384_STATE pStates, + SIZE_T nStates, + _Inout_updates_( nOperations ) PSYMCRYPT_PARALLEL_HASH_OPERATION pOperations, + SIZE_T nOperations, + _Out_writes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + return SymCryptParallelSha512Sha384Process( SymCryptParallelSha384Algorithm, pStates, nStates, pOperations, nOperations, pbScratch, cbScratch ); +} + + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 +// +// Code that uses the XMM registers. +// + +#ifdef __clang__ +#pragma clang attribute push (__attribute__((target("ssse3"))), apply_to=function) +#else +#pragma GCC push_options +#pragma GCC target("ssse3") +#endif + +#define MAJXMM( x, y, z ) _mm_or_si128( _mm_and_si128( _mm_or_si128( z, y ), x ), _mm_and_si128( z, y )) +#define CHXMM( x, y, z ) _mm_xor_si128( _mm_and_si128( _mm_xor_si128( z, y ), x ), z ) + +#define CSIGMA0XMM( x ) \ + _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( \ + _mm_slli_epi64(x,36) , _mm_srli_epi64(x, 28) ),\ + _mm_slli_epi64(x,30) ), _mm_srli_epi64(x, 34) ),\ + _mm_slli_epi64(x,25) ), _mm_srli_epi64(x, 39) ) +#define CSIGMA1XMM( x ) \ + _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( \ + _mm_slli_epi64(x,50) , _mm_srli_epi64(x, 14) ),\ + _mm_slli_epi64(x,46) ), _mm_srli_epi64(x, 18) ),\ + _mm_slli_epi64(x,23) ), _mm_srli_epi64(x, 41) ) +#define LSIGMA0XMM( x ) \ + _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( \ + _mm_slli_epi64(x,63) , _mm_srli_epi64(x, 1) ),\ + _mm_slli_epi64(x,56) ), _mm_srli_epi64(x, 8) ),\ + _mm_srli_epi64(x, 7) ) +#define LSIGMA1XMM( x ) \ + _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( \ + _mm_slli_epi64(x,45) , _mm_srli_epi64(x, 19) ),\ + _mm_slli_epi64(x, 3) ), _mm_srli_epi64(x, 61) ),\ + _mm_srli_epi64(x,6) ) + +#define XMM_TRANSPOSE_64( _R0, _R1, _S0, _S1 ) \ + {\ + _R0 = _mm_unpacklo_epi64( _S0, _S1 );\ + _R1 = _mm_unpackhi_epi64( _S0, _S1 );\ + } + +VOID +SYMCRYPT_CALL +SymCryptParallelSha512AppendBlocks_xmm( + _Inout_updates_( 2 ) PSYMCRYPT_SHA512_CHAINING_STATE * pChain, + _Inout_updates_( 2 ) PCBYTE * ppByte, + SIZE_T nBytes, + _Out_writes_( PAR_SCRATCH_ELEMENTS_512 ) __m128i * pScratch ) +{ + __m128i * buf = pScratch; // chaining state concatenated with the expanded input block + __m128i * W = &buf[4 + 8]; // W are the 64 words of the expanded input + __m128i * ha = &buf[4]; // initial state words, in order h, g, ..., b, a + __m128i A, B, C, D, T; + __m128i T0, T1; + const __m128i BYTE_REVERSE_64 = _mm_set_epi8( 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7 ); + int r; + + + // + // The chaining state can be unaligned on x86, so we use unaligned loads + // + + T0 = _mm_loadu_si128( (__m128i *)&pChain[0]->H[0] ); + T1 = _mm_loadu_si128( (__m128i *)&pChain[1]->H[0] ); + + XMM_TRANSPOSE_64( ha[7], ha[6], T0, T1 ); + + T0 = _mm_loadu_si128( (__m128i *)&pChain[0]->H[2] ); + T1 = _mm_loadu_si128( (__m128i *)&pChain[1]->H[2] ); + XMM_TRANSPOSE_64( ha[5], ha[4], T0, T1 ); + + T0 = _mm_loadu_si128( (__m128i *)&pChain[0]->H[4] ); + T1 = _mm_loadu_si128( (__m128i *)&pChain[1]->H[4] ); + XMM_TRANSPOSE_64( ha[3], ha[2], T0, T1 ); + + T0 = _mm_loadu_si128( (__m128i *)&pChain[0]->H[6] ); + T1 = _mm_loadu_si128( (__m128i *)&pChain[1]->H[6] ); + XMM_TRANSPOSE_64( ha[1], ha[0], T0, T1 ); + + buf[0] = ha[4]; + buf[1] = ha[5]; + buf[2] = ha[6]; + buf[3] = ha[7]; + + while( nBytes >= 128 ) + { + + // + // Capture the input into W[0..15] + // + for( r=0; r<16; r += 2 ) + { + T0 = _mm_shuffle_epi8( _mm_loadu_si128( (__m128i *) ppByte[0] ), BYTE_REVERSE_64 ); ppByte[0] += 16; + T1 = _mm_shuffle_epi8( _mm_loadu_si128( (__m128i *) ppByte[1] ), BYTE_REVERSE_64 ); ppByte[1] += 16; + + XMM_TRANSPOSE_64( W[r], W[r+1], T0, T1 ); + } + + // + // Expand the message + // + A = W[15]; + B = W[14]; + D = W[0]; + for( r=16; r<80; r+= 2 ) + { + // Loop invariant: A=W[r-1], B = W[r-2], D = W[r-16] + + // + // Macro for one word of message expansion. + // Invariant: + // on entry: a = W[r-1], b = W[r-2], d = W[r-16] + // on exit: W[r] computed, a = W[r-1], b = W[r], c = W[r-15] + // + #define EXPAND( a, b, c, d, r ) \ + c = W[r-15]; \ + b = _mm_add_epi64( _mm_add_epi64( _mm_add_epi64( d, LSIGMA1XMM( b ) ), W[r-7] ), LSIGMA0XMM( c ) ); \ + W[r] = b; \ + + EXPAND( A, B, C, D, r ); + EXPAND( B, A, D, C, (r+1)); + + #undef EXPAND + } + + A = ha[7]; + B = ha[6]; + C = ha[5]; + D = ha[4]; + + for( r=0; r<80; r += 4 ) + { + // + // Loop invariant: + // A, B, C, and D are the a,b,c,d values of the current state. + // W[r] is the next expanded message word to be processed. + // W[r-8 .. r-5] contain the current state words h, g, f, e. + // + + // + // Macro to compute one round + // The shuffle is to duplicate the 64-bit value to both lanes. + // Each half of the immediate is 0100. See the documentation of the + // PSHUFD instruction. + // + + #define DO_ROUND( a, b, c, d, t, r ) \ + t = W[r]; \ + t = _mm_add_epi64( t, CSIGMA1XMM( W[r-5] ) ); \ + t = _mm_add_epi64( t, W[r-8] ); \ + t = _mm_add_epi64( t, CHXMM( W[r-5], W[r-6], W[r-7] ) ); \ + t = _mm_add_epi64( t, M2x64broadcast_load( &SymCryptSha512K[r] )); \ + W[r-4] = _mm_add_epi64( t, d ); \ + d = _mm_add_epi64( t, CSIGMA0XMM( a ) ); \ + d = _mm_add_epi64( d, MAJXMM( c, b, a ) ); + + DO_ROUND( A, B, C, D, T, r ); + DO_ROUND( D, A, B, C, T, (r+1) ); + DO_ROUND( C, D, A, B, T, (r+2) ); + DO_ROUND( B, C, D, A, T, (r+3) ); + #undef DO_ROUND + } + + buf[3] = ha[7] = _mm_add_epi64( buf[3], A ); + buf[2] = ha[6] = _mm_add_epi64( buf[2], B ); + buf[1] = ha[5] = _mm_add_epi64( buf[1], C ); + buf[0] = ha[4] = _mm_add_epi64( buf[0], D ); + ha[3] = _mm_add_epi64( ha[3], W[r-5] ); + ha[2] = _mm_add_epi64( ha[2], W[r-6] ); + ha[1] = _mm_add_epi64( ha[1], W[r-7] ); + ha[0] = _mm_add_epi64( ha[0], W[r-8] ); + + nBytes -= 128; + } + + + XMM_TRANSPOSE_64( T0, T1, ha[7], ha[6] ); + _mm_storeu_si128( (__m128i *)&pChain[0]->H[0], T0 ); + _mm_storeu_si128( (__m128i *)&pChain[1]->H[0], T1 ); + + XMM_TRANSPOSE_64( T0, T1, ha[5], ha[4] ); + _mm_storeu_si128( (__m128i *)&pChain[0]->H[2], T0 ); + _mm_storeu_si128( (__m128i *)&pChain[1]->H[2], T1 ); + + XMM_TRANSPOSE_64( T0, T1, ha[3], ha[2] ); + _mm_storeu_si128( (__m128i *)&pChain[0]->H[4], T0 ); + _mm_storeu_si128( (__m128i *)&pChain[1]->H[4], T1 ); + + XMM_TRANSPOSE_64( T0, T1, ha[1], ha[0] ); + _mm_storeu_si128( (__m128i *)&pChain[0]->H[6], T0 ); + _mm_storeu_si128( (__m128i *)&pChain[1]->H[6], T1 ); + +} + +#ifdef __clang__ +#pragma clang attribute pop +#else +#pragma GCC pop_options +#endif + +#endif // CPU_X86_X64 + +#if SYMCRYPT_CPU_ARM + + +#endif // CPU_ARM + + + +VOID +SYMCRYPT_CALL +SymCryptParallelSha512AppendBytes_serial( + _Inout_updates_( nPar ) PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE * pWork, + _In_range_(1, MAX_PARALLEL) SIZE_T nPar, + SIZE_T nBytes ) +{ + SIZE_T i; + SIZE_T tmp; + + SYMCRYPT_ASSERT( nBytes % SYMCRYPT_SHA512_INPUT_BLOCK_SIZE == 0 ); + SYMCRYPT_ASSERT( nPar >= 1 && nPar <= MAX_PARALLEL ); + + for( i=0; i < nPar; i++ ) + { + SYMCRYPT_ASSERT( pWork[i]->cbData >= nBytes ); +#if SYMCRYPT_CPU_X86 + // + // On X86 the Sha512 append blocks function saves the XMM registers again, which is not allowed at DISPATCH level. + // We call the internal function that assumes the XMM registers are already saved. + // This function is only called when we are doing parallel hashing, which means that at a minimum we have SSSE3 and + // the XMM registers are saved. + // + SymCryptSha512AppendBlocks_xmm( & ((PSYMCRYPT_SHA512_STATE)(pWork[i]->hashState))->chain, pWork[i]->pbData, nBytes, &tmp ); +#else + SymCryptSha512AppendBlocks( & ((PSYMCRYPT_SHA512_STATE)(pWork[i]->hashState))->chain, pWork[i]->pbData, nBytes, &tmp ); +#endif + pWork[i]->pbData += nBytes; + pWork[i]->cbData -= nBytes; + } + return; +} + +VOID +SYMCRYPT_CALL +SymCryptParallelSha512Append( + _Inout_updates_( nPar ) PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE * pWork, + _In_range_(1, MAX_PARALLEL) SIZE_T nPar, + SIZE_T nBytes, + _Inout_updates_( SYMCRYPT_SIMD_ELEMENT_SIZE * PAR_SCRATCH_ELEMENTS_512 ) + PBYTE pbSimdScratch, + SIZE_T cbSimdScratch ) +{ + PSYMCRYPT_SHA512_CHAINING_STATE apChain[MAX_PARALLEL]; + PCBYTE apData[MAX_PARALLEL]; + SIZE_T i; + UINT32 maxParallel; + + UNREFERENCED_PARAMETER( cbSimdScratch ); // not referenced on FRE builds + SYMCRYPT_ASSERT( cbSimdScratch >= PAR_SCRATCH_ELEMENTS_512 * SYMCRYPT_SIMD_ELEMENT_SIZE ); + SYMCRYPT_ASSERT( ((SIZE_T)pbSimdScratch & (SYMCRYPT_SIMD_ELEMENT_SIZE - 1)) == 0 ); + + // + // Compute maxParallel; this is 2 if nPar <= 2, and 4 if nPar = 3,4. + // This is how many parameter sets we have to set up. + // +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + + maxParallel = (nPar + 1) & ~1; + SYMCRYPT_ASSERT( maxParallel == 2 || (maxParallel == 4 && SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_AVX2 )) ); + +#elif SYMCRYPT_CPU_ARM + + maxParallel = 2; + +#endif + + SYMCRYPT_ASSERT( nPar >= 1 && nPar <= maxParallel ); + + if( nPar < MIN_PARALLEL ) + { + SymCryptParallelSha512AppendBytes_serial( pWork, nPar, nBytes ); + + // Done with this function. + goto cleanup; + } + + // + // Our parallel code expects exactly 2 or 4 parallel computations. + // We simply duplicate the first one if we get fewer parallel ones. + // That means we write the result multiple times, but it saves a lot of + // extra if()s in the main codeline. + // + + i = 0; + while( i < nPar ) + { + SYMCRYPT_ASSERT( pWork[i]->cbData >= nBytes ); + apChain[i] = & ((PSYMCRYPT_SHA512_STATE)(pWork[i]->hashState))->chain; + apData[i] = pWork[i]->pbData; + pWork[i]->pbData += nBytes; + pWork[i]->cbData -= nBytes; + i++; + } + + while( i < maxParallel ) + { + apChain[i] = apChain[0]; + apData[i] = apData[0]; + i++; + } + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + if( maxParallel == 4 ) + { + SymCryptParallelSha512AppendBlocks_ymm( &apChain[0], &apData[0], nBytes, (PBYTE)((__m256i *)pbSimdScratch) ); + } else { + SymCryptParallelSha512AppendBlocks_xmm( &apChain[0], &apData[0], nBytes, (__m128i *)pbSimdScratch ); + } +#elif SYMCRYPT_CPU_ARM + UNREFERENCED_PARAMETER( pbSimdScratch ); + //SymCryptParallelSha512AppendBlocks_neon( &apChain[0], &apData[0], nBytes, (__n128 *) pbSimdScratch ); +#else +#error Unknown CPU +#endif + +cleanup: + ;// no cleanup at this moment. +} + + +#endif // SUPPORT_PARALLEL + +#if SUPPORT_PARALLEL + +const SYMCRYPT_PARALLEL_HASH SymCryptParallelSha512Algorithm_default = { + &SymCryptSha512Algorithm_default, + PAR_SCRATCH_ELEMENTS_512 * SYMCRYPT_SIMD_ELEMENT_SIZE, + &SymCryptParallelSha512Result1, + &SymCryptParallelSha512Result2, + &SymCryptParallelSha512ResultDone, + &SymCryptParallelSha512Append, +}; + +const SYMCRYPT_PARALLEL_HASH SymCryptParallelSha384Algorithm_default = { + &SymCryptSha384Algorithm_default, + PAR_SCRATCH_ELEMENTS_512 * SYMCRYPT_SIMD_ELEMENT_SIZE, + &SymCryptParallelSha512Result1, + &SymCryptParallelSha512Result2, + &SymCryptParallelSha384ResultDone, + &SymCryptParallelSha512Append, +}; + +#else + +// +// For platforms that do not have a parallel hash implementation +// we use this structure to provide the necessary data to the _serial +// implementation of the function. +// +const SYMCRYPT_PARALLEL_HASH SymCryptParallelSha512Algorithm_default = { + &SymCryptSha512Algorithm_default, + PAR_SCRATCH_ELEMENTS_512 * SYMCRYPT_SIMD_ELEMENT_SIZE, + NULL, + NULL, + NULL, + NULL, +}; + +const SYMCRYPT_PARALLEL_HASH SymCryptParallelSha384Algorithm_default = { + &SymCryptSha384Algorithm_default, + PAR_SCRATCH_ELEMENTS_512 * SYMCRYPT_SIMD_ELEMENT_SIZE, + NULL, + NULL, + NULL, + NULL, +}; + +#endif + +const PCSYMCRYPT_PARALLEL_HASH SymCryptParallelSha384Algorithm = &SymCryptParallelSha384Algorithm_default; +const PCSYMCRYPT_PARALLEL_HASH SymCryptParallelSha512Algorithm = &SymCryptParallelSha512Algorithm_default; + + +#define N_SELFTEST_STATES 3 // Just enough to trigger YMM usage + +VOID +SYMCRYPT_CALL +SymCryptParallelSha384Selftest(void) +{ + SYMCRYPT_ERROR scError; + SYMCRYPT_SHA384_STATE states[N_SELFTEST_STATES]; + BYTE result[N_SELFTEST_STATES][SYMCRYPT_SHA384_RESULT_SIZE]; + SYMCRYPT_PARALLEL_HASH_OPERATION op[2*N_SELFTEST_STATES]; + BYTE scratch[SYMCRYPT_PARALLEL_SHA384_FIXED_SCRATCH + N_SELFTEST_STATES * SYMCRYPT_PARALLEL_HASH_PER_STATE_SCRATCH]; + int i; + + SymCryptParallelSha384Init( &states[0], N_SELFTEST_STATES ); + + for( i=0; i<N_SELFTEST_STATES; i++ ) + { + op[2*i ].iHash = i; + op[2*i ].hashOperation = SYMCRYPT_HASH_OPERATION_APPEND; + op[2*i ].pbBuffer = (PBYTE) SymCryptTestMsg3; + op[2*i ].cbBuffer = sizeof(SymCryptTestMsg3); + op[2*i + 1].iHash = i; + op[2*i + 1].hashOperation = SYMCRYPT_HASH_OPERATION_RESULT; + op[2*i + 1].pbBuffer = &result[i][0]; + op[2*i + 1].cbBuffer = SYMCRYPT_SHA384_RESULT_SIZE; + } + + scError = SymCryptParallelSha384Process( &states[0], N_SELFTEST_STATES, op, 2*N_SELFTEST_STATES, scratch, sizeof( scratch ) ); + if( scError != SYMCRYPT_NO_ERROR ) + { + SymCryptFatal( 'PS38' ); + } + + for( i=0; i<N_SELFTEST_STATES; i++ ) + { + SymCryptInjectError( &result[i][0], SYMCRYPT_SHA384_RESULT_SIZE ); + + if( memcmp( &result[i][0], SymCryptSha384KATAnswer, SYMCRYPT_SHA384_RESULT_SIZE ) != 0 ) { + SymCryptFatal( 'PS38' ); + } + } +} + +VOID +SYMCRYPT_CALL +SymCryptParallelSha512Selftest(void) +{ + SYMCRYPT_ERROR scError; + SYMCRYPT_SHA512_STATE states[N_SELFTEST_STATES]; + BYTE result[N_SELFTEST_STATES][SYMCRYPT_SHA512_RESULT_SIZE]; + SYMCRYPT_PARALLEL_HASH_OPERATION op[2*N_SELFTEST_STATES]; + BYTE scratch[SYMCRYPT_PARALLEL_SHA512_FIXED_SCRATCH + N_SELFTEST_STATES * SYMCRYPT_PARALLEL_HASH_PER_STATE_SCRATCH]; + int i; + + SymCryptParallelSha512Init( &states[0], N_SELFTEST_STATES ); + + for( i=0; i<N_SELFTEST_STATES; i++ ) + { + op[2*i ].iHash = i; + op[2*i ].hashOperation = SYMCRYPT_HASH_OPERATION_APPEND; + op[2*i ].pbBuffer = (PBYTE) SymCryptTestMsg3; + op[2*i ].cbBuffer = sizeof(SymCryptTestMsg3); + op[2*i + 1].iHash = i; + op[2*i + 1].hashOperation = SYMCRYPT_HASH_OPERATION_RESULT; + op[2*i + 1].pbBuffer = &result[i][0]; + op[2*i + 1].cbBuffer = SYMCRYPT_SHA512_RESULT_SIZE; + } + + scError = SymCryptParallelSha512Process( &states[0], N_SELFTEST_STATES, op, 2*N_SELFTEST_STATES, scratch, sizeof( scratch ) ); + if( scError != SYMCRYPT_NO_ERROR ) + { + SymCryptFatal( 'PS51' ); + } + + for( i=0; i<N_SELFTEST_STATES; i++ ) + { + SymCryptInjectError( &result[i][0], SYMCRYPT_SHA512_RESULT_SIZE ); + + if( memcmp( &result[i][0], SymCryptSha512KATAnswer, SYMCRYPT_SHA512_RESULT_SIZE ) != 0 ) { + SymCryptFatal( 'PS51' ); + } + } +} diff --git a/libs/symcrypt/lib/shake.c b/libs/symcrypt/lib/shake.c new file mode 100644 index 00000000000..1850bc7be04 --- /dev/null +++ b/libs/symcrypt/lib/shake.c @@ -0,0 +1,337 @@ +// +// Shake.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +// +// See the symcrypt.h file for documentation on what the various functions do. +// + + + +// +// SHAKE128 +// +#define Alg Shake128 +#define ALG SHAKE128 +#define SYMCRYPT_SHAKEXXX_INPUT_BLOCK_SIZE SYMCRYPT_SHAKE128_INPUT_BLOCK_SIZE +#define SYMCRYPT_SHAKEXXX_RESULT_SIZE SYMCRYPT_SHAKE128_RESULT_SIZE +#include "shake_pattern.c" +#undef SYMCRYPT_SHAKEXXX_RESULT_SIZE +#undef SYMCRYPT_SHAKEXXX_INPUT_BLOCK_SIZE +#undef ALG +#undef Alg + +const SYMCRYPT_HASH SymCryptShake128HashAlgorithm_default = { + &SymCryptShake128Init, + &SymCryptShake128Append, + &SymCryptShake128Result, + NULL, // AppendBlocks function is not implemented for SHA-3 + &SymCryptShake128StateCopy, + sizeof(SYMCRYPT_SHAKE128_STATE), + SYMCRYPT_SHAKE128_RESULT_SIZE, + SYMCRYPT_SHAKE128_INPUT_BLOCK_SIZE, + SYMCRYPT_FIELD_OFFSET(SYMCRYPT_SHAKE128_STATE, ks.state), + SYMCRYPT_FIELD_SIZE(SYMCRYPT_SHAKE128_STATE, ks.state), +}; + +const PCSYMCRYPT_HASH SymCryptShake128HashAlgorithm = &SymCryptShake128HashAlgorithm_default; + +static const BYTE shake128KATAnswer[SYMCRYPT_SHAKE128_RESULT_SIZE] = { + 0x58, 0x81, 0x09, 0x2d, 0xd8, 0x18, 0xbf, 0x5c, + 0xf8, 0xa3, 0xdd, 0xb7, 0x93, 0xfb, 0xcb, 0xa7, + 0x40, 0x97, 0xd5, 0xc5, 0x26, 0xa6, 0xd3, 0x5f, + 0x97, 0xb8, 0x33, 0x51, 0x94, 0x0f, 0x2c ,0xc8 +}; + +VOID +SYMCRYPT_CALL +SymCryptShake128Selftest(void) +{ + BYTE result[SYMCRYPT_SHAKE128_RESULT_SIZE]; + + SymCryptShake128(SymCryptTestMsg3, sizeof(SymCryptTestMsg3), result, sizeof(result)); + + SymCryptInjectError(result, sizeof(result)); + + if (memcmp(result, shake128KATAnswer, sizeof(result)) != 0) + { + SymCryptFatal('shk1'); + } +} + + +// +// SHAKE256 +// +#define Alg Shake256 +#define ALG SHAKE256 +#define SYMCRYPT_SHAKEXXX_INPUT_BLOCK_SIZE SYMCRYPT_SHAKE256_INPUT_BLOCK_SIZE +#define SYMCRYPT_SHAKEXXX_RESULT_SIZE SYMCRYPT_SHAKE256_RESULT_SIZE +#include "shake_pattern.c" +#undef SYMCRYPT_SHAKEXXX_RESULT_SIZE +#undef SYMCRYPT_SHAKEXXX_INPUT_BLOCK_SIZE +#undef ALG +#undef Alg + +const SYMCRYPT_HASH SymCryptShake256HashAlgorithm_default = { + &SymCryptShake256Init, + &SymCryptShake256Append, + &SymCryptShake256Result, + NULL, // AppendBlocks function is not implemented for SHA-3 + &SymCryptShake256StateCopy, + sizeof(SYMCRYPT_SHAKE256_STATE), + SYMCRYPT_SHAKE256_RESULT_SIZE, + SYMCRYPT_SHAKE256_INPUT_BLOCK_SIZE, + SYMCRYPT_FIELD_OFFSET(SYMCRYPT_SHAKE256_STATE, ks.state), + SYMCRYPT_FIELD_SIZE(SYMCRYPT_SHAKE256_STATE, ks.state), +}; + +const PCSYMCRYPT_HASH SymCryptShake256HashAlgorithm = &SymCryptShake256HashAlgorithm_default; + +static const BYTE shake256KATAnswer[SYMCRYPT_SHAKE256_RESULT_SIZE] = { + 0x48, 0x33, 0x66, 0x60, 0x13, 0x60, 0xa8, 0x77, 0x1c, 0x68, 0x63, 0x08, 0x0c, 0xc4, 0x11, 0x4d, + 0x8d, 0xb4, 0x45, 0x30, 0xf8, 0xf1, 0xe1, 0xee, 0x4f, 0x94, 0xea, 0x37, 0xe7, 0x8b, 0x57, 0x39, + 0xd5, 0xa1, 0x5b, 0xef, 0x18, 0x6a, 0x53, 0x86, 0xc7, 0x57, 0x44, 0xc0, 0x52, 0x7e, 0x1f, 0xaa, + 0x9f, 0x87, 0x26, 0xe4, 0x62, 0xa1, 0x2a, 0x4f, 0xeb, 0x06, 0xbd, 0x88, 0x01, 0xe7, 0x51, 0xe4 +}; + +VOID +SYMCRYPT_CALL +SymCryptShake256Selftest(void) +{ + BYTE result[SYMCRYPT_SHAKE256_RESULT_SIZE]; + + SymCryptShake256(SymCryptTestMsg3, sizeof(SymCryptTestMsg3), result, sizeof(result)); + + SymCryptInjectError(result, sizeof(result)); + + if (memcmp(result, shake256KATAnswer, sizeof(result)) != 0) + { + SymCryptFatal('shk2'); + } +} + + +// +// CSHAKE128 +// +#define Alg CShake128 +#define ALG CSHAKE128 +#define SYMCRYPT_SHAKEXXX_INIT SymCryptShake128Init +#define SYMCRYPT_SHAKEXXX_STATE SYMCRYPT_SHAKE128_STATE +#define SYMCRYPT_CSHAKEXXX_INPUT_BLOCK_SIZE SYMCRYPT_CSHAKE128_INPUT_BLOCK_SIZE +#define SYMCRYPT_CSHAKEXXX_RESULT_SIZE SYMCRYPT_CSHAKE128_RESULT_SIZE +#include "cshake_pattern.c" +#undef SYMCRYPT_CSHAKEXXX_RESULT_SIZE +#undef SYMCRYPT_CSHAKEXXX_INPUT_BLOCK_SIZE +#undef SYMCRYPT_SHAKEXXX_STATE +#undef SYMCRYPT_SHAKEXXX_INIT +#undef ALG +#undef Alg + + +static const BYTE cshake128KATAnswer[SYMCRYPT_CSHAKE128_RESULT_SIZE] = { + 0x14, 0xe5, 0xdf, 0xf3, 0xae, 0xfd, 0xfe, 0x8e, + 0xa6, 0xae, 0xed, 0xfd, 0x99, 0xe6, 0x84, 0x74, + 0xbc, 0x61, 0xb9, 0xd6, 0x17, 0x4e, 0x9f, 0x4a, + 0xe3, 0xbd, 0x87, 0xdf, 0x0e, 0xf2, 0x16, 0xdb, +}; + +VOID +SYMCRYPT_CALL +SymCryptCShake128Selftest(void) +{ + BYTE result[SYMCRYPT_CSHAKE128_RESULT_SIZE]; + static const unsigned char Nstr[] = { 'N' }; + static const unsigned char Sstr[] = { 'S' }; + + SymCryptCShake128( Nstr, sizeof(Nstr), + Sstr, sizeof(Sstr), + SymCryptTestMsg3, sizeof(SymCryptTestMsg3), + result, sizeof(result)); + + SymCryptInjectError(result, sizeof(result)); + + if (memcmp(result, cshake128KATAnswer, sizeof(result)) != 0) + { + SymCryptFatal('cshk'); + } +} + + +// +// CSHAKE256 +// +#define Alg CShake256 +#define ALG CSHAKE256 +#define SYMCRYPT_SHAKEXXX_INIT SymCryptShake256Init +#define SYMCRYPT_SHAKEXXX_STATE SYMCRYPT_SHAKE256_STATE +#define SYMCRYPT_CSHAKEXXX_INPUT_BLOCK_SIZE SYMCRYPT_CSHAKE256_INPUT_BLOCK_SIZE +#define SYMCRYPT_CSHAKEXXX_RESULT_SIZE SYMCRYPT_CSHAKE256_RESULT_SIZE +#include "cshake_pattern.c" +#undef SYMCRYPT_CSHAKEXXX_RESULT_SIZE +#undef SYMCRYPT_CSHAKEXXX_INPUT_BLOCK_SIZE +#undef SYMCRYPT_SHAKEXXX_STATE +#undef SYMCRYPT_SHAKEXXX_INIT +#undef ALG +#undef Alg + + +static const BYTE cshake256KATAnswer[SYMCRYPT_CSHAKE256_RESULT_SIZE] = { + 0x4d, 0xe8, 0x71, 0x6c, 0x4a, 0x16, 0x7e, 0x28, 0x2c, 0x18, 0xc5, 0x1e, 0xed, 0xa6, 0x00, 0xb8, + 0x91, 0x92, 0x4f, 0xea, 0x2e, 0x20, 0x7f, 0x71, 0x2c, 0xfd, 0xe2, 0x95, 0xfd, 0x1c, 0x67, 0x32, + 0x31, 0x49, 0x98, 0x23, 0xc0, 0x5e, 0x6a, 0xe3, 0x89, 0xad, 0x4d, 0xa2, 0x32, 0x9c, 0xc9, 0x2e, + 0x0f, 0xd6, 0x90, 0xb9, 0xee, 0x91, 0x0e, 0x86, 0xf7, 0x1d, 0x03, 0x88, 0xb5, 0x95, 0x61, 0x95 +}; + +VOID +SYMCRYPT_CALL +SymCryptCShake256Selftest(void) +{ + BYTE result[SYMCRYPT_CSHAKE256_RESULT_SIZE]; + static const unsigned char Nstr[] = { 'N' }; + static const unsigned char Sstr[] = { 'S' }; + + SymCryptCShake256(Nstr, sizeof(Nstr), + Sstr, sizeof(Sstr), + SymCryptTestMsg3, sizeof(SymCryptTestMsg3), + result, sizeof(result)); + + SymCryptInjectError(result, sizeof(result)); + + if (memcmp(result, cshake256KATAnswer, sizeof(result)) != 0) + { + SymCryptFatal('cshk'); + } +} + +// +// CShake helper functions +// + +// +// SymCryptCShakeEncodeInputStrings +// +VOID +SYMCRYPT_CALL +SymCryptCShakeEncodeInputStrings( + _Inout_ PSYMCRYPT_KECCAK_STATE pState, + _In_reads_( cbFunctionNameString ) PCBYTE pbFunctionNameString, + SIZE_T cbFunctionNameString, + _In_reads_( cbCustomizationString ) PCBYTE pbCustomizationString, + SIZE_T cbCustomizationString) +{ + SYMCRYPT_ASSERT((cbFunctionNameString > 0) || (cbCustomizationString > 0)); + + // left_encode( inputBlockSize ) for byte_pad function + // + // SymCryptKeccakEncodeTimes8 function encodes 8 times the value passed to + // it. Here, we want the actual value of pState->inputBlockSize to be encoded, + // hence the division by 8. + SymCryptKeccakAppendEncodeTimes8(pState, pState->inputBlockSize / 8, TRUE); + + SymCryptKeccakAppendEncodedString(pState, pbFunctionNameString, cbFunctionNameString); + SymCryptKeccakAppendEncodedString(pState, pbCustomizationString, cbCustomizationString); + + // Appending of Customization String may have already called the permutation + // if the appended data is aligned to input block size, in which case the zero + // padding has been done. + if (pState->stateIndex != 0) + { + SymCryptKeccakZeroAppendBlock(pState); + } +} + +// +// SymCryptKeccakEncodeTimes8 +// +SIZE_T +SYMCRYPT_CALL +SymCryptKeccakEncodeTimes8( + UINT64 uInput, + _Out_writes_(cbOutput) PBYTE pbOutput, + SIZE_T cbOutput, + BOOLEAN bLeftEncode) +{ + BYTE encoding[1 + sizeof(UINT64)]; + SIZE_T ret = 0; + + // longest encoding is 1 byte for length + 9 bytes for uInput * 8 + SYMCRYPT_ASSERT(cbOutput >= (1 + sizeof(encoding))); + UNREFERENCED_PARAMETER(cbOutput); + + // + // encoding[0] .. encoding[8] will contain (uInput * 8) in big endian form + encoding[0] = (BYTE)(uInput >> 61); + SYMCRYPT_STORE_MSBFIRST64(&encoding[1], uInput * 8); + + SIZE_T length = 1; // number of bytes required to encode uInput + PCBYTE pbMsb = &encoding[sizeof(encoding) - 1]; // pointer to the most significant byte + + // Locate the most significant non-zero byte + for (int i = 0; i < sizeof(encoding); i++) + { + // Do not early terminate on the most significant byte + if (encoding[i] != 0 && length == 1) + { + length = sizeof(encoding) - i; + pbMsb = &encoding[i]; + } + } + + ret = 1 + length; + + if (bLeftEncode) + { + // length for left_encode + *pbOutput++ = (BYTE)length; + } + + memcpy(pbOutput, pbMsb, length); + + if(!bLeftEncode) + { + // length for right_encode + pbOutput[length] = (BYTE)length; + } + + return ret; // total number of bytes written to pbOutput +} + +// +// SymCryptKeccakAppendEncodeTimes8 +// +VOID +SYMCRYPT_CALL +SymCryptKeccakAppendEncodeTimes8( + _Inout_ SYMCRYPT_KECCAK_STATE *pState, + UINT64 uValue, + BOOLEAN bLeftEncode) + +{ + BYTE encoding[1 + (1 + sizeof(UINT64))]; + SIZE_T ret; + + ret = SymCryptKeccakEncodeTimes8(uValue, encoding, sizeof(encoding), bLeftEncode); + + SymCryptKeccakAppend(pState, encoding, ret); +} + + +// +// SymCryptKeccakAppendEncodedString +// +VOID +SYMCRYPT_CALL +SymCryptKeccakAppendEncodedString( + _Inout_ PSYMCRYPT_KECCAK_STATE pState, + _In_reads_(cbString) PCBYTE pbString, + SIZE_T cbString) +{ + SymCryptKeccakAppendEncodeTimes8(pState, cbString, TRUE); + SymCryptKeccakAppend(pState, pbString, cbString); +} diff --git a/libs/symcrypt/lib/shake_pattern.c b/libs/symcrypt/lib/shake_pattern.c new file mode 100644 index 00000000000..59551a391f8 --- /dev/null +++ b/libs/symcrypt/lib/shake_pattern.c @@ -0,0 +1,111 @@ +// +// shake_pattern.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#if 0 +#pragma makedep header +#endif + +// +// This source file implements SHAKE128 and SHAKE256 +// +// See the symcrypt.h file for documentation on what the various functions do. +// + +// +// SymCryptShake +// +VOID +SYMCRYPT_CALL +SYMCRYPT_XxxDefault( + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_SHAKEXXX_RESULT_SIZE ) PBYTE pbResult) +{ + SYMCRYPT_Xxx(pbData, cbData, pbResult, SYMCRYPT_SHAKEXXX_RESULT_SIZE); +} + +// +// SymCryptShakeEx +// +VOID +SYMCRYPT_CALL +SYMCRYPT_Xxx( + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( cbResult ) PBYTE pbResult, + SIZE_T cbResult) +{ + SYMCRYPT_XXX_STATE state; + + SYMCRYPT_XxxInit(&state); + SYMCRYPT_XxxAppend(&state, pbData, cbData); + SYMCRYPT_XxxExtract(&state, pbResult, cbResult, TRUE); +} + +// +// SymCryptShakeStateCopy +// +VOID +SYMCRYPT_CALL +SYMCRYPT_XxxStateCopy(_In_ const SYMCRYPT_XXX_STATE* pSrc, _Out_ SYMCRYPT_XXX_STATE* pDst) +{ + SYMCRYPT_CHECK_MAGIC(pSrc); + *pDst = *pSrc; + SYMCRYPT_SET_MAGIC(pDst); +} + +// +// SymCryptShakeInit +// +VOID +SYMCRYPT_CALL +SYMCRYPT_XxxInit(_Out_ PSYMCRYPT_XXX_STATE pState) +{ + SymCryptKeccakInit(&pState->ks, + SYMCRYPT_SHAKEXXX_INPUT_BLOCK_SIZE, + SYMCRYPT_SHAKE_PADDING_VALUE); + + SYMCRYPT_SET_MAGIC(pState); +} + +// +// SymCryptShakeAppend +// +VOID +SYMCRYPT_CALL +SYMCRYPT_XxxAppend( + _Inout_ PSYMCRYPT_XXX_STATE pState, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData) +{ + SymCryptKeccakAppend(&pState->ks, pbData, cbData); +} + +// +// SymCryptShakeExtract +// +VOID +SYMCRYPT_CALL +SYMCRYPT_XxxExtract( + _Inout_ PSYMCRYPT_XXX_STATE pState, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult, + BOOLEAN bWipe) +{ + SymCryptKeccakExtract(&pState->ks, pbResult, cbResult, bWipe); +} + +// +// SymCryptShakeResult +// +VOID +SYMCRYPT_CALL +SYMCRYPT_XxxResult( + _Inout_ PSYMCRYPT_XXX_STATE pState, + _Out_writes_(SYMCRYPT_SHAKEXXX_RESULT_SIZE) PBYTE pbResult) +{ + SymCryptKeccakExtract(&pState->ks, pbResult, SYMCRYPT_SHAKEXXX_RESULT_SIZE, TRUE); +} diff --git a/libs/symcrypt/lib/smallPrimes32.h b/libs/symcrypt/lib/smallPrimes32.h new file mode 100644 index 00000000000..8afad4c9380 --- /dev/null +++ b/libs/symcrypt/lib/smallPrimes32.h @@ -0,0 +1,29 @@ +// +// Parameters for trial division mechanism +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// GENERATED FILE, DO NOT EDIT. +// + + +// +// The primes are put into groups of consecutive primes (skipping 2, 3, 5, and 17). +// Each group has a product less than SYMCRYPT_MAX_SMALL_PRIME_GROUP_PRODUCT which is +// chosen to avoid overflows in the modular reduction computation. +// + +typedef struct _SYMCRYPT_SMALL_PRIME_GROUPS_SPEC { + UINT16 nGroups; // # groups of this size + UINT8 nPrimes; // # primes in the group + UINT32 maxPrime; // largest prime in the last group +} SYMCRYPT_SMALL_PRIME_GROUPS_SPEC; + +#define SYMCRYPT_MAX_SMALL_PRIME_GROUP_PRODUCT (0x1c71c71cU) + +const SYMCRYPT_SMALL_PRIME_GROUPS_SPEC g_SymCryptSmallPrimeGroupsSpec[] = { + { 1, 7, 31 }, + { 1, 5, 53 }, + { 5, 4, 151 }, + { 34, 3, 787 }, + { 1156, 2, 21841 }, + { 0, 1, 0xffffffff }, +}; diff --git a/libs/symcrypt/lib/sp800_108.c b/libs/symcrypt/lib/sp800_108.c new file mode 100644 index 00000000000..56690caa1a4 --- /dev/null +++ b/libs/symcrypt/lib/sp800_108.c @@ -0,0 +1,143 @@ +// +// sp800_108.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +// +// This module contains the routines to implement the SP800-108 CTR KDF function +// +// + +#include "precomp.h" + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSp800_108Derive( + _In_ PCSYMCRYPT_SP800_108_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbLabel) PCBYTE pbLabel, + SIZE_T cbLabel, + _In_reads_opt_(cbContext) PCBYTE pbContext, + SIZE_T cbContext, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult) +{ + SYMCRYPT_MAC_STATE macState; + UINT32 iBlock; + SIZE_T bytes; + SIZE_T blockSize = pExpandedKey->macAlg->resultSize; + SIZE_T bytesRemaining = cbResult; + BYTE buf[4]; + SYMCRYPT_ALIGN BYTE rbBlockResult[SYMCRYPT_MAC_MAX_RESULT_SIZE]; + + SYMCRYPT_ASSERT( + blockSize <= SYMCRYPT_MAC_MAX_RESULT_SIZE && + bytesRemaining > 0 ); + + if( cbResult > UINT32_MAX/8 ) + { + // SP800-108 requires the output size in bits to be encoded in a 32-bit value. + // cbResults that are too large are impossible. + return SYMCRYPT_INVALID_ARGUMENT; + } + + iBlock = 0; + while( bytesRemaining > 0 ) + { + iBlock += 1; + pExpandedKey->macAlg->initFunc ( &macState, &pExpandedKey->macKey); + + // + // We append the pieces into the MAC function. This is inefficient but works always. + // If we need more speed for large outputs, we could use a fixed-size stack buffer to build the + // concatenation & do a single append. This reduces the # calls in the loop, but adds one memcpy to + // the parameters. For small output sizes this is probably a wash. + // + + SYMCRYPT_STORE_MSBFIRST32( &buf[0], iBlock ); + pExpandedKey->macAlg->appendFunc( &macState, &buf[0], 4 ); // block count encoded in 4 bytes + + if( cbLabel != (SIZE_T) -1 ) + { + // + // cbLabel == -1 signals a generic input in the Context field. + // + pExpandedKey->macAlg->appendFunc( &macState, pbLabel, cbLabel ); // label + + buf[0] = 0; + pExpandedKey->macAlg->appendFunc( &macState, &buf[0], 1 ); // zero byte + } + + pExpandedKey->macAlg->appendFunc( &macState, pbContext, cbContext); // Context + + SYMCRYPT_STORE_MSBFIRST32( &buf[0], 8 * (UINT32)cbResult ); + pExpandedKey->macAlg->appendFunc( &macState, &buf[0], 4 ); // output length, in bits + + pExpandedKey->macAlg->resultFunc( &macState, rbBlockResult ); + + bytes = SYMCRYPT_MIN( bytesRemaining, blockSize ); + memcpy( pbResult, rbBlockResult, bytes ); + pbResult += bytes; + bytesRemaining -= bytes; + } + + SymCryptWipeKnownSize( rbBlockResult, sizeof( rbBlockResult ) ); + return SYMCRYPT_NO_ERROR; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSp800_108ExpandKey( + _Out_ PSYMCRYPT_SP800_108_EXPANDED_KEY pExpandedKey, + _In_ PCSYMCRYPT_MAC macAlgorithm, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ) +{ + SYMCRYPT_ASSERT( macAlgorithm->expandedKeySize <= sizeof( pExpandedKey->macKey ) ); + + pExpandedKey->macAlg = macAlgorithm; + return macAlgorithm->expandKeyFunc(&pExpandedKey->macKey, pbKey, cbKey ); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSp800_108( + PCSYMCRYPT_MAC macAlgorithm, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey, + _In_reads_opt_(cbLabel) PCBYTE pbLabel, + SIZE_T cbLabel, + _In_reads_opt_(cbContext) PCBYTE pbContext, + SIZE_T cbContext, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult) +{ + SYMCRYPT_SP800_108_EXPANDED_KEY key; + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + scError = SymCryptSp800_108ExpandKey( &key, macAlgorithm, pbKey, cbKey ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + scError = SymCryptSp800_108Derive( &key, pbLabel, cbLabel, pbContext, cbContext, pbResult, cbResult ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + +cleanup: + + SymCryptWipeKnownSize( &key, sizeof( key ) ); + + return scError; + +} + + +// +// Self tests are in sp800_108_*.c files +// to avoid pulling in SHA-1 when only SP800-108-SHA256 is used and +// similar scenarios. +// diff --git a/libs/symcrypt/lib/sp800_108_hmacsha1.c b/libs/symcrypt/lib/sp800_108_hmacsha1.c new file mode 100644 index 00000000000..691a6b2a73b --- /dev/null +++ b/libs/symcrypt/lib/sp800_108_hmacsha1.c @@ -0,0 +1,39 @@ +// +// sp800_108_hmacsha1.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +// +// The SP800-108 SHA-1 test +// This is in a separate module to avoid pulling in SHA-1 whenever we use SP800_108 +// + +static const BYTE sp800_108_sha1Answer[] = +{ + 0xcf, 0x4b, 0xfe, 0x4f, 0x85, 0xa1, 0x0b, 0xad, +}; + +VOID +SYMCRYPT_CALL +SymCryptSp800_108_HmacSha1SelfTest(void) +{ + BYTE res[sizeof(sp800_108_sha1Answer)]; + + SymCryptSp800_108( + SymCryptHmacSha1Algorithm, + &SymCryptTestKey32[0], 8, // key + (PCBYTE)"Label", 5, // label + &SymCryptTestKey32[16], 16, // context + res, + sizeof(res)); + + SymCryptInjectError( res, sizeof( res ) ); + + if (memcmp(res, sp800_108_sha1Answer, sizeof(res)) !=0) + { + SymCryptFatal('8108'); + } +} diff --git a/libs/symcrypt/lib/sp800_108_hmacsha256.c b/libs/symcrypt/lib/sp800_108_hmacsha256.c new file mode 100644 index 00000000000..69ecc59669c --- /dev/null +++ b/libs/symcrypt/lib/sp800_108_hmacsha256.c @@ -0,0 +1,39 @@ +// +// sp800_108_hmacsha256.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +// +// The SP800-108 SHA-256 test +// This is in a separate module to avoid pulling in SHA-256 whenever we use SP800_108 +// + +static const BYTE sp800_108_sha256Answer[] = +{ + 0x00, 0x26, 0x4b, 0xbb, 0x14, 0x97, 0x40, 0x54, +}; + +VOID +SYMCRYPT_CALL +SymCryptSp800_108_HmacSha256SelfTest(void) +{ + BYTE res[sizeof(sp800_108_sha256Answer)]; + + SymCryptSp800_108( + SymCryptHmacSha256Algorithm, + &SymCryptTestKey32[0], 8, // key + (PCBYTE)"Label", 5, // label + &SymCryptTestKey32[16], 16, // context + res, + sizeof(res)); + + SymCryptInjectError( res, sizeof( res ) ); + + if (memcmp(res, sp800_108_sha256Answer, sizeof(res)) !=0) + { + SymCryptFatal('8108'); + } +} diff --git a/libs/symcrypt/lib/sp800_108_hmacsha512.c b/libs/symcrypt/lib/sp800_108_hmacsha512.c new file mode 100644 index 00000000000..bbb9fa0fdae --- /dev/null +++ b/libs/symcrypt/lib/sp800_108_hmacsha512.c @@ -0,0 +1,66 @@ +// +// sp800_108_hmacsha512.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +// +// The SP800-108 SHA-384 / SHA-512 self tests +// This is in a separate module to avoid pulling in SHA-512 whenever we use SP800_108 +// + +static const BYTE sp800_108_sha384Answer[] = +{ + 0xc7, 0x10, 0x27, 0x87, 0xd8, 0x96, 0xbc, 0x89, +}; + +VOID +SYMCRYPT_CALL +SymCryptSp800_108_HmacSha384SelfTest(void) +{ + BYTE res[sizeof(sp800_108_sha384Answer)]; + + SymCryptSp800_108( + SymCryptHmacSha384Algorithm, + &SymCryptTestKey32[0], 8, // key + (PCBYTE)"Label", 5, // label + &SymCryptTestKey32[16], 16, // context + res, + sizeof(res)); + + SymCryptInjectError( res, sizeof( res ) ); + + if (memcmp(res, sp800_108_sha384Answer, sizeof(res)) !=0) + { + SymCryptFatal('8108'); + } +} + +static const BYTE sp800_108_sha512Answer[] = +{ + 0xdb, 0x3a, 0x18, 0xd9, 0x6c, 0x4a, 0xd4, 0x1e, +}; + +VOID +SYMCRYPT_CALL +SymCryptSp800_108_HmacSha512SelfTest(void) +{ + BYTE res[sizeof(sp800_108_sha512Answer)]; + + SymCryptSp800_108( + SymCryptHmacSha512Algorithm, + &SymCryptTestKey32[0], 8, // key + (PCBYTE)"Label", 5, // label + &SymCryptTestKey32[16], 16, // context + res, + sizeof(res)); + + SymCryptInjectError( res, sizeof( res ) ); + + if (memcmp(res, sp800_108_sha512Answer, sizeof(res)) !=0) + { + SymCryptFatal('8108'); + } +} diff --git a/libs/symcrypt/lib/srtp_kdf.c b/libs/symcrypt/lib/srtp_kdf.c new file mode 100644 index 00000000000..34c7f4837d7 --- /dev/null +++ b/libs/symcrypt/lib/srtp_kdf.c @@ -0,0 +1,175 @@ +// +// srtp_kdf.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +// +// This module implements SRTP-KDF specified in RFC 3711 Section 4.3.1. +// + +#include "precomp.h" + + +#define SYMCRYPT_SRTP_KDF_SALT_SIZE (112 / 8) + + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSrtpKdfExpandKey( + _Out_ PSYMCRYPT_SRTPKDF_EXPANDED_KEY pExpandedKey, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey) +{ + return SymCryptAesExpandKeyEncryptOnly(&pExpandedKey->aesExpandedKey, pbKey, cbKey); +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSrtpKdfDerive( + _In_ PCSYMCRYPT_SRTPKDF_EXPANDED_KEY pExpandedKey, + _In_reads_(cbSalt) PCBYTE pbSalt, + SIZE_T cbSalt, + UINT32 uKeyDerivationRate, + UINT64 uIndex, + UINT32 uIndexWidth, + BYTE label, + _Out_writes_(cbOutput) PBYTE pbOutput, + SIZE_T cbOutput) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + BYTE X[16] = { 0 }; + + // + // uIndexWidth must be one of 0, 32 or 48. RFC 3711 defines SRTP indices to be + // 48-bits. SRTCP indices were first specified as 32-bit values and then updated to + // 48-bits by Errata ID 3712. uIndexWidth parameter allows specifying the width of + // the uIndex parameter for both SRTP and SRTCP indices. The test vectors use + // 32-bit SRTCP index values. + // + // The default value of 0 is equivalent to setting uIndexWidth to 48. + if (uIndexWidth == 0) + { + uIndexWidth = 48; + } + else if (uIndexWidth != 32 && uIndexWidth != 48) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if (cbSalt != SYMCRYPT_SRTP_KDF_SALT_SIZE) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // uKeyDerivationRate must be zero or 2^i for 0 <= i <= 24. + // This is verified by checking both it is not greater than 2^24 and it is either zero or a power of two. + if( (uKeyDerivationRate > (1 << 24)) || ((uKeyDerivationRate & (uKeyDerivationRate - 1)) != 0) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Initialize X to Salt || 0 + memcpy(X, pbSalt, cbSalt); + + // (uIndex DIV uKeyDerivationRate) operation can be performed with a right shift as + // uKeyDerivationRate is either zero or a power of 2. When uKeyDerivationRate is zero, + // DIV operation should evaluate to zero, which can be performed by shifting uIndex by 48 bits, + // i.e., maximum value it may have. + UINT32 kdrShift = 48; + if (uKeyDerivationRate) + { + for (UINT32 i = 0; i <= 24; i++) + { + if (uKeyDerivationRate == (1UL << i)) + { + kdrShift = i; + break; + } + } + } + + UINT64 r = uIndex >> kdrShift; + + UINT64 key_id = ((UINT64)label << uIndexWidth) | r; + + // XOR key_id into salt + // + // X = S0 ... |S6 ... S13| 0 0 + // | key_id | + // + PBYTE pbXorPos = &X[SYMCRYPT_SRTP_KDF_SALT_SIZE - sizeof(key_id)]; + UINT64 uSaltLsb = SYMCRYPT_LOAD_MSBFIRST64(pbXorPos); + SYMCRYPT_STORE_MSBFIRST64(pbXorPos, uSaltLsb ^ key_id); + + // + // We break the read-once/write once rule here by writing to the pbOutput buffer twice. + // The first write wipes the buffer so that we get the raw keystream bytes from AES-CTR encryption. + // The second write to pbOutput occurs with the SymCryptAesCtrMsb64() call that produces the keystream bytes. + // + // Modification of pbOutput between the two calls does not leak any information, it just results in flipping of the + // corresponding bits of the correct output. + SymCryptWipe(pbOutput, cbOutput); + SymCryptAesCtrMsb64(&pExpandedKey->aesExpandedKey, X, pbOutput, pbOutput, cbOutput & ~0xf); + + // SymCryptAesCtrMsb64 only processes full blocks. If cbOutput is not a multiple of 16 we generate the last block of + // keystream to local buffer and copy the necessary number of bytes to output. + if (cbOutput & 0xf) + { + BYTE lastBlockBytes[16] = { 0 }; + + SymCryptAesCtrMsb64(&pExpandedKey->aesExpandedKey, X, lastBlockBytes, lastBlockBytes, 16); + + memcpy(pbOutput + 16 * (cbOutput / 16), lastBlockBytes, cbOutput & 0xf); + + SymCryptWipeKnownSize(lastBlockBytes, sizeof(lastBlockBytes)); + } + +cleanup: + + return scError; +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSrtpKdf( + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey, + _In_reads_(cbSalt) PCBYTE pbSalt, + SIZE_T cbSalt, + UINT32 uKeyDerivationRate, + UINT64 uIndex, + UINT32 uIndexWidth, + BYTE label, + _Out_writes_(cbOutput) PBYTE pbOutput, + SIZE_T cbOutput) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SYMCRYPT_SRTPKDF_EXPANDED_KEY expandedKey; + + scError = SymCryptSrtpKdfExpandKey(&expandedKey, pbKey, cbKey); + + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + scError = SymCryptSrtpKdfDerive(&expandedKey, + pbSalt, cbSalt, + uKeyDerivationRate, + uIndex, uIndexWidth, + label, + pbOutput, cbOutput); + +cleanup: + + SymCryptWipeKnownSize(&expandedKey, sizeof(expandedKey)); + + return scError; +} diff --git a/libs/symcrypt/lib/ssh_kdf.c b/libs/symcrypt/lib/ssh_kdf.c new file mode 100644 index 00000000000..b1d961107d0 --- /dev/null +++ b/libs/symcrypt/lib/ssh_kdf.c @@ -0,0 +1,122 @@ +// +// ssh_kdf.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +// +// This module implements SSH-KDF specified in RFC 4253 Section 7.2. +// + +#include "precomp.h" + + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSshKdfExpandKey( + _Out_ PSYMCRYPT_SSHKDF_EXPANDED_KEY pExpandedKey, + _In_ PCSYMCRYPT_HASH pHashFunc, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey) +{ + pExpandedKey->pHashFunc = pHashFunc; + + SymCryptHashInit(pHashFunc, &pExpandedKey->hashState); + SymCryptHashAppend(pHashFunc, &pExpandedKey->hashState, pbKey, cbKey); + + return SYMCRYPT_NO_ERROR; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSshKdfDerive( + _In_ PCSYMCRYPT_SSHKDF_EXPANDED_KEY pExpandedKey, + _In_reads_(cbHashValue) PCBYTE pbHashValue, + SIZE_T cbHashValue, + BYTE label, + _In_reads_(cbSessionId) PCBYTE pbSessionId, + SIZE_T cbSessionId, + _Inout_updates_(cbOutput) PBYTE pbOutput, + SIZE_T cbOutput) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SYMCRYPT_HASH_STATE hashState; + + PCBYTE pcbOutputSave = pbOutput; + PCSYMCRYPT_HASH pHashFunc = pExpandedKey->pHashFunc; + SIZE_T cbHashResultSize = SymCryptHashResultSize(pHashFunc); + + + while (cbOutput > 0) + { + SIZE_T cbGeneratedOutput = pbOutput - pcbOutputSave; + + SymCryptHashStateCopy(pHashFunc, &pExpandedKey->hashState, &hashState); + SymCryptHashAppend(pHashFunc, &hashState, pbHashValue, cbHashValue); // hashState has (K || H) + + // label and session ID are appended only in the first iteration + if (cbGeneratedOutput == 0) + { + SymCryptHashAppend(pHashFunc, &hashState, &label, 1); + SymCryptHashAppend(pHashFunc, &hashState, pbSessionId, cbSessionId); + } + else + { + // We break the read-once write-once rule here by appending data to a + // hash computation from pbOutput that was written by SymCryptHashResult() + // below. + // Modification of data in pbOutput buffer after it's written and before + // used again will have uncontrolled disturbances in the hash output and cannot + // be used to gain knowledge about the secret key. + SymCryptHashAppend(pHashFunc, &hashState, pcbOutputSave, cbGeneratedOutput); // hashState has (K || H || K1 .. Ki) + } + + SymCryptHashResult(pHashFunc, &hashState, pbOutput, cbOutput); + + SIZE_T bytesCopied = SYMCRYPT_MIN(cbOutput, cbHashResultSize); + + pbOutput += bytesCopied; + cbOutput -= bytesCopied; + } + + return scError; +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSshKdf( + _In_ PCSYMCRYPT_HASH pHashFunc, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey, + _In_reads_(cbHashValue) PCBYTE pbHashValue, + SIZE_T cbHashValue, + BYTE label, + _In_reads_(cbSessionId) PCBYTE pbSessionId, + SIZE_T cbSessionId, + _Out_writes_(cbOutput) PBYTE pbOutput, + SIZE_T cbOutput) +{ + SYMCRYPT_SSHKDF_EXPANDED_KEY expandedKey; + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + scError = SymCryptSshKdfExpandKey(&expandedKey, pHashFunc, pbKey, cbKey); + + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + scError = SymCryptSshKdfDerive(&expandedKey, + pbHashValue, cbHashValue, + label, + pbSessionId, cbSessionId, + pbOutput, cbOutput); + + cleanup: + + SymCryptWipeKnownSize(&expandedKey, sizeof(expandedKey)); + + return scError; +} diff --git a/libs/symcrypt/lib/ssh_kdf_sha256.c b/libs/symcrypt/lib/ssh_kdf_sha256.c new file mode 100644 index 00000000000..f324dda3ae2 --- /dev/null +++ b/libs/symcrypt/lib/ssh_kdf_sha256.c @@ -0,0 +1,65 @@ +// +// ssh_kdf_sha256.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + + + +static const BYTE pbKey[] = +{ + 0x00, 0x00, 0x00, 0x81, 0x00, 0x87, 0x5c, 0x55, 0x1c, 0xef, 0x52, 0x6a, 0x4a, 0x8b, 0xe1, 0xa7, + 0xdf, 0x27, 0xe9, 0xed, 0x35, 0x4b, 0xac, 0x9a, 0xfb, 0x71, 0xf5, 0x3d, 0xba, 0xe9, 0x05, 0x67, + 0x9d, 0x14, 0xf9, 0xfa, 0xf2, 0x46, 0x9c, 0x53, 0x45, 0x7c, 0xf8, 0x0a, 0x36, 0x6b, 0xe2, 0x78, + 0x96, 0x5b, 0xa6, 0x25, 0x52, 0x76, 0xca, 0x2d, 0x9f, 0x4a, 0x97, 0xd2, 0x71, 0xf7, 0x1e, 0x50, + 0xd8, 0xa9, 0xec, 0x46, 0x25, 0x3a, 0x6a, 0x90, 0x6a, 0xc2, 0xc5, 0xe4, 0xf4, 0x8b, 0x27, 0xa6, + 0x3c, 0xe0, 0x8d, 0x80, 0x39, 0x0a, 0x49, 0x2a, 0xa4, 0x3b, 0xad, 0x9d, 0x88, 0x2c, 0xca, 0xc2, + 0x3d, 0xac, 0x88, 0xbc, 0xad, 0xa4, 0xb4, 0xd4, 0x26, 0xa3, 0x62, 0x08, 0x3d, 0xab, 0x65, 0x69, + 0xc5, 0x4c, 0x22, 0x4d, 0xd2, 0xd8, 0x76, 0x43, 0xaa, 0x22, 0x76, 0x93, 0xe1, 0x41, 0xad, 0x16, + 0x30, 0xce, 0x13, 0x14, 0x4e +}; + +static const BYTE pbHash[] = +{ + 0x0e, 0x68, 0x3f, 0xc8, 0xa9, 0xed, 0x7c, 0x2f, 0xf0, 0x2d, 0xef, 0x23, 0xb2, 0x74, 0x5e, 0xbc, + 0x99, 0xb2, 0x67, 0xda, 0xa8, 0x6a, 0x4a, 0xa7, 0x69, 0x72, 0x39, 0x08, 0x82, 0x53, 0xf6, 0x42 +}; + +static const BYTE pbSessionId[] = +{ + 0x0e, 0x68, 0x3f, 0xc8, 0xa9, 0xed, 0x7c, 0x2f, 0xf0, 0x2d, 0xef, 0x23, 0xb2, 0x74, 0x5e, 0xbc, + 0x99, 0xb2, 0x67, 0xda, 0xa8, 0x6a, 0x4a, 0xa7, 0x69, 0x72, 0x39, 0x08, 0x82, 0x53, 0xf6, 0x42 +}; + +static const BYTE label = SYMCRYPT_SSHKDF_ENCRYPTION_KEY_CLIENT_TO_SERVER; + +static const BYTE pbResult[] = +{ + 0x4a, 0x63, 0x14, 0xd2, 0xf7, 0x51, 0x1b, 0xf8, 0x8f, 0xad, 0x39, 0xfb, 0x68, 0x92, 0xf3, 0xf2, 0x18, 0xca, 0xfd, 0x53, 0x0e, 0x72, 0xfe, 0x43 +}; + +VOID +SYMCRYPT_CALL +SymCryptSshKdfSha256SelfTest(void) +{ + SYMCRYPT_SSHKDF_EXPANDED_KEY expandedKey; + SYMCRYPT_ALIGN BYTE rbResult[sizeof(pbResult)]; + + SymCryptSshKdfExpandKey(&expandedKey, SymCryptSha256Algorithm, pbKey, sizeof(pbKey)); + + SymCryptSshKdfDerive(&expandedKey, + pbHash, sizeof(pbHash), + label, + pbSessionId, sizeof(pbSessionId), + rbResult, sizeof(rbResult) + ); + + SymCryptInjectError(rbResult, sizeof(rbResult)); + + if (memcmp(rbResult, pbResult, sizeof(pbResult)) != 0) + { + SymCryptFatal('sshk'); + } +} diff --git a/libs/symcrypt/lib/ssh_kdf_sha512.c b/libs/symcrypt/lib/ssh_kdf_sha512.c new file mode 100644 index 00000000000..909403753c1 --- /dev/null +++ b/libs/symcrypt/lib/ssh_kdf_sha512.c @@ -0,0 +1,70 @@ +// +// ssh_kdf_sha512.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + + + +static const BYTE pbKey[] = +{ + 0x00, 0x00, 0x00, 0x80, 0x57, 0x53, 0x08, 0xca, 0x39, 0x57, 0x98, 0xbb, 0x21, 0xec, 0x54, 0x38, + 0xc4, 0x6a, 0x88, 0xff, 0xa3, 0xf7, 0xf7, 0x67, 0x1c, 0x06, 0xf9, 0x24, 0xab, 0xf7, 0xc3, 0xcf, + 0xb4, 0x6c, 0x78, 0xc0, 0x25, 0x59, 0x6e, 0x4a, 0xba, 0x50, 0xc3, 0x27, 0x10, 0x89, 0x18, 0x4a, + 0x44, 0x7a, 0x57, 0x1a, 0xbb, 0x7f, 0x4a, 0x1b, 0x1c, 0x41, 0xf5, 0xd5, 0xca, 0x80, 0x62, 0x94, + 0x0d, 0x43, 0x69, 0x77, 0x85, 0x89, 0xfd, 0xe8, 0x1a, 0x71, 0xb2, 0x22, 0x8f, 0x01, 0x8c, 0x4c, + 0x83, 0x6c, 0xf3, 0x89, 0xf8, 0x54, 0xf8, 0x6d, 0xe7, 0x1a, 0x68, 0xb1, 0x69, 0x3f, 0xe8, 0xff, + 0xa1, 0xc5, 0x9c, 0xe7, 0xe9, 0xf9, 0x22, 0x3d, 0xeb, 0xad, 0xa2, 0x56, 0x6d, 0x2b, 0x0e, 0x56, + 0x78, 0xa4, 0x8b, 0xfb, 0x53, 0x0e, 0x7b, 0xee, 0x42, 0xbd, 0x2a, 0xc7, 0x30, 0x4a, 0x0a, 0x5a, + 0xe3, 0x39, 0xa2, 0xcd +}; + +static const BYTE pbHash[] = +{ + 0xa4, 0x12, 0x5a, 0xa9, 0x89, 0x80, 0x92, 0xca, 0x50, 0xc3, 0xc1, 0x63, 0x1c, 0x03, 0xdc, 0xbc, + 0x9d, 0xf9, 0x5c, 0xeb, 0xb4, 0x09, 0x88, 0x1e, 0x58, 0x01, 0x08, 0xb6, 0xcc, 0x47, 0x04, 0xb7, + 0x6c, 0xc7, 0x7b, 0x87, 0x95, 0xfd, 0x59, 0x40, 0x56, 0x1e, 0x32, 0x24, 0xcc, 0x75, 0x84, 0x85, + 0x18, 0x99, 0x2b, 0xd8, 0xd9, 0xb7, 0x0f, 0xe0, 0xfc, 0x97, 0x7a, 0x47, 0x60, 0x63, 0xc8, 0xbf +}; + +static const BYTE pbSessionId[] = +{ + 0xa4, 0x12, 0x5a, 0xa9, 0x89, 0x80, 0x92, 0xca, 0x50, 0xc3, 0xc1, 0x63, 0x1c, 0x03, 0xdc, 0xbc, + 0x9d, 0xf9, 0x5c, 0xeb, 0xb4, 0x09, 0x88, 0x1e, 0x58, 0x01, 0x08, 0xb6, 0xcc, 0x47, 0x04, 0xb7, + 0x6c, 0xc7, 0x7b, 0x87, 0x95, 0xfd, 0x59, 0x40, 0x56, 0x1e, 0x32, 0x24, 0xcc, 0x75, 0x84, 0x85, + 0x18, 0x99, 0x2b, 0xd8, 0xd9, 0xb7, 0x0f, 0xe0, 0xfc, 0x97, 0x7a, 0x47, 0x60, 0x63, 0xc8, 0xbf +}; + +static const BYTE label = SYMCRYPT_SSHKDF_ENCRYPTION_KEY_CLIENT_TO_SERVER; + +static const BYTE pbResult[] = +{ + 0x7e, 0x4a, 0x72, 0x1f, 0xb7, 0x37, 0x9e, 0xbb, 0x42, 0x33, 0x06, 0x46, 0x4d, 0x57, 0xdb, 0x46, + 0xaf, 0xa3, 0xcc, 0xa1, 0x0a, 0x1d, 0x7f, 0xeb +}; + +VOID +SYMCRYPT_CALL +SymCryptSshKdfSha512SelfTest(void) +{ + SYMCRYPT_SSHKDF_EXPANDED_KEY expandedKey; + SYMCRYPT_ALIGN BYTE rbResult[sizeof(pbResult)]; + + SymCryptSshKdfExpandKey(&expandedKey, SymCryptSha512Algorithm, pbKey, sizeof(pbKey)); + + SymCryptSshKdfDerive(&expandedKey, + pbHash, sizeof(pbHash), + label, + pbSessionId, sizeof(pbSessionId), + rbResult, sizeof(rbResult) + ); + + SymCryptInjectError(rbResult, sizeof(rbResult)); + + if (memcmp(rbResult, pbResult, sizeof(pbResult)) != 0) + { + SymCryptFatal('sshk'); + } +} diff --git a/libs/symcrypt/lib/sskdf.c b/libs/symcrypt/lib/sskdf.c new file mode 100644 index 00000000000..65aebaf24dd --- /dev/null +++ b/libs/symcrypt/lib/sskdf.c @@ -0,0 +1,266 @@ +// +// sskdf.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +// +// This module implements Single-Step KDF as specified in SP800-56C section 4. +// + +#include "precomp.h" + +// +// See the symcrypt.h file for documentation on what the various functions do. +// + + +#define SYMCRYPT_SSKDF_KMAC_128_DEFAULT_SALT_SIZE (164) +#define SYMCRYPT_SSKDF_KMAC_256_DEFAULT_SALT_SIZE (132) +#define SYMCRYPT_SSKDF_DEFAULT_SALT_MAX SYMCRYPT_SSKDF_KMAC_128_DEFAULT_SALT_SIZE + + +static const BYTE pbKmacCustomizationString[3] = { 'K', 'D', 'F' }; + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSskdfMacExpandSalt( + _Out_ PSYMCRYPT_SSKDF_MAC_EXPANDED_SALT pExpandedSalt, + _In_ PCSYMCRYPT_MAC macAlgorithm, + _In_reads_opt_(cbSalt) PCBYTE pbSalt, + SIZE_T cbSalt) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + const BYTE pbSaltDefault[SYMCRYPT_SSKDF_DEFAULT_SALT_MAX] = { 0 }; + + SYMCRYPT_ASSERT( macAlgorithm->expandedKeySize <= sizeof( pExpandedSalt->macKey ) ); + + pExpandedSalt->macAlg = macAlgorithm; + + if ( pbSalt == NULL ) + { + if ( macAlgorithm == SymCryptKmac128Algorithm ) + { + cbSalt = SYMCRYPT_SSKDF_KMAC_128_DEFAULT_SALT_SIZE; + } + else if ( macAlgorithm == SymCryptKmac256Algorithm ) + { + cbSalt = SYMCRYPT_SSKDF_KMAC_256_DEFAULT_SALT_SIZE; + } + else + { + cbSalt = SymCryptHashInputBlockSize( *(macAlgorithm->ppHashAlgorithm) ); + } + + pbSalt = pbSaltDefault; + } + + if ( macAlgorithm == SymCryptKmac128Algorithm ) + { + scError = SymCryptKmac128ExpandKeyEx( + &pExpandedSalt->macKey.kmac128Key, + pbSalt, + cbSalt, + pbKmacCustomizationString, + sizeof( pbKmacCustomizationString ) ); + } + else if ( macAlgorithm == SymCryptKmac256Algorithm ) + { + scError = SymCryptKmac256ExpandKeyEx( + &pExpandedSalt->macKey.kmac256Key, + pbSalt, + cbSalt, + pbKmacCustomizationString, + sizeof( pbKmacCustomizationString ) ); + } + else + { + scError = macAlgorithm->expandKeyFunc( &pExpandedSalt->macKey, pbSalt, cbSalt ); + } + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSskdfMacDerive( + _In_ PCSYMCRYPT_SSKDF_MAC_EXPANDED_SALT pExpandedSalt, + SIZE_T cbMacOutputSize, + _In_reads_(cbSecret) PCBYTE pbSecret, + SIZE_T cbSecret, + _In_reads_opt_(cbInfo) PCBYTE pbInfo, + SIZE_T cbInfo, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SYMCRYPT_MAC_STATE state; + PCSYMCRYPT_MAC pMacAlgorithm = pExpandedSalt->macAlg; + PSYMCRYPT_MAC_RESULT_EX resultFuncEx = NULL; + + SYMCRYPT_ALIGN BYTE rbPartialResult[SYMCRYPT_MAC_MAX_RESULT_SIZE]; + PBYTE pbCurr = pbResult; + + SIZE_T cbMacResultSize = pMacAlgorithm->resultSize; + SIZE_T cbBlock; + UINT32 cntr = 1; + BYTE ctrBuf[4]; + + if ( cbMacOutputSize > 64 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if ( pMacAlgorithm == SymCryptKmac128Algorithm ) + { + cbMacResultSize = cbMacOutputSize; + resultFuncEx = SymCryptKmac128ResultEx; + } + else if ( pMacAlgorithm == SymCryptKmac256Algorithm ) + { + cbMacResultSize = cbMacOutputSize; + resultFuncEx = SymCryptKmac256ResultEx; + } + else if ( cbMacOutputSize > 0 && cbMacOutputSize != pMacAlgorithm->resultSize ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + while ( cbResult > 0 ) + { + SYMCRYPT_STORE_MSBFIRST32( ctrBuf, cntr ); + + // Calculate K(i) = H(counter || Z || FixedInfo) + pMacAlgorithm->initFunc( &state, &pExpandedSalt->macKey ); + pMacAlgorithm->appendFunc( &state, ctrBuf, sizeof( ctrBuf ) ); + pMacAlgorithm->appendFunc( &state, pbSecret, cbSecret ); + pMacAlgorithm->appendFunc( &state, pbInfo, cbInfo ); + + cbBlock = SYMCRYPT_MIN( cbResult, cbMacResultSize ); + + if ( resultFuncEx != NULL ) + { + if ( cbMacOutputSize > 0 ) + { + resultFuncEx( &state, rbPartialResult, cbMacOutputSize ); + } + else + { + // If the output size is not specified, calculate the full result + resultFuncEx( &state, pbResult, cbResult ); + break; + } + } + else + { + pMacAlgorithm->resultFunc( &state, rbPartialResult ); + } + + // Store the result in the output buffer + memcpy( pbCurr, rbPartialResult, cbBlock ); + + // Update counters + cntr++; + pbCurr += cbBlock; + cbResult -= cbBlock; + } + +cleanup: + + SymCryptWipeKnownSize( &rbPartialResult[0], sizeof( rbPartialResult ) ); + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSskdfMac( + _In_ PCSYMCRYPT_MAC macAlgorithm, + SIZE_T cbMacOutputSize, + _In_reads_(cbSecret) PCBYTE pbSecret, + SIZE_T cbSecret, + _In_reads_opt_(cbSalt) PCBYTE pbSalt, + SIZE_T cbSalt, + _In_reads_opt_(cbInfo) PCBYTE pbInfo, + SIZE_T cbInfo, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SYMCRYPT_SSKDF_MAC_EXPANDED_SALT expandedSalt; + + scError = SymCryptSskdfMacExpandSalt( &expandedSalt, macAlgorithm, pbSalt, cbSalt ); + + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + scError = SymCryptSskdfMacDerive( + &expandedSalt, + cbMacOutputSize, + pbSecret, + cbSecret, + pbInfo, + cbInfo, + pbResult, + cbResult ); + +cleanup: + + SymCryptWipeKnownSize( &expandedSalt, sizeof( expandedSalt ) ); + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSskdfHash( + _In_ PCSYMCRYPT_HASH hashAlgorithm, + SIZE_T cbHashOutputSize, + _In_reads_(cbSecret) PCBYTE pbSecret, + SIZE_T cbSecret, + _In_reads_opt_(cbInfo) PCBYTE pbInfo, + SIZE_T cbInfo, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult) +{ + SYMCRYPT_HASH_STATE state; + + PBYTE pbCurr = pbResult; + + SIZE_T cbHashResultSize = hashAlgorithm->resultSize; + SIZE_T cbPartialResult; + UINT32 cntr = 1; + BYTE ctrBuf[4]; + + if ( cbHashOutputSize > 64 || + cbHashOutputSize > 0 && cbHashOutputSize != hashAlgorithm->resultSize ) + { + return SYMCRYPT_INVALID_ARGUMENT; + } + + while ( cbResult > 0 ) + { + SYMCRYPT_STORE_MSBFIRST32( ctrBuf, cntr ); + + cbPartialResult = SYMCRYPT_MIN( cbResult, cbHashResultSize ); + + // Calculate K(i) = H(counter || Z || FixedInfo) + SymCryptHashInit( hashAlgorithm, &state ); + SymCryptHashAppend( hashAlgorithm, &state, ctrBuf, sizeof( ctrBuf ) ); + SymCryptHashAppend( hashAlgorithm, &state, pbSecret, cbSecret ); + SymCryptHashAppend( hashAlgorithm, &state, pbInfo, cbInfo ); + SymCryptHashResult( hashAlgorithm, &state, pbCurr, cbPartialResult ); + + // Update counters + cntr++; + pbCurr += cbPartialResult; + cbResult -= cbPartialResult; + } + + return SYMCRYPT_NO_ERROR; +} diff --git a/libs/symcrypt/lib/tlsCbcVerify.c b/libs/symcrypt/lib/tlsCbcVerify.c new file mode 100644 index 00000000000..26a1a173a07 --- /dev/null +++ b/libs/symcrypt/lib/tlsCbcVerify.c @@ -0,0 +1,458 @@ +// +// tlsCbcVerify.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +// +// This code needs to process data in words, and we'd like to use 32-bit words on 32-bit +// architectures and 64-bit words on 64-bit architectures. So we use NATIVE_UINT & friends. +// +// We don't want to use 64-bit words on 32-bit architectures because the 64-bit shift/rotate +// code might not be constant-time, and it puts further register pressure on the x86 that can only +// use 6 registers in C code. +// + +#if NATIVE_BYTES == 8 +#define NATIVE_01 (0x0101010101010101) +#elif NATIVE_BYTES == 4 +#define NATIVE_01 (0x01010101) +#else +#error Unexpected NATIVE_BYTES value +#endif + +// +// MASK32 macros return UINT32 values based on conditions of the inputs +// + +// MASK32_LT returns a UINT32 that is -1 if _a < _b, 0 otherwise. +#define MASK32_LT( _a, _b ) ((UINT32)( ( (INT32)((_a)-(_b)) ) >> 31 ) ) + +// MASK32_EQ returns a UINT32 that is -1 if _a == _b, 0 otherwise. +#define MASK32_EQ( _a, _b ) (~(UINT32)(-(INT32)((_a) ^ (_b)) >> 31)) + + +// +// Native Byte mask generation is done in inlined functions, as that makes them much more readable +// These are mask values that are computed per byte in the word. +// + +// Relevant bits to look at when determining whether an index is in the word +// Difference of index & word start must be in 0..NATIVE_BYTES - 1 +// This mask defines the relevant bits we look at. +// We avoid using the highest bits as we use fact that the result after the mask +// is positive. This works as all our positions are < 2^16. +#define MASKNB_INWORD_RELEVANTBITS (~(NATIVE_BYTES - 1) & 0x0fffffff) + +#define MASKNB_BROADCAST( _b ) ((NATIVE_UINT)(_b) * NATIVE_01) + +FORCEINLINE +NATIVE_UINT +SymCryptNMaskGe( UINT32 wordStart, UINT32 boundary ) +// Return a word starting at byte wordStart from an array with a[i] = 0xff if i>=boundary, 0 otherwise +{ + INT32 diff32; + NATIVE_INT anySet; + UINT32 shift; + + // Mask that is -1 if boundary < wordStart + 8 + anySet = ((NATIVE_INT) boundary - (NATIVE_INT) wordStart - NATIVE_BYTES) >> (NATIVE_BITS - 1); + + // Compute the index of boundary into the word, possibly negative + diff32 = (INT32)boundary - (INT32)wordStart; + // Compute the necessary shift when the result will be partially set + shift = 8 * (diff32 & (NATIVE_BYTES - 1)); + + // Mask the shift to 0 if the word is to be all set as boundary < wordStart + shift &= (INT32)~diff32 >> 31; + + return (NATIVE_UINT) anySet << shift; +} + +FORCEINLINE +NATIVE_UINT +SymCryptNMaskEq( UINT32 wordStart, UINT32 boundary ) +// Return a word starting at byte wordStart from an array with a[i] = (i == boundary) ? 0xff : 0 +{ + INT32 diff32; + NATIVE_UINT inWord; + + // 32-bit signed difference + diff32 = (INT32)boundary - (INT32)wordStart; + + // inWord = (-1) if boundary is within the word, 0 otherwise + // Cast to NATIVE_UINT is free on AMD64, as is subsequent cast to NATIVE_INT + // A direct cast from INT32 to NATIVE_INT requires a sign extension instruction, so this is faster. + inWord = ~ ((-(NATIVE_INT)(NATIVE_UINT)(diff32 & MASKNB_INWORD_RELEVANTBITS)) >> (NATIVE_BITS -1)); + + return inWord & ((NATIVE_UINT)0xff << 8 * (diff32 & (NATIVE_BYTES - 1)) ); +} + +FORCEINLINE +NATIVE_UINT +SymCryptNMaskEq80( UINT32 wordStart, UINT32 boundary ) +// Same as SymcryptNMaskEq except the 0xff is replaced with 0x80. +{ + INT32 diff32; + NATIVE_UINT inWord; + + // 32-bit signed difference + diff32 = (INT32)boundary - (INT32)wordStart; + + // inWord = (-1) if boundary is within the word, 0 otherwise + inWord = ~ ((-(NATIVE_INT)(NATIVE_UINT)(diff32 & MASKNB_INWORD_RELEVANTBITS)) >> (NATIVE_BITS -1)); + + return inWord & ((NATIVE_UINT)0x80 << 8 * (diff32 & (NATIVE_BYTES - 1)) ); +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptTlsCbcHmacVerifyCore( + _In_ PCSYMCRYPT_HASH pHash, + _Inout_ PSYMCRYPT_COMMON_HASH_STATE pState, + _In_reads_bytes_(cbData) PCBYTE pbData, + SIZE_T cbData, + _Inout_updates_( pHash->inputBlockSize / 2) PBYTE pbMacValue, + _Inout_updates_( pHash->resultSize ) PBYTE pbHashResult, + _Out_ PUINT32 pu32PaddingError ) +// +// The core of the constant-time TLS record validation. +// This appends the data part of the record to the hash state, returns the intermediate hash value, +// and extracts the MAC value out of the record and returns it. +// +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + UINT32 cbPad; + UINT32 maxPadLength; + UINT32 u32; + UINT32 i; + UINT32 iPaddingStart; // using 'i' for index + UINT32 iMacStart; + NATIVE_UINT mInData; + NATIVE_UINT mInMac; + NATIVE_UINT mInPadding; + NATIVE_UINT m; + NATIVE_UINT nPaddingError = 0; // nonzero if a padding byte value is wrong. + UINT32 next; + UINT32 cbHashPrefix; + UINT32 cbExtendedData; + UINT32 totalBytesHashed; + UINT32 hashPaddingFinal; + UINT32 resultHashBlockIndex; + UINT32 lastHashBlockIndex; + NATIVE_UINT w; + NATIVE_UINT data; + UINT32 backOffset; + NATIVE_UINT * bufferLocation; + NATIVE_UINT padBytes; + UINT32 m32ResultBlock; + NATIVE_UINT mResultBlock; + SIZE_T tmp; + const UINT32 cbMacValue = pHash->inputBlockSize / 2; + + SYMCRYPT_ASSERT( cbMacValue == SymCryptRoundUpPow2Sizet(pHash->resultSize) ); + + // Process all the data up to the part where the MAC value might appear + // The if() is safe as both cbData and u32 are public values. + u32 = pHash->resultSize + 256; + if( cbData > u32 ) + { + (*pHash->appendFunc)(pState, pbData, cbData-u32 ); + pbData += cbData - u32; + cbData = u32; + } + + // Check that we have enough data for a valid record. + // We need one MAC value plus one padding_length byte + if (cbData < pHash->resultSize + 1) + { + scError = SYMCRYPT_BUFFER_TOO_SMALL; + goto cleanup; + } + + // We OR our results into the result buffers, so we must init them to zero + SymCryptWipe( pbMacValue, cbMacValue ); + SymCryptWipe( pbHashResult, pHash->resultSize ); + + // Pick up the padding_length. Note that this is the value we have to keep secret from + // side-channel attacks. + cbPad = pbData[cbData - 1]; + + // We reduce cbData so that the padding_length byte is no longer under consideration. + cbData -= 1; + + // Bound the padding length to cbData - mac_length + // This doesn't reveal data as we treat all cbPad values the same, but it makes our + // further computations easier + maxPadLength = (UINT32)cbData - pHash->resultSize; // We checked this is >= 0 + u32 = MASK32_LT( maxPadLength, cbPad ); // mask: maxPadLength < cbPad + cbPad = cbPad + ((maxPadLength - cbPad) & u32); + nPaddingError |= u32; // mark as padding error + + // From here on out we maintain indices into a conceptual extended buffer with length cbExtendedData, + // and index 0 at the start of the hash computation. + // This aligns us with the hash input block, and simplifies hash padding computations and word + // accesses. + // However, we must always subtract cbHashPrefix from indices before using them to access bytes + // in pbData, as the HashPrefix was already hashed into the MAC state. + + cbHashPrefix = (UINT32)pState->dataLengthL; // # bytes already hashed into the MAC state + cbExtendedData = (UINT32)cbData + cbHashPrefix; // total # bytes that the conceptual extended buffer has + next = cbHashPrefix; // next index we will consider for processing (start of pbData) + + totalBytesHashed = cbExtendedData - pHash->resultSize - cbPad; + SYMCRYPT_STORE_MSBFIRST32( &hashPaddingFinal, totalBytesHashed * 8 ); // Length padding for result hash block + + // We need to figure out what the index is of the last hash input block in the computation + // (including any phantom blocks after the actual hash is done) and the index of the result + // hash block of the actual hash computation. + // We've limited the max input for simplicity (everything fits in 32 bits) + // We also avoid 64-bit operations as their implementation on 32-bit architectures might not + // always be constant-time. + // This computation works for SHA-1, SHA-256, and SHA-384 which is all we care about + // We avoid using % as the runtime isn't constant and our inputs are secret. + + // First the actual # bytes in the real and phantom computation + resultHashBlockIndex = totalBytesHashed + 1 + pHash->inputBlockSize / 8; // 1 byte 0x80 + length padding in last block + lastHashBlockIndex = resultHashBlockIndex + cbPad; // The furthest any hash could go + + // round up to a whole # blocks + resultHashBlockIndex = (resultHashBlockIndex + pHash->inputBlockSize - 1) & ~(pHash->inputBlockSize - 1); + lastHashBlockIndex = (lastHashBlockIndex + pHash->inputBlockSize - 1) & ~(pHash->inputBlockSize - 1); + + // Compute the indices where the MAC and padding start + iPaddingStart = cbExtendedData - cbPad; + iMacStart = iPaddingStart - pHash->resultSize; + + SYMCRYPT_ASSERT( iMacStart < cbExtendedData ); // Fail if the last computation underflowed. + + // Align our handling to the native word size so that we can safely use native words + if( (next & (NATIVE_BYTES - 1)) != 0 ) + { + backOffset = next & (NATIVE_BYTES - 1); + + // Process a partial word + SYMCRYPT_ASSERT( ( (next ^ pState->bytesInBuffer) & (NATIVE_BYTES - 1) ) == 0 ); + + // Read a word; as the MAC value is > 8 bytes this won't overflow the buffer + w = *(NATIVE_UINT *) &pbData[0]; + m = SymCryptNMaskGe( next, iMacStart ); + mInData = ~m; + mInMac = m; + + data = w & mInData; + data |= SymCryptNMaskEq80( next, iMacStart ); // add 0x80 byte @ iMacStart + + // Now we put the data into the hash buffer + bufferLocation = (NATIVE_UINT *)&pState->buffer[ pState->bytesInBuffer - backOffset ]; + *bufferLocation = (*bufferLocation & (((NATIVE_UINT)1 << 8*backOffset) - 1)) | (data << 8*backOffset); + pState->bytesInBuffer += NATIVE_BYTES - backOffset; + + // And the MAC data in the mac buffer + *(NATIVE_UINT *)&pbMacValue[(next - backOffset) & (cbMacValue - 1)] |= (w & mInMac) << 8*backOffset; + + if( pState->bytesInBuffer == pHash->inputBlockSize ) + { + // Block is full. This can't be the result block as we didn't have room for the padding yet. + (*pHash->appendBlockFunc)( (PBYTE)pState + pHash->chainOffset, &pState->buffer[0], pHash->inputBlockSize, &tmp ); + pState->bytesInBuffer = 0; + } + + next += NATIVE_BYTES - backOffset; + } + + padBytes = MASKNB_BROADCAST( cbPad ); + + // Now we can loop over the data in whole words + while( next <= cbExtendedData - NATIVE_BYTES ) + { + w = *(NATIVE_UINT *) &pbData[next - cbHashPrefix]; + + m = SymCryptNMaskGe( next, iMacStart ); + mInMac = m; + mInData = ~m; + + m = SymCryptNMaskGe( next, iPaddingStart ); + mInPadding = m; + mInMac &= ~m; + + data = w & mInData; + data |= SymCryptNMaskEq80( next, iMacStart ); // add 0x80 byte @ iMacStart + + *(NATIVE_UINT *)(&pState->buffer[ pState->bytesInBuffer ]) = data; + pState->bytesInBuffer += NATIVE_BYTES; + + if (pState->bytesInBuffer == pHash->inputBlockSize) + { + // Insert the length component of the hash padding (only in result block) + m32ResultBlock = MASK32_EQ( next, resultHashBlockIndex - NATIVE_BYTES ); + *(UINT32*) &pState->buffer[ pHash->inputBlockSize - 4 ] |= hashPaddingFinal & m32ResultBlock; + + (*pHash->appendBlockFunc)( (PBYTE)pState + pHash->chainOffset, &pState->buffer[0], pHash->inputBlockSize, &tmp ); + SYMCRYPT_ASSERT( tmp == 0 ); + + mResultBlock = (NATIVE_UINT)(NATIVE_INT)(INT32) m32ResultBlock; // Convert 32-bit mask to native mask + + // Masked copy of result to result buffer + // We do whole words, and then an optional UINT32 to handle the 20-byte SHA-1 result on AMD64. + // The for() and if() are side-channel safe as the resultSize and NATIVE_BYTES values are public. + for ( i = 0; i < pHash->resultSize / NATIVE_BYTES; i++) + { + ((NATIVE_UINT *)pbHashResult)[i] |= ((NATIVE_UINT *)((PBYTE)pState + pHash->chainOffset))[i] & mResultBlock; + } + if( (pHash->resultSize & (NATIVE_BYTES - 1)) != 0 ) + { + *(UINT32 *) (&pbHashResult[ pHash->resultSize - 4 ]) |= *(UINT32 *) ((PBYTE) pState + pHash->chainOffset + pHash->resultSize - 4) & (UINT32) mResultBlock; + } + pState->bytesInBuffer = 0; + } + + *(NATIVE_UINT *)&pbMacValue[next & (cbMacValue - 1)] |= w & mInMac; + + nPaddingError |= (w ^ padBytes) & mInPadding; + + next += NATIVE_BYTES; + } + + if( next < cbExtendedData ) + { + // Process the remaining bytes. This can't be data so we only do the MAC and padding... + // The main difference is that we read the last full word in pbData and then align it + // as if we read the next word starting at pbData[next - cbHashPrefix] + w = *(NATIVE_UINT *) &pbData[ cbData - NATIVE_BYTES ]; // last word + w >>= 8 * (next - cbExtendedData + NATIVE_BYTES ); // Shift to right location + padBytes >>= 8 * (next - cbExtendedData + NATIVE_BYTES ); // Zero padBytes that are never read + + m = SymCryptNMaskGe( next, iPaddingStart ); + mInPadding = m; + mInMac = ~m; + + *(NATIVE_UINT *)&pbMacValue[next & (cbMacValue - 1)] |= w & mInMac; + + nPaddingError |= (w ^ padBytes) & mInPadding; + next = cbExtendedData; + } + + // At this point we still have to potentially do one more hash block. + // The data is all copied into the hash input buffer, as is the 0x80 padding byte. + + if (next < lastHashBlockIndex) + { + // there is still one more hash block to compute. This could either be the actual last block of the hash + // computation, or a phantom block for side-channel hiding. + // This IF depends only on the cbData, the # bytes hashed before this final pbData buffer, and the hash algorithm + // properties. + // We never need to compute more than 1 additional hash block as we are at least pHash->resultSize bytes beyond the + // actual data. + SymCryptWipe( &pState->buffer[ pState->bytesInBuffer], pHash->inputBlockSize - pState->bytesInBuffer ); + + // Just put in the padding, no need to mask this + *(UINT32*) &pState->buffer[ pHash->inputBlockSize - 4 ] = hashPaddingFinal; + + (*pHash->appendBlockFunc)( (PBYTE)pState + pHash->chainOffset, &pState->buffer[0], pHash->inputBlockSize, &tmp ); + SYMCRYPT_ASSERT( tmp == 0 ); + + // Masked copy of the result + mResultBlock = (NATIVE_UINT)(NATIVE_INT)(INT32) MASK32_EQ( lastHashBlockIndex, resultHashBlockIndex ); + + // Masked copy of result to result buffer + // We do whole words, and then an optional UINT32 to handle the 20-byte SHA-1 result on AMD64. + for ( i = 0; i < pHash->resultSize / NATIVE_BYTES; i++) + { + ((NATIVE_UINT *)pbHashResult)[i] |= ((NATIVE_UINT *)((PBYTE)pState + pHash->chainOffset))[i] & mResultBlock; + } + if( (pHash->resultSize & (NATIVE_BYTES - 1)) != 0 ) + { + *(UINT32 *) (&pbHashResult[ pHash->resultSize - 4 ]) |= *(UINT32 *) ((PBYTE) pState + pHash->chainOffset + pHash->resultSize - 4) & (UINT32) mResultBlock; + } + pState->bytesInBuffer = 0; + } + + // Now we have the hash result, and the Mac value buffer is filled with a rotated copy of the Mac value. + // We have to un-rotate the Mac value. + + // Check that we have the right hash result + //for( SIZE_T t=0; t < cbMacValue; t++ ) + //{ + // SYMCRYPT_ASSERT( pbMacValue[ (iMacStart + t) & (cbMacValue - 1 ) ] == (t >= pHash->resultSize ? 0 : pbData[iMacStart - cbHashPrefix + t] )); + //} + + SymCryptScsRotateBuffer( pbMacValue, cbMacValue, iMacStart & (cbMacValue - 1) ); + + //for( SIZE_T t=0; t < cbMacValue; t++ ) + //{ + // SYMCRYPT_ASSERT( pbMacValue[ t ] == (t >= pHash->resultSize ? 0 : pbData[iMacStart - cbHashPrefix + t] )); + //} + +cleanup: + + nPaddingError |= nPaddingError >> (NATIVE_BITS/2); // Map possibly 64 bits down to 32 + + *pu32PaddingError = (UINT32) nPaddingError; + + return scError; +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptTlsCbcHmacVerify( + _In_ PCSYMCRYPT_MAC pMacAlgorithm, + _In_ PVOID pExpandedKey, + _Inout_ PVOID pState, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData) +{ + BYTE abMacValue[64]; + BYTE abHashResult[48]; + UINT32 u32PaddingError; + PSYMCRYPT_COMMON_HASH_STATE pHashState = (PSYMCRYPT_COMMON_HASH_STATE) pState; + PCSYMCRYPT_HASH pHashAlgorithm = *(pMacAlgorithm->ppHashAlgorithm); + UINT32 i; + + SYMCRYPT_ASSERT(pMacAlgorithm == SymCryptHmacSha1Algorithm || + pMacAlgorithm == SymCryptHmacSha256Algorithm || + pMacAlgorithm == SymCryptHmacSha384Algorithm ); + SYMCRYPT_ASSERT(((*(pMacAlgorithm->ppHashAlgorithm))->inputBlockSize)/2 <= 64); + SYMCRYPT_ASSERT((*(pMacAlgorithm->ppHashAlgorithm))->resultSize <= 48); + + SymCryptTlsCbcHmacVerifyCore( + pHashAlgorithm, + pHashState, + pbData, + cbData, + abMacValue, + abHashResult, + &u32PaddingError ); + + // We have the hash value, convert it to a MAC value + // First we set up the chaining value + memcpy( ((PBYTE)pHashState + pHashAlgorithm->chainOffset), + (PBYTE)pExpandedKey + pMacAlgorithm->outerChainingStateOffset, + pHashAlgorithm->chainSize ); + // Then copy the data & set the length + // The hash result wasn't BSWAPPED yet... + if( pMacAlgorithm->resultSize <= 32 ) + { + SymCryptUint32ToMsbFirst( (UINT32 *) abHashResult, pHashState->buffer, pHashAlgorithm->resultSize / 4 ); + } else { + SymCryptUint64ToMsbFirst( (UINT64 *) abHashResult, pHashState->buffer, pHashAlgorithm->resultSize / 8 ); + } + pHashState->bytesInBuffer = pHashAlgorithm->resultSize; + pHashState->dataLengthL = pHashAlgorithm->resultSize + pHashAlgorithm->inputBlockSize; + + (*pHashAlgorithm->resultFunc)( pHashState, abHashResult ); + + // Verify in 32-bit chunks to support SHA-1 without further problems + for( i=0; i<pHashAlgorithm->resultSize / 4; i++ ) + { + u32PaddingError |= *(PUINT32)&abHashResult[4*i] ^ *(PUINT32)&abMacValue[4*i]; + } + + // We may reveal the final error-or-no-error as that will be visible anyway + return u32PaddingError == 0 ? SYMCRYPT_NO_ERROR : SYMCRYPT_AUTHENTICATION_FAILURE; +} diff --git a/libs/symcrypt/lib/tlsprf.c b/libs/symcrypt/lib/tlsprf.c new file mode 100644 index 00000000000..4a3167ec65e --- /dev/null +++ b/libs/symcrypt/lib/tlsprf.c @@ -0,0 +1,569 @@ +// +// tlsprf.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +// +// This module contains the routines to implement the two PRF +// functions for the TLS protocols 1.1 and 1.2. These are used in +// the protocol's key derivation function. +// +// + +#include "precomp.h" + +// +// TLS PRF Constants +// +#define SYMCRYPT_TLS_MAX_LABEL_AND_SEED_SIZE (SYMCRYPT_TLS_MAX_LABEL_SIZE + SYMCRYPT_TLS_MAX_SEED_SIZE) + +// This **MUST** be a common multiple of MD5 +// output size and SHA1 output size. +#define SYMCRYPT_TLS_1_1_CHUNK_SIZE 80 + +// +// SymCryptTlsPrf1_1ExpandKey is the key expansion function for versions 1.0 +// and 1.1 of the TLS protocol. It takes as inputs a pointer to the expanded TLSPRF1.1 +// key, and the key material in pbKey. Regarding the treatment of the key +// material (the "secret"), the following is defined in RFCs 2246 and 4346: +// +// TLS's PRF is created by splitting the secret into two halves and +// using one half to generate data with P_MD5 and the other half to +// generate data with P_SHA - 1, then exclusive - or'ing the outputs of +// these two expansion functions together. +// +// S1 and S2 are the two halves of the secret and each is the same +// length. S1 is taken from the first half of the secret, S2 from the +// second half. Their length is created by rounding up the length of the +// overall secret divided by two; thus, if the original secret is an odd +// number of bytes long, the last byte of S1 will be the same as the +// first byte of S2. +// +// L_S = length in bytes of secret; +// L_S1 = L_S2 = ceil(L_S / 2); +// +// The secret is partitioned into two halves (with the possibility of +// one shared byte) as described above, S1 taking the first L_S1 bytes +// and S2 the last L_S2 bytes. +// +// Note: In pre-RS1 Windows if the length of the key material of each half +// exceeded HMAC_K_PADSIZE = 64, we truncated the key. This does not comply +// with RFC 2014 (HMAC). However, as of April 2016 several cipher suites +// used keys (pre-master secret) longer than 128 bytes. To achieve interop +// with servers complying to the RFC we use the entire key for the HMAC calculation. +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptTlsPrf1_1ExpandKey( + _Out_ PSYMCRYPT_TLSPRF1_1_EXPANDED_KEY pExpandedKey, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + SIZE_T cbKeySize; + SIZE_T cbHalfSecret; + SIZE_T cbOdd; + + // Calculating the two halves + cbHalfSecret = cbKey / 2; + cbOdd = cbKey % 2; + cbKeySize = cbHalfSecret + cbOdd; + + // + // The bytes of the key material are split as following: + // cbOdd == 0 => cbKeySize == cbHalfSecret + // + // ******************************************** + // <----cbHalfSecret----><----cbHalfSecret----> + // <----cbKeySize-------><----cbKeySize-------> + // + // + // cbOdd == 1 => cbKeySize == cbHalfSecret + 1 + // + // **********************$********************** + // <----cbHalfSecret----> <----cbHalfSecret----> + // <----cbKeySize--------> + // <----cbKeySize--------> + // + // Note that the middle byte of the key input might be + // read twice (when the key length is odd). This violates + // the standard rule that input data should only be read + // once. In this case, we do this for the following reasons: + // - Avoiding the dual-read is difficult; we'd have to buffer + // an arbitrary-size input, and SymCrypt avoids memory + // allocations for symmetric algorithms. + // - The dual-reading of inputs is a problem when the + // memory is double-mapped to a different (less trusted) + // security context. (E.g. a kernel-mode operation on + // memory that is also mapped into a user address space.) + // This PRF is used by TLS in LSA where that situation + // does not occur. + // - This is used for TLS 1.0 and TLS 1.1, both of which + // are on the deprecation path. + // - In the dual-read attack, the input is typically provided + // by the attacker, and then changed whilst the code is + // accessing it. But if the attacker is providing the input, + // she could just as well have provided an even-length key + // input that provides full freedom for choosing both HMAC + // keys; there is simply no reason to try and perform the + // dual-read attack. + // - Even if the dual-read problem were to occur, it does not + // seem to help an attacker in any way. + + // MD5 Key Expansion + scError = SymCryptHmacMd5ExpandKey(&pExpandedKey->macMd5Key, pbKey, cbKeySize); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // SHA1 Key Expansion + scError = SymCryptHmacSha1ExpandKey(&pExpandedKey->macSha1Key, pbKey + cbHalfSecret, cbKeySize); + if (scError != SYMCRYPT_NO_ERROR) + { + SymCryptWipeKnownSize(&pExpandedKey->macMd5Key, sizeof(pExpandedKey->macMd5Key)); + + goto cleanup; + } + +cleanup: + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptTlsPrf1_2ExpandKey( + _Out_ PSYMCRYPT_TLSPRF1_2_EXPANDED_KEY pExpandedKey, + _In_ PCSYMCRYPT_MAC macAlgorithm, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ) +{ + SYMCRYPT_ASSERT( macAlgorithm->expandedKeySize <= sizeof( pExpandedKey->macKey ) ); + + pExpandedKey->macAlg = macAlgorithm; + return macAlgorithm->expandKeyFunc( &pExpandedKey->macKey, pbKey, cbKey ); +} + +// +// SymCryptTlsPrfMac uses the expanded key and hashes the concatenated +// inputs pbAi and pbSeed. It is used by all the TLS versions per +// RFCs 2246, 4346, and 5246. +// Remark: +// - cbSeed can be 0 and pbSeed NULL. +// - pbResult should be of size at least pMacAlgorithm->resultSize +// + +VOID +SYMCRYPT_CALL +SymCryptTlsPrfMac( + _In_ PCSYMCRYPT_MAC pMacAlgorithm, + _In_ PCSYMCRYPT_MAC_EXPANDED_KEY pMacExpandedKey, + _In_reads_(cbAi) PCBYTE pbAi, + _In_ SIZE_T cbAi, + _In_reads_opt_(cbSeed) PCBYTE pbSeed, + _In_ SIZE_T cbSeed, + _Out_ PBYTE pbResult) +{ + SYMCRYPT_MAC_STATE macState; + + pMacAlgorithm->initFunc( &macState, pMacExpandedKey ); + pMacAlgorithm->appendFunc(&macState, pbAi, cbAi); + + if (cbSeed > 0) + { + pMacAlgorithm->appendFunc( &macState, pbSeed, cbSeed ); + } + + pMacAlgorithm->resultFunc( &macState, pbResult ); + + // No need to wipe the state. The resultFunc wipes it. +} + +// +// SymCryptTlsPrfPHash is defined in RFCs 2246, 4346, +// and 5246 as follows: +// +// First, we define a data expansion function, P_hash(secret, data) +// which uses a single hash function to expand a secret and seed into +// an arbitrary quantity of output: +// +// P_hash(secret, seed) = HMAC_hash(secret, A(1) + seed) + +// HMAC_hash(secret, A(2) + seed) + +// HMAC_hash(secret, A(3) + seed) + ... +// +// Where + indicates concatenation. +// A() is defined as: +// A(0) = seed +// A(i) = HMAC_hash(secret, A(i-1)) +// + +VOID +SYMCRYPT_CALL +SymCryptTlsPrfPHash( + _In_ PCSYMCRYPT_MAC pMacAlgorithm, + _In_ PCSYMCRYPT_MAC_EXPANDED_KEY pMacExpandedKey, + _In_reads_(cbSeed) PCBYTE pbSeed, + _In_ SIZE_T cbSeed, + _In_reads_opt_(cbAiIn) PCBYTE pbAiIn, // Buffer for the previous Ai (used in 1.1) + _In_ SIZE_T cbAiIn, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult, + _Out_writes_opt_(cbAiOut) PBYTE pbAiOut, // Buffer for the next Ai (only with AiIn) + SIZE_T cbAiOut) +{ + SYMCRYPT_ALIGN BYTE rbAi[SYMCRYPT_MAC_MAX_RESULT_SIZE]; + SYMCRYPT_ALIGN BYTE rbPartialResult[SYMCRYPT_MAC_MAX_RESULT_SIZE]; + BYTE * pbTmp = pbResult; + + SIZE_T cbMacResultSize = pMacAlgorithm->resultSize; + SIZE_T cbBytesToWrite = cbResult; + + if (cbAiIn == 0) + { + // Build A(1) + SymCryptTlsPrfMac( + pMacAlgorithm, + pMacExpandedKey, + pbSeed, // This is A(0) + cbSeed, + NULL, // No "seed" part for A(i)'s + 0, + rbAi); + } + else + { + // Get the previous Ai + memcpy(rbAi, pbAiIn, SYMCRYPT_MIN(SYMCRYPT_MAC_MAX_RESULT_SIZE, cbAiIn)); + } + + while (cbBytesToWrite > 0) + { + // Build HMAC( secret, A(i) + seed) + SymCryptTlsPrfMac( + pMacAlgorithm, + pMacExpandedKey, + rbAi, // this is A(i) + cbMacResultSize, + pbSeed, // the "seed" part + cbSeed, + rbPartialResult); + + // Store it in the output buffer + memcpy(pbTmp, rbPartialResult, SYMCRYPT_MIN(cbBytesToWrite, cbMacResultSize)); + + // Build A(i+1) + SymCryptTlsPrfMac( + pMacAlgorithm, + pMacExpandedKey, + rbAi, // This is A(i) + cbMacResultSize, + NULL, // No "seed" part for A(i)'s + 0, + rbAi); + + if (cbBytesToWrite <= cbMacResultSize) + { + break; + } + + pbTmp += cbMacResultSize; + cbBytesToWrite -= cbMacResultSize; + } + + // Store the next A(i) if needed + if (cbAiOut > 0) + { + memcpy(pbAiOut, rbAi, SYMCRYPT_MIN(cbAiOut,cbMacResultSize)); + } + + SymCryptWipeKnownSize(rbAi, sizeof(rbAi)); + SymCryptWipeKnownSize(rbPartialResult, sizeof(rbPartialResult)); +} + + +// +// The following PRF is defined in RFC 2246 and 4346: +// +// The PRF is then defined as the result of mixing the two pseudorandom +// streams by exclusive - or'ing them together. +// +// PRF(secret, label, seed) = P_MD5(S1, label + seed) XOR +// P_SHA-1(S2, label + seed); +// +// Remark: We will do the do the two P_hash computations in parallel +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptTlsPrf1_1Derive( + _In_ PCSYMCRYPT_TLSPRF1_1_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbLabel) PCBYTE pbLabel, + _In_ SIZE_T cbLabel, + _In_reads_(cbSeed) PCBYTE pbSeed, + _In_ SIZE_T cbSeed, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + SYMCRYPT_ALIGN BYTE rbLabelAndSeed[SYMCRYPT_TLS_MAX_LABEL_AND_SEED_SIZE]; + SIZE_T cbLabelAndSeed = 0; + + SYMCRYPT_ALIGN BYTE rbAiMd5[SYMCRYPT_HMAC_MD5_RESULT_SIZE]; + SYMCRYPT_ALIGN BYTE rbPartialResultMd5[SYMCRYPT_TLS_1_1_CHUNK_SIZE]; + + SYMCRYPT_ALIGN BYTE rbAiSha1[SYMCRYPT_HMAC_SHA1_RESULT_SIZE]; + SYMCRYPT_ALIGN BYTE rbPartialResultSha1[SYMCRYPT_TLS_1_1_CHUNK_SIZE]; + + BYTE * pbTmp = pbResult; + SIZE_T cbBytesToWrite = cbResult; + + // Size checks + if ((cbLabel > SYMCRYPT_TLS_MAX_LABEL_SIZE) || (cbSeed > SYMCRYPT_TLS_MAX_SEED_SIZE)) + { + scError = SYMCRYPT_WRONG_DATA_SIZE; + goto cleanup; + } + + // Concatenating the label and the seed + pbTmp = rbLabelAndSeed; + if( cbLabel > 0 ) + { + memcpy(pbTmp, pbLabel, cbLabel); + pbTmp += cbLabel; + } + memcpy(pbTmp, pbSeed, cbSeed); + cbLabelAndSeed = cbLabel + cbSeed; + + // Build A(1)'s + SymCryptTlsPrfMac( + SymCryptHmacMd5Algorithm, + (PCSYMCRYPT_MAC_EXPANDED_KEY)&pExpandedKey->macMd5Key, + rbLabelAndSeed, // This is A(0) + cbLabelAndSeed, + NULL, // No "seed" part for A(i)'s + 0, + rbAiMd5); + + SymCryptTlsPrfMac( + SymCryptHmacSha1Algorithm, + (PCSYMCRYPT_MAC_EXPANDED_KEY)&pExpandedKey->macSha1Key, + rbLabelAndSeed, // This is A(0) + cbLabelAndSeed, + NULL, // No "seed" part for A(i)'s + 0, + rbAiSha1); + + // Calculate the output + pbTmp = pbResult; + while (cbBytesToWrite > 0) + { + // Calculate the two P_Hashes up to SYMCRYPT_TLS_1_1_CHUNK_SIZE bytes + + // P_MD5 + SymCryptTlsPrfPHash( + SymCryptHmacMd5Algorithm, + (PCSYMCRYPT_MAC_EXPANDED_KEY)&pExpandedKey->macMd5Key, + rbLabelAndSeed, + cbLabelAndSeed, + rbAiMd5, + SYMCRYPT_HMAC_MD5_RESULT_SIZE, + rbPartialResultMd5, + SYMCRYPT_MIN(cbBytesToWrite, SYMCRYPT_TLS_1_1_CHUNK_SIZE), + rbAiMd5, + SYMCRYPT_HMAC_MD5_RESULT_SIZE); + + // P_SHA1 + SymCryptTlsPrfPHash( + SymCryptHmacSha1Algorithm, + (PCSYMCRYPT_MAC_EXPANDED_KEY)&pExpandedKey->macSha1Key, + rbLabelAndSeed, + cbLabelAndSeed, + rbAiSha1, + SYMCRYPT_HMAC_SHA1_RESULT_SIZE, + rbPartialResultSha1, + SYMCRYPT_MIN(cbBytesToWrite, SYMCRYPT_TLS_1_1_CHUNK_SIZE), + rbAiSha1, + SYMCRYPT_HMAC_SHA1_RESULT_SIZE); + + // XOR the two into the output + SymCryptXorBytes( + rbPartialResultMd5, + rbPartialResultSha1, + pbTmp, + SYMCRYPT_MIN(cbBytesToWrite, SYMCRYPT_TLS_1_1_CHUNK_SIZE)); + + if (cbBytesToWrite <= SYMCRYPT_TLS_1_1_CHUNK_SIZE) + { + break; + } + + cbBytesToWrite -= SYMCRYPT_TLS_1_1_CHUNK_SIZE; + pbTmp += SYMCRYPT_TLS_1_1_CHUNK_SIZE; + + } + +cleanup: + SymCryptWipeKnownSize(rbLabelAndSeed, sizeof(rbLabelAndSeed)); + SymCryptWipeKnownSize(rbAiMd5, sizeof(rbAiMd5)); + SymCryptWipeKnownSize(rbPartialResultMd5, sizeof(rbPartialResultMd5)); + SymCryptWipeKnownSize(rbAiSha1, sizeof(rbAiSha1)); + SymCryptWipeKnownSize(rbPartialResultSha1, sizeof(rbPartialResultSha1)); + + return scError; +} + +// +// The following PRF is defined in RFC 5246: +// +// TLS's PRF is created by applying P_hash to the secret as: +// +// PRF(secret, label, seed) = P_<hash>(secret, label + seed) +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptTlsPrf1_2Derive( + _In_ PCSYMCRYPT_TLSPRF1_2_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbLabel) PCBYTE pbLabel, + _In_ SIZE_T cbLabel, + _In_reads_(cbSeed) PCBYTE pbSeed, + _In_ SIZE_T cbSeed, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + SYMCRYPT_ALIGN BYTE rbLabelAndSeed[SYMCRYPT_TLS_MAX_LABEL_AND_SEED_SIZE]; + BYTE * pbTmp; + + // Size checks + if ((cbLabel > SYMCRYPT_TLS_MAX_LABEL_SIZE) || (cbSeed > SYMCRYPT_TLS_MAX_SEED_SIZE)) + { + scError = SYMCRYPT_WRONG_DATA_SIZE; + goto cleanup; + } + + // Concatenating the label and the seed + pbTmp = rbLabelAndSeed; + if( cbLabel > 0 ) + { + memcpy(pbTmp, pbLabel, cbLabel); + pbTmp += cbLabel; + } + memcpy(pbTmp, pbSeed, cbSeed); + + // + // According to RFC 2104 (HMAC), hash the secret if its length + // exceeds the basic compression block length. This is taken + // care by the specific HMAC inside SymCryptTlsPrfPHash. + // + SymCryptTlsPrfPHash( + pExpandedKey->macAlg, + &pExpandedKey->macKey, + rbLabelAndSeed, + cbLabel + cbSeed, + NULL, + 0, + pbResult, + cbResult, + NULL, + 0); + +cleanup: + SymCryptWipeKnownSize(rbLabelAndSeed, sizeof(rbLabelAndSeed)); + + return scError; +} + +// +// The full TLS 1.0/1.1 Key Derivation Function +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptTlsPrf1_1( + _In_reads_(cbKey) PCBYTE pbKey, + _In_ SIZE_T cbKey, + _In_reads_opt_(cbLabel) PCBYTE pbLabel, + _In_ SIZE_T cbLabel, + _In_reads_(cbSeed) PCBYTE pbSeed, + _In_ SIZE_T cbSeed, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SYMCRYPT_TLSPRF1_1_EXPANDED_KEY key; + + // Create the expanded key + scError = SymCryptTlsPrf1_1ExpandKey(&key, pbKey, cbKey); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // Derive the key + scError = SymCryptTlsPrf1_1Derive( + &key, + pbLabel, + cbLabel, + pbSeed, + cbSeed, + pbResult, + cbResult); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + +cleanup: + SymCryptWipeKnownSize(&key, sizeof(key)); + + return scError; +} + + +// +// The full TLS 1.2 Key Derivation Function +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptTlsPrf1_2( + _In_ PCSYMCRYPT_MAC pMacAlgorithm, + _In_reads_(cbKey) PCBYTE pbKey, + _In_ SIZE_T cbKey, + _In_reads_opt_(cbLabel) PCBYTE pbLabel, + _In_ SIZE_T cbLabel, + _In_reads_(cbSeed) PCBYTE pbSeed, + _In_ SIZE_T cbSeed, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SYMCRYPT_TLSPRF1_2_EXPANDED_KEY key; + + // Create the expanded key + scError = SymCryptTlsPrf1_2ExpandKey(&key, pMacAlgorithm, pbKey, cbKey); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // Derive the key + scError = SymCryptTlsPrf1_2Derive( + &key, + pbLabel, + cbLabel, + pbSeed, + cbSeed, + pbResult, + cbResult); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + +cleanup: + SymCryptWipeKnownSize(&key, sizeof(key)); + + return scError; +} diff --git a/libs/symcrypt/lib/xmss.c b/libs/symcrypt/lib/xmss.c new file mode 100644 index 00000000000..1bd9b116c96 --- /dev/null +++ b/libs/symcrypt/lib/xmss.c @@ -0,0 +1,2129 @@ +// +// xmss.c XMSS and XMSS^MT implementation +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +// +// See the symcrypt.h file for documentation on what the various functions do. +// + + +// Maximum size of the domain separator prefix used in PRFs +#define SYMCRYPT_XMSS_MAX_PREFIX_SIZE SYMCRYPT_HASH_MAX_RESULT_SIZE + +// PRF domain separators +#define SYMCRYPT_XMSS_F 0x00 +#define SYMCRYPT_XMSS_H 0x01 +#define SYMCRYPT_XMSS_H_MSG 0x02 +#define SYMCRYPT_XMSS_PRF 0x03 +#define SYMCRYPT_XMSS_PRF_KEYGEN 0x04 + + +static const PCSYMCRYPT_HASH* XmssHashArray[] = { + &SymCryptSha256Algorithm, // 0 + &SymCryptSha512Algorithm, // 1 + &SymCryptShake128HashAlgorithm, // 2 + &SymCryptShake256HashAlgorithm, // 3 +}; + + +typedef enum _SYMCRYPT_XMSS_WOTSP_ALGID +{ + // Hash Fn. RFC-8391 SP800-208 + SYMCRYPT_XMSS_WOTSP_SHA2_256 = 0x00000001, // SHA-256 X X + SYMCRYPT_XMSS_WOTSP_SHA2_512 = 0x00000002, // SHA-512 X + SYMCRYPT_XMSS_WOTSP_SHAKE_256 = 0x00000003, // SHAKE128 X + SYMCRYPT_XMSS_WOTSP_SHAKE_512 = 0x00000004, // SHAKE256 X + SYMCRYPT_XMSS_WOTSP_SHA2_192 = 0x00000005, // SHA-256 X + SYMCRYPT_XMSS_WOTSP_SHAKE256_256 = 0x00000006, // SHAKE256 X + SYMCRYPT_XMSS_WOTSP_SHAKE256_192 = 0x00000007, // SHAKE256 X + +} SYMCRYPT_XMSS_WOTSP_ALGID, *PSYMCRYPT_XMSS_WOTSP_ALGID; + + +typedef struct _SYMCRYPT_XMSS_WOTSP_PARAMS +{ + SYMCRYPT_XMSS_WOTSP_ALGID wotspId; + UINT8 hashIndex; + UINT8 n; + UINT8 w; + UINT8 cbPrefix; + +} SYMCRYPT_XMSS_WOTSP_PARAMS, *PSYMCRYPT_XMSS_WOTSP_PARAMS; + + +static const SYMCRYPT_XMSS_WOTSP_PARAMS XmssWotspParams[] = +{ + // wotspId hashIndex n w cbPrefix + { SYMCRYPT_XMSS_WOTSP_SHA2_256, 0, 32, 4, 32 }, // SHA-256 + { SYMCRYPT_XMSS_WOTSP_SHA2_512, 1, 64, 4, 64 }, // SHA-512 + { SYMCRYPT_XMSS_WOTSP_SHAKE_256, 2, 32, 4, 32 }, // SHAKE128 + { SYMCRYPT_XMSS_WOTSP_SHAKE_512, 3, 64, 4, 64 }, // SHAKE256 + { SYMCRYPT_XMSS_WOTSP_SHA2_192, 0, 24, 4, 4 }, // SHA-256 + { SYMCRYPT_XMSS_WOTSP_SHAKE256_256, 3, 32, 4, 32 }, // SHAKE256 + { SYMCRYPT_XMSS_WOTSP_SHAKE256_192, 3, 24, 4, 4 }, // SHAKE256 +}; + +typedef struct _SYMCRYPT_XMSS_PARAMETER_PREDEFINED +{ + UINT32 idAlg; + + SYMCRYPT_XMSS_WOTSP_ALGID idWotsp; + + // total tree height (each level has height h/d) + UINT8 h; + + // number of layers (for single tree, d=1) + UINT8 d; + +} SYMCRYPT_XMSS_PARAMETER_PREDEFINED; + +typedef SYMCRYPT_XMSS_PARAMETER_PREDEFINED* PSYMCRYPT_XMSS_PARAMETER_PREDEFINED; +typedef const SYMCRYPT_XMSS_PARAMETER_PREDEFINED* PCSYMCRYPT_XMSS_PARAMETER_PREDEFINED; + + +static const SYMCRYPT_XMSS_PARAMETER_PREDEFINED XmssParametersPredefined[] = { + + // algId wotspId/wotspIndex h d + { SYMCRYPT_XMSS_SHA2_10_256, SYMCRYPT_XMSS_WOTSP_SHA2_256, 10, 1 }, + { SYMCRYPT_XMSS_SHA2_16_256, SYMCRYPT_XMSS_WOTSP_SHA2_256, 16, 1 }, + { SYMCRYPT_XMSS_SHA2_20_256, SYMCRYPT_XMSS_WOTSP_SHA2_256, 20, 1 }, + { SYMCRYPT_XMSS_SHA2_10_512, SYMCRYPT_XMSS_WOTSP_SHA2_512, 10, 1 }, + { SYMCRYPT_XMSS_SHA2_16_512, SYMCRYPT_XMSS_WOTSP_SHA2_512, 16, 1 }, + { SYMCRYPT_XMSS_SHA2_20_512, SYMCRYPT_XMSS_WOTSP_SHA2_512, 20, 1 }, + { SYMCRYPT_XMSS_SHAKE_10_256, SYMCRYPT_XMSS_WOTSP_SHAKE_256, 10, 1 }, + { SYMCRYPT_XMSS_SHAKE_16_256, SYMCRYPT_XMSS_WOTSP_SHAKE_256, 16, 1 }, + { SYMCRYPT_XMSS_SHAKE_20_256, SYMCRYPT_XMSS_WOTSP_SHAKE_256, 20, 1 }, + { SYMCRYPT_XMSS_SHAKE_10_512, SYMCRYPT_XMSS_WOTSP_SHAKE_512, 10, 1 }, + { SYMCRYPT_XMSS_SHAKE_16_512, SYMCRYPT_XMSS_WOTSP_SHAKE_512, 16, 1 }, + { SYMCRYPT_XMSS_SHAKE_20_512, SYMCRYPT_XMSS_WOTSP_SHAKE_512, 20, 1 }, + { SYMCRYPT_XMSS_SHA2_10_192, SYMCRYPT_XMSS_WOTSP_SHA2_192, 10, 1 }, + { SYMCRYPT_XMSS_SHA2_16_192, SYMCRYPT_XMSS_WOTSP_SHA2_192, 16, 1 }, + { SYMCRYPT_XMSS_SHA2_20_192, SYMCRYPT_XMSS_WOTSP_SHA2_192, 20, 1 }, + { SYMCRYPT_XMSS_SHAKE256_10_256, SYMCRYPT_XMSS_WOTSP_SHAKE256_256, 10, 1 }, + { SYMCRYPT_XMSS_SHAKE256_16_256, SYMCRYPT_XMSS_WOTSP_SHAKE256_256, 16, 1 }, + { SYMCRYPT_XMSS_SHAKE256_20_256, SYMCRYPT_XMSS_WOTSP_SHAKE256_256, 20, 1 }, + { SYMCRYPT_XMSS_SHAKE256_10_192, SYMCRYPT_XMSS_WOTSP_SHAKE256_192, 10, 1 }, + { SYMCRYPT_XMSS_SHAKE256_16_192, SYMCRYPT_XMSS_WOTSP_SHAKE256_192, 16, 1 }, + { SYMCRYPT_XMSS_SHAKE256_20_192, SYMCRYPT_XMSS_WOTSP_SHAKE256_192, 20, 1 }, +}; + + +static const SYMCRYPT_XMSS_PARAMETER_PREDEFINED XmssMtParametersPredefined[] = { + + // algId wotspId/wotspIndex h d + { SYMCRYPT_XMSSMT_SHA2_20_2_256, SYMCRYPT_XMSS_WOTSP_SHA2_256, 20, 2 }, + { SYMCRYPT_XMSSMT_SHA2_20_4_256, SYMCRYPT_XMSS_WOTSP_SHA2_256, 20, 4 }, + { SYMCRYPT_XMSSMT_SHA2_40_2_256, SYMCRYPT_XMSS_WOTSP_SHA2_256, 40, 2 }, + { SYMCRYPT_XMSSMT_SHA2_40_4_256, SYMCRYPT_XMSS_WOTSP_SHA2_256, 40, 4 }, + { SYMCRYPT_XMSSMT_SHA2_40_8_256, SYMCRYPT_XMSS_WOTSP_SHA2_256, 40, 8 }, + { SYMCRYPT_XMSSMT_SHA2_60_3_256, SYMCRYPT_XMSS_WOTSP_SHA2_256, 60, 3 }, + { SYMCRYPT_XMSSMT_SHA2_60_6_256, SYMCRYPT_XMSS_WOTSP_SHA2_256, 60, 6 }, + { SYMCRYPT_XMSSMT_SHA2_60_12_256, SYMCRYPT_XMSS_WOTSP_SHA2_256, 60, 12 }, + + { SYMCRYPT_XMSSMT_SHA2_20_2_512, SYMCRYPT_XMSS_WOTSP_SHA2_512, 20, 2 }, + { SYMCRYPT_XMSSMT_SHA2_20_4_512, SYMCRYPT_XMSS_WOTSP_SHA2_512, 20, 4 }, + { SYMCRYPT_XMSSMT_SHA2_40_2_512, SYMCRYPT_XMSS_WOTSP_SHA2_512, 40, 2 }, + { SYMCRYPT_XMSSMT_SHA2_40_4_512, SYMCRYPT_XMSS_WOTSP_SHA2_512, 40, 4 }, + { SYMCRYPT_XMSSMT_SHA2_40_8_512, SYMCRYPT_XMSS_WOTSP_SHA2_512, 40, 8 }, + { SYMCRYPT_XMSSMT_SHA2_60_3_512, SYMCRYPT_XMSS_WOTSP_SHA2_512, 60, 3 }, + { SYMCRYPT_XMSSMT_SHA2_60_6_512, SYMCRYPT_XMSS_WOTSP_SHA2_512, 60, 6 }, + { SYMCRYPT_XMSSMT_SHA2_60_12_512, SYMCRYPT_XMSS_WOTSP_SHA2_512, 60, 12 }, + + { SYMCRYPT_XMSSMT_SHAKE_20_2_256, SYMCRYPT_XMSS_WOTSP_SHAKE_256, 20, 2 }, + { SYMCRYPT_XMSSMT_SHAKE_20_4_256, SYMCRYPT_XMSS_WOTSP_SHAKE_256, 20, 4 }, + { SYMCRYPT_XMSSMT_SHAKE_40_2_256, SYMCRYPT_XMSS_WOTSP_SHAKE_256, 40, 2 }, + { SYMCRYPT_XMSSMT_SHAKE_40_4_256, SYMCRYPT_XMSS_WOTSP_SHAKE_256, 40, 4 }, + { SYMCRYPT_XMSSMT_SHAKE_40_8_256, SYMCRYPT_XMSS_WOTSP_SHAKE_256, 40, 8 }, + { SYMCRYPT_XMSSMT_SHAKE_60_3_256, SYMCRYPT_XMSS_WOTSP_SHAKE_256, 60, 3 }, + { SYMCRYPT_XMSSMT_SHAKE_60_6_256, SYMCRYPT_XMSS_WOTSP_SHAKE_256, 60, 6 }, + { SYMCRYPT_XMSSMT_SHAKE_60_12_256, SYMCRYPT_XMSS_WOTSP_SHAKE_256, 60, 12 }, + + { SYMCRYPT_XMSSMT_SHAKE_20_2_512, SYMCRYPT_XMSS_WOTSP_SHAKE_512, 20, 2 }, + { SYMCRYPT_XMSSMT_SHAKE_20_4_512, SYMCRYPT_XMSS_WOTSP_SHAKE_512, 20, 4 }, + { SYMCRYPT_XMSSMT_SHAKE_40_2_512, SYMCRYPT_XMSS_WOTSP_SHAKE_512, 40, 2 }, + { SYMCRYPT_XMSSMT_SHAKE_40_4_512, SYMCRYPT_XMSS_WOTSP_SHAKE_512, 40, 4 }, + { SYMCRYPT_XMSSMT_SHAKE_40_8_512, SYMCRYPT_XMSS_WOTSP_SHAKE_512, 40, 8 }, + { SYMCRYPT_XMSSMT_SHAKE_60_3_512, SYMCRYPT_XMSS_WOTSP_SHAKE_512, 60, 3 }, + { SYMCRYPT_XMSSMT_SHAKE_60_6_512, SYMCRYPT_XMSS_WOTSP_SHAKE_512, 60, 6 }, + { SYMCRYPT_XMSSMT_SHAKE_60_12_512, SYMCRYPT_XMSS_WOTSP_SHAKE_512, 60, 12 }, + + { SYMCRYPT_XMSSMT_SHA2_20_2_192, SYMCRYPT_XMSS_WOTSP_SHA2_192, 20, 2 }, + { SYMCRYPT_XMSSMT_SHA2_20_4_192, SYMCRYPT_XMSS_WOTSP_SHA2_192, 20, 4 }, + { SYMCRYPT_XMSSMT_SHA2_40_2_192, SYMCRYPT_XMSS_WOTSP_SHA2_192, 40, 2 }, + { SYMCRYPT_XMSSMT_SHA2_40_4_192, SYMCRYPT_XMSS_WOTSP_SHA2_192, 40, 4 }, + { SYMCRYPT_XMSSMT_SHA2_40_8_192, SYMCRYPT_XMSS_WOTSP_SHA2_192, 40, 8 }, + { SYMCRYPT_XMSSMT_SHA2_60_3_192, SYMCRYPT_XMSS_WOTSP_SHA2_192, 60, 3 }, + { SYMCRYPT_XMSSMT_SHA2_60_6_192, SYMCRYPT_XMSS_WOTSP_SHA2_192, 60, 6 }, + { SYMCRYPT_XMSSMT_SHA2_60_12_192, SYMCRYPT_XMSS_WOTSP_SHA2_192, 60, 12 }, + + { SYMCRYPT_XMSSMT_SHAKE256_20_2_256, SYMCRYPT_XMSS_WOTSP_SHAKE256_256, 20, 2 }, + { SYMCRYPT_XMSSMT_SHAKE256_20_4_256, SYMCRYPT_XMSS_WOTSP_SHAKE256_256, 20, 4 }, + { SYMCRYPT_XMSSMT_SHAKE256_40_2_256, SYMCRYPT_XMSS_WOTSP_SHAKE256_256, 40, 2 }, + { SYMCRYPT_XMSSMT_SHAKE256_40_4_256, SYMCRYPT_XMSS_WOTSP_SHAKE256_256, 40, 4 }, + { SYMCRYPT_XMSSMT_SHAKE256_40_8_256, SYMCRYPT_XMSS_WOTSP_SHAKE256_256, 40, 8 }, + { SYMCRYPT_XMSSMT_SHAKE256_60_3_256, SYMCRYPT_XMSS_WOTSP_SHAKE256_256, 60, 3 }, + { SYMCRYPT_XMSSMT_SHAKE256_60_6_256, SYMCRYPT_XMSS_WOTSP_SHAKE256_256, 60, 6 }, + { SYMCRYPT_XMSSMT_SHAKE256_60_12_256, SYMCRYPT_XMSS_WOTSP_SHAKE256_256, 60, 12 }, + + { SYMCRYPT_XMSSMT_SHAKE256_20_2_192, SYMCRYPT_XMSS_WOTSP_SHAKE256_192, 20, 2 }, + { SYMCRYPT_XMSSMT_SHAKE256_20_4_192, SYMCRYPT_XMSS_WOTSP_SHAKE256_192, 20, 4 }, + { SYMCRYPT_XMSSMT_SHAKE256_40_2_192, SYMCRYPT_XMSS_WOTSP_SHAKE256_192, 40, 2 }, + { SYMCRYPT_XMSSMT_SHAKE256_40_4_192, SYMCRYPT_XMSS_WOTSP_SHAKE256_192, 40, 4 }, + { SYMCRYPT_XMSSMT_SHAKE256_40_8_192, SYMCRYPT_XMSS_WOTSP_SHAKE256_192, 40, 8 }, + { SYMCRYPT_XMSSMT_SHAKE256_60_3_192, SYMCRYPT_XMSS_WOTSP_SHAKE256_192, 60, 3 }, + { SYMCRYPT_XMSSMT_SHAKE256_60_6_192, SYMCRYPT_XMSS_WOTSP_SHAKE256_192, 60, 6 }, + { SYMCRYPT_XMSSMT_SHAKE256_60_12_192, SYMCRYPT_XMSS_WOTSP_SHAKE256_192, 60, 12 }, +}; + +// +// Compute the number of chains for an n-byte input and its checksum +// for Winternitz parameter w (i.e., using w-bit blocks) in an OTS scheme +// +VOID +SYMCRYPT_CALL +SymCryptHbsGetWinternitzLengths( + UINT32 n, + UINT32 w, + _Out_ PUINT32 puLen1, + _Out_ PUINT32 puLen2 + ) +{ + UINT32 len1; + UINT32 len2; + UINT32 maxChecksum; + UINT32 msb; + + SYMCRYPT_ASSERT(n > 0); + SYMCRYPT_ASSERT(w >= 1 && w <= 8); + + // number of w-bit digits in an n-byte input + len1 = (8 * n + (w - 1)) / w; + + // maximum value the checksum can take (each w-bit digit can have value at most 2^w-1) + maxChecksum = len1 * ((1 << w) - 1); + + msb = 31 - SymCryptCountLeadingZeros32(maxChecksum); + + // msb + 1 bits are required to store the maxChecksum, + // calculate the number of w-bit blocks to represent that + len2 = (msb + 1 + (w - 1)) / w; + + *puLen1 = len1; + *puLen2 = len2; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmssGetWotspParams( + SYMCRYPT_XMSS_WOTSP_ALGID id, + _Out_ PSYMCRYPT_XMSS_PARAMS pParams ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + for (UINT32 i = 0; i < SYMCRYPT_ARRAY_SIZE(XmssWotspParams); i++) + { + if (XmssWotspParams[i].wotspId == id) + { + SYMCRYPT_ASSERT(XmssWotspParams[i].hashIndex < SYMCRYPT_ARRAY_SIZE(XmssHashArray)); + pParams->hash = *XmssHashArray[XmssWotspParams[i].hashIndex]; + pParams->cbHashOutput = XmssWotspParams[i].n; + pParams->nWinternitzWidth = XmssWotspParams[i].w; + pParams->cbPrefix = XmssWotspParams[i].cbPrefix; + goto cleanup; + } + } + + scError = SYMCRYPT_INVALID_ARGUMENT; + +cleanup: + + return scError; +} + +// +// Derive XMSS parameters that can be computed from others +// +// SYMCRYPT_XMSS_PARAMS structure must be initialized with either predefined +// or user defined parameters before this function is called. +// +VOID +SYMCRYPT_CALL +SymCryptXmssDeriveParams( + _Inout_ PSYMCRYPT_XMSS_PARAMS pParams ) +{ + SymCryptHbsGetWinternitzLengths( + pParams->cbHashOutput, + pParams->nWinternitzWidth, + &pParams->len1, + &pParams->len2); + + pParams->len = pParams->len1 + pParams->len2; + + UINT32 nChecksumBits = pParams->len2 * pParams->nWinternitzWidth; + SYMCRYPT_ASSERT(nChecksumBits <= 32); + pParams->nLeftShift32 = (UINT8)(32 - nChecksumBits); + + if (pParams->nLayers == 1) + { + // single trees have a 32-bit Idx value + pParams->cbIdx = 4; + } + else + { + // number of bytes to store h-bits for Idx + pParams->cbIdx = (pParams->nTotalTreeHeight + 7) / 8; + } + + pParams->nLayerHeight = pParams->nTotalTreeHeight / pParams->nLayers; +} + + +// +// Fill a SYMCRYPT_XMSS_PARAMS structure from either an XMSS algorithm ID or +// XMSS^MT algorithm ID from predefined parameter sets. +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmssParamsFromAlgIdCommon( + UINT32 id, + BOOLEAN isMultiTree, + _Out_ PSYMCRYPT_XMSS_PARAMS pParams ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_INVALID_ARGUMENT; + PCSYMCRYPT_XMSS_PARAMETER_PREDEFINED pParameters = NULL; + SIZE_T uParameterCount; + + SymCryptWipeKnownSize(pParams, sizeof(*pParams)); + + if (isMultiTree) + { + pParameters = XmssMtParametersPredefined; + uParameterCount = SYMCRYPT_ARRAY_SIZE(XmssMtParametersPredefined); + } + else + { + pParameters = XmssParametersPredefined; + uParameterCount = SYMCRYPT_ARRAY_SIZE(XmssParametersPredefined); + } + + for (UINT32 i = 0; i < uParameterCount; i++) + { + if (pParameters[i].idAlg == id) + { + scError = SymCryptXmssGetWotspParams(pParameters[i].idWotsp, pParams); + + if (scError == SYMCRYPT_NO_ERROR) + { + SYMCRYPT_ASSERT(pParams->cbHashOutput <= SYMCRYPT_HASH_MAX_RESULT_SIZE); + + pParams->id = id; + pParams->nTotalTreeHeight = pParameters[i].h; + pParams->nLayers = pParameters[i].d; + SymCryptXmssDeriveParams(pParams); + } + + break; + } + } + + return scError; +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmssParamsFromAlgId( + SYMCRYPT_XMSS_ALGID id, + _Out_ PSYMCRYPT_XMSS_PARAMS pParams ) +{ + return SymCryptXmssParamsFromAlgIdCommon(id, FALSE, pParams); +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmssMtParamsFromAlgId( + SYMCRYPT_XMSSMT_ALGID id, + _Out_ PSYMCRYPT_XMSS_PARAMS pParams) +{ + return SymCryptXmssParamsFromAlgIdCommon(id, TRUE, pParams); +} + + +// +// Set custom XMSS/XMSS^MT parameters +// +// This function can be used to initialize SYMCRYPT_XMSS_PARAMS with +// custom parameters that are not defined by the standards. +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmssSetParams( + _Out_ PSYMCRYPT_XMSS_PARAMS pParams, + UINT32 id, + _In_ PCSYMCRYPT_HASH pHash, // hash algorithm + UINT32 cbHashOutput, // hash output size + UINT32 nWinternitzWidth, // Winternitz parameter + UINT32 nTotalTreeHeight, // total tree height + UINT32 nLayers, // number of layers + UINT32 cbPrefix // domain separator prefix length + ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + if (pParams == NULL) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + SymCryptWipeKnownSize(pParams, sizeof(*pParams)); + + if (pHash == NULL) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Output size n can at most be equal to the hash output size + if (cbHashOutput == 0 || + cbHashOutput > pHash->resultSize || + cbHashOutput > SYMCRYPT_HASH_MAX_RESULT_SIZE) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Winternitz parameter must be one of 1, 2, 4, or 8 + if (nWinternitzWidth == 0 || + nWinternitzWidth > 8 || + (nWinternitzWidth & (nWinternitzWidth - 1)) != 0) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // nTotalTreeHeight and nLayers must both be positive and + // nLayers must divide nTotalTreeHeight + if (nTotalTreeHeight == 0 || + nLayers == 0 || + (nTotalTreeHeight % nLayers) != 0) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Layer height (tree height of one layer) can be at most 31 + if ((nTotalTreeHeight / nLayers) > 31) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Total tree height can be at most 63 + if (nTotalTreeHeight > 63) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if (cbPrefix == 0) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + pParams->id = id; + pParams->hash = pHash; + pParams->cbHashOutput = cbHashOutput; + pParams->nWinternitzWidth = nWinternitzWidth; + pParams->nTotalTreeHeight = nTotalTreeHeight; + pParams->nLayers = nLayers; + SymCryptXmssDeriveParams(pParams); + + pParams->cbPrefix = cbPrefix; + +cleanup: + + return scError; +} + + +// +// Updates the type field in ADRS structure and clears the +// subsequent fields. +// +// Does not modify the first two fields (Layer and Tree) of +// the ADRS structure. +// +VOID +SYMCRYPT_CALL +SymCryptXmssSetAdrsType( + _Out_ PXMSS_ADRS adrs, + UINT32 type ) +{ + SYMCRYPT_STORE_MSBFIRST32(adrs->en32Type, type); + SymCryptWipeKnownSize(&adrs->u, sizeof(adrs->u)); + SYMCRYPT_STORE_MSBFIRST32(adrs->en32KeyAndMask, 0); +} + + +SIZE_T +SYMCRYPT_CALL +SymCryptXmssSizeofSignatureFromParams( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams ) +{ + SYMCRYPT_ASSERT(pParams->nLayers != 0); + SYMCRYPT_ASSERT((pParams->nTotalTreeHeight % pParams->nLayers) == 0); + SYMCRYPT_ASSERT(pParams->nLayerHeight > 0); + + SIZE_T size = 0; + size += pParams->cbIdx; // idx + size += pParams->cbHashOutput; // randomness + + // WOTSP signature + authentication path for each layer + size += pParams->nLayers * ( pParams->cbHashOutput * (pParams->len + pParams->nLayerHeight) ); + + return size; +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmssSizeofKeyBlobFromParams( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams, + SYMCRYPT_XMSSKEY_TYPE keyType, + _Out_ SIZE_T* pcbKey ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SIZE_T cbPublicKey = 0; + SIZE_T cbPrivateKey = 0; + + // Public Key + cbPublicKey += sizeof(UINT32); // Alg ID + cbPublicKey += 2 * pParams->cbHashOutput; // Root and Seed + + // Private Key (on top of the public key) + cbPrivateKey = cbPublicKey; + cbPrivateKey += sizeof(UINT64); // Idx + cbPrivateKey += 2 * pParams->cbHashOutput; // SK_XMSS and SK_PRF + + switch (keyType) + { + case SYMCRYPT_XMSSKEY_TYPE_PUBLIC: + *pcbKey = cbPublicKey; + break; + + case SYMCRYPT_XMSSKEY_TYPE_PRIVATE: + *pcbKey = cbPrivateKey; + break; + + default: + scError = SYMCRYPT_INVALID_ARGUMENT; + break; + } + + return scError; +} + +PSYMCRYPT_XMSS_KEY +SYMCRYPT_CALL +SymCryptXmsskeyAllocate( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams, + UINT32 flags ) +{ + PSYMCRYPT_XMSS_KEY pKey = NULL; + + // No flags allowed + if (flags != 0) + { + goto cleanup; + } + + SIZE_T cbSize = sizeof(SYMCRYPT_XMSS_KEY); + + pKey = SymCryptCallbackAlloc(cbSize); + + if (pKey == NULL) + { + goto cleanup; + } + + SymCryptWipe(pKey, cbSize); + pKey->version = 1; + pKey->keyType = SYMCRYPT_XMSSKEY_TYPE_NONE; + pKey->params = *pParams; + + SYMCRYPT_SET_MAGIC(pKey); + + cleanup: + + return pKey; +} + + +VOID +SYMCRYPT_CALL +SymCryptXmsskeyFree( + _Inout_ PSYMCRYPT_XMSS_KEY pKey ) +{ + SYMCRYPT_CHECK_MAGIC(pKey); + SymCryptWipeKnownSize(pKey, sizeof(*pKey)); + SymCryptCallbackFree(pKey); +} + + +PSYMCRYPT_INCREMENTAL_TREEHASH +SYMCRYPT_CALL +SymCryptHbsIncrementalTreehashInit( + UINT32 nLeaves, + PBYTE pbBuffer, + SIZE_T cbBuffer, + UINT32 cbHashResult, + PSYMCRYPT_INCREMENTAL_TREEHASH_FUNC funcCompressNodes, + PSYMCRYPT_XMSS_INCREMENTAL_TREEHASH_CONTEXT pContext ) +{ + UNREFERENCED_PARAMETER(cbBuffer); + + SYMCRYPT_ASSERT(cbBuffer >= SymCryptHbsSizeofScratchBytesForIncrementalTreehash(cbHashResult, nLeaves)); + + PSYMCRYPT_INCREMENTAL_TREEHASH pIncHash = (PSYMCRYPT_INCREMENTAL_TREEHASH)pbBuffer; + + pIncHash->cbNode = 2 * sizeof(UINT32) + cbHashResult; + pIncHash->nSize = 0; + pIncHash->nCapacity = SymCryptHbsIncrementalTreehashStackDepth(nLeaves); + pIncHash->nLastLeafIndex = 0; + pIncHash->funcCompressNodes = funcCompressNodes; + pIncHash->pContext = pContext; + + return pIncHash; +} + + +PSYMCRYPT_TREEHASH_NODE +SYMCRYPT_CALL +SymCryptHbsIncrementalTreehashGetNode( + _In_ PSYMCRYPT_INCREMENTAL_TREEHASH pIncHash, + SIZE_T index ) +{ + PBYTE pNode = (PBYTE)pIncHash->arrNodes; + + pNode += index * pIncHash->cbNode; + + return (PSYMCRYPT_TREEHASH_NODE)pNode; +} + + +PSYMCRYPT_TREEHASH_NODE +SYMCRYPT_CALL +SymCryptHbsIncrementalTreehashAllocNode( + _Inout_ PSYMCRYPT_INCREMENTAL_TREEHASH pIncHash, + UINT32 nLeafIndex ) +{ + SYMCRYPT_ASSERT(pIncHash->nSize < pIncHash->nCapacity); + + PSYMCRYPT_TREEHASH_NODE pNode = SymCryptHbsIncrementalTreehashGetNode(pIncHash, pIncHash->nSize); + + pNode->height = 0; + pNode->index = nLeafIndex; + + pIncHash->nSize++; + + return pNode; +} + + +VOID +SYMCRYPT_CALL +SymCryptHbsIncrementalTreehashGetTopNodes( + _Inout_ PSYMCRYPT_INCREMENTAL_TREEHASH pIncHash, + _Out_ PSYMCRYPT_TREEHASH_NODE *ppNodeLeft, + _Out_ PSYMCRYPT_TREEHASH_NODE *ppNodeRight ) +{ + *ppNodeRight = (pIncHash->nSize < 1) ? NULL : SymCryptHbsIncrementalTreehashGetNode(pIncHash, pIncHash->nSize - 1); + + *ppNodeLeft = (pIncHash->nSize < 2) ? NULL : SymCryptHbsIncrementalTreehashGetNode(pIncHash, pIncHash->nSize - 2); +} + + +PSYMCRYPT_TREEHASH_NODE +SYMCRYPT_CALL +SymCryptHbsIncrementalTreehashProcessCommon( + _Inout_ PSYMCRYPT_INCREMENTAL_TREEHASH pIncHash, + BOOLEAN fFinal ) +{ + PSYMCRYPT_TREEHASH_NODE pNodeLeft = NULL; + PSYMCRYPT_TREEHASH_NODE pNodeRight = NULL; + + SYMCRYPT_ASSERT(pIncHash->nSize > 0); + + SymCryptHbsIncrementalTreehashGetTopNodes(pIncHash, &pNodeLeft, &pNodeRight); + + while ( pNodeLeft && + (fFinal || (pNodeLeft->height == pNodeRight->height)) ) + { + pIncHash->funcCompressNodes( + pNodeLeft, + pNodeRight, + pNodeLeft, + pIncHash->pContext); + + pIncHash->nSize--; + + SymCryptHbsIncrementalTreehashGetTopNodes(pIncHash, &pNodeLeft, &pNodeRight); + } + + return pNodeRight; +} + + +PSYMCRYPT_TREEHASH_NODE +SYMCRYPT_CALL +SymCryptHbsIncrementalTreehashProcess( + _Inout_ PSYMCRYPT_INCREMENTAL_TREEHASH pIncHash) +{ + return SymCryptHbsIncrementalTreehashProcessCommon(pIncHash, FALSE); +} + + +PSYMCRYPT_TREEHASH_NODE +SYMCRYPT_CALL +SymCryptHbsIncrementalTreehashFinalize( + _Inout_ PSYMCRYPT_INCREMENTAL_TREEHASH pIncHash ) +{ + return SymCryptHbsIncrementalTreehashProcessCommon(pIncHash, TRUE); +} + + +UINT32 +SYMCRYPT_CALL +SymCryptHbsIncrementalTreehashStackDepth( + UINT32 nLeaves) +{ + UINT32 h; + + // Minimum height binary tree that contains nLeaves many leaves is h+1 + h = 31 - SymCryptCountLeadingZeros32(nLeaves); + + // Tree root computation will require a stack of depth equal to tree height plus 1 + return (h + 2); +} + + +SIZE_T +SYMCRYPT_CALL +SymCryptHbsSizeofScratchBytesForIncrementalTreehash( + UINT32 cbNode, + UINT32 nLeaves) +{ + SIZE_T nodeSize = cbNode + 2 * sizeof(UINT32); + SIZE_T result = (sizeof(SYMCRYPT_INCREMENTAL_TREEHASH) - sizeof(SYMCRYPT_TREEHASH_NODE)); + + result += nodeSize * SymCryptHbsIncrementalTreehashStackDepth(nLeaves); + return result; +} + + +VOID +SYMCRYPT_CALL +SymCryptXmssPrfInit( + _In_ PCSYMCRYPT_HASH hash, + BYTE PrfType, + SIZE_T prefixLength, + _Out_ PSYMCRYPT_HASH_STATE state ) +{ + BYTE prefix[SYMCRYPT_XMSS_MAX_PREFIX_SIZE]; + + SYMCRYPT_ASSERT(prefixLength <= SYMCRYPT_XMSS_MAX_PREFIX_SIZE); + + SymCryptWipe(prefix, prefixLength); + prefix[prefixLength - 1] = PrfType; + + SymCryptHashInit(hash, state); + SymCryptHashAppend(hash, state, prefix, prefixLength); +} + + +VOID +SYMCRYPT_CALL +SymCryptXmssPrfKey( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams, + _In_reads_bytes_( cbKey ) PCBYTE pbKey, + SIZE_T cbKey, + _Out_ SYMCRYPT_HASH_STATE *pState ) +{ + SymCryptXmssPrfInit(pParams->hash, SYMCRYPT_XMSS_PRF, pParams->cbPrefix, pState); + SymCryptHashAppend(pParams->hash, pState, pbKey, cbKey); +} + +VOID +SYMCRYPT_CALL +SymCryptXmssPrf( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams, + BYTE PrfType, + _In_reads_bytes_( cbKey ) PCBYTE pbKey, + SIZE_T cbKey, + _In_reads_bytes_( cbMsg ) PCBYTE pbMsg, + SIZE_T cbMsg, + _Out_writes_bytes_( pParams->cbHashOutput ) PBYTE pbOutput ) +{ + SYMCRYPT_HASH_STATE state; + + SymCryptXmssPrfInit(pParams->hash, PrfType, pParams->cbPrefix, &state); + SymCryptHashAppend(pParams->hash, &state, pbKey, cbKey); + SymCryptHashAppend(pParams->hash, &state, pbMsg, cbMsg); + SymCryptHashResult(pParams->hash, &state, pbOutput, pParams->cbHashOutput); +} + + +VOID +SYMCRYPT_CALL +SymCryptXmssRandHash( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams, + _Inout_ XMSS_ADRS *adrs, + _In_reads_bytes_( pParams->cbHashOutput ) PCBYTE pbSeed, + _In_reads_bytes_( pParams->cbHashOutput ) PCBYTE pbLeft, + _In_reads_bytes_( pParams->cbHashOutput ) PCBYTE pbRight, + _Out_writes_bytes_( pParams->cbHashOutput ) PBYTE pbOutput ) +{ + BYTE key[SYMCRYPT_HASH_MAX_RESULT_SIZE]; + BYTE bitmask[2 * SYMCRYPT_HASH_MAX_RESULT_SIZE]; + SYMCRYPT_HASH_STATE stateKeyed; + SYMCRYPT_HASH_STATE stateMask; + + SYMCRYPT_ASSERT(pParams->cbHashOutput <= SYMCRYPT_HASH_MAX_RESULT_SIZE); + + SymCryptXmssPrfKey(pParams, pbSeed, pParams->cbHashOutput, &stateKeyed); + + SYMCRYPT_STORE_MSBFIRST32(adrs->en32KeyAndMask, 1); + SymCryptHashStateCopy(pParams->hash, &stateKeyed, &stateMask); + SymCryptHashAppend(pParams->hash, &stateMask, (PCBYTE)adrs, sizeof(*adrs)); + SymCryptHashResult(pParams->hash, &stateMask, &bitmask[0], pParams->cbHashOutput); + + SYMCRYPT_STORE_MSBFIRST32(adrs->en32KeyAndMask, 2); + SymCryptHashStateCopy(pParams->hash, &stateKeyed, &stateMask); + SymCryptHashAppend(pParams->hash, &stateMask, (PCBYTE)adrs, sizeof(*adrs)); + SymCryptHashResult(pParams->hash, &stateMask, &bitmask[pParams->cbHashOutput], pParams->cbHashOutput); + + SYMCRYPT_STORE_MSBFIRST32(adrs->en32KeyAndMask, 0); + SymCryptHashAppend(pParams->hash, &stateKeyed, (PCBYTE)adrs, sizeof(*adrs)); + SymCryptHashResult(pParams->hash, &stateKeyed, key, pParams->cbHashOutput); + + SymCryptXorBytes(&bitmask[0], pbLeft, &bitmask[0], pParams->cbHashOutput); + SymCryptXorBytes(&bitmask[pParams->cbHashOutput], pbRight, &bitmask[pParams->cbHashOutput], pParams->cbHashOutput); + + SymCryptXmssPrf(pParams, SYMCRYPT_XMSS_H, key, pParams->cbHashOutput, bitmask, 2 * pParams->cbHashOutput, pbOutput); +} + + +VOID +SYMCRYPT_CALL +SymCryptXmssTreeNodeCompress( + _In_ PSYMCRYPT_TREEHASH_NODE pNodeLeft, + _In_ PSYMCRYPT_TREEHASH_NODE pNodeRight, + _Out_ PSYMCRYPT_TREEHASH_NODE pNodeOut, + _Inout_ PSYMCRYPT_XMSS_INCREMENTAL_TREEHASH_CONTEXT pCtxIncHash ) +{ + SYMCRYPT_STORE_MSBFIRST32(pCtxIncHash->adrs.u.hashtree.en32Height, pNodeLeft->height); + SYMCRYPT_STORE_MSBFIRST32(pCtxIncHash->adrs.u.hashtree.en32Index, pNodeLeft->index / 2); + + SymCryptXmssRandHash( + pCtxIncHash->pParams, + &pCtxIncHash->adrs, + pCtxIncHash->pbSeed, + pNodeLeft->value, + pNodeRight->value, + pNodeOut->value); + + pNodeOut->index = pNodeLeft->index / 2; + pNodeOut->height = pNodeLeft->height + 1; +} + +VOID +SYMCRYPT_CALL +SymCryptXmssLtreeNodeCompress( + _In_ PSYMCRYPT_TREEHASH_NODE pNodeLeft, + _In_ PSYMCRYPT_TREEHASH_NODE pNodeRight, + _Out_ PSYMCRYPT_TREEHASH_NODE pNodeOut, + _Inout_ PSYMCRYPT_XMSS_INCREMENTAL_TREEHASH_CONTEXT pCtxIncHash ) +{ + SYMCRYPT_STORE_MSBFIRST32(pCtxIncHash->adrs.u.ltree.en32Height, pNodeLeft->height); + SYMCRYPT_STORE_MSBFIRST32(pCtxIncHash->adrs.u.ltree.en32Index, pNodeLeft->index / 2); + + SymCryptXmssRandHash( + pCtxIncHash->pParams, + &pCtxIncHash->adrs, + pCtxIncHash->pbSeed, + pNodeLeft->value, + pNodeRight->value, + pNodeOut->value); + + pNodeOut->index = pNodeLeft->index / 2; + pNodeOut->height = pNodeLeft->height + 1; +} + +VOID +SYMCRYPT_CALL +SymCryptXmssCreateWotspSecret( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams, + _In_reads_bytes_( pParams->cbHashOutput ) PCBYTE pbSkXmss, + _In_reads_bytes_( pParams->cbHashOutput ) PCBYTE pbSeed, + _Inout_ XMSS_ADRS *adrs, + _Out_writes_bytes_( pParams->cbHashOutput ) PBYTE pbOutput ) +{ + SYMCRYPT_HASH_STATE state; + + SymCryptXmssPrfInit(pParams->hash, SYMCRYPT_XMSS_PRF_KEYGEN, pParams->cbPrefix, &state); + SymCryptHashAppend(pParams->hash, &state, pbSkXmss, pParams->cbHashOutput); + SymCryptHashAppend(pParams->hash, &state, pbSeed, pParams->cbHashOutput); + SymCryptHashAppend(pParams->hash, &state, (PCBYTE)adrs, sizeof(*adrs)); + SymCryptHashResult(pParams->hash, &state, pbOutput, pParams->cbHashOutput); +} + +VOID +SYMCRYPT_CALL +SymCryptXmssChain( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams, + _In_reads_bytes_( pParams->cbHashOutput ) PCBYTE pbInput, + UINT32 startIndex, + UINT32 steps, + _In_reads_bytes_( pParams->cbHashOutput ) PCBYTE pbSeed, + _Inout_ XMSS_ADRS *adrs, + _Out_writes_bytes_( pParams->cbHashOutput ) PBYTE pbOutput ) +{ + BYTE tmp[SYMCRYPT_HASH_MAX_RESULT_SIZE]; + BYTE key[SYMCRYPT_HASH_MAX_RESULT_SIZE]; + BYTE bm[SYMCRYPT_HASH_MAX_RESULT_SIZE]; + SYMCRYPT_HASH_STATE stateKey; + SYMCRYPT_HASH_STATE stateMask; + + memcpy(tmp, pbInput, pParams->cbHashOutput); + + for (UINT32 i = startIndex; i < startIndex + steps; i++) + { + SYMCRYPT_STORE_MSBFIRST32(adrs->u.ots.en32Hash, i); + + SymCryptXmssPrfKey(pParams, pbSeed, pParams->cbHashOutput, &stateKey); + SymCryptHashStateCopy(pParams->hash, &stateKey, &stateMask); + + SYMCRYPT_STORE_MSBFIRST32(adrs->en32KeyAndMask, 0); + SymCryptHashAppend(pParams->hash, &stateKey, (PCBYTE)adrs, sizeof(*adrs)); + SymCryptHashResult(pParams->hash, &stateKey, key, pParams->cbHashOutput); + + SYMCRYPT_STORE_MSBFIRST32(adrs->en32KeyAndMask, 1); + SymCryptHashAppend(pParams->hash, &stateMask, (PCBYTE)adrs, sizeof(*adrs)); + SymCryptHashResult(pParams->hash, &stateMask, bm, pParams->cbHashOutput); + + SymCryptXorBytes(tmp, bm, tmp, pParams->cbHashOutput); + + SymCryptXmssPrf(pParams, SYMCRYPT_XMSS_F, key, pParams->cbHashOutput, tmp, pParams->cbHashOutput, tmp); + } + + // reset used ADRS fields + SYMCRYPT_STORE_MSBFIRST32(adrs->u.ots.en32Hash, 0); + SYMCRYPT_STORE_MSBFIRST32(adrs->en32KeyAndMask, 0); + + memcpy(pbOutput, tmp, pParams->cbHashOutput); +} + + +VOID +SYMCRYPT_CALL +SymCryptXmssCreateWotspPublickey( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams, + _Inout_ XMSS_ADRS *adrs, + UINT32 uLeaf, + _In_reads_bytes_( pParams->cbHashOutput ) PCBYTE pbSkXmss, + _In_reads_bytes_( pParams->cbHashOutput ) PCBYTE pbSeed, + _Out_writes_bytes_opt_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch, + _Out_writes_bytes_( pParams->cbHashOutput ) PBYTE pbOutput ) +{ + PSYMCRYPT_INCREMENTAL_TREEHASH pIncHash = NULL; + PSYMCRYPT_TREEHASH_NODE pNode = NULL; + SYMCRYPT_XMSS_INCREMENTAL_TREEHASH_CONTEXT ctxIncHash; + + SYMCRYPT_ASSERT(cbScratch >= SymCryptHbsSizeofScratchBytesForIncrementalTreehash(pParams->cbHashOutput, pParams->len)); + + SymCryptXmssSetAdrsType(adrs, XMSS_ADRS_TYPE_LTREE); + SYMCRYPT_STORE_MSBFIRST32(adrs->u.ltree.en32Leaf, uLeaf); + + ctxIncHash.adrs = *adrs; + ctxIncHash.pParams = pParams; + ctxIncHash.pbSeed = pbSeed; + + pIncHash = SymCryptHbsIncrementalTreehashInit( + pParams->len, + pbScratch, + cbScratch, + pParams->cbHashOutput, + SymCryptXmssLtreeNodeCompress, + &ctxIncHash); + + for (UINT32 i = 0; i < pParams->len; i++) + { + pNode = SymCryptHbsIncrementalTreehashAllocNode(pIncHash, i); + + SymCryptXmssSetAdrsType(adrs, XMSS_ADRS_TYPE_OTS); + SYMCRYPT_STORE_MSBFIRST32(adrs->u.ots.en32Leaf, uLeaf); + SYMCRYPT_STORE_MSBFIRST32(adrs->u.ots.en32Chain, i); + + SymCryptXmssCreateWotspSecret( + pParams, + pbSkXmss, + pbSeed, + adrs, + pNode->value); + + SymCryptXmssChain( + pParams, + pNode->value, + 0, + (1 << pParams->nWinternitzWidth) - 1, + pbSeed, + adrs, + pNode->value); + + SymCryptHbsIncrementalTreehashProcess(pIncHash); + + } + + pNode = SymCryptHbsIncrementalTreehashFinalize(pIncHash); + + memcpy(pbOutput, pNode->value, pParams->cbHashOutput); +} + + +VOID +SYMCRYPT_CALL +SymCryptXmssComputeSubtreeRoot( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams, + _In_ XMSS_ADRS *adrs, + _In_reads_bytes_( pParams->cbHashOutput ) PCBYTE pbSkXmss, + _In_reads_bytes_( pParams->cbHashOutput ) PCBYTE pbSeed, + UINT32 uLeaf, + UINT32 uHeight, + _Out_writes_bytes_opt_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch, + _Out_writes_bytes_( pParams->cbHashOutput ) PBYTE pbRoot ) +{ + UNREFERENCED_PARAMETER(cbScratch); + + PSYMCRYPT_INCREMENTAL_TREEHASH pIncHash = NULL; + PSYMCRYPT_TREEHASH_NODE pNode = NULL; + SYMCRYPT_XMSS_INCREMENTAL_TREEHASH_CONTEXT ctxIncHash; + + SYMCRYPT_ASSERT((uLeaf & ((1UL << uHeight) - 1)) == 0); // uLeaf must be a multiple of 2^uHeight + SYMCRYPT_ASSERT(pParams->nLayerHeight < 32); // Ensure nLeaves fits in 32 bits + + SIZE_T cbScratchTree = SymCryptHbsSizeofScratchBytesForIncrementalTreehash(pParams->cbHashOutput, 1ULL << pParams->nLayerHeight); + SIZE_T cbScratchLtree = SymCryptHbsSizeofScratchBytesForIncrementalTreehash(pParams->cbHashOutput, pParams->len); + + SYMCRYPT_ASSERT(cbScratch >= (cbScratchTree + cbScratchLtree)); + + PBYTE pbScratchTree = pbScratch; + PBYTE pbScratchLtree = pbScratch + cbScratchTree; + + SymCryptXmssSetAdrsType(adrs, XMSS_ADRS_TYPE_HASH_TREE); + + ctxIncHash.adrs = *adrs; + ctxIncHash.pParams = pParams; + ctxIncHash.pbSeed = pbSeed; + + pIncHash = SymCryptHbsIncrementalTreehashInit( + 1ULL << uHeight, + pbScratchTree, + cbScratchTree, + pParams->cbHashOutput, + SymCryptXmssTreeNodeCompress, + &ctxIncHash); + + for (UINT32 nLeafIndex = uLeaf; nLeafIndex < uLeaf + (1UL << uHeight); nLeafIndex++) + { + pNode = SymCryptHbsIncrementalTreehashAllocNode(pIncHash, nLeafIndex); + + SymCryptXmssCreateWotspPublickey(pParams, + adrs, + nLeafIndex, + pbSkXmss, + pbSeed, + pbScratchLtree, + cbScratchLtree, + pNode->value ); + + SymCryptHbsIncrementalTreehashProcess(pIncHash); + } + + pNode = SymCryptHbsIncrementalTreehashFinalize(pIncHash); + + memcpy(pbRoot, pNode->value, pParams->cbHashOutput); +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmssComputePublicRoot( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams, + _In_reads_bytes_( cbSeed ) PCBYTE pbSeed, + SIZE_T cbSeed, + _In_reads_bytes_( cbSkXmss ) PCBYTE pbSkXmss, + SIZE_T cbSkXmss, + _Out_writes_bytes_( cbRoot ) PBYTE pbRoot, + SIZE_T cbRoot ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PBYTE pbScratch = NULL; + SIZE_T cbScratch = 0; + XMSS_ADRS adrs; + + SYMCRYPT_ASSERT(pParams->nLayerHeight < 32); // Ensure nLeaves fits in 32 bits + + if (pbRoot == NULL || cbRoot != pParams->cbHashOutput || + pbSeed == NULL || cbSeed != pParams->cbHashOutput || + pbSkXmss == NULL || cbSkXmss != pParams->cbHashOutput) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + cbScratch += SymCryptHbsSizeofScratchBytesForIncrementalTreehash(pParams->cbHashOutput, 1ULL << pParams->nLayerHeight); + cbScratch += SymCryptHbsSizeofScratchBytesForIncrementalTreehash(pParams->cbHashOutput, pParams->len); + + SYMCRYPT_ASSERT(cbScratch > 0); + pbScratch = SymCryptCallbackAlloc(cbScratch); + + if (pbScratch == NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + SymCryptWipeKnownSize(&adrs, sizeof(XMSS_ADRS)); + SYMCRYPT_STORE_MSBFIRST32(adrs.en32Layer, pParams->nLayers - 1); + + SymCryptXmssComputeSubtreeRoot( + pParams, + &adrs, + pbSkXmss, + pbSeed, + 0, + pParams->nLayerHeight, + pbScratch, + cbScratch, + pbRoot ); + +cleanup: + + if (pbScratch != NULL) + { + SymCryptWipe(pbScratch, cbScratch); + SymCryptCallbackFree(pbScratch); + } + + return scError; +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmsskeyVerifyRoot( + _In_ PCSYMCRYPT_XMSS_KEY pKey) +{ + BYTE Root[SYMCRYPT_HASH_MAX_RESULT_SIZE]; + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + SYMCRYPT_CHECK_MAGIC(pKey); + + // key to be verified has to be a private key + if (pKey->keyType != SYMCRYPT_XMSSKEY_TYPE_PRIVATE) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + SymCryptWipeKnownSize(Root, sizeof(Root)); + + scError = SymCryptXmssComputePublicRoot( + &pKey->params, + pKey->Seed, + pKey->params.cbHashOutput, + pKey->SkXmss, + pKey->params.cbHashOutput, + Root, + pKey->params.cbHashOutput); + + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + if (!SymCryptEqual(Root, pKey->Root, pKey->params.cbHashOutput)) + { + scError = SYMCRYPT_HBS_PUBLIC_ROOT_MISMATCH; + } + +cleanup: + + return scError; +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmsskeyGenerate( + _Inout_ PSYMCRYPT_XMSS_KEY pKey, + UINT32 flags) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + SYMCRYPT_CHECK_MAGIC(pKey); + + if (flags != 0) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Wipe key material + SymCryptWipeKnownSize(pKey->Root, sizeof(pKey->Root)); + SymCryptWipeKnownSize(pKey->Seed, sizeof(pKey->Seed)); + SymCryptWipeKnownSize(pKey->SkPrf, sizeof(pKey->SkPrf)); + SymCryptWipeKnownSize(pKey->SkXmss, sizeof(pKey->SkXmss)); + pKey->Idx = 0; + + scError = SymCryptCallbackRandom(pKey->SkPrf, pKey->params.cbHashOutput); + + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + scError = SymCryptCallbackRandom(pKey->SkXmss, pKey->params.cbHashOutput); + + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + scError = SymCryptCallbackRandom(pKey->Seed, pKey->params.cbHashOutput); + + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // Compute public root from the private key + scError = SymCryptXmssComputePublicRoot( + &pKey->params, + pKey->Seed, + pKey->params.cbHashOutput, + pKey->SkXmss, + pKey->params.cbHashOutput, + pKey->Root, + pKey->params.cbHashOutput); + + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + pKey->keyType = SYMCRYPT_XMSSKEY_TYPE_PRIVATE; + +cleanup: + + if (scError != SYMCRYPT_NO_ERROR) + { + SymCryptWipeKnownSize(pKey->SkPrf, sizeof(pKey->SkPrf)); + SymCryptWipeKnownSize(pKey->SkXmss, sizeof(pKey->SkXmss)); + pKey->keyType = SYMCRYPT_XMSSKEY_TYPE_NONE; + } + + return scError; +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmsskeySetValue( + _In_reads_bytes_( cbInput ) PCBYTE pbInput, + SIZE_T cbInput, + SYMCRYPT_XMSSKEY_TYPE keyType, + UINT32 flags, + _Inout_ PSYMCRYPT_XMSS_KEY pKey ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + UINT32 uAlgId; + SIZE_T cbKey; + + SYMCRYPT_ASSERT(keyType == SYMCRYPT_XMSSKEY_TYPE_PUBLIC || keyType == SYMCRYPT_XMSSKEY_TYPE_PRIVATE); + + SYMCRYPT_CHECK_MAGIC(pKey); + + if ((flags & (~SYMCRYPT_FLAG_XMSSKEY_VERIFY_ROOT)) != 0 || + (keyType != SYMCRYPT_XMSSKEY_TYPE_PUBLIC && keyType != SYMCRYPT_XMSSKEY_TYPE_PRIVATE)) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Public root validation can only be performed for private keys + if ((flags & SYMCRYPT_FLAG_XMSSKEY_VERIFY_ROOT) != 0 && + keyType != SYMCRYPT_XMSSKEY_TYPE_PRIVATE) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + SymCryptXmssSizeofKeyBlobFromParams(&pKey->params, keyType, &cbKey); + + if (cbInput != cbKey) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + uAlgId = SYMCRYPT_LOAD_MSBFIRST32(pbInput); + pbInput += sizeof(UINT32); + + if (uAlgId != pKey->params.id) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + // Wipe private key material + pKey->Idx = 0; + SymCryptWipeKnownSize(pKey->SkPrf, sizeof(pKey->SkPrf)); + SymCryptWipeKnownSize(pKey->SkXmss, sizeof(pKey->SkXmss)); + + pKey->keyType = keyType; + + memcpy(pKey->Root, pbInput, pKey->params.cbHashOutput); + pbInput += pKey->params.cbHashOutput; + + memcpy(pKey->Seed, pbInput, pKey->params.cbHashOutput); + pbInput += pKey->params.cbHashOutput; + + if (keyType == SYMCRYPT_XMSSKEY_TYPE_PRIVATE) + { + pKey->Idx = SYMCRYPT_LOAD_MSBFIRST64(pbInput); + pbInput += sizeof(UINT64); + + memcpy(pKey->SkXmss, pbInput, pKey->params.cbHashOutput); + pbInput += pKey->params.cbHashOutput; + + memcpy(pKey->SkPrf, pbInput, pKey->params.cbHashOutput); + pbInput += pKey->params.cbHashOutput; + + if ((flags & SYMCRYPT_FLAG_XMSSKEY_VERIFY_ROOT) != 0) + { + // pKey has been initialized by now + scError = SymCryptXmsskeyVerifyRoot(pKey); + + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + } + } + +cleanup: + + if (scError != SYMCRYPT_NO_ERROR) + { + SymCryptWipeKnownSize(pKey->SkPrf, sizeof(pKey->SkPrf)); + SymCryptWipeKnownSize(pKey->SkXmss, sizeof(pKey->SkXmss)); + pKey->keyType = SYMCRYPT_XMSSKEY_TYPE_NONE; + } + + return scError; +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmsskeyGetValue( + _In_ PCSYMCRYPT_XMSS_KEY pKey, + SYMCRYPT_XMSSKEY_TYPE keyType, + UINT32 flags, + _Out_writes_bytes_( cbOutput ) PBYTE pbOutput, + SIZE_T cbOutput) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SIZE_T cbKey; + + SYMCRYPT_ASSERT(keyType == SYMCRYPT_XMSSKEY_TYPE_PUBLIC || keyType == SYMCRYPT_XMSSKEY_TYPE_PRIVATE); + + SYMCRYPT_CHECK_MAGIC(pKey); + + if (flags != 0 || + (keyType != SYMCRYPT_XMSSKEY_TYPE_PUBLIC && keyType != SYMCRYPT_XMSSKEY_TYPE_PRIVATE) || + pKey->keyType == SYMCRYPT_XMSSKEY_TYPE_NONE) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Cannot export private key from a public key object + if (keyType == SYMCRYPT_XMSSKEY_TYPE_PRIVATE && pKey->keyType != SYMCRYPT_XMSSKEY_TYPE_PRIVATE) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + SymCryptXmssSizeofKeyBlobFromParams(&pKey->params, keyType, &cbKey); + + if (cbOutput != cbKey) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // + // Public Key + // + + // Alg Id + SYMCRYPT_STORE_MSBFIRST32(pbOutput, pKey->params.id); + pbOutput += sizeof(UINT32); + + // Root + memcpy(pbOutput, pKey->Root, pKey->params.cbHashOutput); + pbOutput += pKey->params.cbHashOutput; + + // Seed + memcpy(pbOutput, pKey->Seed, pKey->params.cbHashOutput); + pbOutput += pKey->params.cbHashOutput; + + if (keyType == SYMCRYPT_XMSSKEY_TYPE_PRIVATE) + { + // + // Private Key + // + + // Idx + SYMCRYPT_STORE_MSBFIRST64(pbOutput, pKey->Idx); + pbOutput += sizeof(pKey->Idx); + + // SK_XMSS + memcpy(pbOutput, pKey->SkXmss, pKey->params.cbHashOutput); + pbOutput += pKey->params.cbHashOutput; + + // SK_PRF + memcpy(pbOutput, pKey->SkPrf, pKey->params.cbHashOutput); + pbOutput += pKey->params.cbHashOutput; + } + +cleanup: + + return scError; +} + + +UINT32 +SYMCRYPT_CALL +SymCryptHbsGetDigit( + UINT32 width, + _In_ PCBYTE pbBuffer, + SIZE_T cbBuffer, + UINT32 index ) +{ + UNREFERENCED_PARAMETER(cbBuffer); + + SYMCRYPT_ASSERT(width == 1 || width == 2 || width == 4 || width == 8); + SYMCRYPT_ASSERT(index < ((cbBuffer * 8) / width)); + + UINT32 digitsPerByte = 8 / width; + + BYTE value = pbBuffer[index / digitsPerByte]; + + value >>= width * (digitsPerByte - 1 - (index % digitsPerByte)); + + value &= (1 << width) - 1; + + return value; +} + + +VOID +SYMCRYPT_CALL +SymCryptXmssTreeRootFromAuthenticationPath( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams, + _Inout_ XMSS_ADRS *adrs, + UINT32 uLeaf, + _In_reads_bytes_( pParams->cbHashOutput ) + PCBYTE pbStartingNode, + _In_reads_bytes_( pParams->cbHashOutput * pParams->nLayerHeight ) + PCBYTE pbAuthNodes, + _In_reads_bytes_( pParams->cbHashOutput ) + PCBYTE pbSeed, + _Out_writes_bytes_( pParams->cbHashOutput ) + PBYTE pbOutput ) +{ + BYTE node[SYMCRYPT_HASH_MAX_RESULT_SIZE]; + BYTE tmp[SYMCRYPT_HASH_MAX_RESULT_SIZE]; + UINT32 uNodeIndex = uLeaf; + + memcpy(node, pbStartingNode, pParams->cbHashOutput); + + SymCryptXmssSetAdrsType(adrs, XMSS_ADRS_TYPE_HASH_TREE); + SYMCRYPT_STORE_MSBFIRST32(adrs->u.hashtree.en32Index, uNodeIndex); + + for (UINT32 i = 0; i < pParams->nLayerHeight; i++) + { + SYMCRYPT_STORE_MSBFIRST32(adrs->u.hashtree.en32Height, i); + + if ( ((uLeaf >> i) & 1) == 0 ) + { + uNodeIndex = uNodeIndex / 2; + SYMCRYPT_STORE_MSBFIRST32(adrs->u.hashtree.en32Index, uNodeIndex); + SymCryptXmssRandHash(pParams, adrs, pbSeed, node, &pbAuthNodes[pParams->cbHashOutput * i], tmp); + } + else + { + uNodeIndex = (uNodeIndex - 1) / 2; + SYMCRYPT_STORE_MSBFIRST32(adrs->u.hashtree.en32Index, uNodeIndex); + SymCryptXmssRandHash(pParams, adrs, pbSeed, &pbAuthNodes[pParams->cbHashOutput * i], node, tmp); + } + + memcpy(node, tmp, pParams->cbHashOutput); + } + + memcpy(pbOutput, node, pParams->cbHashOutput); +} + +VOID +SYMCRYPT_CALL +SymCryptXmssRandomizedHash( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams, + UINT64 Idx, + _In_reads_bytes_( pParams->cbHashOutput ) PCBYTE pbRandomizer, + _In_reads_bytes_( pParams->cbHashOutput ) PCBYTE pbRoot, + _In_reads_bytes_( pParams->cbHashOutput ) PCBYTE pbMsg, + SIZE_T cbMsg, + _Out_writes_bytes_( pParams->cbHashOutput ) PBYTE pbOutput ) +{ + SYMCRYPT_HASH_STATE state; + BYTE idxBuf[SYMCRYPT_HASH_MAX_RESULT_SIZE]; + + SymCryptWipe(idxBuf, pParams->cbHashOutput); + SYMCRYPT_STORE_MSBFIRST64(&idxBuf[pParams->cbHashOutput - sizeof(Idx)], Idx); + + SymCryptXmssPrfInit(pParams->hash, SYMCRYPT_XMSS_H_MSG, pParams->cbPrefix, &state); + SymCryptHashAppend(pParams->hash, &state, pbRandomizer, pParams->cbHashOutput); + SymCryptHashAppend(pParams->hash, &state, pbRoot, pParams->cbHashOutput); + SymCryptHashAppend(pParams->hash, &state, idxBuf, pParams->cbHashOutput); + SymCryptHashAppend(pParams->hash, &state, pbMsg, cbMsg); + SymCryptHashResult(pParams->hash, &state, pbOutput, pParams->cbHashOutput); +} + + +VOID +SYMCRYPT_CALL +SymCryptXmssWotspPublickeyFromSignature( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams, + _Inout_ XMSS_ADRS *adrs, + UINT32 idx, + _In_reads_bytes_( pParams->cbHashOutput ) + PCBYTE pbMsg, + _In_reads_bytes_( pParams->cbHashOutput ) + PCBYTE pbSeed, + _In_reads_bytes_( pParams->cbHashOutput * pParams->len ) + PCBYTE pbSignature, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch, + _Out_writes_bytes_( pParams->cbHashOutput ) + PBYTE pbOutput ) +{ + + UINT32 digit; + UINT32 checksum = 0; + BYTE en32Checksum[4]; + const UINT32 maxChainIndex = (1 << pParams->nWinternitzWidth) - 1; + PSYMCRYPT_INCREMENTAL_TREEHASH pIncHash = NULL; + PSYMCRYPT_TREEHASH_NODE pNode = NULL; + SYMCRYPT_XMSS_INCREMENTAL_TREEHASH_CONTEXT ctxIncHash; + + SymCryptXmssSetAdrsType(adrs, XMSS_ADRS_TYPE_LTREE); + SYMCRYPT_STORE_MSBFIRST32(adrs->u.ltree.en32Leaf, idx); + + ctxIncHash.adrs = *adrs; + ctxIncHash.pParams = pParams; + ctxIncHash.pbSeed = pbSeed; + + pIncHash = SymCryptHbsIncrementalTreehashInit( + pParams->len, + pbScratch, + cbScratch, + pParams->cbHashOutput, + SymCryptXmssLtreeNodeCompress, + &ctxIncHash); + + for (UINT32 i = 0; i < pParams->len; i++) + { + if (i < pParams->len1) + { + digit = SymCryptHbsGetDigit(pParams->nWinternitzWidth, pbMsg, pParams->cbHashOutput, i); + + checksum += maxChainIndex - digit; + } + else + { + if (i == pParams->len1) + { + checksum <<= pParams->nLeftShift32; + SYMCRYPT_STORE_MSBFIRST32(en32Checksum, checksum); + } + + digit = SymCryptHbsGetDigit(pParams->nWinternitzWidth, en32Checksum, sizeof(en32Checksum), i - pParams->len1); + } + + pNode = SymCryptHbsIncrementalTreehashAllocNode(pIncHash, i); + + SymCryptXmssSetAdrsType(adrs, XMSS_ADRS_TYPE_OTS); + SYMCRYPT_STORE_MSBFIRST32(adrs->u.ots.en32Leaf, idx); + SYMCRYPT_STORE_MSBFIRST32(adrs->u.ots.en32Chain, i); + + SymCryptXmssChain( + pParams, + &pbSignature[pParams->cbHashOutput * i], + digit, + maxChainIndex - digit, + pbSeed, + adrs, + pNode->value); + + SymCryptHbsIncrementalTreehashProcess(pIncHash); + } + + pNode = SymCryptHbsIncrementalTreehashFinalize(pIncHash); + + memcpy(pbOutput, pNode->value, pParams->cbHashOutput); +} + + +VOID +SYMCRYPT_CALL +SymCryptXmssTreeRootFromSignature( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams, + _Inout_ XMSS_ADRS *adrs, + _In_reads_bytes_( pParams->cbHashOutput ) + PCBYTE pbSeed, + _In_reads_bytes_( pParams->cbHashOutput ) + PCBYTE pbHash, + UINT32 uLeaf, + _In_reads_bytes_( pParams->cbHashOutput * pParams->len ) + PCBYTE pbWotspSig, + _In_reads_bytes_( pParams->cbHashOutput* pParams->nLayerHeight ) + PCBYTE pbAuthNodes, + _Out_writes_bytes_( pParams->cbHashOutput ) + PBYTE pbOutput, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + BYTE WotspPublickey[SYMCRYPT_HASH_MAX_RESULT_SIZE]; + + SymCryptXmssWotspPublickeyFromSignature( + pParams, + adrs, + uLeaf, + pbHash, + pbSeed, + pbWotspSig, + pbScratch, + cbScratch, + WotspPublickey); + + SymCryptXmssTreeRootFromAuthenticationPath( + pParams, + adrs, + uLeaf, + WotspPublickey, + pbAuthNodes, + pbSeed, + pbOutput); +} + +SIZE_T +SYMCRYPT_CALL +SymCryptXmssSizeofWotspSignature(_In_ PCSYMCRYPT_XMSS_PARAMS pParams) +{ + // WOTSP signature size is len = len1 + len2 many hash outputs + return pParams->cbHashOutput * pParams->len; +} + +SIZE_T +SYMCRYPT_CALL +SymCryptXmssSizeofAuthNodes( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams) +{ + // size of authentication nodes for single tree + return pParams->cbHashOutput * pParams->nLayerHeight; +} + +UINT64 +SYMCRYPT_CALL +SymCryptXmssSignatureGetIdx( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams, + _In_ PCBYTE pbSig ) +{ + UINT64 Idx = 0; + + for (UINT8 i = 0; i < pParams->cbIdx; i++) + { + Idx <<= 8; + Idx |= (UINT64)pbSig[i]; + } + + return Idx; +} + +PBYTE +SYMCRYPT_CALL +SymCryptXmssSignatureGetRandomness( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams, + _In_ PCBYTE pbSig ) +{ + PBYTE pb = (PBYTE)pbSig; + + // randomness comes after idx + pb += pParams->cbIdx; + + return pb; +} + +PBYTE +SYMCRYPT_CALL +SymCryptXmssSignatureGetWotspSig( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams, + _In_ PCBYTE pbSig, + UINT32 uLayer ) +{ + PBYTE pb = SymCryptXmssSignatureGetRandomness(pParams, pbSig); + + // skip randomness + pb += pParams->cbHashOutput; + + // each layer contains WOTSP signature and AuthNodes + pb += uLayer * (SymCryptXmssSizeofWotspSignature(pParams) + SymCryptXmssSizeofAuthNodes(pParams)); + + return pb; +} + +PBYTE +SYMCRYPT_CALL +SymCryptXmssSignatureGetAuthNodes( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams, + _In_ PCBYTE pbSig, + UINT32 uLayer ) +{ + PBYTE pb = SymCryptXmssSignatureGetWotspSig(pParams, pbSig, uLayer); + + // AuthNodes follow WOTSP signature + pb += SymCryptXmssSizeofWotspSignature(pParams); + + return pb; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmssVerifyInternal( + _Inout_ PSYMCRYPT_XMSS_KEY pKey, + _In_reads_bytes_( cbMessage ) PCBYTE pbMessage, + SIZE_T cbMessage, + UINT32 flags, + _In_reads_bytes_( cbSignature ) PCBYTE pbSignature, + SIZE_T cbSignature ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PBYTE pbScratch = NULL; + SIZE_T cbScratch = 0; + PCSYMCRYPT_XMSS_PARAMS pParams = &pKey->params; + BYTE RandomizedHash[SYMCRYPT_HASH_MAX_RESULT_SIZE]; + BYTE ComputedRoot[SYMCRYPT_HASH_MAX_RESULT_SIZE]; + XMSS_ADRS adrs; + UINT32 uLayer; + UINT64 uTree; + UINT32 uLeaf; + const UINT64 LeafMask = (1ULL << pParams->nLayerHeight) - 1; + + SYMCRYPT_CHECK_MAGIC(pKey); + + SYMCRYPT_ASSERT(pParams->nLayerHeight < 32); // Ensure nLeaves fits in 32 bits + + if (flags != 0 || + pbSignature == NULL || + cbSignature != SymCryptXmssSizeofSignatureFromParams(pParams) || + pKey->keyType == SYMCRYPT_XMSSKEY_TYPE_NONE ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + cbScratch += SymCryptHbsSizeofScratchBytesForIncrementalTreehash(pParams->cbHashOutput, pParams->len); + cbScratch += SymCryptHbsSizeofScratchBytesForIncrementalTreehash(pParams->cbHashOutput, 1ULL << pParams->nLayerHeight); + + pbScratch = (PBYTE)SymCryptCallbackAlloc(cbScratch); + + if (pbScratch == NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + PBYTE pbRandomness = SymCryptXmssSignatureGetRandomness(pParams, pbSignature); + UINT64 Idx = SymCryptXmssSignatureGetIdx(pParams, pbSignature); + + SymCryptXmssRandomizedHash( + pParams, + Idx, + pbRandomness, + pKey->Root, + pbMessage, + cbMessage, + RandomizedHash); + + SymCryptWipeKnownSize(&adrs, sizeof(XMSS_ADRS)); + + for (uLayer = 0; uLayer < pParams->nLayers; uLayer++) + { + uTree = Idx >> pParams->nLayerHeight; + uLeaf = (UINT32)(Idx & LeafMask); + + SYMCRYPT_STORE_MSBFIRST32(adrs.en32Layer, uLayer); + SYMCRYPT_STORE_MSBFIRST64(adrs.en64Tree, uTree); + SymCryptXmssTreeRootFromSignature( + pParams, + &adrs, + pKey->Seed, + uLayer == 0 ? RandomizedHash : ComputedRoot, + uLeaf, + SymCryptXmssSignatureGetWotspSig(pParams, pbSignature, uLayer), + SymCryptXmssSignatureGetAuthNodes(pParams, pbSignature, uLayer), + ComputedRoot, + pbScratch, + cbScratch); + + Idx >>= pParams->nLayerHeight; + } + + if (!SymCryptEqual(ComputedRoot, pKey->Root, pParams->cbHashOutput)) + { + scError = SYMCRYPT_SIGNATURE_VERIFICATION_FAILURE; + goto cleanup; + } + +cleanup: + + if (pbScratch) + { + SymCryptWipe(pbScratch, cbScratch); + SymCryptCallbackFree(pbScratch); + } + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmssVerify( + _Inout_ PSYMCRYPT_XMSS_KEY pKey, + _In_reads_bytes_( cbMessage ) PCBYTE pbMessage, + SIZE_T cbMessage, + UINT32 flags, + _In_reads_bytes_( cbSignature ) PCBYTE pbSignature, + SIZE_T cbSignature ) +{ + SYMCRYPT_RUN_SELFTEST_ONCE( + SymCryptXmssSelftest, + SYMCRYPT_SELFTEST_ALGORITHM_XMSS); + + return SymCryptXmssVerifyInternal( + pKey, + pbMessage, + cbMessage, + flags, + pbSignature, + cbSignature); +} + +VOID +SYMCRYPT_CALL +SymCryptXmssWotspSign( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams, + _Inout_ XMSS_ADRS* adrs, + _In_reads_bytes_( pParams->cbHashOutput ) PCBYTE pbInput, + UINT32 uLeaf, + _In_reads_bytes_( pParams->cbHashOutput ) PCBYTE pbSeed, + _In_reads_bytes_( pParams->cbHashOutput ) PCBYTE pbSkXmss, + _Out_writes_bytes_( pParams->cbHashOutput * pParams->len ) PBYTE pbOutput ) +{ + BYTE node[SYMCRYPT_HASH_MAX_RESULT_SIZE]; + UINT32 nChecksum = 0; + BYTE en32Checksum[sizeof(UINT32)]; + UINT32 digit; + const UINT32 maxChainIndex = (1UL << pParams->nWinternitzWidth) - 1; + + + SymCryptXmssSetAdrsType(adrs, XMSS_ADRS_TYPE_OTS); + SYMCRYPT_STORE_MSBFIRST32(adrs->u.ots.en32Leaf, uLeaf); + + for (UINT32 i = 0; i < pParams->len; i++) + { + SYMCRYPT_STORE_MSBFIRST32(adrs->u.ots.en32Chain, i); + SymCryptXmssCreateWotspSecret( + pParams, + pbSkXmss, + pbSeed, + adrs, + node); + + if (i < pParams->len1) + { + digit = SymCryptHbsGetDigit(pParams->nWinternitzWidth, pbInput, pParams->cbHashOutput, i); + + nChecksum += maxChainIndex - digit; + } + else + { + if (i == pParams->len1) + { + nChecksum <<= pParams->nLeftShift32; + SYMCRYPT_STORE_MSBFIRST32(en32Checksum, nChecksum); + } + + digit = SymCryptHbsGetDigit(pParams->nWinternitzWidth, en32Checksum, sizeof(en32Checksum), i - pParams->len1); + } + + SymCryptXmssChain( + pParams, + node, + 0, + digit, + pbSeed, + adrs, + &pbOutput[i * pParams->cbHashOutput]); + } + + SymCryptWipeKnownSize(node, sizeof(node)); +} + + +VOID +SYMCRYPT_CALL +SymCryptXmssTreeSignHash( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams, + _Inout_ XMSS_ADRS *adrs, + _In_reads_bytes_( pParams->cbHashOutput ) + PCBYTE pbSkXmss, + _In_reads_bytes_( pParams->cbHashOutput ) + PCBYTE pbSeed, + _In_reads_bytes_( pParams->cbHashOutput ) + PCBYTE pbHash, + UINT32 Idx, + _Out_writes_bytes_( pParams->cbHashOutput * pParams->len ) + PBYTE pbWotspSig, + _Out_writes_bytes_( pParams->cbHashOutput * pParams->nLayerHeight ) + PBYTE pbAuthNodes, + _Out_writes_bytes_opt_( pParams->cbHashOutput ) + PBYTE pbRoot, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + BYTE WotspPublicKey[SYMCRYPT_HASH_MAX_RESULT_SIZE]; + + SymCryptXmssWotspSign( + pParams, + adrs, + pbHash, + Idx, + pbSeed, + pbSkXmss, + pbWotspSig ); + + // Generate authentication path + for (UINT32 h = 0; h < pParams->nLayerHeight; h++) + { + UINT32 uLeaf = ((Idx >> h) ^ 1UL) << h; + SymCryptXmssComputeSubtreeRoot( + pParams, + adrs, + pbSkXmss, + pbSeed, + uLeaf, + h, + pbScratch, + cbScratch, + &pbAuthNodes[h * pParams->cbHashOutput]); + } + + // + // Calculate tree root if requested by the caller + // + // This is used to return the tree root to be signed with the upper + // layer in XMSS^MT. + if (pbRoot) + { + SymCryptXmssCreateWotspPublickey( + pParams, + adrs, + Idx, + pbSkXmss, + pbSeed, + pbScratch, + cbScratch, + WotspPublicKey); + + SymCryptXmssTreeRootFromAuthenticationPath( + pParams, + adrs, + Idx, + WotspPublicKey, + pbAuthNodes, + pbSeed, + pbRoot); + } +} + +// +// Compute randomness for randomized hashing +// +VOID +SYMCRYPT_CALL +SymCryptXmssComputeRandomness( + _In_ PCSYMCRYPT_XMSS_KEY pKey, + UINT64 Idx, + _Out_writes_bytes_( pKey->params.cbHashOutput ) PBYTE pbRandomness ) +{ + BYTE IdxBuffer[32]; + + SymCryptWipeKnownSize(IdxBuffer, sizeof(IdxBuffer)); + SYMCRYPT_STORE_MSBFIRST64(IdxBuffer + sizeof(IdxBuffer) - sizeof(Idx), Idx); + + SymCryptXmssPrf( + &pKey->params, + SYMCRYPT_XMSS_PRF, + pKey->SkPrf, + pKey->params.cbHashOutput, + IdxBuffer, + sizeof(IdxBuffer), + pbRandomness); +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmssSign( + _Inout_ PSYMCRYPT_XMSS_KEY pKey, + _In_reads_bytes_( cbMessage ) PCBYTE pbMessage, + SIZE_T cbMessage, + UINT32 flags, + _Out_writes_bytes_( cbSignature ) PBYTE pbSignature, + SIZE_T cbSignature ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PBYTE pbScratch = NULL; + SIZE_T cbScratch = 0; + PSYMCRYPT_XMSS_PARAMS pParams = &pKey->params; + UINT64 Idx; + BYTE en64Idx[sizeof(UINT64)]; + BYTE Randomness[SYMCRYPT_HASH_MAX_RESULT_SIZE]; + BYTE RandomizedHash[SYMCRYPT_HASH_MAX_RESULT_SIZE]; + BYTE TreeRoot[SYMCRYPT_HASH_MAX_RESULT_SIZE]; + XMSS_ADRS adrs; + UINT32 uLayer; + UINT64 uTree; + UINT32 uLeaf; + const UINT64 LeafMask = (1ULL << pParams->nLayerHeight) - 1; + + SYMCRYPT_CHECK_MAGIC(pKey); + + SYMCRYPT_ASSERT(pParams->nLayerHeight < 32); // Ensure nLeaves fits in 32 bits + + if (flags != 0 || + pbSignature == NULL || + cbSignature != SymCryptXmssSizeofSignatureFromParams(pParams) || + pKey->keyType != SYMCRYPT_XMSSKEY_TYPE_PRIVATE ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + cbScratch += SymCryptHbsSizeofScratchBytesForIncrementalTreehash(pParams->cbHashOutput, pParams->len); // Ltree hashing + cbScratch += SymCryptHbsSizeofScratchBytesForIncrementalTreehash(pParams->cbHashOutput, 1ULL << pParams->nLayerHeight); // Merkle-tree hashing + + pbScratch = (PBYTE)SymCryptCallbackAlloc(cbScratch); + + if (pbScratch == NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + Idx = SYMCRYPT_ATOMIC_ADD64_POST_RELAXED(&pKey->Idx, 1) - 1; + if (Idx >= (1ULL << pParams->nTotalTreeHeight)) + { + // Set Idx to first unusable value + pKey->Idx = (1ULL << pParams->nTotalTreeHeight); + + scError = SYMCRYPT_HBS_NO_OTS_KEYS_LEFT; + goto cleanup; + } + + SYMCRYPT_STORE_MSBFIRST64(en64Idx, Idx); + memcpy(pbSignature, &en64Idx[sizeof(en64Idx) - pParams->cbIdx], pParams->cbIdx); + + SymCryptXmssComputeRandomness(pKey, Idx, Randomness); + memcpy(SymCryptXmssSignatureGetRandomness(pParams, pbSignature), Randomness, pKey->params.cbHashOutput); + + SymCryptXmssRandomizedHash(&pKey->params, Idx, Randomness, pKey->Root, pbMessage, cbMessage, RandomizedHash); + + SymCryptWipeKnownSize(&adrs, sizeof(XMSS_ADRS)); + + for (uLayer = 0; uLayer < pParams->nLayers; uLayer++) + { + uTree = Idx >> pParams->nLayerHeight; + uLeaf = (UINT32)(Idx & LeafMask); + + SYMCRYPT_STORE_MSBFIRST32(adrs.en32Layer, uLayer); + SYMCRYPT_STORE_MSBFIRST64(adrs.en64Tree, uTree); + + SymCryptXmssTreeSignHash( + &pKey->params, + &adrs, + pKey->SkXmss, + pKey->Seed, + uLayer == 0 ? RandomizedHash : TreeRoot, + uLeaf, + SymCryptXmssSignatureGetWotspSig(pParams, pbSignature, uLayer), + SymCryptXmssSignatureGetAuthNodes(pParams, pbSignature, uLayer), + uLayer == (UINT32)(pParams->nLayers - 1) ? NULL : TreeRoot, // No need to compute the root for the top layer tree + pbScratch, + cbScratch); + + Idx >>= pParams->nLayerHeight; + } + + if (scError != SYMCRYPT_NO_ERROR) + { + SymCryptWipe(pbSignature, cbSignature); + goto cleanup; + } + +cleanup: + + if (pbScratch) + { + SymCryptWipe(pbScratch, cbScratch); + SymCryptCallbackFree(pbScratch); + } + + return scError; +} diff --git a/libs/symcrypt/lib/xtsaes.c b/libs/symcrypt/lib/xtsaes.c new file mode 100644 index 00000000000..45e842702e7 --- /dev/null +++ b/libs/symcrypt/lib/xtsaes.c @@ -0,0 +1,727 @@ +// +// xtsaes.c code for XTS-AES implementation +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXtsAesExpandKey( + _Out_ PSYMCRYPT_XTS_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( cbKey ) PCBYTE pbKey, + SIZE_T cbKey ) +{ + SYMCRYPT_ERROR scError; + SIZE_T halfKeySize = cbKey / 2; + + scError = SymCryptAesExpandKey( &pExpandedKey->key1, pbKey, halfKeySize ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // + // Pass the 'rest' of the key to the second one. This catches errors such as + // an attempt to pass a 33 byte key. + // halfKeySize = 16, which is valid, but this expansion gets a 17-byte key which will fail. + // Key2 is only used for tweak encryption, so we can use the EncryptOnly key expansion. + // + scError = SymCryptAesExpandKeyEncryptOnly( &pExpandedKey->key2, pbKey + halfKeySize, cbKey - halfKeySize ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + +cleanup: + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXtsAesExpandKeyEx( + _Out_ PSYMCRYPT_XTS_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( cbKey ) PCBYTE pbKey, + SIZE_T cbKey, + UINT32 flags ) +{ + if( ( flags & SYMCRYPT_FLAG_KEY_NO_FIPS ) == 0 ) + { + // FIPS IG C.I enforces that the two AES keys internally used in XTS-AES are non-equal + if( cbKey > 64 ) + { + return SYMCRYPT_WRONG_KEY_SIZE; + } + if( SymCryptEqual( pbKey, pbKey+(cbKey/2), (cbKey/2) ) ) + { + return SYMCRYPT_FIPS_FAILURE; + } + } + + return SymCryptXtsAesExpandKey( pExpandedKey, pbKey, cbKey ); +} + + +VOID +SYMCRYPT_CALL +SymCryptXtsAesKeyCopy( + _In_ PCSYMCRYPT_XTS_AES_EXPANDED_KEY pSrc, + _Out_ PSYMCRYPT_XTS_AES_EXPANDED_KEY pDst ) +{ + SymCryptAesKeyCopy( &pSrc->key1, &pDst->key1 ); + SymCryptAesKeyCopy( &pSrc->key2, &pDst->key2 ); +} + +#define N_PARALLEL_TWEAKS 16 + +#define SYMCRYPT_XTS_AES_LOCALSCRATCH_DEFN \ + SYMCRYPT_ALIGN BYTE localScratch[N_PARALLEL_TWEAKS * SYMCRYPT_AES_BLOCK_SIZE]; + +#define SYMCRYPT_AesEcbEncryptXxx SymCryptAesEcbEncryptC + +#define SYMCRYPT_XtsAesXxx SymCryptXtsAesEncryptInternalC +#define SYMCRYPT_XTSAESDATAUNIT_INVOKE \ + SymCryptXtsAesEncryptDataUnitC( &pExpandedKey->key1, &tweakBuf[i], pbSrc, pbDst, cbDataUnit ); +#include "xtsaes_pattern.c" +#undef SYMCRYPT_XtsAesXxx +#undef SYMCRYPT_XTSAESDATAUNIT_INVOKE + +#define SYMCRYPT_XtsAesXxx SymCryptXtsAesDecryptInternalC +#define SYMCRYPT_XTSAESDATAUNIT_INVOKE \ + SymCryptXtsAesDecryptDataUnitC( &pExpandedKey->key1, &tweakBuf[i], pbSrc, pbDst, cbDataUnit ); +#include "xtsaes_pattern.c" +#undef SYMCRYPT_XtsAesXxx +#undef SYMCRYPT_XTSAESDATAUNIT_INVOKE + +#undef SYMCRYPT_AesEcbEncryptXxx + + +#if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_ARM +#define SYMCRYPT_AesEcbEncryptXxx SymCryptAesEcbEncryptAsm + +#define SYMCRYPT_XtsAesXxx SymCryptXtsAesEncryptInternalAsm +#define SYMCRYPT_XTSAESDATAUNIT_INVOKE \ + SymCryptXtsAesEncryptDataUnitAsm( &pExpandedKey->key1, &tweakBuf[i], pbSrc, pbDst, cbDataUnit ); +#include "xtsaes_pattern.c" +#undef SYMCRYPT_XtsAesXxx +#undef SYMCRYPT_XTSAESDATAUNIT_INVOKE + +#define SYMCRYPT_XtsAesXxx SymCryptXtsAesDecryptInternalAsm +#define SYMCRYPT_XTSAESDATAUNIT_INVOKE \ + SymCryptXtsAesDecryptDataUnitAsm( &pExpandedKey->key1, &tweakBuf[i], pbSrc, pbDst, cbDataUnit ); +#include "xtsaes_pattern.c" +#undef SYMCRYPT_XtsAesXxx +#undef SYMCRYPT_XTSAESDATAUNIT_INVOKE + +#undef SYMCRYPT_AesEcbEncryptXxx +#endif + +#if SYMCRYPT_CPU_ARM64 +#define SYMCRYPT_AesEcbEncryptXxx SymCryptAesEcbEncryptNeon + +#define SYMCRYPT_XtsAesXxx SymCryptXtsAesEncryptInternalNeon +#define SYMCRYPT_XTSAESDATAUNIT_INVOKE \ + SymCryptXtsAesEncryptDataUnitNeon( &pExpandedKey->key1, &tweakBuf[i], pbSrc, pbDst, cbDataUnit ); +#include "xtsaes_pattern.c" +#undef SYMCRYPT_XtsAesXxx +#undef SYMCRYPT_XTSAESDATAUNIT_INVOKE + +#define SYMCRYPT_XtsAesXxx SymCryptXtsAesDecryptInternalNeon +#define SYMCRYPT_XTSAESDATAUNIT_INVOKE \ + SymCryptXtsAesDecryptDataUnitNeon( &pExpandedKey->key1, &tweakBuf[i], pbSrc, pbDst, cbDataUnit ); +#include "xtsaes_pattern.c" +#undef SYMCRYPT_XtsAesXxx +#undef SYMCRYPT_XTSAESDATAUNIT_INVOKE + +#undef SYMCRYPT_AesEcbEncryptXxx +#endif + +#undef SYMCRYPT_XTS_AES_LOCALSCRATCH_DEFN + + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + +#define SYMCRYPT_XTS_AES_LOCALSCRATCH_DEFN \ + /* Defining localScratch as a buffer of __m128is ensures there is required 16B alignment on x86 */ \ + __m128i localScratch[ N_PARALLEL_TWEAKS + 16 ]; +#define SYMCRYPT_AesEcbEncryptXxx SymCryptAesEcbEncryptXmm + +#define SYMCRYPT_XtsAesXxx SymCryptXtsAesEncryptInternalXmm +#define SYMCRYPT_XTSAESDATAUNIT_INVOKE \ + SymCryptXtsAesEncryptDataUnitXmm( &pExpandedKey->key1, &tweakBuf[i], (PBYTE) &localScratch[N_PARALLEL_TWEAKS], pbSrc, pbDst, cbDataUnit ); +#include "xtsaes_pattern.c" +#undef SYMCRYPT_XtsAesXxx +#undef SYMCRYPT_XTSAESDATAUNIT_INVOKE + +#define SYMCRYPT_XtsAesXxx SymCryptXtsAesDecryptInternalXmm +#define SYMCRYPT_XTSAESDATAUNIT_INVOKE \ + SymCryptXtsAesDecryptDataUnitXmm( &pExpandedKey->key1, &tweakBuf[i], (PBYTE) &localScratch[N_PARALLEL_TWEAKS], pbSrc, pbDst, cbDataUnit ); +#include "xtsaes_pattern.c" +#undef SYMCRYPT_XtsAesXxx +#undef SYMCRYPT_XTSAESDATAUNIT_INVOKE + +#define SYMCRYPT_XtsAesXxx SymCryptXtsAesEncryptInternalYmm +#define SYMCRYPT_XTSAESDATAUNIT_INVOKE \ + SymCryptXtsAesEncryptDataUnitYmm_2048( &pExpandedKey->key1, &tweakBuf[i], (PBYTE) &localScratch[N_PARALLEL_TWEAKS], pbSrc, pbDst, cbDataUnit ); +#include "xtsaes_pattern.c" +#undef SYMCRYPT_XtsAesXxx +#undef SYMCRYPT_XTSAESDATAUNIT_INVOKE + +#define SYMCRYPT_XtsAesXxx SymCryptXtsAesDecryptInternalYmm +#define SYMCRYPT_XTSAESDATAUNIT_INVOKE \ + SymCryptXtsAesDecryptDataUnitYmm_2048( &pExpandedKey->key1, &tweakBuf[i], (PBYTE) &localScratch[N_PARALLEL_TWEAKS], pbSrc, pbDst, cbDataUnit ); +#include "xtsaes_pattern.c" +#undef SYMCRYPT_XtsAesXxx +#undef SYMCRYPT_XTSAESDATAUNIT_INVOKE + +#if 0 //do not compile Zmm code for now + +#define SYMCRYPT_XtsAesXxx SymCryptXtsAesEncryptInternalZmm +#define SYMCRYPT_XTSAESDATAUNIT_INVOKE \ + SymCryptXtsAesEncryptDataUnitZmm_2048( &pExpandedKey->key1, &tweakBuf[i], (PBYTE) &localScratch[N_PARALLEL_TWEAKS], pbSrc, pbDst, cbDataUnit ); +#include "xtsaes_pattern.c" +#undef SYMCRYPT_XtsAesXxx +#undef SYMCRYPT_XTSAESDATAUNIT_INVOKE + +#define SYMCRYPT_XtsAesXxx SymCryptXtsAesDecryptInternalZmm +#define SYMCRYPT_XTSAESDATAUNIT_INVOKE \ + SymCryptXtsAesDecryptDataUnitZmm_2048( &pExpandedKey->key1, &tweakBuf[i], (PBYTE) &localScratch[N_PARALLEL_TWEAKS], pbSrc, pbDst, cbDataUnit ); +#include "xtsaes_pattern.c" +#undef SYMCRYPT_XtsAesXxx +#undef SYMCRYPT_XTSAESDATAUNIT_INVOKE + +#endif + +#undef SYMCRYPT_XTS_AES_LOCALSCRATCH_DEFN +#undef SYMCRYPT_AesEcbEncryptXxx + +#endif + +VOID +SYMCRYPT_CALL +SymCryptXtsAesEncryptInternal( + _In_ PCSYMCRYPT_XTS_AES_EXPANDED_KEY pExpandedKey, + SIZE_T cbDataUnit, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pbTweak, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData, + BOOLEAN bOverflow ) +{ +#if SYMCRYPT_CPU_AMD64 + SYMCRYPT_EXTENDED_SAVE_DATA SaveData; + /* if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_VAES_512_CODE ) ) { + SymCryptXtsAesEncryptInternalZmm( pExpandedKey, cbDataUnit, pbTweak, pbSrc, pbDst, cbData, bOverflow ); + } else */ + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_VAES_256_CODE ) && + SymCryptSaveYmm( &SaveData ) == SYMCRYPT_NO_ERROR ) + { + SymCryptXtsAesEncryptInternalYmm( pExpandedKey, cbDataUnit, pbTweak, pbSrc, pbDst, cbData, bOverflow ); + SymCryptRestoreYmm( &SaveData ); + } else if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) ) { + SymCryptXtsAesEncryptInternalXmm( pExpandedKey, cbDataUnit, pbTweak, pbSrc, pbDst, cbData, bOverflow ); + } else { + SymCryptXtsAesEncryptInternalAsm( pExpandedKey, cbDataUnit, pbTweak, pbSrc, pbDst, cbData, bOverflow ); + } +#elif SYMCRYPT_CPU_X86 + SYMCRYPT_EXTENDED_SAVE_DATA SaveData; + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) && + SymCryptSaveXmm( &SaveData ) == SYMCRYPT_NO_ERROR ) + { + SymCryptXtsAesEncryptInternalXmm( pExpandedKey, cbDataUnit, pbTweak, pbSrc, pbDst, cbData, bOverflow ); + SymCryptRestoreXmm( &SaveData ); + } else { + SymCryptXtsAesEncryptInternalAsm( pExpandedKey, cbDataUnit, pbTweak, pbSrc, pbDst, cbData, bOverflow ); + } +#elif SYMCRYPT_CPU_ARM + SymCryptXtsAesEncryptInternalAsm( pExpandedKey, cbDataUnit, pbTweak, pbSrc, pbDst, cbData, bOverflow ); +#elif SYMCRYPT_CPU_ARM64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON_AES ) ) + { + SymCryptXtsAesEncryptInternalNeon( pExpandedKey, cbDataUnit, pbTweak, pbSrc, pbDst, cbData, bOverflow ); + } else { + SymCryptXtsAesEncryptInternalC( pExpandedKey, cbDataUnit, pbTweak, pbSrc, pbDst, cbData, bOverflow ); + } +#else + SymCryptXtsAesEncryptInternalC( pExpandedKey, cbDataUnit, pbTweak, pbSrc, pbDst, cbData, bOverflow ); +#endif +} + +VOID +SYMCRYPT_CALL +SymCryptXtsAesEncrypt( + _In_ PCSYMCRYPT_XTS_AES_EXPANDED_KEY pExpandedKey, + SIZE_T cbDataUnit, + UINT64 tweak, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + SYMCRYPT_ALIGN BYTE fullTweak[SYMCRYPT_AES_BLOCK_SIZE]; + + SYMCRYPT_ASSERT( cbData % cbDataUnit == 0 ); + + if( cbDataUnit < SYMCRYPT_AES_BLOCK_SIZE ) + { + // Invalid data unit size + // Return early to avoid repeated checks deeper in the code + return; + } + + SYMCRYPT_STORE_LSBFIRST64(&fullTweak[0], tweak); + SYMCRYPT_STORE_LSBFIRST64(&fullTweak[8], 0); + + SymCryptXtsAesEncryptInternal( pExpandedKey, cbDataUnit, &fullTweak[0], pbSrc, pbDst, cbData, FALSE ); +} + +VOID +SYMCRYPT_CALL +SymCryptXtsAesEncryptWith128bTweak( + _In_ PCSYMCRYPT_XTS_AES_EXPANDED_KEY pExpandedKey, + SIZE_T cbDataUnit, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pbTweak, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + if( cbDataUnit < SYMCRYPT_AES_BLOCK_SIZE ) + { + // Invalid data unit size + // Return early to avoid repeated checks deeper in the code + return; + } + + SymCryptXtsAesEncryptInternal( pExpandedKey, cbDataUnit, pbTweak, pbSrc, pbDst, cbData, TRUE ); +} + + +VOID +SYMCRYPT_CALL +SymCryptXtsAesDecryptInternal( + _In_ PCSYMCRYPT_XTS_AES_EXPANDED_KEY pExpandedKey, + SIZE_T cbDataUnit, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pbTweak, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData, + BOOLEAN bOverflow ) +{ +#if SYMCRYPT_CPU_AMD64 + SYMCRYPT_EXTENDED_SAVE_DATA SaveData; + /* if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_VAES_512_CODE ) ) { + SymCryptXtsAesDecryptInternalZmm( pExpandedKey, cbDataUnit, pbTweak, pbSrc, pbDst, cbData, bOverflow ); + } else */ + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_VAES_256_CODE ) && + SymCryptSaveYmm( &SaveData ) == SYMCRYPT_NO_ERROR ) + { + SymCryptXtsAesDecryptInternalYmm( pExpandedKey, cbDataUnit, pbTweak, pbSrc, pbDst, cbData, bOverflow ); + SymCryptRestoreYmm( &SaveData ); + } else if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) ) { + SymCryptXtsAesDecryptInternalXmm( pExpandedKey, cbDataUnit, pbTweak, pbSrc, pbDst, cbData, bOverflow ); + } else { + SymCryptXtsAesDecryptInternalAsm( pExpandedKey, cbDataUnit, pbTweak, pbSrc, pbDst, cbData, bOverflow ); + } +#elif SYMCRYPT_CPU_X86 + SYMCRYPT_EXTENDED_SAVE_DATA SaveData; + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) && + SymCryptSaveXmm( &SaveData ) == SYMCRYPT_NO_ERROR ) + { + SymCryptXtsAesDecryptInternalXmm( pExpandedKey, cbDataUnit, pbTweak, pbSrc, pbDst, cbData, bOverflow ); + SymCryptRestoreXmm( &SaveData ); + } else { + SymCryptXtsAesDecryptInternalAsm( pExpandedKey, cbDataUnit, pbTweak, pbSrc, pbDst, cbData, bOverflow ); + } +#elif SYMCRYPT_CPU_ARM + SymCryptXtsAesDecryptInternalAsm( pExpandedKey, cbDataUnit, pbTweak, pbSrc, pbDst, cbData, bOverflow ); +#elif SYMCRYPT_CPU_ARM64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON_AES ) ) + { + SymCryptXtsAesDecryptInternalNeon( pExpandedKey, cbDataUnit, pbTweak, pbSrc, pbDst, cbData, bOverflow ); + } else { + SymCryptXtsAesDecryptInternalC( pExpandedKey, cbDataUnit, pbTweak, pbSrc, pbDst, cbData, bOverflow ); + } +#else + SymCryptXtsAesDecryptInternalC( pExpandedKey, cbDataUnit, pbTweak, pbSrc, pbDst, cbData, bOverflow ); +#endif +} + +VOID +SYMCRYPT_CALL +SymCryptXtsAesDecrypt( + _In_ PCSYMCRYPT_XTS_AES_EXPANDED_KEY pExpandedKey, + SIZE_T cbDataUnit, + UINT64 tweak, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + SYMCRYPT_ALIGN BYTE fullTweak[SYMCRYPT_AES_BLOCK_SIZE]; + + SYMCRYPT_ASSERT( cbData % cbDataUnit == 0 ); + + if( cbDataUnit < SYMCRYPT_AES_BLOCK_SIZE ) + { + // Invalid data unit size + // Return early to avoid repeated checks deeper in the code + return; + } + + SYMCRYPT_STORE_LSBFIRST64(&fullTweak[0], tweak); + SYMCRYPT_STORE_LSBFIRST64(&fullTweak[8], 0); + + SymCryptXtsAesDecryptInternal( pExpandedKey, cbDataUnit, &fullTweak[0], pbSrc, pbDst, cbData, FALSE ); +} + +VOID +SYMCRYPT_CALL +SymCryptXtsAesDecryptWith128bTweak( + _In_ PCSYMCRYPT_XTS_AES_EXPANDED_KEY pExpandedKey, + SIZE_T cbDataUnit, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pbTweak, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + if( cbDataUnit < SYMCRYPT_AES_BLOCK_SIZE ) + { + // Invalid data unit size + // Return early to avoid repeated checks deeper in the code + return; + } + + SymCryptXtsAesDecryptInternal( pExpandedKey, cbDataUnit, pbTweak, pbSrc, pbDst, cbData, TRUE ); +} + +VOID +SYMCRYPT_CALL +SymCryptXtsUpdateTweak( + _Inout_updates_(SYMCRYPT_AES_BLOCK_SIZE) PBYTE buf ) +{ +/* + UINT32 b0 = LOAD_LSBFIRST32( buf ); + UINT32 b1 = LOAD_LSBFIRST32( buf + 4 ); + UINT32 b2 = LOAD_LSBFIRST32( buf + 8 ); + UINT32 b3 = LOAD_LSBFIRST32( buf + 12 ); + UINT32 msbit = b3 >> 31; + + // + // The STORE_* macros re-evaluate their arguments sometimes, so we + // keep all computations in local variables. + // + UINT32 r0 = (b0 << 1) ^ (135 * msbit); + UINT32 r1 = (b1 << 1) | (b0 >> 31); + UINT32 r2 = (b2 << 1) | (b1 >> 31); + UINT32 r3 = (b3 << 1) | (b2 >> 31); + + STORE_LSBFIRST32( buf , r0 ); + STORE_LSBFIRST32( buf + 4, r1 ); + STORE_LSBFIRST32( buf + 8, r2 ); + STORE_LSBFIRST32( buf + 12, r3 ); +*/ + UINT64 b0 = SYMCRYPT_LOAD_LSBFIRST64( buf ); + UINT64 b1 = SYMCRYPT_LOAD_LSBFIRST64( buf + 8 ); + + /* + UINT32 msbit = (UINT32)(b1 >> 63); + //UINT32 feedback = 135 * msbit; + UINT32 feedback = (msbit << 7) + (msbit << 3) - msbit; + */ + UINT32 feedback = (((INT64)b1) >> 63) & 135; + + UINT64 r0 = (b0 << 1) ^ feedback; + UINT64 r1 = (b1 << 1) | (b0 >> 63); + + SYMCRYPT_STORE_LSBFIRST64( buf , r0 ); + SYMCRYPT_STORE_LSBFIRST64( buf + 8, r1 ); +} + +VOID +SYMCRYPT_CALL +SymCryptXtsEncryptDataUnit( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ PCVOID pExpandedKey, + _Inout_updates_( pBlockCipher->blockSize ) PBYTE pbTweakBlock, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + BYTE buf[2*SYMCRYPT_AES_BLOCK_SIZE]; + + while( cbData >= 2*SYMCRYPT_AES_BLOCK_SIZE ) + { + SymCryptXorBytes( pbTweakBlock, pbSrc, buf, SYMCRYPT_AES_BLOCK_SIZE ); + (*pBlockCipher->encryptFunc)( pExpandedKey, buf, buf ); + SymCryptXorBytes( pbTweakBlock, buf, pbDst, SYMCRYPT_AES_BLOCK_SIZE ); + + SYMCRYPT_ASSERT( pBlockCipher->blockSize == SYMCRYPT_AES_BLOCK_SIZE ); + SymCryptXtsUpdateTweak( pbTweakBlock ); + + pbSrc += SYMCRYPT_AES_BLOCK_SIZE; + pbDst += SYMCRYPT_AES_BLOCK_SIZE; + cbData -= SYMCRYPT_AES_BLOCK_SIZE; + } + + if( cbData > SYMCRYPT_AES_BLOCK_SIZE ) + { + // Ciphertext stealing encryption + // + // +--------------+ + // | | + // | V + // +-----------------+ | +-----+-----------+ + // | P_m-1 | | | P_m |++++CP+++++| + // +-----------------+ | +-----+-----------+ + // | | | + // enc_m-1 | enc_m + // | | | + // V | V + // +-----+-----------+ | +-----------------+ + // | C_m |++++CP+++++|--+ | C_m-1 | + // +-----+-----------+ +-----------------+ + // | / + // +---------------- / --+ + // / | + // | V + // +-----------------+ | +-----+ + // | C_m-1 |<-+ | C_m | + // +-----------------+ +-----+ + + // Encrypt penultimate plaintext block into buf + SymCryptXorBytes( pbTweakBlock, pbSrc, buf, SYMCRYPT_AES_BLOCK_SIZE ); + (*pBlockCipher->encryptFunc)( pExpandedKey, buf, buf ); + SymCryptXorBytes( pbTweakBlock, buf, buf, SYMCRYPT_AES_BLOCK_SIZE ); + + cbData -= SYMCRYPT_AES_BLOCK_SIZE; + + // Copy buf to buf[SYMCRYPT_AES_BLOCK_SIZE] + memcpy( &buf[SYMCRYPT_AES_BLOCK_SIZE], buf, SYMCRYPT_AES_BLOCK_SIZE ); + // Copy final plaintext bytes to prefix of buf - we must read before writing to support in-place encryption + memcpy( buf, pbSrc + SYMCRYPT_AES_BLOCK_SIZE, cbData ); + // Copy prefix of buf[SYMCRYPT_AES_BLOCK_SIZE] to the right place in the destination buffer + memcpy( pbDst + SYMCRYPT_AES_BLOCK_SIZE, &buf[SYMCRYPT_AES_BLOCK_SIZE], cbData ); + + // Do final tweak update + SymCryptXtsUpdateTweak( pbTweakBlock ); + + // Set pbSrc correctly to share code with non-ciphertext stealing case + pbSrc = &buf[0]; + } + + // Final full block encryption + SymCryptXorBytes( pbTweakBlock, pbSrc, buf, SYMCRYPT_AES_BLOCK_SIZE ); + (*pBlockCipher->encryptFunc)( pExpandedKey, buf, buf ); + SymCryptXorBytes( pbTweakBlock, buf, pbDst, SYMCRYPT_AES_BLOCK_SIZE ); + + SymCryptWipeKnownSize( buf, sizeof(buf) ); +} + +VOID +SYMCRYPT_CALL +SymCryptXtsDecryptDataUnit( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ PCVOID pExpandedKey, + _Inout_updates_( pBlockCipher->blockSize ) PBYTE pbTweakBlock, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + BYTE buf[2*SYMCRYPT_AES_BLOCK_SIZE]; + BYTE tweakBuf[SYMCRYPT_AES_BLOCK_SIZE]; + + while( cbData >= 2*SYMCRYPT_AES_BLOCK_SIZE ) + { + SymCryptXorBytes( pbTweakBlock, pbSrc, buf, SYMCRYPT_AES_BLOCK_SIZE ); + (*pBlockCipher->decryptFunc)( pExpandedKey, buf, buf ); + SymCryptXorBytes( pbTweakBlock, buf, pbDst, SYMCRYPT_AES_BLOCK_SIZE ); + + SYMCRYPT_ASSERT( pBlockCipher->blockSize == SYMCRYPT_AES_BLOCK_SIZE ); + SymCryptXtsUpdateTweak( pbTweakBlock ); + + pbSrc += SYMCRYPT_AES_BLOCK_SIZE; + pbDst += SYMCRYPT_AES_BLOCK_SIZE; + cbData -= SYMCRYPT_AES_BLOCK_SIZE; + } + + if( cbData > SYMCRYPT_AES_BLOCK_SIZE ) + { + // Ciphertext stealing decryption + // + // +--------------+ + // | | + // | V + // +-----------------+ | +-----+-----------+ + // | C_m-1 | | | C_m |++++CP+++++| + // +-----------------+ | +-----+-----------+ + // | | | + // dec_m | dec_m-1 + // | | | + // V | V + // +-----+-----------+ | +-----------------+ + // | P_m |++++CP+++++|--+ | P_m-1 | + // +-----+-----------+ +-----------------+ + // | / + // +---------------- / --+ + // / | + // | V + // +-----------------+ | +-----+ + // | P_m-1 |<-+ | P_m | + // +-----------------+ +-----+ + + // Save penultimate value of tweak to tweakBuf + memcpy( tweakBuf, pbTweakBlock, SYMCRYPT_AES_BLOCK_SIZE ); + + // Do final tweak update + SymCryptXtsUpdateTweak( pbTweakBlock ); + + // Decrypt penultimate ciphertext block into buf + SymCryptXorBytes( pbTweakBlock, pbSrc, buf, SYMCRYPT_AES_BLOCK_SIZE ); + (*pBlockCipher->decryptFunc)( pExpandedKey, buf, buf ); + SymCryptXorBytes( pbTweakBlock, buf, buf, SYMCRYPT_AES_BLOCK_SIZE ); + + cbData -= SYMCRYPT_AES_BLOCK_SIZE; + + // Copy buf to buf[SYMCRYPT_AES_BLOCK_SIZE] + memcpy( &buf[SYMCRYPT_AES_BLOCK_SIZE], buf, SYMCRYPT_AES_BLOCK_SIZE ); + // Copy final ciphertext bytes to prefix of buf - we must read before writing to support in-place decryption + memcpy( buf, pbSrc + SYMCRYPT_AES_BLOCK_SIZE, cbData ); + // Copy prefix of buf[SYMCRYPT_AES_BLOCK_SIZE] to the right place in the destination buffer + memcpy( pbDst + SYMCRYPT_AES_BLOCK_SIZE, &buf[SYMCRYPT_AES_BLOCK_SIZE], cbData ); + + // Set pbSrc and pbTweakBlock correctly to share code with non-ciphertext stealing case + pbSrc = &buf[0]; + pbTweakBlock = &tweakBuf[0]; + } + + // Final full block decryption + SymCryptXorBytes( pbTweakBlock, pbSrc, buf, SYMCRYPT_AES_BLOCK_SIZE ); + (*pBlockCipher->decryptFunc)( pExpandedKey, buf, buf ); + SymCryptXorBytes( pbTweakBlock, buf, pbDst, SYMCRYPT_AES_BLOCK_SIZE ); + + SymCryptWipeKnownSize( buf, sizeof(buf) ); + SymCryptWipeKnownSize( tweakBuf, sizeof(tweakBuf) ); +} + +VOID +SYMCRYPT_CALL +SymCryptXtsAesEncryptDataUnitAsm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbTweakBlock, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + SYMCRYPT_ASSERT( SymCryptAesBlockCipherNoOpt.blockSize == SYMCRYPT_AES_BLOCK_SIZE ); // keep Prefast happy + SymCryptXtsEncryptDataUnit( + &SymCryptAesBlockCipherNoOpt, + pExpandedKey, + pbTweakBlock, + pbSrc, + pbDst, + cbData ); +} + +VOID +SYMCRYPT_CALL +SymCryptXtsAesDecryptDataUnitAsm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbTweakBlock, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + SYMCRYPT_ASSERT( SymCryptAesBlockCipherNoOpt.blockSize == SYMCRYPT_AES_BLOCK_SIZE ); // keep Prefast happy + SymCryptXtsDecryptDataUnit( + &SymCryptAesBlockCipherNoOpt, + pExpandedKey, + pbTweakBlock, + pbSrc, + pbDst, + cbData ); +} + +VOID +SYMCRYPT_CALL +SymCryptXtsAesEncryptDataUnitC( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbTweakBlock, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + // No special optimizations... + SYMCRYPT_ASSERT( SymCryptAesBlockCipherNoOpt.blockSize == SYMCRYPT_AES_BLOCK_SIZE ); // keep Prefast happy + SymCryptXtsEncryptDataUnit( + &SymCryptAesBlockCipherNoOpt, + pExpandedKey, + pbTweakBlock, + pbSrc, + pbDst, + cbData ); +} + +VOID +SYMCRYPT_CALL +SymCryptXtsAesDecryptDataUnitC( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbTweakBlock, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + SYMCRYPT_ASSERT( SymCryptAesBlockCipherNoOpt.blockSize == SYMCRYPT_AES_BLOCK_SIZE ); // keep Prefast happy + SymCryptXtsDecryptDataUnit( + &SymCryptAesBlockCipherNoOpt, + pExpandedKey, + pbTweakBlock, + pbSrc, + pbDst, + cbData ); + +} + +static const BYTE SymCryptXtsAesCiphertext[32] = { + 0xef, 0xe5, 0x8b, 0x1a, 0x0b, 0xaf, 0xc1, 0x08, + 0xe9, 0xb7, 0x74, 0x1c, 0xcb, 0xdc, 0xf8, 0x53, + 0x4f, 0x90, 0x55, 0x32, 0x53, 0xf6, 0x18, 0xd2, + 0x34, 0xd5, 0xf2, 0x29, 0xf6, 0x4f, 0xd3, 0x8c, +}; + +VOID +SYMCRYPT_CALL +SymCryptXtsAesSelftest(void) +{ + SYMCRYPT_XTS_AES_EXPANDED_KEY key; + BYTE buf[32]; + BYTE plaintext[sizeof( buf )]; + + SymCryptWipeKnownSize( buf, sizeof( buf ) ); + buf[0] = 1; + + if( SymCryptXtsAesExpandKeyEx( &key, buf, sizeof( buf ), 0 ) != SYMCRYPT_NO_ERROR ) + { + SymCryptFatal( 'xtsa' ); + } + + SymCryptXtsAesEncrypt( &key, sizeof( buf ), 0, buf, buf, sizeof( buf ) ); + + SymCryptInjectError( buf, sizeof( buf ) ); + if( memcmp( buf, SymCryptXtsAesCiphertext, sizeof( buf ) ) != 0 ) + { + SymCryptFatal( 'xtsa' ); + } + + SymCryptXtsAesDecrypt( &key, sizeof( buf ), 0, buf, buf, sizeof( buf ) ); + + SymCryptInjectError( buf, sizeof( buf ) ); + + SymCryptWipeKnownSize( plaintext, sizeof( plaintext ) ); + plaintext[0] = 1; + if( memcmp( buf, plaintext, sizeof( buf ) ) != 0 ) + { + SymCryptFatal( 'xtsa' ); + } +} diff --git a/libs/symcrypt/lib/xtsaes_definitions.h b/libs/symcrypt/lib/xtsaes_definitions.h new file mode 100644 index 00000000000..0a316b65a1b --- /dev/null +++ b/libs/symcrypt/lib/xtsaes_definitions.h @@ -0,0 +1,176 @@ +// +// xtsaes_definitions.h +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +// +// Multiply by alpha +// +// <</>> indicate shifts on 128-bit values +// <<<</>>>> indicate shifts on 32-bit values (a word) +// + +// Multiply by ALPHA +// Since there's no instruction to shift the 128 bit register left by one, the following shifts do the trick. +// All shifts are zero extended +// t1 = _in <<<< 1 words shifted left by 1, this is almost a _in << 1 but there are +// gaps at first bit of each word, the following two shifts fixes that. +// t2 = _in >>>> 31 words shifted right by 31 +// t1 = t1 ^ (t2 << 32) t1 = _in << 1, note ^ could be | +// Do the special case for first byte of _in where last carry means xor with 135 for first byte. +// t2 = t2 >> 96 t2 = _in >> 127, i.e., last bit of _in is placed in first bit +// t2 = (t2 <<<< 7) + (t2 <<<<3) - (t2) t2 = 135 if last bit of t2 is set +// res = t1 ^ t2 +#define XTS_MUL_ALPHA_old( _in, _res ) \ +{\ + __m128i _t1, _t2;\ +\ + _t1 = _mm_slli_epi32( _in, 1 ); \ + _t2 = _mm_srli_epi32( _in, 31); \ + _t1 = _mm_xor_si128( _t1, _mm_slli_si128( _t2, 4 )); \ + _t2 = _mm_srli_si128( _t2, 12 ); \ + _t2 = _mm_sub_epi32( _mm_add_epi32( _mm_slli_epi32( _t2, 7 ), _mm_slli_epi32( _t2, 3 ) ), _t2 ); \ + _res = _mm_xor_si128( _t1, _t2 ); \ +} + +// An improved approach; use arithmetic shift-right to duplicate the carry-out, PSHUFD to re-arrange, and an AND to +// implement both the polynomial and mask the other words down to 1 bit again. + +// __m128i XTS_ALPHA_MASK = _mm_set_epi32( 1, 1, 1, 0x87 ); +#define XTS_MUL_ALPHA( _in, _res ) \ +{\ + __m128i _t1, _t2;\ +\ + _t1 = _mm_slli_epi32( _in, 1 ); \ + _t2 = _mm_srai_epi32( _in, 31); \ + _t2 = _mm_shuffle_epi32( _t2, _MM_SHUFFLE( 2, 1, 0, 3 ) ); \ + _t2 = _mm_and_si128( _t2, XTS_ALPHA_MASK ); \ + _res = _mm_xor_si128( _t1, _t2 ); \ +} + +// Like XTS_MUL_ALPHA_old but operate on __m512i for _in and _res. +// TODO: do this with VSHUFPS. +#define XTS_MUL_ALPHA_ZMM_old( _in, _res ) \ +{\ + __m512i _t1, _t2;\ +\ + _t1 = _mm512_slli_epi32( _in, 1 ); \ + _t2 = _mm512_srli_epi32( _in, 31); \ + _t1 = _mm512_xor_si512( _t1, _mm512_bslli_epi128( _t2, 4 )); \ + _t2 = _mm512_bsrli_epi128( _t2, 12 ); \ + _t2 = _mm512_sub_epi32( _mm512_add_epi32( _mm512_slli_epi32( _t2, 7 ), _mm512_slli_epi32( _t2, 3 ) ), _t2 ); \ + _res = _mm512_xor_si512( _t1, _t2 ); \ +} + +// Multiply by ALPHA^2 +// t1 = Input <<<< 2 +// t2 = Input >>>> 30 +// t1 = t1 ^ (t2 << 32) +// t2 = t2 >> 96 +// t2 = (t2 <<<< 7) ^ (t2 <<<< 2) ^ (t2 <<<< 1) ^ t2 +// res = t1 ^ t2 +#define XTS_MUL_ALPHA2( _in, _res ) \ +{\ + __m128i _t1, _t2;\ +\ + _t1 = _mm_slli_epi32( _in, 2 ); \ + _t2 = _mm_srli_epi32( _in, 30); \ + _t1 = _mm_xor_si128( _t1, _mm_slli_si128( _t2, 4 )); \ + _t2 = _mm_srli_si128( _t2, 12 ); \ + _t2 = _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( _mm_slli_epi32( _t2, 7 ), _mm_slli_epi32( _t2, 2 ) ), _mm_slli_epi32( _t2, 1 )), _t2 ); \ + _res = _mm_xor_si128( _t1, _t2 ); \ +} + +// Multiply by ALPHA^4 +// t1 = Input <<<< 4 +// t2 = Input >>>> 28 +// t1 = t1 ^ (t2 << 32) +// t2 = t2 >> 96 +// t2 = (t2 <<<< 7) ^ (t2 <<<< 2) ^ (t2 <<<< 1) ^ t2 +// res = t1 ^ t2 +#define XTS_MUL_ALPHA4( _in, _res ) \ +{\ + __m128i _t1, _t2;\ +\ + _t1 = _mm_slli_epi32( _in, 4 ); \ + _t2 = _mm_srli_epi32( _in, 28); \ + _t1 = _mm_xor_si128( _t1, _mm_slli_si128( _t2, 4 )); \ + _t2 = _mm_srli_si128( _t2, 12 ); \ + _t2 = _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( _mm_slli_epi32( _t2, 7 ), _mm_slli_epi32( _t2, 2 ) ), _mm_slli_epi32( _t2, 1 )), _t2 ); \ + _res = _mm_xor_si128( _t1, _t2 ); \ +} + +#define XTS_MUL_ALPHA5( _in, _res ) \ +{\ + __m128i _t1, _t2;\ +\ + _t1 = _mm_slli_epi32( _in, 5 ); \ + _t2 = _mm_srli_epi32( _in, 27); \ + _t1 = _mm_xor_si128( _t1, _mm_slli_si128( _t2, 4 )); \ + _t2 = _mm_srli_si128( _t2, 12 ); \ + _t2 = _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( _mm_slli_epi32( _t2, 7 ), _mm_slli_epi32( _t2, 2 ) ), _mm_slli_epi32( _t2, 1 )), _t2 ); \ + _res = _mm_xor_si128( _t1, _t2 ); \ +} + + +// Multiply by ALPHA^8 +// t2 = Input >> 120 +// t2 = (t2 <<<< 7) ^ (t2 <<<< 2) ^ (t2 <<<< 1) ^ t2 +// res = (Input << 8) ^ t2 +// +// Only currently used with VPCLMULQDQ (in Ymm / Zmm versions) as support for non-vectorized PCLMULQDQ is not always supported with AESNI, +// and is sometimes slower than shift+xor + +// __m256i XTS_ALPHA_MULTIPLIER_Ymm = _mm256_set_epi64x( 0, 0x87, 0, 0x87); +#define XTS_MUL_ALPHA8_YMM( _in, _res ) \ +{\ + __m256i _t2;\ +\ + _t2 = _mm256_srli_si256( _in, 15 ); /* AVX2 */ \ + _res = _mm256_slli_si256( _in, 1 ); \ + _t2 = _mm256_clmulepi64_epi128( _t2, XTS_ALPHA_MULTIPLIER_Ymm, 0x00 ); \ + _res = _mm256_xor_si256( _res, _t2 ); \ +} + +#define XTS_MUL_ALPHA16_YMM( _in, _res ) \ +{\ + __m256i _t2;\ +\ + _t2 = _mm256_srli_si256( _in, 14 ); /* AVX2 */ \ + _res = _mm256_slli_si256( _in, 2 ); \ + _t2 = _mm256_clmulepi64_epi128( _t2, XTS_ALPHA_MULTIPLIER_Ymm, 0x00 ); \ + _res = _mm256_xor_si256( _res, _t2 ); \ +} + +// __m512i XTS_ALPHA_MULTIPLIER_Zmm = _mm512_set_epi64( 0, 0x87, 0, 0x87, 0, 0x87, 0, 0x87 ); +#define XTS_MUL_ALPHA8_ZMM( _in, _res ) \ +{\ + __m512i _t2; \ +\ + _t2 = _mm512_bsrli_epi128( _in, 15 ); \ + _res = _mm512_bslli_epi128( _in, 1 ); \ + _t2 = _mm512_clmulepi64_epi128( _t2, XTS_ALPHA_MULTIPLIER_Zmm, 0x00 ); \ + _res = _mm512_xor_si512( _res, _t2 ); \ +} + +#define XTS_MUL_ALPHA16_ZMM( _in, _res ) \ +{\ + __m512i _t2; \ +\ + _t2 = _mm512_bsrli_epi128( _in, 14 ); \ + _res = _mm512_bslli_epi128( _in, 2 ); \ + _t2 = _mm512_clmulepi64_epi128( _t2, XTS_ALPHA_MULTIPLIER_Zmm, 0x00 ); \ + _res = _mm512_xor_si512( _res, _t2 ); \ +} + +// Currently only use UINT64 for x86 and amd64 - this does regress perf on x86 +// but we don't expect a lot of XTS in x86. If the regression causes any real problems +// we can consider introducing another variant. Not doing this now to avoid code bloat +#define XTS_MUL_ALPHA_Scalar( _inout_low_u64, _inout_high_u64 ) \ +{ \ + UINT64 tmp = (UINT64) ((INT64)_inout_high_u64 >> 63); \ + \ + _inout_high_u64 = (_inout_high_u64 << 1) ^ (_inout_low_u64 >> 63); \ + _inout_low_u64 = (_inout_low_u64 << 1) ^ (tmp & 0x87); \ +} diff --git a/libs/symcrypt/lib/xtsaes_pattern.c b/libs/symcrypt/lib/xtsaes_pattern.c new file mode 100644 index 00000000000..f6199cd5d55 --- /dev/null +++ b/libs/symcrypt/lib/xtsaes_pattern.c @@ -0,0 +1,90 @@ +// +// xtsaes_pattern.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#if 0 +#pragma makedep header +#endif + +VOID +SYMCRYPT_CALL +SYMCRYPT_XtsAesXxx( + _In_ PCSYMCRYPT_XTS_AES_EXPANDED_KEY pExpandedKey, + SIZE_T cbDataUnit, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pbTweak, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData, + BOOLEAN bOverflow ) +{ + SYMCRYPT_XTS_AES_LOCALSCRATCH_DEFN; + // SYMCRYPT_ALIGN BYTE localScratch[N_PARALLEL_TWEAKS * SYMCRYPT_AES_BLOCK_SIZE]; + // or + // /* Defining localScratch as a buffer of __m128is ensures there is required 16B alignment on x86 */ + // __m128i localScratch[ N_PARALLEL_TWEAKS + 16 ]; + // Note that the extra 16 __m128i space is used for internal scratch space for SymCryptXtsAesEncryptDataUnitXmm + // This allows modified tweak generation to be performed in scalar registers in parallel with AES in Xmm register + // which reduces register pressure and increases throughput + PBYTE tweakBuf = (PBYTE) &localScratch[0]; + SIZE_T i, tweakBytes; + UINT64 tweakLow64 = SYMCRYPT_LOAD_LSBFIRST64(pbTweak); + UINT64 tweakHigh64 = SYMCRYPT_LOAD_LSBFIRST64(pbTweak+8); + UINT64 previousTweakLow64; + + SYMCRYPT_ASSERT( cbData % cbDataUnit == 0 ); + + while( cbData >= cbDataUnit ) + { + // + // We encrypt the tweaks of many data units in parallel for best performance. + // In the first loop we build the tweaks and decrement cbData. + // In the second loop we use up all the tweaks, and update the pointers. + // Both loops are executed the same number of times. + // + tweakBytes = 0; + previousTweakLow64 = tweakLow64; + + do // do-while because we know we are going to go through at least once. + { + SYMCRYPT_STORE_LSBFIRST64(&tweakBuf[tweakBytes ], tweakLow64); + SYMCRYPT_STORE_LSBFIRST64(&tweakBuf[tweakBytes + 8], tweakHigh64); + tweakLow64++; + cbData -= cbDataUnit; + tweakBytes += SYMCRYPT_AES_BLOCK_SIZE; + } while( cbData >= cbDataUnit && tweakBytes < SYMCRYPT_AES_BLOCK_SIZE * N_PARALLEL_TWEAKS ); + + if( bOverflow && previousTweakLow64 > tweakLow64 ) + { + // Very rare fix-up of tweaks if tweakLow64 overflowed, and should have incremented tweakHigh64 + // bOverflow=FALSE allows backwards compatibility with old API which wrapped around at 64-bits + SYMCRYPT_ASSERT( tweakLow64 < N_PARALLEL_TWEAKS ); + + // Increment tweakHigh64 and store new value in high half of the previous tweakLow64 tweaks + tweakHigh64++; + for( i=0; i<tweakLow64; i++) + { + SYMCRYPT_STORE_LSBFIRST64(&tweakBuf[tweakBytes - (16*i) - 8], tweakHigh64); + } + } + + SYMCRYPT_AesEcbEncryptXxx( &pExpandedKey->key2, &tweakBuf[0], &tweakBuf[0], tweakBytes ); + + i = 0; + while( i < tweakBytes ) + { + SYMCRYPT_XTSAESDATAUNIT_INVOKE; + // SymCryptXtsAesXxcryptDataUnitXxx( &pExpandedKey->key1, &tweakBuf[i], pbSrc, pbDst, cbDataUnit ); + // or + // SymCryptXtsAesXxcryptDataUnitXxx( &pExpandedKey->key1, &tweakBuf[i], (PBYTE) &localScratch[N_PARALLEL_TWEAKS], pbSrc, pbDst, cbDataUnit ); + // Note that the scratch space being provided to the DataUnit function is an offset into the localScratch buffer + + pbSrc += cbDataUnit; + pbDst += cbDataUnit; + i += SYMCRYPT_AES_BLOCK_SIZE; + } + } + + SymCryptWipeKnownSize( localScratch, sizeof( localScratch ) ); +} diff --git a/tools/make_makefiles b/tools/make_makefiles index a6d0d6b210c..194940d8aac 100755 --- a/tools/make_makefiles +++ b/tools/make_makefiles @@ -303,6 +303,11 @@ sub assign_sources_to_makefiles(@) { next; } + else + { + my %flags = get_makedep_flags($file); + next if defined $flags{header}; + } push @{${$make}{"=SOURCES"}}, $name; } diff --git a/tools/makedep.c b/tools/makedep.c index 90a522640fe..ca61fb6a9dc 100644 --- a/tools/makedep.c +++ b/tools/makedep.c @@ -3748,6 +3748,7 @@ static const struct { "l", output_source_l }, { "h", output_source_h }, { "rh", output_source_h }, + { "inc", output_source_h }, { "inl", output_source_h }, { "ver", output_source_ver }, { "rc", output_source_rc }, -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/11025
From: Alexandre Julliard <julliard@winehq.org> --- dlls/rsaenh/Makefile.in | 4 +- dlls/rsaenh/implglue.c | 108 ++++++++++++---------------------------- dlls/rsaenh/implglue.h | 22 ++++++-- dlls/rsaenh/rsaenh.c | 1 + 4 files changed, 52 insertions(+), 83 deletions(-) diff --git a/dlls/rsaenh/Makefile.in b/dlls/rsaenh/Makefile.in index ab53b2fe0ef..7a7be6679a5 100644 --- a/dlls/rsaenh/Makefile.in +++ b/dlls/rsaenh/Makefile.in @@ -1,7 +1,7 @@ MODULE = rsaenh.dll IMPORTLIB = rsaenh -IMPORTS = $(TOMCRYPT_PE_LIBS) crypt32 advapi32 -EXTRAINCL = $(TOMCRYPT_PE_CFLAGS) +IMPORTS = $(TOMCRYPT_PE_LIBS) $(SYMCRYPT_PE_LIBS) crypt32 advapi32 +EXTRAINCL = $(TOMCRYPT_PE_CFLAGS) $(SYMCRYPT_PE_CFLAGS) VER_PRODUCTVERSION = 5,1,2600,2180 diff --git a/dlls/rsaenh/implglue.c b/dlls/rsaenh/implglue.c index 900c620c9ef..f81c038cdc0 100644 --- a/dlls/rsaenh/implglue.c +++ b/dlls/rsaenh/implglue.c @@ -31,60 +31,29 @@ #include "implglue.h" +SYMCRYPT_ENVIRONMENT_DEFS( WindowsUsermodeWin8_1nLater ); + prng_state prng = { 0 }; int wprng = 0; BOOL init_hash_impl( ALG_ID algid, struct hash *hash ) { - memset( hash, 0, sizeof(*hash) ); - - switch (algid) + const SYMCRYPT_HASH *algorithms[] = { - case CALG_MD2: - hash->desc = &md2_desc; - break; - - case CALG_MD4: - hash->desc = &md4_desc; - break; - - case CALG_MD5: - hash->desc = &md5_desc; - break; - - case CALG_SHA: - hash->desc = &sha1_desc; - break; - - case CALG_SHA_256: - hash->desc = &sha256_desc; - break; - - case CALG_SHA_384: - hash->desc = &sha384_desc; - break; - - case CALG_SHA_512: - hash->desc = &sha512_desc; - break; - - default: - return TRUE; - } - - hash->desc->init( &hash->state ); - return TRUE; -} - -BOOL update_hash_impl( struct hash *hash, const BYTE *data, DWORD len ) -{ - hash->desc->process( &hash->state, data, len ); - return TRUE; -} + [ALG_SID_MD2] = SymCryptMd2Algorithm, + [ALG_SID_MD4] = SymCryptMd4Algorithm, + [ALG_SID_MD5] = SymCryptMd5Algorithm, + [ALG_SID_SHA] = SymCryptSha1Algorithm, + [ALG_SID_SHA_256] = SymCryptSha256Algorithm, + [ALG_SID_SHA_384] = SymCryptSha384Algorithm, + [ALG_SID_SHA_512] = SymCryptSha512Algorithm, + }; -BOOL finalize_hash_impl( struct hash *hash, BYTE *hash_value, DWORD hash_size ) -{ - hash->desc->done( &hash->state, hash_value ); + memset( hash, 0, sizeof(*hash) ); + if (GET_ALG_CLASS(algid) != ALG_CLASS_HASH) return TRUE; + if (GET_ALG_SID(algid) >= ARRAY_SIZE(algorithms)) return TRUE; + if (!(hash->desc = algorithms[GET_ALG_SID(algid)])) return TRUE; + SymCryptHashInit( hash->desc, &hash->state ); return TRUE; } @@ -138,34 +107,27 @@ BOOL setup_key_impl(ALG_ID aiAlgid, KEY_CONTEXT *pKeyContext, DWORD dwKeyLen, break; case CALG_RC2: - rc2_setup_ex(abKeyValue, dwKeyLen + dwSaltLen, dwEffectiveKeyLen ? - dwEffectiveKeyLen : dwKeyLen << 3, 0, &pKeyContext->key); + SymCryptRc2ExpandKeyEx( &pKeyContext->rc2, abKeyValue, dwKeyLen + dwSaltLen, + dwEffectiveKeyLen ? dwEffectiveKeyLen : dwKeyLen << 3 ); break; - case CALG_3DES: - des3_setup(abKeyValue, 24, 0, &pKeyContext->key); + SymCrypt3DesExpandKey( &pKeyContext->des3, abKeyValue, 24 ); break; - case CALG_3DES_112: - memcpy(abKeyValue+16, abKeyValue, 8); - des3_setup(abKeyValue, 24, 0, &pKeyContext->key); + SymCrypt3DesExpandKey( &pKeyContext->des3, abKeyValue, 16 ); break; - case CALG_DES: - des_setup(abKeyValue, 8, 0, &pKeyContext->key); + SymCryptDesExpandKey( &pKeyContext->des, abKeyValue, 8 ); break; - case CALG_AES: case CALG_AES_128: - aes_setup(abKeyValue, 16, 0, &pKeyContext->key); + SymCryptAesExpandKey( &pKeyContext->aes, abKeyValue, 16 ); break; - case CALG_AES_192: - aes_setup(abKeyValue, 24, 0, &pKeyContext->key); + SymCryptAesExpandKey( &pKeyContext->aes, abKeyValue, 24 ); break; - case CALG_AES_256: - aes_setup(abKeyValue, 32, 0, &pKeyContext->key); + SymCryptAesExpandKey( &pKeyContext->aes, abKeyValue, 32 ); break; } @@ -227,23 +189,20 @@ BOOL encrypt_block_impl(ALG_ID aiAlgid, DWORD dwKeySpec, KEY_CONTEXT *pKeyContex switch (aiAlgid) { case CALG_RC2: - rc2_ecb_encrypt(in, out, &pKeyContext->key); + SymCryptRc2Encrypt( &pKeyContext->rc2, in, out ); break; - case CALG_3DES: case CALG_3DES_112: - des3_ecb_encrypt(in, out, &pKeyContext->key); + SymCrypt3DesEncrypt( &pKeyContext->des3, in, out ); break; - case CALG_DES: - des_ecb_encrypt(in, out, &pKeyContext->key); + SymCryptDesEncrypt( &pKeyContext->des, in, out ); break; - case CALG_AES: case CALG_AES_128: case CALG_AES_192: case CALG_AES_256: - aes_ecb_encrypt(in, out, &pKeyContext->key); + SymCryptAesEncrypt( &pKeyContext->aes, in, out ); break; case CALG_RSA_KEYX: @@ -273,23 +232,20 @@ BOOL decrypt_block_impl(ALG_ID aiAlgid, DWORD dwKeySpec, KEY_CONTEXT *pKeyContex switch (aiAlgid) { case CALG_RC2: - rc2_ecb_decrypt(in, out, &pKeyContext->key); + SymCryptRc2Decrypt( &pKeyContext->rc2, in, out ); break; - case CALG_3DES: case CALG_3DES_112: - des3_ecb_decrypt(in, out, &pKeyContext->key); + SymCrypt3DesDecrypt( &pKeyContext->des3, in, out ); break; - case CALG_DES: - des_ecb_decrypt(in, out, &pKeyContext->key); + SymCryptDesDecrypt( &pKeyContext->des, in, out ); break; - case CALG_AES: case CALG_AES_128: case CALG_AES_192: case CALG_AES_256: - aes_ecb_decrypt(in, out, &pKeyContext->key); + SymCryptAesDecrypt( &pKeyContext->aes, in, out ); break; case CALG_RSA_KEYX: diff --git a/dlls/rsaenh/implglue.h b/dlls/rsaenh/implglue.h index b0a518830ac..4ea65e3a973 100644 --- a/dlls/rsaenh/implglue.h +++ b/dlls/rsaenh/implglue.h @@ -25,11 +25,15 @@ #define __WINE_IMPLGLUE_H #include "tomcrypt.h" +#include "symcrypt.h" #define RSAENH_MAX_HASH_SIZE 104 typedef union tagKEY_CONTEXT { - symmetric_key key; + SYMCRYPT_DES_EXPANDED_KEY des; + SYMCRYPT_3DES_EXPANDED_KEY des3; + SYMCRYPT_RC2_EXPANDED_KEY rc2; + SYMCRYPT_AES_EXPANDED_KEY aes; prng_state prng; rsa_key rsa; } KEY_CONTEXT; @@ -39,13 +43,21 @@ extern int wprng; struct hash { - const struct ltc_hash_descriptor *desc; - hash_state state; + const SYMCRYPT_HASH *desc; + SYMCRYPT_HASH_STATE state; }; BOOL init_hash_impl(ALG_ID algid, struct hash *hash); -BOOL update_hash_impl(struct hash *hash, const BYTE *data, DWORD len); -BOOL finalize_hash_impl(struct hash *hash, BYTE *hash_value, DWORD hash_size); + +static inline void update_hash_impl(struct hash *hash, const BYTE *data, DWORD len) +{ + SymCryptHashAppend( hash->desc, &hash->state, data, len ); +} + +static inline void finalize_hash_impl(struct hash *hash, BYTE *hash_value, DWORD hash_size) +{ + SymCryptHashResult( hash->desc, &hash->state, hash_value, hash_size ); +} BOOL new_key_impl(ALG_ID aiAlgid, KEY_CONTEXT *pKeyContext, DWORD dwKeyLen); BOOL free_key_impl(ALG_ID aiAlgid, KEY_CONTEXT *pKeyContext); diff --git a/dlls/rsaenh/rsaenh.c b/dlls/rsaenh/rsaenh.c index fbd500b7c43..4eca3a6d9eb 100644 --- a/dlls/rsaenh/rsaenh.c +++ b/dlls/rsaenh/rsaenh.c @@ -354,6 +354,7 @@ BOOL WINAPI DllMain(HINSTANCE hInstance, DWORD fdwReason, PVOID reserved) case DLL_PROCESS_ATTACH: DisableThreadLibraryCalls(hInstance); init_handle_table(&handle_table); + SymCryptInit(); /* tomcrypt initialization */ init_LTM(); wprng = register_prng( &rc4_desc ); -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/11025
Thanks, hopefully it's better now. -- https://gitlab.winehq.org/wine/wine/-/merge_requests/11025#note_141693
Yes, everything seems fine. Good to see that you've managed to build with optimizations, AES is as fast as Windows now :-) -- https://gitlab.winehq.org/wine/wine/-/merge_requests/11025#note_141704
This merge request was approved by Hans Leidekker. -- https://gitlab.winehq.org/wine/wine/-/merge_requests/11025
Nice! It looks like we don't even need the asm files for AES. We may need asm for the other algorithms though, have you been able to benchmark them? -- https://gitlab.winehq.org/wine/wine/-/merge_requests/11025#note_141715
The SHA hashes are slightly faster than Windows and twice as fast as TomCrypt. RSA sign/verify is 4 times slower than Windows and ECC sign/verify is 10 times slower than Windows. Both are also slower than TomCrypt. So there seems to be room for optimizing the asymmetric algorithms. Performance isn't as important here as it is for ciphers and hashes of course. -- https://gitlab.winehq.org/wine/wine/-/merge_requests/11025#note_141734
OK thanks, I'll work on enabling the asm code for these algorithms. -- https://gitlab.winehq.org/wine/wine/-/merge_requests/11025#note_141735
participants (3)
-
Alexandre Julliard -
Alexandre Julliard (@julliard) -
Hans Leidekker (@hans)