replaced the stub with a real implementation for FMA3
Proton-Bug: https://github.com/ValveSoftware/Proton/issues/3227 --- .../api-ms-win-crt-math-l1-1-0.spec | 2 +- dlls/msvcr120/msvcr120.spec | 1 + dlls/msvcrt/math.c | 14 ++++++++++++++ dlls/ucrtbase/ucrtbase.spec | 2 +- 4 files changed, 17 insertions(+), 2 deletions(-)
diff --git a/dlls/api-ms-win-crt-math-l1-1-0/api-ms-win-crt-math-l1-1-0.spec b/dlls/api-ms-win-crt-math-l1-1-0/api-ms-win-crt-math-l1-1-0.spec index cb8fdc9227..579b97fee3 100644 --- a/dlls/api-ms-win-crt-math-l1-1-0/api-ms-win-crt-math-l1-1-0.spec +++ b/dlls/api-ms-win-crt-math-l1-1-0/api-ms-win-crt-math-l1-1-0.spec @@ -81,7 +81,7 @@ @ cdecl _fpclass(double) ucrtbase._fpclass @ stub _fpclassf @ cdecl -arch=i386 -ret64 _ftol() ucrtbase._ftol -@ stub _get_FMA3_enable +@ cdecl -arch=win64 _get_FMA3_enable() ucrtbase._get_FMA3_enable @ cdecl _hypot(double double) ucrtbase._hypot @ cdecl _hypotf(float float) ucrtbase._hypotf @ cdecl _isnan(double) ucrtbase._isnan diff --git a/dlls/msvcr120/msvcr120.spec b/dlls/msvcr120/msvcr120.spec index 903f4b7ce3..1016f6a649 100644 --- a/dlls/msvcr120/msvcr120.spec +++ b/dlls/msvcr120/msvcr120.spec @@ -1639,6 +1639,7 @@ @ stdcall -arch=i386 _seh_longjmp_unwind4(ptr) @ stdcall -arch=i386 _seh_longjmp_unwind(ptr) @ cdecl -arch=i386 _set_SSE2_enable(long) MSVCRT__set_SSE2_enable +@ cdecl -arch=win64 _get_FMA3_enable() MSVCRT__get_FMA3_enable @ cdecl -arch=win64 _set_FMA3_enable(long) MSVCRT__set_FMA3_enable @ cdecl _set_abort_behavior(long long) MSVCRT__set_abort_behavior @ cdecl _set_controlfp(long long) diff --git a/dlls/msvcrt/math.c b/dlls/msvcrt/math.c index f6824128cd..80e73ccefe 100644 --- a/dlls/msvcrt/math.c +++ b/dlls/msvcrt/math.c @@ -118,6 +118,20 @@ int CDECL MSVCRT__set_SSE2_enable(int flag) }
#if defined(_WIN64) && _MSVCR_VER>=120 +/********************************************************************* + * _get_FMA3_enable (MSVCR120.@) + */ +int CDECL MSVCRT__get_FMA3_enable(void) +{ + #if !defined(__FMA__) && defined(__AVX2__) + #define __FMA__ 1 + #endif + #if defined(__FMA__) + return 1; + #endif + return 0; +} + /********************************************************************* * _set_FMA3_enable (MSVCR120.@) */ diff --git a/dlls/ucrtbase/ucrtbase.spec b/dlls/ucrtbase/ucrtbase.spec index 568019da51..6cac7a5ef5 100644 --- a/dlls/ucrtbase/ucrtbase.spec +++ b/dlls/ucrtbase/ucrtbase.spec @@ -357,7 +357,7 @@ @ cdecl _fwrite_nolock(ptr long long ptr) MSVCRT__fwrite_nolock @ cdecl _gcvt(double long str) MSVCRT__gcvt @ cdecl _gcvt_s(ptr long double long) MSVCRT__gcvt_s -@ stub _get_FMA3_enable +@ cdecl -arch=win64 _get_FMA3_enable() MSVCRT__get_FMA3_enable @ cdecl _get_current_locale() MSVCRT__get_current_locale @ cdecl _get_daylight(ptr) @ cdecl _get_doserrno(ptr)
You patch is missing the signoff and requires your real name.
See https://wiki.winehq.org/Submitting_Patches
Regards Alistair
Should this really be static? There is a function to switch this mode too.
From: daxcore daxcore@online.de
replaced the stub with a real implementation for FMA3
Proton-Bug: https://github.com/ValveSoftware/Proton/issues/3227 Signed-off-by: Rosa Hase daxcore@online.de
---
v2: also implemented the set_FMA3_enable function. --- .../api-ms-win-crt-math-l1-1-0.spec | 2 +- dlls/msvcr120/msvcr120.spec | 1 + dlls/msvcrt/math.c | 22 +++++++++++++++++-- dlls/ucrtbase/ucrtbase.spec | 2 +- 4 files changed, 23 insertions(+), 4 deletions(-)
diff --git a/dlls/api-ms-win-crt-math-l1-1-0/api-ms-win-crt-math-l1-1-0.spec b/dlls/api-ms-win-crt-math-l1-1-0/api-ms-win-crt-math-l1-1-0.spec index cb8fdc9227..579b97fee3 100644 --- a/dlls/api-ms-win-crt-math-l1-1-0/api-ms-win-crt-math-l1-1-0.spec +++ b/dlls/api-ms-win-crt-math-l1-1-0/api-ms-win-crt-math-l1-1-0.spec @@ -81,7 +81,7 @@ @ cdecl _fpclass(double) ucrtbase._fpclass @ stub _fpclassf @ cdecl -arch=i386 -ret64 _ftol() ucrtbase._ftol -@ stub _get_FMA3_enable +@ cdecl -arch=win64 _get_FMA3_enable() ucrtbase._get_FMA3_enable @ cdecl _hypot(double double) ucrtbase._hypot @ cdecl _hypotf(float float) ucrtbase._hypotf @ cdecl _isnan(double) ucrtbase._isnan diff --git a/dlls/msvcr120/msvcr120.spec b/dlls/msvcr120/msvcr120.spec index 903f4b7ce3..1016f6a649 100644 --- a/dlls/msvcr120/msvcr120.spec +++ b/dlls/msvcr120/msvcr120.spec @@ -1639,6 +1639,7 @@ @ stdcall -arch=i386 _seh_longjmp_unwind4(ptr) @ stdcall -arch=i386 _seh_longjmp_unwind(ptr) @ cdecl -arch=i386 _set_SSE2_enable(long) MSVCRT__set_SSE2_enable +@ cdecl -arch=win64 _get_FMA3_enable() MSVCRT__get_FMA3_enable @ cdecl -arch=win64 _set_FMA3_enable(long) MSVCRT__set_FMA3_enable @ cdecl _set_abort_behavior(long long) MSVCRT__set_abort_behavior @ cdecl _set_controlfp(long long) diff --git a/dlls/msvcrt/math.c b/dlls/msvcrt/math.c index f6824128cd..673aaba1a6 100644 --- a/dlls/msvcrt/math.c +++ b/dlls/msvcrt/math.c @@ -56,10 +56,20 @@ static MSVCRT_matherr_func MSVCRT_default_matherr_func = NULL;
static BOOL sse2_supported; static BOOL sse2_enabled; +static BOOL fma3_supported; +static BOOL fma3_enabled;
void msvcrt_init_math(void) { sse2_supported = sse2_enabled = IsProcessorFeaturePresent( PF_XMMI64_INSTRUCTIONS_AVAILABLE ); + #if !defined(__FMA__) && defined(__AVX2__) + #define __FMA__ 1 + #endif + #if defined(__FMA__) + fma3_supported = fma3_enabled = 1; + #else + fma3_supported = fma3_enabled = 0; + #endif }
/********************************************************************* @@ -118,13 +128,21 @@ int CDECL MSVCRT__set_SSE2_enable(int flag) }
#if defined(_WIN64) && _MSVCR_VER>=120 +/********************************************************************* + * _get_FMA3_enable (MSVCR120.@) + */ +int CDECL MSVCRT__get_FMA3_enable(void) +{ + return fma3_enabled; +} + /********************************************************************* * _set_FMA3_enable (MSVCR120.@) */ int CDECL MSVCRT__set_FMA3_enable(int flag) { - FIXME("(%x) stub\n", flag); - return 0; + fma3_enabled = flag && fma3_supported; + return fma3_enabled; } #endif
diff --git a/dlls/ucrtbase/ucrtbase.spec b/dlls/ucrtbase/ucrtbase.spec index 568019da51..6cac7a5ef5 100644 --- a/dlls/ucrtbase/ucrtbase.spec +++ b/dlls/ucrtbase/ucrtbase.spec @@ -357,7 +357,7 @@ @ cdecl _fwrite_nolock(ptr long long ptr) MSVCRT__fwrite_nolock @ cdecl _gcvt(double long str) MSVCRT__gcvt @ cdecl _gcvt_s(ptr long double long) MSVCRT__gcvt_s -@ stub _get_FMA3_enable +@ cdecl -arch=win64 _get_FMA3_enable() MSVCRT__get_FMA3_enable @ cdecl _get_current_locale() MSVCRT__get_current_locale @ cdecl _get_daylight(ptr) @ cdecl _get_doserrno(ptr)
From: daxcore daxcore@online.de
replaced the stub with a real implementation for FMA3_enable
Proton-Bug: https://github.com/ValveSoftware/Proton/issues/3227 Signed-off-by: Rosa Hase daxcore@online.de
---
v2: also implemented the set_FMA3_enable function. v3: added the preprocessor checks --- .../api-ms-win-crt-math-l1-1-0.spec | 2 +- dlls/msvcr120/msvcr120.spec | 1 + dlls/msvcrt/math.c | 26 +++++++++++++++++-- dlls/ucrtbase/ucrtbase.spec | 2 +- 4 files changed, 27 insertions(+), 4 deletions(-)
diff --git a/dlls/api-ms-win-crt-math-l1-1-0/api-ms-win-crt-math-l1-1-0.spec b/dlls/api-ms-win-crt-math-l1-1-0/api-ms-win-crt-math-l1-1-0.spec index cb8fdc9227..579b97fee3 100644 --- a/dlls/api-ms-win-crt-math-l1-1-0/api-ms-win-crt-math-l1-1-0.spec +++ b/dlls/api-ms-win-crt-math-l1-1-0/api-ms-win-crt-math-l1-1-0.spec @@ -81,7 +81,7 @@ @ cdecl _fpclass(double) ucrtbase._fpclass @ stub _fpclassf @ cdecl -arch=i386 -ret64 _ftol() ucrtbase._ftol -@ stub _get_FMA3_enable +@ cdecl -arch=win64 _get_FMA3_enable() ucrtbase._get_FMA3_enable @ cdecl _hypot(double double) ucrtbase._hypot @ cdecl _hypotf(float float) ucrtbase._hypotf @ cdecl _isnan(double) ucrtbase._isnan diff --git a/dlls/msvcr120/msvcr120.spec b/dlls/msvcr120/msvcr120.spec index 903f4b7ce3..1016f6a649 100644 --- a/dlls/msvcr120/msvcr120.spec +++ b/dlls/msvcr120/msvcr120.spec @@ -1639,6 +1639,7 @@ @ stdcall -arch=i386 _seh_longjmp_unwind4(ptr) @ stdcall -arch=i386 _seh_longjmp_unwind(ptr) @ cdecl -arch=i386 _set_SSE2_enable(long) MSVCRT__set_SSE2_enable +@ cdecl -arch=win64 _get_FMA3_enable() MSVCRT__get_FMA3_enable @ cdecl -arch=win64 _set_FMA3_enable(long) MSVCRT__set_FMA3_enable @ cdecl _set_abort_behavior(long long) MSVCRT__set_abort_behavior @ cdecl _set_controlfp(long long) diff --git a/dlls/msvcrt/math.c b/dlls/msvcrt/math.c index f6824128cd..ea5781a0d4 100644 --- a/dlls/msvcrt/math.c +++ b/dlls/msvcrt/math.c @@ -56,10 +56,24 @@ static MSVCRT_matherr_func MSVCRT_default_matherr_func = NULL;
static BOOL sse2_supported; static BOOL sse2_enabled; +#if defined(_WIN64) && _MSVCR_VER>=120 +static BOOL fma3_supported; +static BOOL fma3_enabled; +#endif
void msvcrt_init_math(void) { sse2_supported = sse2_enabled = IsProcessorFeaturePresent( PF_XMMI64_INSTRUCTIONS_AVAILABLE ); + #if defined(_WIN64) && _MSVCR_VER>=120 + #if !defined(__FMA__) && defined(__AVX2__) + #define __FMA__ 1 + #endif + #if defined(__FMA__) + fma3_supported = fma3_enabled = 1; + #else + fma3_supported = fma3_enabled = 0; + #endif + #endif }
/********************************************************************* @@ -118,13 +132,21 @@ int CDECL MSVCRT__set_SSE2_enable(int flag) }
#if defined(_WIN64) && _MSVCR_VER>=120 +/********************************************************************* + * _get_FMA3_enable (MSVCR120.@) + */ +int CDECL MSVCRT__get_FMA3_enable(void) +{ + return fma3_enabled; +} + /********************************************************************* * _set_FMA3_enable (MSVCR120.@) */ int CDECL MSVCRT__set_FMA3_enable(int flag) { - FIXME("(%x) stub\n", flag); - return 0; + fma3_enabled = flag && fma3_supported; + return fma3_enabled; } #endif
diff --git a/dlls/ucrtbase/ucrtbase.spec b/dlls/ucrtbase/ucrtbase.spec index 568019da51..6cac7a5ef5 100644 --- a/dlls/ucrtbase/ucrtbase.spec +++ b/dlls/ucrtbase/ucrtbase.spec @@ -357,7 +357,7 @@ @ cdecl _fwrite_nolock(ptr long long ptr) MSVCRT__fwrite_nolock @ cdecl _gcvt(double long str) MSVCRT__gcvt @ cdecl _gcvt_s(ptr long double long) MSVCRT__gcvt_s -@ stub _get_FMA3_enable +@ cdecl -arch=win64 _get_FMA3_enable() MSVCRT__get_FMA3_enable @ cdecl _get_current_locale() MSVCRT__get_current_locale @ cdecl _get_daylight(ptr) @ cdecl _get_doserrno(ptr)
Am 02.02.20 um 16:51 schrieb Rosa Hase:
From: daxcore daxcore@online.de
replaced the stub with a real implementation for FMA3_enable
Proton-Bug: https://github.com/ValveSoftware/Proton/issues/3227 Signed-off-by: Rosa Hase daxcore@online.de
Welcome to wine development! Please use your real name when submitting patches.
Hi Rosa,
On 2/2/20 4:51 PM, Rosa Hase wrote:
- #if defined(_WIN64) && _MSVCR_VER>=120
#if !defined(__FMA__) && defined(__AVX2__)
#define __FMA__ 1
The __FMA__ and __AVX2__ is only defined when compilation with this instructions is enabled (e.g. -mfma option). It's also said that you should never define them yourself.
/*********************************************************************
_set_FMA3_enable (MSVCR120.@)
*/ int CDECL MSVCRT__set_FMA3_enable(int flag) {
- FIXME("(%x) stub\n", flag);
- return 0;
- fma3_enabled = flag && fma3_supported;
- return fma3_enabled;
This is only checking if wine was compiled using -mfma or -mavx option. This function should check processor capabilities instead and switch to different math functions implementation. Taking in account that we don't provide math function implementation using fma3 instructions it's probably best to leave _set_FMA3_enable function as is.
A fake implementation of _get_FMA3_enable may look like this: +/********************************************************************* + * _get_FMA3_enable (MSVCR120.@) + */ +int CDECL MSVCRT__get_FMA3_enable(void) +{ + FIXME("() stub\n"); + return 0; +} I guess it will be good enough to fix the proton bug.
Thanks, Piotr
Hi,
I guess it will be good enough to fix the proton bug.
I think so too.
The only think is, I have no idea how big the performance lag will be, if fma3 is disabled but the cpu supports this.
BR.
03.02.2020 16:30:04 Piotr Caban piotr.caban@gmail.com:
Hi Rosa,
On 2/2/20 4:51 PM, Rosa Hase wrote:
- #if defined(_WIN64) && _MSVCR_VER>=120
- #if !defined(__FMA__) && defined(__AVX2__)
- #define __FMA__ 1
The __FMA__ and __AVX2__ is only defined when compilation with this instructions is enabled (e.g. -mfma option). It's also said that you should never define them yourself.
/*********************************************************************
- _set_FMA3_enable (MSVCR120.@)
*/ int CDECL MSVCRT__set_FMA3_enable(int flag) {
- FIXME("(%x) stub\n", flag);
- return 0;
- fma3_enabled = flag && fma3_supported;
- return fma3_enabled;
This is only checking if wine was compiled using -mfma or -mavx option. This function should check processor capabilities instead and switch to different math functions implementation. Taking in account that we don't provide math function implementation using fma3 instructions it's probably best to leave _set_FMA3_enable function as is.
A fake implementation of _get_FMA3_enable may look like this: +/*********************************************************************
- _get_FMA3_enable (MSVCR120.@)
- */
+int CDECL MSVCRT__get_FMA3_enable(void) +{
- FIXME("() stub\n");
- return 0;
+} I guess it will be good enough to fix the proton bug.
Thanks, Piotr
From: daxcore daxcore@online.de
added a stub for get_FMA3_enable, like exists for set_FMA3_enable
Proton-Bug: https://github.com/ValveSoftware/Proton/issues/3227 Signed-off-by: Rosa Hase daxcore@online.de
---
v2: also implemented the set_FMA3_enable function. v3: added the preprocessor checks v4: just use stubs for get and set FMA3_enable --- .../api-ms-win-crt-math-l1-1-0.spec | 2 +- dlls/msvcr120/msvcr120.spec | 1 + dlls/msvcrt/math.c | 9 +++++++++ dlls/ucrtbase/ucrtbase.spec | 2 +- 4 files changed, 12 insertions(+), 2 deletions(-)
diff --git a/dlls/api-ms-win-crt-math-l1-1-0/api-ms-win-crt-math-l1-1-0.spec b/dlls/api-ms-win-crt-math-l1-1-0/api-ms-win-crt-math-l1-1-0.spec index cb8fdc9227..579b97fee3 100644 --- a/dlls/api-ms-win-crt-math-l1-1-0/api-ms-win-crt-math-l1-1-0.spec +++ b/dlls/api-ms-win-crt-math-l1-1-0/api-ms-win-crt-math-l1-1-0.spec @@ -81,7 +81,7 @@ @ cdecl _fpclass(double) ucrtbase._fpclass @ stub _fpclassf @ cdecl -arch=i386 -ret64 _ftol() ucrtbase._ftol -@ stub _get_FMA3_enable +@ cdecl -arch=win64 _get_FMA3_enable() ucrtbase._get_FMA3_enable @ cdecl _hypot(double double) ucrtbase._hypot @ cdecl _hypotf(float float) ucrtbase._hypotf @ cdecl _isnan(double) ucrtbase._isnan diff --git a/dlls/msvcr120/msvcr120.spec b/dlls/msvcr120/msvcr120.spec index 903f4b7ce3..1016f6a649 100644 --- a/dlls/msvcr120/msvcr120.spec +++ b/dlls/msvcr120/msvcr120.spec @@ -1639,6 +1639,7 @@ @ stdcall -arch=i386 _seh_longjmp_unwind4(ptr) @ stdcall -arch=i386 _seh_longjmp_unwind(ptr) @ cdecl -arch=i386 _set_SSE2_enable(long) MSVCRT__set_SSE2_enable +@ cdecl -arch=win64 _get_FMA3_enable() MSVCRT__get_FMA3_enable @ cdecl -arch=win64 _set_FMA3_enable(long) MSVCRT__set_FMA3_enable @ cdecl _set_abort_behavior(long long) MSVCRT__set_abort_behavior @ cdecl _set_controlfp(long long) diff --git a/dlls/msvcrt/math.c b/dlls/msvcrt/math.c index f6824128cd..115dfbe125 100644 --- a/dlls/msvcrt/math.c +++ b/dlls/msvcrt/math.c @@ -118,6 +118,15 @@ int CDECL MSVCRT__set_SSE2_enable(int flag) }
#if defined(_WIN64) && _MSVCR_VER>=120 +/********************************************************************* + * _get_FMA3_enable (MSVCR120.@) + */ +int CDECL MSVCRT__get_FMA3_enable(void) +{ + FIXME("() stub\n"); + return 0; +} + /********************************************************************* * _set_FMA3_enable (MSVCR120.@) */ diff --git a/dlls/ucrtbase/ucrtbase.spec b/dlls/ucrtbase/ucrtbase.spec index 568019da51..6cac7a5ef5 100644 --- a/dlls/ucrtbase/ucrtbase.spec +++ b/dlls/ucrtbase/ucrtbase.spec @@ -357,7 +357,7 @@ @ cdecl _fwrite_nolock(ptr long long ptr) MSVCRT__fwrite_nolock @ cdecl _gcvt(double long str) MSVCRT__gcvt @ cdecl _gcvt_s(ptr long double long) MSVCRT__gcvt_s -@ stub _get_FMA3_enable +@ cdecl -arch=win64 _get_FMA3_enable() MSVCRT__get_FMA3_enable @ cdecl _get_current_locale() MSVCRT__get_current_locale @ cdecl _get_daylight(ptr) @ cdecl _get_doserrno(ptr)
Hi Rosa,
On 2/3/20 8:40 PM, Rosa Hase wrote:
--- a/dlls/msvcr120/msvcr120.spec +++ b/dlls/msvcr120/msvcr120.spec @@ -1639,6 +1639,7 @@ @ stdcall -arch=i386 _seh_longjmp_unwind4(ptr) @ stdcall -arch=i386 _seh_longjmp_unwind(ptr) @ cdecl -arch=i386 _set_SSE2_enable(long) MSVCRT__set_SSE2_enable +@ cdecl -arch=win64 _get_FMA3_enable() MSVCRT__get_FMA3_enable
Are you sure the function is exported from _get_FMA3_enable is exported from msvcr120? It looks like the function was introduced in ucrtbase.
#if defined(_WIN64) && _MSVCR_VER>=120 +/*********************************************************************
_get_FMA3_enable (MSVCR120.@)
If it was not exported from msvcr120 please also change this comment.
Thanks, Piotr
Hi,
I don't understand this hint. I just handled the get_FMA3_enable completely like the already existing set_FMA3_enable function.
BR.
Am Montag, den 03.02.2020, 20:58 +0100 schrieb Piotr Caban:
Hi Rosa,
On 2/3/20 8:40 PM, Rosa Hase wrote:
--- a/dlls/msvcr120/msvcr120.spec +++ b/dlls/msvcr120/msvcr120.spec @@ -1639,6 +1639,7 @@ @ stdcall -arch=i386 _seh_longjmp_unwind4(ptr) @ stdcall -arch=i386 _seh_longjmp_unwind(ptr) @ cdecl -arch=i386 _set_SSE2_enable(long) MSVCRT__set_SSE2_enable +@ cdecl -arch=win64 _get_FMA3_enable() MSVCRT__get_FMA3_enable
Are you sure the function is exported from _get_FMA3_enable is exported from msvcr120? It looks like the function was introduced in ucrtbase.
#if defined(_WIN64) && _MSVCR_VER>=120 +/*****************************************************************
_get_FMA3_enable (MSVCR120.@)
If it was not exported from msvcr120 please also change this comment.
Thanks, Piotr
Hi,
please use a real name for submitting patches