When application uses FLT_MAX in shader local constants, GLSL shader code gets 3.40282347e+38 string literal. Sometimes (seemingly depending on some side factors) NVIDIA GLSL compiler rejects such shaders with 'floating point constant overflow' error.
Signed-off-by: Paul Gofman gofmanp@gmail.com --- v2: - removed leftover debug change in utils.c.
dlls/d3d9/tests/visual.c | 48 ++++++++++++++++++++++++++++++++++++-- dlls/wined3d/glsl_shader.c | 33 +++++++++++++++++--------- 2 files changed, 68 insertions(+), 13 deletions(-)
diff --git a/dlls/d3d9/tests/visual.c b/dlls/d3d9/tests/visual.c index 85c1fa1bf8..36465f2156 100644 --- a/dlls/d3d9/tests/visual.c +++ b/dlls/d3d9/tests/visual.c @@ -28,6 +28,7 @@ * causes visible results in games can be tested in a way that does not depend on pixel exactness */
+#include <float.h> #include <limits.h> #include <math.h>
@@ -16644,7 +16645,23 @@ static void fp_special_test(void) 0x0000ffff, /* end */ };
- struct + static const DWORD ps_fltmax_code[] = + { + 0xffff0200, /* ps_2_0 */ + 0x05000051, 0xa00f0000, 0x7f7fffff, 0xff7fffff, 0x7f7fffff, 0xff7fffff, /* def c0, FLT_MAX, -FLT_MAX, + * FLT_MAX, -FLT_MAX */ + 0x05000051, 0xa00f0002, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, /* def c2, 1.0, 1.0, 1.0, 1.0 */ + 0x05000051, 0xa00f0003, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, /* def c3, 0.5, 0.5, 0.5, 0.5 */ + 0x02000001, 0x800f0001, 0xa0e40001, /* mov r1, c1 */ + 0x03000002, 0x800f0000, 0xa0e40000, 0x81e40001, /* add r0, c0, -r1 */ + 0x02000023, 0x800f0000, 0x80e40000, /* abs r0, r0 */ + 0x02000001, 0x800f0001, 0xa0e40003, /* mov r1, c3 */ + 0x04000058, 0x800f0000, 0x81e40000, 0xa0e40002, 0x80e40001, /* cmp r0, -r0, c2, r1 */ + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ + 0x0000ffff, /* end */ + }; + + static const struct { float x, y, z; float s; @@ -16657,10 +16674,12 @@ static void fp_special_test(void) { 1.0f, -1.0f, 1.0f, 0.0f}, };
+ static const struct vec4 const_max_flt = {FLT_MAX, -FLT_MAX, FLT_MAX, -FLT_MAX}; IDirect3DPixelShader9 *ps; IDirect3DDevice9 *device; UINT body_size = 0; IDirect3D9 *d3d; + D3DCOLOR color; DWORD *vs_code; ULONG refcount; D3DCAPS9 caps; @@ -16712,7 +16731,6 @@ static void fp_special_test(void) { DWORD offset = ARRAY_SIZE(vs_header); IDirect3DVertexShader9 *vs; - D3DCOLOR color;
memcpy(vs_code + offset, vs_body[i].ops, vs_body[i].size); offset += vs_body[i].size / sizeof(*vs_body[i].ops); @@ -16752,6 +16770,32 @@ static void fp_special_test(void) hr = IDirect3DDevice9_SetPixelShader(device, NULL); ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr); IDirect3DPixelShader9_Release(ps); + + hr = IDirect3DDevice9_CreatePixelShader(device, ps_fltmax_code, &ps); + ok(hr == D3D_OK, "Got unexpected hr %#x.\n", hr); + hr = IDirect3DDevice9_SetPixelShader(device, ps); + ok(hr == D3D_OK, "Got unexpected hr %#x.\n", hr); + + hr = IDirect3DDevice9_SetPixelShaderConstantF(device, 1, &const_max_flt.x, 1); + ok(hr == D3D_OK, "IDirect3DDevice9_SetPixelShaderConstantF returned %08x\n", hr); + + hr = IDirect3DDevice9_BeginScene(device); + ok(hr == D3D_OK, "Got unexpected hr %#x.\n", hr); + hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad)); + ok(hr == D3D_OK, "Got unexpected hr %#x.\n", hr); + hr = IDirect3DDevice9_EndScene(device); + ok(hr == D3D_OK, "Got unexpected hr %#x.\n", hr); + + color = getPixelColor(device, 320, 240); + ok(color == 0x00ffffff, "Got unexpected color 0x%08x.\n", color); + + hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL, NULL); + ok(hr == D3D_OK, "Got unexpected hr %#x.\n", hr); + + hr = IDirect3DDevice9_SetPixelShader(device, NULL); + ok(hr == D3D_OK, "Got unexpected hr %#x.\n", hr); + IDirect3DPixelShader9_Release(ps); + refcount = IDirect3DDevice9_Release(device); ok(!refcount, "Device has %u references left.\n", refcount); done: diff --git a/dlls/wined3d/glsl_shader.c b/dlls/wined3d/glsl_shader.c index 746d7cfa36..4a60f6e0e4 100644 --- a/dlls/wined3d/glsl_shader.c +++ b/dlls/wined3d/glsl_shader.c @@ -432,14 +432,25 @@ static void shader_glsl_add_version_declaration(struct wined3d_string_buffer *bu shader_addline(buffer, "#version %u\n", shader_glsl_get_version(gl_info)); }
-static void shader_glsl_append_imm_vec4(struct wined3d_string_buffer *buffer, const float *values) +static void shader_glsl_imm_float_literal(float value, char *str, + const struct wined3d_gl_info *gl_info) +{ + if (fabsf(value) == FLT_MAX && isfinite(value) && gl_info->supported[ARB_SHADER_BIT_ENCODING]) + strcpy(str, value == FLT_MAX ? "uintBitsToFloat(0x7f7fffffu)" + : "uintBitsToFloat(0xff7fffffu)"); + else + wined3d_ftoa(value, str); +} + +static void shader_glsl_append_imm_vec4(struct wined3d_string_buffer *buffer, const float *values, + const struct wined3d_gl_info *gl_info) { - char str[4][17]; + char str[4][29];
- wined3d_ftoa(values[0], str[0]); - wined3d_ftoa(values[1], str[1]); - wined3d_ftoa(values[2], str[2]); - wined3d_ftoa(values[3], str[3]); + shader_glsl_imm_float_literal(values[0], str[0], gl_info); + shader_glsl_imm_float_literal(values[1], str[1], gl_info); + shader_glsl_imm_float_literal(values[2], str[2], gl_info); + shader_glsl_imm_float_literal(values[3], str[3], gl_info); shader_addline(buffer, "vec4(%s, %s, %s, %s)", str[0], str[1], str[2], str[3]); }
@@ -2932,7 +2943,7 @@ static void shader_generate_glsl_declarations(const struct wined3d_context *cont LIST_FOR_EACH_ENTRY(lconst, &shader->constantsF, struct wined3d_shader_lconst, entry) { shader_addline(buffer, "const vec4 %s_lc%u = ", prefix, lconst->idx); - shader_glsl_append_imm_vec4(buffer, (const float *)lconst->value); + shader_glsl_append_imm_vec4(buffer, (const float *)lconst->value, gl_info); shader_addline(buffer, ";\n"); } } @@ -8104,10 +8115,10 @@ static GLuint shader_glsl_generate_pshader(const struct wined3d_context *context if (args->srgb_correction) { shader_addline(buffer, "const vec4 srgb_const0 = "); - shader_glsl_append_imm_vec4(buffer, wined3d_srgb_const0); + shader_glsl_append_imm_vec4(buffer, wined3d_srgb_const0, gl_info); shader_addline(buffer, ";\n"); shader_addline(buffer, "const vec4 srgb_const1 = "); - shader_glsl_append_imm_vec4(buffer, wined3d_srgb_const1); + shader_glsl_append_imm_vec4(buffer, wined3d_srgb_const1, gl_info); shader_addline(buffer, ";\n"); } if (reg_maps->vpos || reg_maps->usesdsy) @@ -10001,10 +10012,10 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * if (settings->sRGB_write) { shader_addline(buffer, "const vec4 srgb_const0 = "); - shader_glsl_append_imm_vec4(buffer, wined3d_srgb_const0); + shader_glsl_append_imm_vec4(buffer, wined3d_srgb_const0, gl_info); shader_addline(buffer, ";\n"); shader_addline(buffer, "const vec4 srgb_const1 = "); - shader_glsl_append_imm_vec4(buffer, wined3d_srgb_const1); + shader_glsl_append_imm_vec4(buffer, wined3d_srgb_const1, gl_info); shader_addline(buffer, ";\n"); }
After testing a bit more I figured out the exact conditions which lead to shader compilation failure, and given those conditions are reliably avoided by CSMT I don't think anymore that this patch is much useful. Please disregard it, sorry for the noise.
Just in case, I wrote all the details here: https://bugs.winehq.org/show_bug.cgi?id=46821#c5
On 4/5/19 21:08, Paul Gofman wrote:
When application uses FLT_MAX in shader local constants, GLSL shader code gets 3.40282347e+38 string literal. Sometimes (seemingly depending on some side factors) NVIDIA GLSL compiler rejects such shaders with 'floating point constant overflow' error.
Am 06.04.19 um 00:28 schrieb Paul Gofman:
After testing a bit more I figured out the exact conditions which lead to shader compilation failure, and given those conditions are reliably avoided by CSMT I don't think anymore that this patch is much useful. Please disregard it, sorry for the noise.
I suspect the application changed the FPU precision to single precision or something like that. I remember that there is (was?) some bug where the nvidia driver didn't parse a float literal that was not MAX_FLOAT correctly and the resulting loss of precision broke the shader.
Other drivers parsed it correctly even with the FPU in single precision mode.
On 4/6/19 12:06, Stefan Dösinger wrote:
I suspect the application changed the FPU precision to single precision or something like that. I remember that there is (was?) some bug where the nvidia driver didn't parse a float literal that was not MAX_FLOAT correctly and the resulting loss of precision broke the shader.
Other drivers parsed it correctly even with the FPU in single precision mode.
Yes, this is the case. Application does not even have to do that explicitly, x87 single precision is set in d3d9 device initialization without D3DCREATE_FPU_PRESERVE flag. It is strange that most (if not all) of the 32 bit builds are still using x87 these days. I suppose many of then won't run on Pentium III computer anyway.
I don't think special casing specific values adds much, but by itself, using ARB_shader_bit_encoding when available wouldn't be a bad thing. Note that we do something very similar for immediate values in shader_glsl_get_register_name(). The specific issue there was related to specifying floating-point specials like +/-INF or NaN.
I will make such a patch then. I would suggest to still keep constant values output as float in a comment to keep GLSL code readable in log.
On 4/8/19 18:19, Henri Verbeet wrote:
I don't think special casing specific values adds much, but by itself, using ARB_shader_bit_encoding when available wouldn't be a bad thing. Note that we do something very similar for immediate values in shader_glsl_get_register_name(). The specific issue there was related to specifying floating-point specials like +/-INF or NaN.
On Mon, 8 Apr 2019 at 20:41, Paul Gofman gofmanp@gmail.com wrote:
I will make such a patch then. I would suggest to still keep constant values output as float in a comment to keep GLSL code readable in log.
That's probably fine.