* This is to simplify the code before adding the FETCH4 modifications No change is intended in the shader generation
Signed-off-by: Daniel Ansorregui mailszeros@gmail.com --- dlls/wined3d/glsl_shader.c | 63 ++++++++++---------------------------- 1 file changed, 16 insertions(+), 47 deletions(-)
diff --git a/dlls/wined3d/glsl_shader.c b/dlls/wined3d/glsl_shader.c index 545e1bff9f..4b2dfaf34a 100644 --- a/dlls/wined3d/glsl_shader.c +++ b/dlls/wined3d/glsl_shader.c @@ -9863,65 +9863,34 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * switch (settings->op[stage].tex_type) { case WINED3D_GL_RES_TYPE_TEX_1D: - if (proj) - { - texture_function = "texture1DProj"; - coord_mask = "xw"; - } - else - { - texture_function = "texture1D"; - coord_mask = "x"; - } + texture_function = "texture1D"; + coord_mask = "x"; break; case WINED3D_GL_RES_TYPE_TEX_2D: - if (proj) - { - texture_function = "texture2DProj"; - coord_mask = "xyw"; - } - else - { - texture_function = "texture2D"; - coord_mask = "xy"; - } + texture_function = "texture2D"; + coord_mask = "xy"; break; case WINED3D_GL_RES_TYPE_TEX_3D: - if (proj) - { - texture_function = "texture3DProj"; - coord_mask = "xyzw"; - } - else - { - texture_function = "texture3D"; - coord_mask = "xyz"; - } + texture_function = "texture3D"; + coord_mask = "xyz"; break; case WINED3D_GL_RES_TYPE_TEX_CUBE: texture_function = "textureCube"; coord_mask = "xyz"; break; case WINED3D_GL_RES_TYPE_TEX_RECT: - if (proj) - { - texture_function = "texture2DRectProj"; - coord_mask = "xyw"; - } - else - { - texture_function = "texture2DRect"; - coord_mask = "xy"; - } + texture_function = "texture2DRect"; + coord_mask = "xy"; break; default: FIXME("Unhandled texture type %#x.\n", settings->op[stage].tex_type); texture_function = ""; coord_mask = "xyzw"; + proj = FALSE; break; } if (!legacy_syntax) - texture_function = proj ? "textureProj" : "texture"; + texture_function = "texture";
if (stage > 0 && (settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP @@ -9953,8 +9922,8 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * shader_addline(buffer, "ret = ffp_texcoord[%u] + ret.xyxy;\n", stage); }
- shader_addline(buffer, "tex%u = %s(ps_sampler%u, ret.%s);\n", - stage, texture_function, stage, coord_mask); + shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ret.%s%c);\n", + stage, texture_function, proj ? "Proj" : "", stage, coord_mask, proj ? 'w' : ' ');
if (settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP_LUMINANCE) shader_addline(buffer, "tex%u *= clamp(tex%u.z * bumpenv_lum_scale%u + bumpenv_lum_offset%u, 0.0, 1.0);\n", @@ -9962,13 +9931,13 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * } else if (settings->op[stage].projected == WINED3D_PROJECTION_COUNT3) { - shader_addline(buffer, "tex%u = %s(ps_sampler%u, ffp_texcoord[%u].xyz);\n", - stage, texture_function, stage, stage); + shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ffp_texcoord[%u].xyz);\n", + stage, texture_function, proj ? "Proj" : "", stage, stage); } else { - shader_addline(buffer, "tex%u = %s(ps_sampler%u, ffp_texcoord[%u].%s);\n", - stage, texture_function, stage, stage, coord_mask); + shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ffp_texcoord[%u].%s%c);\n", + stage, texture_function, proj ? "Proj" : "", stage, stage, coord_mask, proj ? 'w' : ' '); }
string_buffer_sprintf(tex_reg_name, "tex%u", stage);
- Tested under W10, when Fetch4 is enabled, projection is ignored - Untested what happens when Fetch4 is used on unsupported textures. Disabling Fetch4 fttb. - The swizzle fix has been checked against windows since it does not match with gather4
Signed-off-by: Daniel Ansorregui mailszeros@gmail.com --- dlls/wined3d/glsl_shader.c | 19 ++++++++++++++++++- dlls/wined3d/utils.c | 2 ++ dlls/wined3d/wined3d_private.h | 3 ++- 3 files changed, 22 insertions(+), 2 deletions(-)
diff --git a/dlls/wined3d/glsl_shader.c b/dlls/wined3d/glsl_shader.c index 4b2dfaf34a..e3aa5125fd 100644 --- a/dlls/wined3d/glsl_shader.c +++ b/dlls/wined3d/glsl_shader.c @@ -9837,6 +9837,7 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * for (stage = 0; stage < MAX_TEXTURES && settings->op[stage].cop != WINED3D_TOP_DISABLE; ++stage) { const char *texture_function, *coord_mask; + BOOL fetch4 = settings->op[stage].fetch4; BOOL proj;
if (!(tex_map & (1u << stage))) @@ -9856,7 +9857,6 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * FIXME("Unexpected projection mode %d\n", settings->op[stage].projected); proj = TRUE; } - if (settings->op[stage].tex_type == WINED3D_GL_RES_TYPE_TEX_CUBE) proj = FALSE;
@@ -9865,6 +9865,7 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * case WINED3D_GL_RES_TYPE_TEX_1D: texture_function = "texture1D"; coord_mask = "x"; + fetch4 = FALSE; break; case WINED3D_GL_RES_TYPE_TEX_2D: texture_function = "texture2D"; @@ -9873,6 +9874,7 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * case WINED3D_GL_RES_TYPE_TEX_3D: texture_function = "texture3D"; coord_mask = "xyz"; + fetch4 = FALSE; break; case WINED3D_GL_RES_TYPE_TEX_CUBE: texture_function = "textureCube"; @@ -9881,17 +9883,28 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * case WINED3D_GL_RES_TYPE_TEX_RECT: texture_function = "texture2DRect"; coord_mask = "xy"; + if (fetch4) + FIXME("Unsupported Fetch4 and texture2DRect sampling"); + fetch4 = FALSE; break; default: FIXME("Unhandled texture type %#x.\n", settings->op[stage].tex_type); texture_function = ""; coord_mask = "xyzw"; proj = FALSE; + fetch4 = FALSE; break; } if (!legacy_syntax) texture_function = "texture";
+ if (fetch4) + { + texture_function = "textureGather"; + /* Tested on W10+Intel, fetch4 enabled disables projection */ + proj = FALSE; + } + if (stage > 0 && (settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP || settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP_LUMINANCE)) @@ -9940,6 +9953,10 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * stage, texture_function, proj ? "Proj" : "", stage, stage, coord_mask, proj ? 'w' : ' '); }
+ /* Match FETCH4 swizzle with textureGather swizzle */ + if (fetch4) + shader_addline(buffer, "tex%u = tex%u.xwyz;\n", stage, stage); + string_buffer_sprintf(tex_reg_name, "tex%u", stage); shader_glsl_color_correction_ext(buffer, tex_reg_name->buffer, WINED3DSP_WRITEMASK_ALL, settings->op[stage].color_fixup); diff --git a/dlls/wined3d/utils.c b/dlls/wined3d/utils.c index aa68799535..9b409b5c3b 100644 --- a/dlls/wined3d/utils.c +++ b/dlls/wined3d/utils.c @@ -5783,6 +5783,7 @@ void gen_ffp_frag_op(const struct wined3d_context *context, const struct wined3d settings->op[i].tmp_dst = 0; settings->op[i].tex_type = WINED3D_GL_RES_TYPE_TEX_1D; settings->op[i].projected = WINED3D_PROJECTION_NONE; + settings->op[i].fetch4 = FALSE; i++; break; } @@ -5926,6 +5927,7 @@ void gen_ffp_frag_op(const struct wined3d_context *context, const struct wined3d settings->op[i].aarg1 = aarg1; settings->op[i].aarg2 = aarg2; settings->op[i].tmp_dst = state->texture_states[i][WINED3D_TSS_RESULT_ARG] == WINED3DTA_TEMP; + settings->op[i].fetch4 = state->sampler_states[i][WINED3D_SAMP_MIPMAP_LOD_BIAS] == MAKEFOURCC('G','E','T','4'); }
/* Clear unsupported stages */ diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index f908ff7173..f75660f0ab 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -2743,7 +2743,8 @@ struct texture_stage_op unsigned tex_type : 3; unsigned tmp_dst : 1; unsigned projected : 2; - unsigned padding : 10; + unsigned fetch4 : 1; + unsigned padding : 9; };
struct ffp_frag_settings
- Implement shader generation of tex-ld/ldp/ldd/ldb/ldl - Vertex texldl unimplemented yet, since it is not possible to access ps_compile_args. Maybe move it to another place. - Trigger PS re-generation at FETCH4 state change
Signed-off-by: Daniel Ansorregui mailszeros@gmail.com --- dlls/wined3d/device.c | 4 ++ dlls/wined3d/glsl_shader.c | 126 ++++++++++++++++++++++++--------- dlls/wined3d/shader.c | 11 +++ dlls/wined3d/state.c | 14 ++++ dlls/wined3d/wined3d_private.h | 4 +- 5 files changed, 124 insertions(+), 35 deletions(-)
diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c index a89a33d676..0f7c433d1e 100644 --- a/dlls/wined3d/device.c +++ b/dlls/wined3d/device.c @@ -2088,7 +2088,11 @@ void CDECL wined3d_device_set_sampler_state(struct wined3d_device *device, device, sampler_idx, debug_d3dsamplerstate(state), value);
if (sampler_idx >= WINED3DVERTEXTEXTURESAMPLER0 && sampler_idx <= WINED3DVERTEXTEXTURESAMPLER3) + { sampler_idx -= (WINED3DVERTEXTEXTURESAMPLER0 - MAX_FRAGMENT_SAMPLERS); + if (state == WINED3D_SAMP_MIPMAP_LOD_BIAS && value == MAKEFOURCC('G','E','T','4')) + FIXME("Unsupported FETCH4 and Vertex Texture Sampler"); + }
if (sampler_idx >= ARRAY_SIZE(device->state.sampler_states)) { diff --git a/dlls/wined3d/glsl_shader.c b/dlls/wined3d/glsl_shader.c index e3aa5125fd..900190d728 100644 --- a/dlls/wined3d/glsl_shader.c +++ b/dlls/wined3d/glsl_shader.c @@ -49,6 +49,7 @@ WINE_DECLARE_DEBUG_CHANNEL(winediag); #define WINED3D_GLSL_SAMPLE_GRAD 0x04 #define WINED3D_GLSL_SAMPLE_LOAD 0x08 #define WINED3D_GLSL_SAMPLE_OFFSET 0x10 +#define WINED3D_GLSL_SAMPLE_GATHER 0x20
static const struct { @@ -3607,6 +3608,7 @@ static void shader_glsl_get_sample_function(const struct wined3d_shader_context BOOL lod = flags & WINED3D_GLSL_SAMPLE_LOD; BOOL grad = flags & WINED3D_GLSL_SAMPLE_GRAD; BOOL offset = flags & WINED3D_GLSL_SAMPLE_OFFSET; + BOOL gather = flags & WINED3D_GLSL_SAMPLE_GATHER; const char *base = "texture", *type_part = "", *suffix = ""; unsigned int coord_size, deriv_size;
@@ -3652,6 +3654,14 @@ static void shader_glsl_get_sample_function(const struct wined3d_shader_context type_part = ""; }
+ if (gather) + { + base = "texture"; + type_part = "Gather"; + suffix = ""; + projected = lod = grad = offset = FALSE; + } + sample_function->name = string_buffer_get(priv->string_buffers); string_buffer_sprintf(sample_function->name, "%s%s%s%s%s%s", base, type_part, projected ? "Proj" : "", lod ? "Lod" : grad ? "Grad" : "", offset ? "Offset" : "", suffix); @@ -5391,11 +5401,18 @@ static void shader_glsl_tex(const struct wined3d_shader_instruction *ins) } }
+ /* Fetch4 overwrites the other texture flags */ + if (priv->cur_ps_args->fetch4 & (1u << resource_idx)){ + sample_flags = WINED3D_GLSL_SAMPLE_GATHER; + mask = 0; + } + shader_glsl_get_sample_function(ins->ctx, resource_idx, resource_idx, sample_flags, &sample_function); mask |= sample_function.coord_mask; sample_function.coord_mask = mask;
if (shader_version < WINED3D_SHADER_VERSION(2,0)) swizzle = WINED3DSP_NOSWIZZLE; + else if (priv->cur_ps_args->fetch4 & (1u << resource_idx)) swizzle = (3<<2)|(1<<4)|(2<<6); /* xwyz */ else swizzle = ins->src[1].swizzle;
/* 1.0-1.3: Use destination register as coordinate source. @@ -5411,7 +5428,7 @@ static void shader_glsl_tex(const struct wined3d_shader_instruction *ins) { struct glsl_src_param coord_param; shader_glsl_add_src_param(ins, &ins->src[0], mask, &coord_param); - if (ins->flags & WINED3DSI_TEXLD_BIAS) + if (ins->flags & WINED3DSI_TEXLD_BIAS && sample_flags != WINED3D_GLSL_SAMPLE_GATHER) { struct glsl_src_param bias; shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &bias); @@ -5427,10 +5444,11 @@ static void shader_glsl_tex(const struct wined3d_shader_instruction *ins)
static void shader_glsl_texldd(const struct wined3d_shader_instruction *ins) { + const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data; const struct wined3d_gl_info *gl_info = ins->ctx->gl_info; struct glsl_src_param coord_param, dx_param, dy_param; struct glsl_sample_function sample_function; - DWORD sampler_idx; + DWORD sampler_idx = ins->src[1].reg.idx[0].offset; DWORD swizzle = ins->src[1].swizzle;
if (!shader_glsl_has_core_grad(gl_info) && !gl_info->supported[ARB_SHADER_TEXTURE_LOD]) @@ -5440,7 +5458,16 @@ static void shader_glsl_texldd(const struct wined3d_shader_instruction *ins) return; }
- sampler_idx = ins->src[1].reg.idx[0].offset; + /* Fetch4 overwrites the other texture flags */ + if (priv->cur_ps_args->fetch4 & (1u << sampler_idx)){ + shader_glsl_get_sample_function(ins->ctx, sampler_idx, sampler_idx, WINED3D_GLSL_SAMPLE_GATHER, &sample_function); + swizzle = (3<<2)|(1<<4)|(2<<6); + shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param); + shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, NULL, NULL, NULL, NULL, + "%s", coord_param.param_str); + shader_glsl_release_sample_function(ins->ctx, &sample_function); + return; + }
shader_glsl_get_sample_function(ins->ctx, sampler_idx, sampler_idx, WINED3D_GLSL_SAMPLE_GRAD, &sample_function); shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param); @@ -5455,19 +5482,21 @@ static void shader_glsl_texldd(const struct wined3d_shader_instruction *ins) static void shader_glsl_texldl(const struct wined3d_shader_instruction *ins) { const struct wined3d_shader_version *shader_version = &ins->ctx->reg_maps->shader_version; + const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data; const struct wined3d_gl_info *gl_info = ins->ctx->gl_info; struct glsl_src_param coord_param, lod_param; struct glsl_sample_function sample_function; + DWORD sampler_idx = ins->src[1].reg.idx[0].offset; DWORD swizzle = ins->src[1].swizzle; - DWORD sampler_idx; + DWORD flags = WINED3D_GLSL_SAMPLE_LOD;
- sampler_idx = ins->src[1].reg.idx[0].offset; + /* This call can be used in vertex shader, without cur_ps_args */ + if(priv->cur_ps_args && priv->cur_ps_args->fetch4 & (1u << sampler_idx)) + flags = WINED3D_GLSL_SAMPLE_GATHER;
- shader_glsl_get_sample_function(ins->ctx, sampler_idx, sampler_idx, WINED3D_GLSL_SAMPLE_LOD, &sample_function); + shader_glsl_get_sample_function(ins->ctx, sampler_idx, sampler_idx, flags, &sample_function); shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param);
- shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &lod_param); - if (shader_version->type == WINED3D_SHADER_TYPE_PIXEL && !shader_glsl_has_core_grad(gl_info) && !gl_info->supported[ARB_SHADER_TEXTURE_LOD]) { @@ -5476,6 +5505,18 @@ static void shader_glsl_texldl(const struct wined3d_shader_instruction *ins) * even without the appropriate extension. */ WARN("Using %s in fragment shader.\n", sample_function.name->buffer); } + + if (flags == WINED3D_GLSL_SAMPLE_GATHER){ + swizzle = (3<<2)|(1<<4)|(2<<6); + shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, NULL, NULL, NULL, NULL, + "%s", coord_param.param_str); + shader_glsl_release_sample_function(ins->ctx, &sample_function); + return; + } + + shader_glsl_get_sample_function(ins->ctx, sampler_idx, sampler_idx, WINED3D_GLSL_SAMPLE_LOD, &sample_function); + shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param); + shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &lod_param); shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, NULL, NULL, lod_param.param_str, NULL, "%s", coord_param.param_str); shader_glsl_release_sample_function(ins->ctx, &sample_function); @@ -6169,6 +6210,7 @@ static void shader_glsl_ld(const struct wined3d_shader_instruction *ins) struct glsl_sample_function sample_function; DWORD flags = WINED3D_GLSL_SAMPLE_LOAD; BOOL has_lod_param; + const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
if (wined3d_shader_instruction_has_texel_offset(ins)) flags |= WINED3D_GLSL_SAMPLE_OFFSET; @@ -6183,6 +6225,10 @@ static void shader_glsl_ld(const struct wined3d_shader_instruction *ins) } has_lod_param = is_mipmapped(reg_maps->resource_info[resource_idx].type);
+ if (priv->cur_ps_args && priv->cur_ps_args->fetch4 & (1u << resource_idx)){ + FIXME("Unsupported FETCH4 and LD Sampling SM 5.0"); + } + shader_glsl_get_sample_function(ins->ctx, resource_idx, sampler_idx, flags, &sample_function); shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param); shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &lod_param); @@ -6208,46 +6254,57 @@ static void shader_glsl_sample(const struct wined3d_shader_instruction *ins) struct glsl_src_param coord_param, lod_param, dx_param, dy_param; unsigned int resource_idx, sampler_idx, sampler_bind_idx; struct glsl_sample_function sample_function; + const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data; + DWORD swizzle = ins->src[1].swizzle; DWORD flags = 0;
+ resource_idx = ins->src[1].reg.idx[0].offset; + sampler_idx = ins->src[2].reg.idx[0].offset; + if (ins->handler_idx == WINED3DSIH_SAMPLE_GRAD) flags |= WINED3D_GLSL_SAMPLE_GRAD; if (ins->handler_idx == WINED3DSIH_SAMPLE_LOD) flags |= WINED3D_GLSL_SAMPLE_LOD; if (wined3d_shader_instruction_has_texel_offset(ins)) flags |= WINED3D_GLSL_SAMPLE_OFFSET; - - resource_idx = ins->src[1].reg.idx[0].offset; - sampler_idx = ins->src[2].reg.idx[0].offset; + if (priv->cur_ps_args->fetch4 & (1u << sampler_idx)) + { + flags = WINED3D_GLSL_SAMPLE_GATHER; + swizzle = (3<<2)|(1<<4)|(2<<6); + }
shader_glsl_get_sample_function(ins->ctx, resource_idx, sampler_idx, flags, &sample_function); shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param);
- switch (ins->handler_idx) + /* Fetch4 overwrites the other texture flags */ + if (flags != WINED3D_GLSL_SAMPLE_GATHER) { - case WINED3DSIH_SAMPLE: - break; - case WINED3DSIH_SAMPLE_B: - shader_glsl_add_src_param(ins, &ins->src[3], WINED3DSP_WRITEMASK_0, &lod_param); - lod_param_str = lod_param.param_str; - break; - case WINED3DSIH_SAMPLE_GRAD: - shader_glsl_add_src_param(ins, &ins->src[3], sample_function.deriv_mask, &dx_param); - shader_glsl_add_src_param(ins, &ins->src[4], sample_function.deriv_mask, &dy_param); - dx_param_str = dx_param.param_str; - dy_param_str = dy_param.param_str; - break; - case WINED3DSIH_SAMPLE_LOD: - shader_glsl_add_src_param(ins, &ins->src[3], WINED3DSP_WRITEMASK_0, &lod_param); - lod_param_str = lod_param.param_str; - break; - default: - ERR("Unhandled opcode %s.\n", debug_d3dshaderinstructionhandler(ins->handler_idx)); - break; + switch (ins->handler_idx) + { + case WINED3DSIH_SAMPLE: + break; + case WINED3DSIH_SAMPLE_B: + shader_glsl_add_src_param(ins, &ins->src[3], WINED3DSP_WRITEMASK_0, &lod_param); + lod_param_str = lod_param.param_str; + break; + case WINED3DSIH_SAMPLE_GRAD: + shader_glsl_add_src_param(ins, &ins->src[3], sample_function.deriv_mask, &dx_param); + shader_glsl_add_src_param(ins, &ins->src[4], sample_function.deriv_mask, &dy_param); + dx_param_str = dx_param.param_str; + dy_param_str = dy_param.param_str; + break; + case WINED3DSIH_SAMPLE_LOD: + shader_glsl_add_src_param(ins, &ins->src[3], WINED3DSP_WRITEMASK_0, &lod_param); + lod_param_str = lod_param.param_str; + break; + default: + ERR("Unhandled opcode %s.\n", debug_d3dshaderinstructionhandler(ins->handler_idx)); + break; + } }
sampler_bind_idx = shader_glsl_find_sampler(&ins->ctx->reg_maps->sampler_map, resource_idx, sampler_idx); - shader_glsl_gen_sample_code(ins, sampler_bind_idx, &sample_function, ins->src[1].swizzle, + shader_glsl_gen_sample_code(ins, sampler_bind_idx, &sample_function, swizzle, dx_param_str, dy_param_str, lod_param_str, &ins->texel_offset, "%s", coord_param.param_str); shader_glsl_release_sample_function(ins->ctx, &sample_function); } @@ -6293,6 +6350,9 @@ static void shader_glsl_sample_c(const struct wined3d_shader_instruction *ins) unsigned int coord_size; DWORD flags = 0;
+ resource_idx = ins->src[1].reg.idx[0].offset; + sampler_idx = ins->src[2].reg.idx[0].offset; + if (ins->handler_idx == WINED3DSIH_SAMPLE_C_LZ) { lod_param = "0"; @@ -6304,8 +6364,6 @@ static void shader_glsl_sample_c(const struct wined3d_shader_instruction *ins)
if (!(resource_info = shader_glsl_get_resource_info(ins, &ins->src[1].reg))) return; - resource_idx = ins->src[1].reg.idx[0].offset; - sampler_idx = ins->src[2].reg.idx[0].offset;
shader_glsl_get_sample_function(ins->ctx, resource_idx, sampler_idx, flags, &sample_function); coord_size = shader_glsl_get_write_mask_size(sample_function.coord_mask); diff --git a/dlls/wined3d/shader.c b/dlls/wined3d/shader.c index e11a37cf07..909a59e2e5 100644 --- a/dlls/wined3d/shader.c +++ b/dlls/wined3d/shader.c @@ -4027,6 +4027,17 @@ void find_ps_compile_args(const struct wined3d_state *state, const struct wined3 } }
+ if (gl_info->supported[ARB_TEXTURE_GATHER]) + { + for (i = 0; i < MAX_FRAGMENT_SAMPLERS; ++i) + { + if (state->sampler_states[i][WINED3D_SAMP_MIPMAP_LOD_BIAS] == MAKEFOURCC('G','E','T','4')) + args->fetch4 |= 1 << i; + else + args->fetch4 &= ~(1 << i); + } + } + if (context->d3d_info->limits.varying_count < wined3d_max_compat_varyings(context->gl_info)) { const struct wined3d_shader *vs = state->shader[WINED3D_SHADER_TYPE_VERTEX]; diff --git a/dlls/wined3d/state.c b/dlls/wined3d/state.c index 42c109da57..ea56d704b5 100644 --- a/dlls/wined3d/state.c +++ b/dlls/wined3d/state.c @@ -3599,6 +3599,7 @@ static void sampler(struct wined3d_context *context, const struct wined3d_state DWORD sampler_idx = state_id - STATE_SAMPLER(0); DWORD mapped_stage = context->tex_unit_map[sampler_idx]; const struct wined3d_gl_info *gl_info = context->gl_info; + int i;
TRACE("Sampler %u.\n", sampler_idx);
@@ -3616,6 +3617,8 @@ static void sampler(struct wined3d_context *context, const struct wined3d_state { struct wined3d_texture_gl *texture_gl = wined3d_texture_gl(state->textures[sampler_idx]); BOOL srgb = state->sampler_states[sampler_idx][WINED3D_SAMP_SRGB_TEXTURE]; + BOOL fetch4 = state->sampler_states[sampler_idx][WINED3D_SAMP_MIPMAP_LOD_BIAS] + == MAKEFOURCC('G','E','T','4'); const DWORD *sampler_states = state->sampler_states[sampler_idx]; struct wined3d_device *device = context->device; struct wined3d_sampler_desc desc; @@ -3650,6 +3653,17 @@ static void sampler(struct wined3d_context *context, const struct wined3d_state /* Trigger shader constant reloading (for NP2 texcoord fixup) */ if (!(texture_gl->t.flags & WINED3D_TEXTURE_POW2_MAT_IDENT)) context->constant_update_mask |= WINED3D_SHADER_CONST_PS_NP2_FIXUP; + + /* Trigger pixel shader recompilation for FETCH4 changes */ + if(gl_info->supported[ARB_TEXTURE_GATHER] && + ((context->last_was_fetch4 >> i) & 0x1) ^ fetch4) + { + if (fetch4) + context->last_was_fetch4 |= 1 << i; + else + context->last_was_fetch4 &= ~(1 << i); + context->shader_update_mask = (1u << WINED3D_SHADER_TYPE_PIXEL); + } } else { diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index f75660f0ab..9dfdee898d 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -1357,7 +1357,8 @@ struct ps_compile_args DWORD alpha_test_func : 3; DWORD render_offscreen : 1; DWORD rt_alpha_swizzle : 8; /* MAX_RENDER_TARGET_VIEWS, 8 */ - DWORD padding : 18; + WORD fetch4 : 16; /* MAX_FRAGMENT_SAMPLERS, 16 */ + DWORD padding : 2; };
enum fog_src_type @@ -1892,6 +1893,7 @@ struct wined3d_context DWORD last_was_ffp_blit : 1; DWORD last_was_blit : 1; DWORD last_was_ckey : 1; + DWORD last_was_fetch4 : 16; /* MAX_FRAGMENT_SAMPLERS */ DWORD fog_coord : 1; DWORD fog_enabled : 1; DWORD current : 1;
Am 22.11.2018 um 02:28 schrieb Daniel Ansorregui mailszeros@gmail.com:
- Vertex texldl unimplemented yet, since it is not possible to access
ps_compile_args. Maybe move it to another place.
For that you'd need a similar change in vs_compile_args. I am fine with the FIXME for now. Vertex sampling can only be done via texldl, so I have doubts that it will work well.
- if (gl_info->supported[ARB_TEXTURE_GATHER])
- {
for (i = 0; i < MAX_FRAGMENT_SAMPLERS; ++i)
{
if (state->sampler_states[i][WINED3D_SAMP_MIPMAP_LOD_BIAS] == MAKEFOURCC('G','E','T','4'))
args->fetch4 |= 1 << i;
else
args->fetch4 &= ~(1 << i);
}
- }
You either have to set args->fetch4 to 0 if ARB_TEXTURE_GATHER is not supported, or you don't need to remove the bit if the lod bias is not set to GET4. We have the memset at the start, so from the code point of view the second case applies. Once upon a time the idea was to avoid the memset to avoid writing ps_compile_args twice in this rather time critical function, but I don't know if this is still a worthwhile goal. Henri?
* This assumes FETCH4 is already supported by wine And checks for FETCH4 support before exposing DF24
Signed-off-by: Daniel Ansorregui mailszeros@gmail.com --- dlls/wined3d/directx.c | 6 ++++++ dlls/wined3d/utils.c | 31 +++++++++++++++++++++++++++++++ include/wine/wined3d.h | 2 ++ 3 files changed, 39 insertions(+)
diff --git a/dlls/wined3d/directx.c b/dlls/wined3d/directx.c index b9aa9fa440..a7c98d8095 100644 --- a/dlls/wined3d/directx.c +++ b/dlls/wined3d/directx.c @@ -1711,6 +1711,12 @@ HRESULT CDECL wined3d_check_device_format(const struct wined3d *wined3d, UINT ad return WINED3DOK_NOMIPGEN; }
+ if ((check_format_id == WINED3DFMT_DF24) && !adapter->gl_info.supported[ARB_TEXTURE_GATHER]) + { + TRACE("No Support for Fetch4 disabling DF24 support.\n"); + return WINED3DERR_NOTAVAILABLE; + } + return WINED3D_OK; }
diff --git a/dlls/wined3d/utils.c b/dlls/wined3d/utils.c index 9b409b5c3b..234dde34cb 100644 --- a/dlls/wined3d/utils.c +++ b/dlls/wined3d/utils.c @@ -65,6 +65,8 @@ format_index_remap[] = {WINED3DFMT_R16, WINED3D_FORMAT_FOURCC_BASE + 20}, {WINED3DFMT_AL16, WINED3D_FORMAT_FOURCC_BASE + 21}, {WINED3DFMT_NV12, WINED3D_FORMAT_FOURCC_BASE + 22}, + {WINED3DFMT_DF16, WINED3D_FORMAT_FOURCC_BASE + 23}, + {WINED3DFMT_DF24, WINED3D_FORMAT_FOURCC_BASE + 24}, };
#define WINED3D_FORMAT_COUNT (WINED3D_FORMAT_FOURCC_BASE + ARRAY_SIZE(format_index_remap)) @@ -141,6 +143,8 @@ static const struct wined3d_format_channels formats[] = {WINED3DFMT_NVHU, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0}, {WINED3DFMT_NVHS, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0}, {WINED3DFMT_NULL, 8, 8, 8, 8, 0, 8, 16, 24, 4, 0, 0}, + {WINED3DFMT_DF16, 0, 0, 0, 0, 0, 0, 0, 0, 2, 16, 0}, + {WINED3DFMT_DF24, 0, 0, 0, 0, 0, 0, 0, 0, 4, 24, 0}, /* Unsure about them, could not find a Windows driver that supports them */ {WINED3DFMT_R16, 16, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0}, {WINED3DFMT_AL16, 0, 0, 0, 16, 0, 0, 0, 16, 4, 0, 0}, @@ -1888,6 +1892,25 @@ static const struct wined3d_format_texture_info format_texture_info[] = WINED3DFMT_FLAG_TEXTURE | WINED3DFMT_FLAG_POSTPIXELSHADER_BLENDING | WINED3DFMT_FLAG_FILTERING | WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL, ARB_FRAMEBUFFER_OBJECT, NULL}, + {WINED3DFMT_DF16, GL_DEPTH_COMPONENT, GL_DEPTH_COMPONENT, 0, + GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, 0, + WINED3DFMT_FLAG_DEPTH, + WINED3D_GL_EXT_NONE, NULL}, + {WINED3DFMT_DF16, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT16, 0, + GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, 0, + WINED3DFMT_FLAG_TEXTURE | WINED3DFMT_FLAG_POSTPIXELSHADER_BLENDING | WINED3DFMT_FLAG_FILTERING + | WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_SHADOW, + ARB_DEPTH_TEXTURE, NULL}, + {WINED3DFMT_DF24, GL_DEPTH24_STENCIL8_EXT, GL_DEPTH24_STENCIL8_EXT, 0, + GL_DEPTH_STENCIL_EXT, GL_UNSIGNED_INT_24_8_EXT, 0, + WINED3DFMT_FLAG_TEXTURE | WINED3DFMT_FLAG_POSTPIXELSHADER_BLENDING | WINED3DFMT_FLAG_FILTERING + | WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL, + EXT_PACKED_DEPTH_STENCIL, NULL}, + {WINED3DFMT_DF24, GL_DEPTH24_STENCIL8, GL_DEPTH24_STENCIL8, 0, + GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, 0, + WINED3DFMT_FLAG_TEXTURE | WINED3DFMT_FLAG_POSTPIXELSHADER_BLENDING | WINED3DFMT_FLAG_FILTERING + | WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL, + ARB_FRAMEBUFFER_OBJECT, NULL}, {WINED3DFMT_NULL, 0, 0, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, 0, WINED3DFMT_FLAG_TEXTURE | WINED3DFMT_FLAG_RENDERTARGET | WINED3DFMT_FLAG_FBO_ATTACHABLE, @@ -3529,6 +3552,12 @@ static void apply_format_fixups(struct wined3d_adapter *adapter, struct wined3d_ {WINED3DFMT_INTZ, "XXXX", FALSE, WINED3D_GL_EXT_NONE}, {WINED3DFMT_INTZ, "XYZW", FALSE, WINED3D_GL_LEGACY_CONTEXT},
+ {WINED3DFMT_DF16, "XXXX", FALSE, WINED3D_GL_EXT_NONE}, + {WINED3DFMT_DF16, "XYZW", FALSE, WINED3D_GL_LEGACY_CONTEXT}, + + {WINED3DFMT_DF24, "XXXX", FALSE, WINED3D_GL_EXT_NONE}, + {WINED3DFMT_DF24, "XYZW", FALSE, WINED3D_GL_LEGACY_CONTEXT}, + {WINED3DFMT_L8_UNORM, "XXX1", FALSE, ARB_TEXTURE_RG}, };
@@ -4383,6 +4412,8 @@ const char *debug_d3dformat(enum wined3d_format_id format_id) FMT_TO_STR(WINED3DFMT_R16); FMT_TO_STR(WINED3DFMT_AL16); FMT_TO_STR(WINED3DFMT_NV12); + FMT_TO_STR(WINED3DFMT_DF16); + FMT_TO_STR(WINED3DFMT_DF24); #undef FMT_TO_STR default: { diff --git a/include/wine/wined3d.h b/include/wine/wined3d.h index 40553f7e51..97c640acc5 100644 --- a/include/wine/wined3d.h +++ b/include/wine/wined3d.h @@ -271,6 +271,8 @@ enum wined3d_format_id WINED3DFMT_R16 = WINEMAKEFOURCC(' ','R','1','6'), WINED3DFMT_AL16 = WINEMAKEFOURCC('A','L','1','6'), WINED3DFMT_NV12 = WINEMAKEFOURCC('N','V','1','2'), + WINED3DFMT_DF16 = WINEMAKEFOURCC('D','F','1','6'), + WINED3DFMT_DF24 = WINEMAKEFOURCC('D','F','2','4'),
WINED3DFMT_FORCE_DWORD = 0xffffffff };
- Implemented for texld/texldp/texldd/texldb/texldl - In all cases tested on Windows10 + Intel Fetch4 enabled always produced same result (like texld)
Signed-off-by: Daniel Ansorregui mailszeros@gmail.com --- dlls/d3d9/tests/visual.c | 252 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 252 insertions(+)
diff --git a/dlls/d3d9/tests/visual.c b/dlls/d3d9/tests/visual.c index 4f26b0d23f..14f2d49132 100644 --- a/dlls/d3d9/tests/visual.c +++ b/dlls/d3d9/tests/visual.c @@ -15104,6 +15104,257 @@ done: DestroyWindow(window); }
+static void fetch4_test(void) +{ + static const DWORD vs_code[] = + { + 0xfffe0300, /* vs_3_0 */ + 0x0200001f, 0x80000000, 0x900f0000, /* dcl_position v0 */ + 0x0200001f, 0x80000005, 0x900f0001, /* dcl_texcoord v1 */ + 0x0200001f, 0x80000000, 0xe00f0000, /* dcl_position o0 */ + 0x0200001f, 0x80000005, 0xe00f0001, /* dcl_texcoord o1 */ + 0x02000001, 0xe00f0000, 0x90e40000, /* mov o0, v0 */ + 0x02000001, 0xe00f0001, 0x90e40001, /* mov o1, v1 */ + 0x0000ffff + }; + static const DWORD ps_code_texld[] = + { + 0xffff0300, /* ps_3_0 */ + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ + 0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */ + 0x03000042, 0x800f0000, 0x90e40000, 0xa0e40800, /* texld r0, v0, s0 */ + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ + 0x0000ffff /* end */ + }; + static const DWORD ps_code_texldp[] = + { + 0xffff0300, /* ps_3_0 */ + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ + 0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */ + 0x05000051, 0xa00f0000, 0x00000000, 0x00000000, 0x3f000000, 0x3f000000, /* def c0, 0.0, 0.0, 0.5, 0.5 */ + 0x03000002, 0x800f0000, 0x90e40000, 0xa0e40000, /* add r0, v0, c0 */ + 0x03010042, 0x800f0000, 0x80e40000, 0xa0e40800, /* texldp r0, r0, s0 */ + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ + 0x0000ffff, /* end */ + }; + static const DWORD ps_code_texldd[] = + { + 0xffff0300, /* ps_3_0 */ + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ + 0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */ + 0x05000051, 0xa00f0000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, /* def c0, 0.5, 0.5, 0.5, 0.5 */ + 0x05000051, 0xa00f0001, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, /* def c0, 1.0, 1.0, 1.0, 1.0 */ + 0x02000001, 0x800f0002, 0xa0e40000, /* mov r2, c0 */ + 0x0500005d, 0x800f0000, 0x90e40000, 0xa0e40800, 0xa0e40000, 0x80e40002, /* texldd r0, v0, s0, c0, r2 */ + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ + 0x0000ffff, /* end */ + }; + static const DWORD ps_code_texldb[] = + { + 0xffff0300, /* ps_3_0 */ + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ + 0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */ + 0x05000051, 0xa00f0000, 0x00000000, 0x00000000, 0x40a00000, 0x40a00000, /* def c0, 0.0, 0.0, 5.0, 5.0 */ + 0x03000002, 0x800f0000, 0x90e40000, 0xa0e40000, /* add r0, v0, c0 */ + 0x03020042, 0x800f0000, 0x80e40000, 0xa0e40800, /* texldb r0, r0, s0 */ + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ + 0x0000ffff, /* end */ + }; + static const DWORD ps_code_texldl[] = + { + 0xffff0300, /* ps_3_0 */ + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ + 0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */ + 0x05000051, 0xa00f0000, 0x00000000, 0x00000000, 0x3f000000, 0x3f000000, /* def c0, 0.0, 0.0, 0.5, 0.5 */ + 0x03000002, 0x800f0000, 0x90e40000, 0xa0e40000, /* add r0, v0, c0 */ + 0x0300005f, 0x800f0000, 0x80e40000, 0xa0e40800, /* texldl r0, r0, s0 */ + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ + 0x0000ffff, /* end */ + }; + + struct + { + float x, y, z; + float tu, tv; + } + quad[] = + { + {-1.0f, 1.0f, 0.0f, 0.0f,0.0f }, + { 1.0f, 1.0f, 0.0f, 1.0f,0.0f }, + {-1.0f,-1.0f, 0.0f, 0.0f,1.0f }, + { 1.0f,-1.0f, 0.0f, 1.0f,1.0f } + }; + + struct + { + UINT x, y; + D3DCOLOR color; + } + expected_colors[] = + { + { 40, 30, 0x23102013},{160, 30, 0x22132312},{320, 30, 0x21122211}, + {480, 30, 0x20112110},{600, 30, 0x23102013}, + { 40,120, 0x13011002},{160,120, 0x120213f2},{320,120, 0x11f212f1}, + {480,120, 0x10f11101},{600,120, 0x13011002}, + { 40,240, 0x02030104},{160,240, 0xf20402f4},{320,240, 0xf1f4f2f3}, + {480,240, 0x01f3f103},{600,240, 0x02030104}, + { 40,360, 0x04200323},{160,360, 0xf4230422},{320,360, 0xf322f421}, + {480,360, 0x0321f320},{600,360, 0x04200323}, + { 40,450, 0x23102013},{160,450, 0x22132312},{320,450, 0x21122211}, + {480,450, 0x20112110},{600,450, 0x23102013}, + }; + + static const DWORD texture_data[4] = {0x10111213, + 0x01f1f202, + 0x03f3f404, + 0x20212223}; + + IDirect3DPixelShader9 *ps_texld, *ps_texldp, *ps_texldd, *ps_texldb, *ps_texldl; + IDirect3DSurface9 *original_rt; + struct surface_readback rb; + IDirect3DTexture9 *texture; + IDirect3DVertexShader9 *vs; + IDirect3DDevice9 *device; + D3DLOCKED_RECT lr; + IDirect3D9 *d3d; + ULONG refcount; + D3DCAPS9 caps; + HWND window; + HRESULT hr; + UINT i, j; + + + window = create_window(); + d3d = Direct3DCreate9(D3D_SDK_VERSION); + ok(!!d3d, "Failed to create a D3D object.\n"); + if (FAILED(IDirect3D9_CheckDeviceFormat(d3d, D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, + D3DFMT_X8R8G8B8, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_TEXTURE, MAKEFOURCC('D','F','2','4')))) + { + skip("No DF24 support, skipping FETCH4 test.\n"); + goto done; + } + if (!(device = create_device(d3d, window, window, TRUE))) + { + skip("Failed to create a D3D device, skipping tests.\n"); + goto done; + } + + hr = IDirect3DDevice9_GetDeviceCaps(device, &caps); + ok(SUCCEEDED(hr), "GetDeviceCaps failed, hr %#x.\n", hr); + if (caps.PixelShaderVersion < D3DPS_VERSION(3, 0)) + { + skip("No pixel shader 3.0 support, skipping FETCH4 test.\n"); + IDirect3DDevice9_Release(device); + goto done; + } + if (caps.TextureCaps & D3DPTEXTURECAPS_POW2) + { + skip("No unconditional NP2 texture support, skipping FETCH4 test.\n"); + IDirect3DDevice9_Release(device); + goto done; + } + hr = IDirect3DDevice9_GetRenderTarget(device, 0, &original_rt); + ok(SUCCEEDED(hr), "GetRenderTarget failed, hr %#x.\n", hr); + + /* Create our texture for FETCH4 */ + hr = IDirect3DDevice9_CreateTexture(device, 4, 4, 1, 0, D3DFMT_L8, D3DPOOL_MANAGED, &texture, NULL); + ok(hr == D3D_OK, "Failed to create texture, hr %#x.\n", hr); + memset(&lr, 0, sizeof(lr)); + hr = IDirect3DTexture9_LockRect(texture, 0, &lr, NULL, 0); + ok(hr == D3D_OK, "Failed to lock texture, hr %#x.\n", hr); + memcpy(lr.pBits, texture_data, sizeof(texture_data)); + hr = IDirect3DTexture9_UnlockRect(texture, 0); + ok(hr == D3D_OK, "Failed to unlock texture, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetTexture(device, 0, (IDirect3DBaseTexture9 *)texture); + ok(hr == D3D_OK, "Failed to set texture, hr %#x.\n", hr); + + /* Create vertex shader */ + hr = IDirect3DDevice9_CreateVertexShader(device, vs_code, &vs); + ok(hr == D3D_OK, "IDirect3DDevice9_CreateVertexShader returned %08x\n", hr); + hr = IDirect3DDevice9_SetVertexShader(device, vs); + ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n", hr); + + /* Prepare the pixel shaders */ + hr = IDirect3DDevice9_CreatePixelShader(device, ps_code_texld, &ps_texld); + ok(SUCCEEDED(hr), "CreatePixelShader failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_CreatePixelShader(device, ps_code_texldp, &ps_texldp); + ok(SUCCEEDED(hr), "CreatePixelShader failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_CreatePixelShader(device, ps_code_texldd, &ps_texldd); + ok(SUCCEEDED(hr), "CreatePixelShader failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_CreatePixelShader(device, ps_code_texldb, &ps_texldb); + ok(SUCCEEDED(hr), "CreatePixelShader failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_CreatePixelShader(device, ps_code_texldl, &ps_texldl); + ok(SUCCEEDED(hr), "CreatePixelShader failed, hr %#x.\n", hr); + + hr = IDirect3DDevice9_SetRenderState(device, D3DRS_LIGHTING, FALSE); + ok(SUCCEEDED(hr), "SetRenderState failed, hr %#x.\n", hr); + + hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MIPMAPLODBIAS, MAKEFOURCC('G','E','T','4')); + ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR); + ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR); + ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MIPFILTER, D3DTEXF_LINEAR); + ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr); + + /* render */ + for (i=0; i<5; i++) + { + if (i==0) + hr = IDirect3DDevice9_SetPixelShader(device, ps_texld); + else if(i==1) + hr = IDirect3DDevice9_SetPixelShader(device, ps_texldp); + else if(i==2) + hr = IDirect3DDevice9_SetPixelShader(device, ps_texldd); + else if(i==3) + hr = IDirect3DDevice9_SetPixelShader(device, ps_texldb); + else + hr = IDirect3DDevice9_SetPixelShader(device, ps_texldl); + ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr); + + hr = IDirect3DDevice9_Clear(device, 0, NULL, D3DCLEAR_TARGET | D3DCLEAR_ZBUFFER, 0, 0.0f, 0); + ok(SUCCEEDED(hr), "Clear failed, hr %#x.\n", hr); + + hr = IDirect3DDevice9_SetFVF(device, D3DFVF_XYZ | D3DFVF_TEX1); + ok(SUCCEEDED(hr), "Failed to set FVF, hr %#x.\n", hr); + + hr = IDirect3DDevice9_BeginScene(device); + ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad)); + ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_EndScene(device); + ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr); + + get_rt_readback(original_rt, &rb); + for (j = 0; j < ARRAY_SIZE(expected_colors); ++j) + { + D3DCOLOR color = get_readback_color(&rb, expected_colors[j].x, expected_colors[j].y); + ok(color_match(color, expected_colors[j].color, 1), + "Expected color 0x%08x at (%u, %u), got 0x%08x.\n", + expected_colors[j].color, expected_colors[j].x, expected_colors[j].y, color); + } + release_surface_readback(&rb); + + hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL, NULL); + ok(SUCCEEDED(hr), "Present failed, hr %#x.\n", hr); + } + + IDirect3DTexture9_Release(texture); + IDirect3DPixelShader9_Release(ps_texld); + IDirect3DPixelShader9_Release(ps_texldp); + IDirect3DPixelShader9_Release(ps_texldb); + IDirect3DPixelShader9_Release(ps_texldd); + IDirect3DPixelShader9_Release(ps_texldl); + IDirect3DVertexShader9_Release(vs); + IDirect3DSurface9_Release(original_rt); + refcount = IDirect3DDevice9_Release(device); + ok(!refcount, "Device has %u references left.\n", refcount); +done: + IDirect3D9_Release(d3d); + DestroyWindow(window); +} + static void shadow_test(void) { static const DWORD ps_code[] = @@ -24291,6 +24542,7 @@ START_TEST(visual) depth_buffer2_test(); depth_blit_test(); intz_test(); + fetch4_test(); shadow_test(); fp_special_test(); depth_bounds_test();
Hi,
While running your changed tests on Windows, I think I found new failures. Being a bot and all I'm not very good at pattern recognition, so I might be wrong, but could you please double-check?
Full results can be found at: https://testbot.winehq.org/JobDetails.pl?Key=44768
Your paranoid android.
=== w8 (32 bit report) ===
d3d9: visual.c:8620: Test failed: Got unexpected color 0x00007580 for quad 2 (different colors). visual.c:8556: Test failed: Input test: Quad 3(2crd-wrongidx) returned color 0x00ff00ff, expected 0x00ff0080
=== debian9 (32 bit Japanese:Japan report) ===
d3d9: visual.c:9079: Test failed: Test 0 shading has color1 000000ff, expected 0000ff00. visual.c:9079: Test failed: Test 6 shading has color1 000000ff, expected 0000ff00. visual.c:9079: Test failed: Test 10 shading has color1 000000ff, expected 0000ff00. visual.c:9079: Test failed: Test 11 shading has color1 000000ff, expected 0000ff00. visual.c:9079: Test failed: Test 12 shading has color1 000000ff, expected 0000ff00. visual.c:11580: Test failed: Got unexpected color 0x0000ff00 (case 0, 3, size 0). visual.c:11580: Test failed: Got unexpected color 0x0000ff00 (case 1, 3, size 0). visual.c:11580: Test failed: Got unexpected color 0x0000ff00 (case 2, 3, size 0). visual.c:11580: Test failed: Got unexpected color 0x0000ff00 (case 3, 3, size 0). visual.c:11580: Test failed: Got unexpected color 0x0000ff00 (case 4, 3, size 0). visual.c:11580: Test failed: Got unexpected color 0x00ff0000 (case 5, 3, size 0). visual.c:11580: Test failed: Got unexpected color 0x0000ff00 (case 6, 3, size 0). visual.c:11580: Test failed: Got unexpected color 0x00ff0000 (case 7, 3, size 0). visual.c:11580: Test failed: Got unexpected color 0x0000ff00 (case 8, 3, size 0). visual.c:11580: Test failed: Got unexpected color 0x00ff0000 (case 9, 3, size 0). visual.c:11580: Test failed: Got unexpected color 0x0000ff00 (case 10, 7, size 0). visual.c:11580: Test failed: Got unexpected color 0x0000ff00 (case 11, 7, size 0). visual.c:11580: Test failed: Got unexpected color 0x0000ff00 (case 12, 7, size 0). visual.c:19939: Test failed: Expected color 0x00000000, got 0x000000ff, format D3DFMT_Q8W8V8U8, test 0, location 3x2. visual.c:19939: Test failed: Expected color 0x00000000, got 0x000000ff, format D3DFMT_Q8W8V8U8, test 2, location 3x2. visual.c:21175: Test failed: Got unexpected color 00ffffff at x=64, format 80. visual.c:21182: Test failed: Got unexpected color 000000ff at x=194, format 80. visual.c:21192: Test failed: Got unexpected color 00000000 at x=446, format 80. visual.c:21175: Test failed: Got unexpected color 00ffffff at x=64, format 77. visual.c:21182: Test failed: Got unexpected color 000000ff at x=194, format 77. visual.c:21192: Test failed: Got unexpected color 00000000 at x=446, format 77. visual.c:21175: Test failed: Got unexpected color 00ffffff at x=64, format 71. visual.c:21178: Test failed: Got unexpected color 00ffffff at x=190, format 71. visual.c:21182: Test failed: Got unexpected color 00ffffff at x=194, format 71. visual.c:21185: Test failed: Got unexpected color 00ffffff at x=318, format 71. visual.c:21189: Test failed: Got unexpected color 00000000 at x=322, format 71. visual.c:21192: Test failed: Got unexpected color 00000000 at x=446, format 71. visual.c:21175: Test failed: Got unexpected color 00ffffff at x=64, format 75. visual.c:21182: Test failed: Got unexpected color 000000ff at x=194, format 75. visual.c:21192: Test failed: Got unexpected color 00000000 at x=446, format 75. visual.c:21175: Test failed: Got unexpected color 00ffffff at x=64, format 1515474505. visual.c:21182: Test failed: Got unexpected color 000000ff at x=194, format 1515474505. visual.c:21192: Test failed: Got unexpected color 00000000 at x=446, format 1515474505. visual.c:24322: Test failed: Expected unsynchronised map for flags 0x1000. visual.c:24322: Test failed: Expected unsynchronised map for flags 0x3000.
* This is to simplify the code before adding the FETCH4 modifications No change is intended in the shader generation
Signed-off-by: Daniel Ansorregui mailszeros@gmail.com --- dlls/wined3d/glsl_shader.c | 63 ++++++++++---------------------------- 1 file changed, 16 insertions(+), 47 deletions(-)
diff --git a/dlls/wined3d/glsl_shader.c b/dlls/wined3d/glsl_shader.c index 545e1bff9f..4b2dfaf34a 100644 --- a/dlls/wined3d/glsl_shader.c +++ b/dlls/wined3d/glsl_shader.c @@ -9863,65 +9863,34 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * switch (settings->op[stage].tex_type) { case WINED3D_GL_RES_TYPE_TEX_1D: - if (proj) - { - texture_function = "texture1DProj"; - coord_mask = "xw"; - } - else - { - texture_function = "texture1D"; - coord_mask = "x"; - } + texture_function = "texture1D"; + coord_mask = "x"; break; case WINED3D_GL_RES_TYPE_TEX_2D: - if (proj) - { - texture_function = "texture2DProj"; - coord_mask = "xyw"; - } - else - { - texture_function = "texture2D"; - coord_mask = "xy"; - } + texture_function = "texture2D"; + coord_mask = "xy"; break; case WINED3D_GL_RES_TYPE_TEX_3D: - if (proj) - { - texture_function = "texture3DProj"; - coord_mask = "xyzw"; - } - else - { - texture_function = "texture3D"; - coord_mask = "xyz"; - } + texture_function = "texture3D"; + coord_mask = "xyz"; break; case WINED3D_GL_RES_TYPE_TEX_CUBE: texture_function = "textureCube"; coord_mask = "xyz"; break; case WINED3D_GL_RES_TYPE_TEX_RECT: - if (proj) - { - texture_function = "texture2DRectProj"; - coord_mask = "xyw"; - } - else - { - texture_function = "texture2DRect"; - coord_mask = "xy"; - } + texture_function = "texture2DRect"; + coord_mask = "xy"; break; default: FIXME("Unhandled texture type %#x.\n", settings->op[stage].tex_type); texture_function = ""; coord_mask = "xyzw"; + proj = FALSE; break; } if (!legacy_syntax) - texture_function = proj ? "textureProj" : "texture"; + texture_function = "texture";
if (stage > 0 && (settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP @@ -9953,8 +9922,8 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * shader_addline(buffer, "ret = ffp_texcoord[%u] + ret.xyxy;\n", stage); }
- shader_addline(buffer, "tex%u = %s(ps_sampler%u, ret.%s);\n", - stage, texture_function, stage, coord_mask); + shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ret.%s%c);\n", + stage, texture_function, proj ? "Proj" : "", stage, coord_mask, proj ? 'w' : ' ');
if (settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP_LUMINANCE) shader_addline(buffer, "tex%u *= clamp(tex%u.z * bumpenv_lum_scale%u + bumpenv_lum_offset%u, 0.0, 1.0);\n", @@ -9962,13 +9931,13 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * } else if (settings->op[stage].projected == WINED3D_PROJECTION_COUNT3) { - shader_addline(buffer, "tex%u = %s(ps_sampler%u, ffp_texcoord[%u].xyz);\n", - stage, texture_function, stage, stage); + shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ffp_texcoord[%u].xyz);\n", + stage, texture_function, proj ? "Proj" : "", stage, stage); } else { - shader_addline(buffer, "tex%u = %s(ps_sampler%u, ffp_texcoord[%u].%s);\n", - stage, texture_function, stage, stage, coord_mask); + shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ffp_texcoord[%u].%s%c);\n", + stage, texture_function, proj ? "Proj" : "", stage, stage, coord_mask, proj ? 'w' : ' '); }
string_buffer_sprintf(tex_reg_name, "tex%u", stage);
- Tested under W10, when Fetch4 is enabled, projection is ignored - Untested what happens when Fetch4 is used on unsupported textures. Disabling Fetch4 fttb. - The swizzle fix has been checked against windows since it does not match with gather4
Signed-off-by: Daniel Ansorregui mailszeros@gmail.com --- dlls/wined3d/glsl_shader.c | 19 ++++++++++++++++++- dlls/wined3d/utils.c | 2 ++ dlls/wined3d/wined3d_private.h | 3 ++- 3 files changed, 22 insertions(+), 2 deletions(-)
diff --git a/dlls/wined3d/glsl_shader.c b/dlls/wined3d/glsl_shader.c index 4b2dfaf34a..e3aa5125fd 100644 --- a/dlls/wined3d/glsl_shader.c +++ b/dlls/wined3d/glsl_shader.c @@ -9837,6 +9837,7 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * for (stage = 0; stage < MAX_TEXTURES && settings->op[stage].cop != WINED3D_TOP_DISABLE; ++stage) { const char *texture_function, *coord_mask; + BOOL fetch4 = settings->op[stage].fetch4; BOOL proj;
if (!(tex_map & (1u << stage))) @@ -9856,7 +9857,6 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * FIXME("Unexpected projection mode %d\n", settings->op[stage].projected); proj = TRUE; } - if (settings->op[stage].tex_type == WINED3D_GL_RES_TYPE_TEX_CUBE) proj = FALSE;
@@ -9865,6 +9865,7 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * case WINED3D_GL_RES_TYPE_TEX_1D: texture_function = "texture1D"; coord_mask = "x"; + fetch4 = FALSE; break; case WINED3D_GL_RES_TYPE_TEX_2D: texture_function = "texture2D"; @@ -9873,6 +9874,7 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * case WINED3D_GL_RES_TYPE_TEX_3D: texture_function = "texture3D"; coord_mask = "xyz"; + fetch4 = FALSE; break; case WINED3D_GL_RES_TYPE_TEX_CUBE: texture_function = "textureCube"; @@ -9881,17 +9883,28 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * case WINED3D_GL_RES_TYPE_TEX_RECT: texture_function = "texture2DRect"; coord_mask = "xy"; + if (fetch4) + FIXME("Unsupported Fetch4 and texture2DRect sampling"); + fetch4 = FALSE; break; default: FIXME("Unhandled texture type %#x.\n", settings->op[stage].tex_type); texture_function = ""; coord_mask = "xyzw"; proj = FALSE; + fetch4 = FALSE; break; } if (!legacy_syntax) texture_function = "texture";
+ if (fetch4) + { + texture_function = "textureGather"; + /* Tested on W10+Intel, fetch4 enabled disables projection */ + proj = FALSE; + } + if (stage > 0 && (settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP || settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP_LUMINANCE)) @@ -9940,6 +9953,10 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * stage, texture_function, proj ? "Proj" : "", stage, stage, coord_mask, proj ? 'w' : ' '); }
+ /* Match FETCH4 swizzle with textureGather swizzle */ + if (fetch4) + shader_addline(buffer, "tex%u = tex%u.xwyz;\n", stage, stage); + string_buffer_sprintf(tex_reg_name, "tex%u", stage); shader_glsl_color_correction_ext(buffer, tex_reg_name->buffer, WINED3DSP_WRITEMASK_ALL, settings->op[stage].color_fixup); diff --git a/dlls/wined3d/utils.c b/dlls/wined3d/utils.c index aa68799535..9b409b5c3b 100644 --- a/dlls/wined3d/utils.c +++ b/dlls/wined3d/utils.c @@ -5783,6 +5783,7 @@ void gen_ffp_frag_op(const struct wined3d_context *context, const struct wined3d settings->op[i].tmp_dst = 0; settings->op[i].tex_type = WINED3D_GL_RES_TYPE_TEX_1D; settings->op[i].projected = WINED3D_PROJECTION_NONE; + settings->op[i].fetch4 = FALSE; i++; break; } @@ -5926,6 +5927,7 @@ void gen_ffp_frag_op(const struct wined3d_context *context, const struct wined3d settings->op[i].aarg1 = aarg1; settings->op[i].aarg2 = aarg2; settings->op[i].tmp_dst = state->texture_states[i][WINED3D_TSS_RESULT_ARG] == WINED3DTA_TEMP; + settings->op[i].fetch4 = state->sampler_states[i][WINED3D_SAMP_MIPMAP_LOD_BIAS] == MAKEFOURCC('G','E','T','4'); }
/* Clear unsupported stages */ diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index f908ff7173..f75660f0ab 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -2743,7 +2743,8 @@ struct texture_stage_op unsigned tex_type : 3; unsigned tmp_dst : 1; unsigned projected : 2; - unsigned padding : 10; + unsigned fetch4 : 1; + unsigned padding : 9; };
struct ffp_frag_settings
- Implement shader generation of tex-ld/ldp/ldd/ldb/ldl - Vertex texldl unimplemented yet, since it is not possible to access ps_compile_args. Maybe move it to another place. - Trigger PS re-generation at FETCH4 state change
Signed-off-by: Daniel Ansorregui mailszeros@gmail.com --- dlls/wined3d/device.c | 4 ++ dlls/wined3d/glsl_shader.c | 126 ++++++++++++++++++++++++--------- dlls/wined3d/shader.c | 11 +++ dlls/wined3d/state.c | 13 ++++ dlls/wined3d/wined3d_private.h | 7 +- 5 files changed, 125 insertions(+), 36 deletions(-)
diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c index a89a33d676..0f7c433d1e 100644 --- a/dlls/wined3d/device.c +++ b/dlls/wined3d/device.c @@ -2088,7 +2088,11 @@ void CDECL wined3d_device_set_sampler_state(struct wined3d_device *device, device, sampler_idx, debug_d3dsamplerstate(state), value);
if (sampler_idx >= WINED3DVERTEXTEXTURESAMPLER0 && sampler_idx <= WINED3DVERTEXTEXTURESAMPLER3) + { sampler_idx -= (WINED3DVERTEXTEXTURESAMPLER0 - MAX_FRAGMENT_SAMPLERS); + if (state == WINED3D_SAMP_MIPMAP_LOD_BIAS && value == MAKEFOURCC('G','E','T','4')) + FIXME("Unsupported FETCH4 and Vertex Texture Sampler"); + }
if (sampler_idx >= ARRAY_SIZE(device->state.sampler_states)) { diff --git a/dlls/wined3d/glsl_shader.c b/dlls/wined3d/glsl_shader.c index e3aa5125fd..04e475acff 100644 --- a/dlls/wined3d/glsl_shader.c +++ b/dlls/wined3d/glsl_shader.c @@ -49,6 +49,7 @@ WINE_DECLARE_DEBUG_CHANNEL(winediag); #define WINED3D_GLSL_SAMPLE_GRAD 0x04 #define WINED3D_GLSL_SAMPLE_LOAD 0x08 #define WINED3D_GLSL_SAMPLE_OFFSET 0x10 +#define WINED3D_GLSL_SAMPLE_GATHER 0x20
static const struct { @@ -3607,6 +3608,7 @@ static void shader_glsl_get_sample_function(const struct wined3d_shader_context BOOL lod = flags & WINED3D_GLSL_SAMPLE_LOD; BOOL grad = flags & WINED3D_GLSL_SAMPLE_GRAD; BOOL offset = flags & WINED3D_GLSL_SAMPLE_OFFSET; + BOOL gather = flags & WINED3D_GLSL_SAMPLE_GATHER; const char *base = "texture", *type_part = "", *suffix = ""; unsigned int coord_size, deriv_size;
@@ -3652,6 +3654,14 @@ static void shader_glsl_get_sample_function(const struct wined3d_shader_context type_part = ""; }
+ if (gather) + { + base = "texture"; + type_part = "Gather"; + suffix = ""; + projected = lod = grad = offset = FALSE; + } + sample_function->name = string_buffer_get(priv->string_buffers); string_buffer_sprintf(sample_function->name, "%s%s%s%s%s%s", base, type_part, projected ? "Proj" : "", lod ? "Lod" : grad ? "Grad" : "", offset ? "Offset" : "", suffix); @@ -5391,11 +5401,18 @@ static void shader_glsl_tex(const struct wined3d_shader_instruction *ins) } }
+ /* Fetch4 overwrites the other texture flags */ + if (priv->cur_ps_args->fetch4 & (1u << resource_idx)){ + sample_flags = WINED3D_GLSL_SAMPLE_GATHER; + mask = 0; + } + shader_glsl_get_sample_function(ins->ctx, resource_idx, resource_idx, sample_flags, &sample_function); mask |= sample_function.coord_mask; sample_function.coord_mask = mask;
if (shader_version < WINED3D_SHADER_VERSION(2,0)) swizzle = WINED3DSP_NOSWIZZLE; + else if (priv->cur_ps_args->fetch4 & (1u << resource_idx)) swizzle = WINED3DSP_FETCH4_SWIZZLE; else swizzle = ins->src[1].swizzle;
/* 1.0-1.3: Use destination register as coordinate source. @@ -5411,7 +5428,7 @@ static void shader_glsl_tex(const struct wined3d_shader_instruction *ins) { struct glsl_src_param coord_param; shader_glsl_add_src_param(ins, &ins->src[0], mask, &coord_param); - if (ins->flags & WINED3DSI_TEXLD_BIAS) + if (ins->flags & WINED3DSI_TEXLD_BIAS && sample_flags != WINED3D_GLSL_SAMPLE_GATHER) { struct glsl_src_param bias; shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &bias); @@ -5427,10 +5444,11 @@ static void shader_glsl_tex(const struct wined3d_shader_instruction *ins)
static void shader_glsl_texldd(const struct wined3d_shader_instruction *ins) { + const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data; const struct wined3d_gl_info *gl_info = ins->ctx->gl_info; struct glsl_src_param coord_param, dx_param, dy_param; struct glsl_sample_function sample_function; - DWORD sampler_idx; + DWORD sampler_idx = ins->src[1].reg.idx[0].offset; DWORD swizzle = ins->src[1].swizzle;
if (!shader_glsl_has_core_grad(gl_info) && !gl_info->supported[ARB_SHADER_TEXTURE_LOD]) @@ -5440,7 +5458,16 @@ static void shader_glsl_texldd(const struct wined3d_shader_instruction *ins) return; }
- sampler_idx = ins->src[1].reg.idx[0].offset; + /* Fetch4 overwrites the other texture flags */ + if (priv->cur_ps_args->fetch4 & (1u << sampler_idx)){ + shader_glsl_get_sample_function(ins->ctx, sampler_idx, sampler_idx, WINED3D_GLSL_SAMPLE_GATHER, &sample_function); + swizzle = WINED3DSP_FETCH4_SWIZZLE; + shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param); + shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, NULL, NULL, NULL, NULL, + "%s", coord_param.param_str); + shader_glsl_release_sample_function(ins->ctx, &sample_function); + return; + }
shader_glsl_get_sample_function(ins->ctx, sampler_idx, sampler_idx, WINED3D_GLSL_SAMPLE_GRAD, &sample_function); shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param); @@ -5455,19 +5482,21 @@ static void shader_glsl_texldd(const struct wined3d_shader_instruction *ins) static void shader_glsl_texldl(const struct wined3d_shader_instruction *ins) { const struct wined3d_shader_version *shader_version = &ins->ctx->reg_maps->shader_version; + const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data; const struct wined3d_gl_info *gl_info = ins->ctx->gl_info; struct glsl_src_param coord_param, lod_param; struct glsl_sample_function sample_function; + DWORD sampler_idx = ins->src[1].reg.idx[0].offset; DWORD swizzle = ins->src[1].swizzle; - DWORD sampler_idx; + DWORD flags = WINED3D_GLSL_SAMPLE_LOD;
- sampler_idx = ins->src[1].reg.idx[0].offset; + /* This call can be used in vertex shader, without cur_ps_args */ + if(priv->cur_ps_args && priv->cur_ps_args->fetch4 & (1u << sampler_idx)) + flags = WINED3D_GLSL_SAMPLE_GATHER;
- shader_glsl_get_sample_function(ins->ctx, sampler_idx, sampler_idx, WINED3D_GLSL_SAMPLE_LOD, &sample_function); + shader_glsl_get_sample_function(ins->ctx, sampler_idx, sampler_idx, flags, &sample_function); shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param);
- shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &lod_param); - if (shader_version->type == WINED3D_SHADER_TYPE_PIXEL && !shader_glsl_has_core_grad(gl_info) && !gl_info->supported[ARB_SHADER_TEXTURE_LOD]) { @@ -5476,6 +5505,18 @@ static void shader_glsl_texldl(const struct wined3d_shader_instruction *ins) * even without the appropriate extension. */ WARN("Using %s in fragment shader.\n", sample_function.name->buffer); } + + if (flags == WINED3D_GLSL_SAMPLE_GATHER){ + swizzle = WINED3DSP_FETCH4_SWIZZLE; + shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, NULL, NULL, NULL, NULL, + "%s", coord_param.param_str); + shader_glsl_release_sample_function(ins->ctx, &sample_function); + return; + } + + shader_glsl_get_sample_function(ins->ctx, sampler_idx, sampler_idx, WINED3D_GLSL_SAMPLE_LOD, &sample_function); + shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param); + shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &lod_param); shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, NULL, NULL, lod_param.param_str, NULL, "%s", coord_param.param_str); shader_glsl_release_sample_function(ins->ctx, &sample_function); @@ -6169,6 +6210,7 @@ static void shader_glsl_ld(const struct wined3d_shader_instruction *ins) struct glsl_sample_function sample_function; DWORD flags = WINED3D_GLSL_SAMPLE_LOAD; BOOL has_lod_param; + const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
if (wined3d_shader_instruction_has_texel_offset(ins)) flags |= WINED3D_GLSL_SAMPLE_OFFSET; @@ -6183,6 +6225,10 @@ static void shader_glsl_ld(const struct wined3d_shader_instruction *ins) } has_lod_param = is_mipmapped(reg_maps->resource_info[resource_idx].type);
+ if (priv->cur_ps_args && priv->cur_ps_args->fetch4 & (1u << resource_idx)){ + FIXME("Unsupported FETCH4 and LD Sampling SM 5.0"); + } + shader_glsl_get_sample_function(ins->ctx, resource_idx, sampler_idx, flags, &sample_function); shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param); shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &lod_param); @@ -6208,46 +6254,57 @@ static void shader_glsl_sample(const struct wined3d_shader_instruction *ins) struct glsl_src_param coord_param, lod_param, dx_param, dy_param; unsigned int resource_idx, sampler_idx, sampler_bind_idx; struct glsl_sample_function sample_function; + const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data; + DWORD swizzle = ins->src[1].swizzle; DWORD flags = 0;
+ resource_idx = ins->src[1].reg.idx[0].offset; + sampler_idx = ins->src[2].reg.idx[0].offset; + if (ins->handler_idx == WINED3DSIH_SAMPLE_GRAD) flags |= WINED3D_GLSL_SAMPLE_GRAD; if (ins->handler_idx == WINED3DSIH_SAMPLE_LOD) flags |= WINED3D_GLSL_SAMPLE_LOD; if (wined3d_shader_instruction_has_texel_offset(ins)) flags |= WINED3D_GLSL_SAMPLE_OFFSET; - - resource_idx = ins->src[1].reg.idx[0].offset; - sampler_idx = ins->src[2].reg.idx[0].offset; + if (priv->cur_ps_args->fetch4 & (1u << sampler_idx)) + { + flags = WINED3D_GLSL_SAMPLE_GATHER; + swizzle = WINED3DSP_FETCH4_SWIZZLE; + }
shader_glsl_get_sample_function(ins->ctx, resource_idx, sampler_idx, flags, &sample_function); shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param);
- switch (ins->handler_idx) + /* Fetch4 overwrites the other texture flags */ + if (flags != WINED3D_GLSL_SAMPLE_GATHER) { - case WINED3DSIH_SAMPLE: - break; - case WINED3DSIH_SAMPLE_B: - shader_glsl_add_src_param(ins, &ins->src[3], WINED3DSP_WRITEMASK_0, &lod_param); - lod_param_str = lod_param.param_str; - break; - case WINED3DSIH_SAMPLE_GRAD: - shader_glsl_add_src_param(ins, &ins->src[3], sample_function.deriv_mask, &dx_param); - shader_glsl_add_src_param(ins, &ins->src[4], sample_function.deriv_mask, &dy_param); - dx_param_str = dx_param.param_str; - dy_param_str = dy_param.param_str; - break; - case WINED3DSIH_SAMPLE_LOD: - shader_glsl_add_src_param(ins, &ins->src[3], WINED3DSP_WRITEMASK_0, &lod_param); - lod_param_str = lod_param.param_str; - break; - default: - ERR("Unhandled opcode %s.\n", debug_d3dshaderinstructionhandler(ins->handler_idx)); - break; + switch (ins->handler_idx) + { + case WINED3DSIH_SAMPLE: + break; + case WINED3DSIH_SAMPLE_B: + shader_glsl_add_src_param(ins, &ins->src[3], WINED3DSP_WRITEMASK_0, &lod_param); + lod_param_str = lod_param.param_str; + break; + case WINED3DSIH_SAMPLE_GRAD: + shader_glsl_add_src_param(ins, &ins->src[3], sample_function.deriv_mask, &dx_param); + shader_glsl_add_src_param(ins, &ins->src[4], sample_function.deriv_mask, &dy_param); + dx_param_str = dx_param.param_str; + dy_param_str = dy_param.param_str; + break; + case WINED3DSIH_SAMPLE_LOD: + shader_glsl_add_src_param(ins, &ins->src[3], WINED3DSP_WRITEMASK_0, &lod_param); + lod_param_str = lod_param.param_str; + break; + default: + ERR("Unhandled opcode %s.\n", debug_d3dshaderinstructionhandler(ins->handler_idx)); + break; + } }
sampler_bind_idx = shader_glsl_find_sampler(&ins->ctx->reg_maps->sampler_map, resource_idx, sampler_idx); - shader_glsl_gen_sample_code(ins, sampler_bind_idx, &sample_function, ins->src[1].swizzle, + shader_glsl_gen_sample_code(ins, sampler_bind_idx, &sample_function, swizzle, dx_param_str, dy_param_str, lod_param_str, &ins->texel_offset, "%s", coord_param.param_str); shader_glsl_release_sample_function(ins->ctx, &sample_function); } @@ -6293,6 +6350,9 @@ static void shader_glsl_sample_c(const struct wined3d_shader_instruction *ins) unsigned int coord_size; DWORD flags = 0;
+ resource_idx = ins->src[1].reg.idx[0].offset; + sampler_idx = ins->src[2].reg.idx[0].offset; + if (ins->handler_idx == WINED3DSIH_SAMPLE_C_LZ) { lod_param = "0"; @@ -6304,8 +6364,6 @@ static void shader_glsl_sample_c(const struct wined3d_shader_instruction *ins)
if (!(resource_info = shader_glsl_get_resource_info(ins, &ins->src[1].reg))) return; - resource_idx = ins->src[1].reg.idx[0].offset; - sampler_idx = ins->src[2].reg.idx[0].offset;
shader_glsl_get_sample_function(ins->ctx, resource_idx, sampler_idx, flags, &sample_function); coord_size = shader_glsl_get_write_mask_size(sample_function.coord_mask); diff --git a/dlls/wined3d/shader.c b/dlls/wined3d/shader.c index e11a37cf07..909a59e2e5 100644 --- a/dlls/wined3d/shader.c +++ b/dlls/wined3d/shader.c @@ -4027,6 +4027,17 @@ void find_ps_compile_args(const struct wined3d_state *state, const struct wined3 } }
+ if (gl_info->supported[ARB_TEXTURE_GATHER]) + { + for (i = 0; i < MAX_FRAGMENT_SAMPLERS; ++i) + { + if (state->sampler_states[i][WINED3D_SAMP_MIPMAP_LOD_BIAS] == MAKEFOURCC('G','E','T','4')) + args->fetch4 |= 1 << i; + else + args->fetch4 &= ~(1 << i); + } + } + if (context->d3d_info->limits.varying_count < wined3d_max_compat_varyings(context->gl_info)) { const struct wined3d_shader *vs = state->shader[WINED3D_SHADER_TYPE_VERTEX]; diff --git a/dlls/wined3d/state.c b/dlls/wined3d/state.c index 42c109da57..6a4d23c451 100644 --- a/dlls/wined3d/state.c +++ b/dlls/wined3d/state.c @@ -3616,6 +3616,8 @@ static void sampler(struct wined3d_context *context, const struct wined3d_state { struct wined3d_texture_gl *texture_gl = wined3d_texture_gl(state->textures[sampler_idx]); BOOL srgb = state->sampler_states[sampler_idx][WINED3D_SAMP_SRGB_TEXTURE]; + BOOL fetch4 = state->sampler_states[sampler_idx][WINED3D_SAMP_MIPMAP_LOD_BIAS] + == MAKEFOURCC('G','E','T','4'); const DWORD *sampler_states = state->sampler_states[sampler_idx]; struct wined3d_device *device = context->device; struct wined3d_sampler_desc desc; @@ -3650,6 +3652,17 @@ static void sampler(struct wined3d_context *context, const struct wined3d_state /* Trigger shader constant reloading (for NP2 texcoord fixup) */ if (!(texture_gl->t.flags & WINED3D_TEXTURE_POW2_MAT_IDENT)) context->constant_update_mask |= WINED3D_SHADER_CONST_PS_NP2_FIXUP; + + /* Trigger pixel shader recompilation for FETCH4 changes */ + if(gl_info->supported[ARB_TEXTURE_GATHER] && + ((context->last_was_fetch4 >> sampler_idx) & 0x1) ^ fetch4) + { + if (fetch4) + context->last_was_fetch4 |= 1 << sampler_idx; + else + context->last_was_fetch4 &= ~(1 << sampler_idx); + context->shader_update_mask = (1u << WINED3D_SHADER_TYPE_PIXEL); + } } else { diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index f75660f0ab..ca2774f6c3 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -520,7 +520,8 @@ enum wined3d_immconst_type WINED3D_IMMCONST_VEC4, };
-#define WINED3DSP_NOSWIZZLE (0u | (1u << 2) | (2u << 4) | (3u << 6)) +#define WINED3DSP_NOSWIZZLE (0u | (1u << 2) | (2u << 4) | (3u << 6)) +#define WINED3DSP_FETCH4_SWIZZLE (0u | (3u << 2) | (1u << 4) | (2u << 6)) /* xwyz */
enum wined3d_shader_src_modifier { @@ -1357,7 +1358,8 @@ struct ps_compile_args DWORD alpha_test_func : 3; DWORD render_offscreen : 1; DWORD rt_alpha_swizzle : 8; /* MAX_RENDER_TARGET_VIEWS, 8 */ - DWORD padding : 18; + WORD fetch4 : 16; /* MAX_FRAGMENT_SAMPLERS, 16 */ + DWORD padding : 2; };
enum fog_src_type @@ -1892,6 +1894,7 @@ struct wined3d_context DWORD last_was_ffp_blit : 1; DWORD last_was_blit : 1; DWORD last_was_ckey : 1; + DWORD last_was_fetch4 : 16; /* MAX_FRAGMENT_SAMPLERS */ DWORD fog_coord : 1; DWORD fog_enabled : 1; DWORD current : 1;
* This assumes FETCH4 is already supported by wine And checks for FETCH4 support before exposing DF24
Signed-off-by: Daniel Ansorregui mailszeros@gmail.com --- dlls/wined3d/directx.c | 6 ++++++ dlls/wined3d/utils.c | 31 +++++++++++++++++++++++++++++++ include/wine/wined3d.h | 2 ++ 3 files changed, 39 insertions(+)
diff --git a/dlls/wined3d/directx.c b/dlls/wined3d/directx.c index b9aa9fa440..a7c98d8095 100644 --- a/dlls/wined3d/directx.c +++ b/dlls/wined3d/directx.c @@ -1711,6 +1711,12 @@ HRESULT CDECL wined3d_check_device_format(const struct wined3d *wined3d, UINT ad return WINED3DOK_NOMIPGEN; }
+ if ((check_format_id == WINED3DFMT_DF24) && !adapter->gl_info.supported[ARB_TEXTURE_GATHER]) + { + TRACE("No Support for Fetch4 disabling DF24 support.\n"); + return WINED3DERR_NOTAVAILABLE; + } + return WINED3D_OK; }
diff --git a/dlls/wined3d/utils.c b/dlls/wined3d/utils.c index 9b409b5c3b..234dde34cb 100644 --- a/dlls/wined3d/utils.c +++ b/dlls/wined3d/utils.c @@ -65,6 +65,8 @@ format_index_remap[] = {WINED3DFMT_R16, WINED3D_FORMAT_FOURCC_BASE + 20}, {WINED3DFMT_AL16, WINED3D_FORMAT_FOURCC_BASE + 21}, {WINED3DFMT_NV12, WINED3D_FORMAT_FOURCC_BASE + 22}, + {WINED3DFMT_DF16, WINED3D_FORMAT_FOURCC_BASE + 23}, + {WINED3DFMT_DF24, WINED3D_FORMAT_FOURCC_BASE + 24}, };
#define WINED3D_FORMAT_COUNT (WINED3D_FORMAT_FOURCC_BASE + ARRAY_SIZE(format_index_remap)) @@ -141,6 +143,8 @@ static const struct wined3d_format_channels formats[] = {WINED3DFMT_NVHU, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0}, {WINED3DFMT_NVHS, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0}, {WINED3DFMT_NULL, 8, 8, 8, 8, 0, 8, 16, 24, 4, 0, 0}, + {WINED3DFMT_DF16, 0, 0, 0, 0, 0, 0, 0, 0, 2, 16, 0}, + {WINED3DFMT_DF24, 0, 0, 0, 0, 0, 0, 0, 0, 4, 24, 0}, /* Unsure about them, could not find a Windows driver that supports them */ {WINED3DFMT_R16, 16, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0}, {WINED3DFMT_AL16, 0, 0, 0, 16, 0, 0, 0, 16, 4, 0, 0}, @@ -1888,6 +1892,25 @@ static const struct wined3d_format_texture_info format_texture_info[] = WINED3DFMT_FLAG_TEXTURE | WINED3DFMT_FLAG_POSTPIXELSHADER_BLENDING | WINED3DFMT_FLAG_FILTERING | WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL, ARB_FRAMEBUFFER_OBJECT, NULL}, + {WINED3DFMT_DF16, GL_DEPTH_COMPONENT, GL_DEPTH_COMPONENT, 0, + GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, 0, + WINED3DFMT_FLAG_DEPTH, + WINED3D_GL_EXT_NONE, NULL}, + {WINED3DFMT_DF16, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT16, 0, + GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, 0, + WINED3DFMT_FLAG_TEXTURE | WINED3DFMT_FLAG_POSTPIXELSHADER_BLENDING | WINED3DFMT_FLAG_FILTERING + | WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_SHADOW, + ARB_DEPTH_TEXTURE, NULL}, + {WINED3DFMT_DF24, GL_DEPTH24_STENCIL8_EXT, GL_DEPTH24_STENCIL8_EXT, 0, + GL_DEPTH_STENCIL_EXT, GL_UNSIGNED_INT_24_8_EXT, 0, + WINED3DFMT_FLAG_TEXTURE | WINED3DFMT_FLAG_POSTPIXELSHADER_BLENDING | WINED3DFMT_FLAG_FILTERING + | WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL, + EXT_PACKED_DEPTH_STENCIL, NULL}, + {WINED3DFMT_DF24, GL_DEPTH24_STENCIL8, GL_DEPTH24_STENCIL8, 0, + GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, 0, + WINED3DFMT_FLAG_TEXTURE | WINED3DFMT_FLAG_POSTPIXELSHADER_BLENDING | WINED3DFMT_FLAG_FILTERING + | WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL, + ARB_FRAMEBUFFER_OBJECT, NULL}, {WINED3DFMT_NULL, 0, 0, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, 0, WINED3DFMT_FLAG_TEXTURE | WINED3DFMT_FLAG_RENDERTARGET | WINED3DFMT_FLAG_FBO_ATTACHABLE, @@ -3529,6 +3552,12 @@ static void apply_format_fixups(struct wined3d_adapter *adapter, struct wined3d_ {WINED3DFMT_INTZ, "XXXX", FALSE, WINED3D_GL_EXT_NONE}, {WINED3DFMT_INTZ, "XYZW", FALSE, WINED3D_GL_LEGACY_CONTEXT},
+ {WINED3DFMT_DF16, "XXXX", FALSE, WINED3D_GL_EXT_NONE}, + {WINED3DFMT_DF16, "XYZW", FALSE, WINED3D_GL_LEGACY_CONTEXT}, + + {WINED3DFMT_DF24, "XXXX", FALSE, WINED3D_GL_EXT_NONE}, + {WINED3DFMT_DF24, "XYZW", FALSE, WINED3D_GL_LEGACY_CONTEXT}, + {WINED3DFMT_L8_UNORM, "XXX1", FALSE, ARB_TEXTURE_RG}, };
@@ -4383,6 +4412,8 @@ const char *debug_d3dformat(enum wined3d_format_id format_id) FMT_TO_STR(WINED3DFMT_R16); FMT_TO_STR(WINED3DFMT_AL16); FMT_TO_STR(WINED3DFMT_NV12); + FMT_TO_STR(WINED3DFMT_DF16); + FMT_TO_STR(WINED3DFMT_DF24); #undef FMT_TO_STR default: { diff --git a/include/wine/wined3d.h b/include/wine/wined3d.h index 40553f7e51..97c640acc5 100644 --- a/include/wine/wined3d.h +++ b/include/wine/wined3d.h @@ -271,6 +271,8 @@ enum wined3d_format_id WINED3DFMT_R16 = WINEMAKEFOURCC(' ','R','1','6'), WINED3DFMT_AL16 = WINEMAKEFOURCC('A','L','1','6'), WINED3DFMT_NV12 = WINEMAKEFOURCC('N','V','1','2'), + WINED3DFMT_DF16 = WINEMAKEFOURCC('D','F','1','6'), + WINED3DFMT_DF24 = WINEMAKEFOURCC('D','F','2','4'),
WINED3DFMT_FORCE_DWORD = 0xffffffff };
- Implemented for texld/texldp/texldd/texldb/texldl - In all cases tested on Windows10 + Intel Fetch4 enabled always produced same result (like texld)
Signed-off-by: Daniel Ansorregui mailszeros@gmail.com --- dlls/d3d9/tests/visual.c | 252 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 252 insertions(+)
diff --git a/dlls/d3d9/tests/visual.c b/dlls/d3d9/tests/visual.c index 4f26b0d23f..14f2d49132 100644 --- a/dlls/d3d9/tests/visual.c +++ b/dlls/d3d9/tests/visual.c @@ -15104,6 +15104,257 @@ done: DestroyWindow(window); }
+static void fetch4_test(void) +{ + static const DWORD vs_code[] = + { + 0xfffe0300, /* vs_3_0 */ + 0x0200001f, 0x80000000, 0x900f0000, /* dcl_position v0 */ + 0x0200001f, 0x80000005, 0x900f0001, /* dcl_texcoord v1 */ + 0x0200001f, 0x80000000, 0xe00f0000, /* dcl_position o0 */ + 0x0200001f, 0x80000005, 0xe00f0001, /* dcl_texcoord o1 */ + 0x02000001, 0xe00f0000, 0x90e40000, /* mov o0, v0 */ + 0x02000001, 0xe00f0001, 0x90e40001, /* mov o1, v1 */ + 0x0000ffff + }; + static const DWORD ps_code_texld[] = + { + 0xffff0300, /* ps_3_0 */ + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ + 0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */ + 0x03000042, 0x800f0000, 0x90e40000, 0xa0e40800, /* texld r0, v0, s0 */ + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ + 0x0000ffff /* end */ + }; + static const DWORD ps_code_texldp[] = + { + 0xffff0300, /* ps_3_0 */ + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ + 0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */ + 0x05000051, 0xa00f0000, 0x00000000, 0x00000000, 0x3f000000, 0x3f000000, /* def c0, 0.0, 0.0, 0.5, 0.5 */ + 0x03000002, 0x800f0000, 0x90e40000, 0xa0e40000, /* add r0, v0, c0 */ + 0x03010042, 0x800f0000, 0x80e40000, 0xa0e40800, /* texldp r0, r0, s0 */ + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ + 0x0000ffff, /* end */ + }; + static const DWORD ps_code_texldd[] = + { + 0xffff0300, /* ps_3_0 */ + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ + 0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */ + 0x05000051, 0xa00f0000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, /* def c0, 0.5, 0.5, 0.5, 0.5 */ + 0x05000051, 0xa00f0001, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, /* def c0, 1.0, 1.0, 1.0, 1.0 */ + 0x02000001, 0x800f0002, 0xa0e40000, /* mov r2, c0 */ + 0x0500005d, 0x800f0000, 0x90e40000, 0xa0e40800, 0xa0e40000, 0x80e40002, /* texldd r0, v0, s0, c0, r2 */ + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ + 0x0000ffff, /* end */ + }; + static const DWORD ps_code_texldb[] = + { + 0xffff0300, /* ps_3_0 */ + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ + 0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */ + 0x05000051, 0xa00f0000, 0x00000000, 0x00000000, 0x40a00000, 0x40a00000, /* def c0, 0.0, 0.0, 5.0, 5.0 */ + 0x03000002, 0x800f0000, 0x90e40000, 0xa0e40000, /* add r0, v0, c0 */ + 0x03020042, 0x800f0000, 0x80e40000, 0xa0e40800, /* texldb r0, r0, s0 */ + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ + 0x0000ffff, /* end */ + }; + static const DWORD ps_code_texldl[] = + { + 0xffff0300, /* ps_3_0 */ + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ + 0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */ + 0x05000051, 0xa00f0000, 0x00000000, 0x00000000, 0x3f000000, 0x3f000000, /* def c0, 0.0, 0.0, 0.5, 0.5 */ + 0x03000002, 0x800f0000, 0x90e40000, 0xa0e40000, /* add r0, v0, c0 */ + 0x0300005f, 0x800f0000, 0x80e40000, 0xa0e40800, /* texldl r0, r0, s0 */ + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ + 0x0000ffff, /* end */ + }; + + struct + { + float x, y, z; + float tu, tv; + } + quad[] = + { + {-1.0f, 1.0f, 0.0f, 0.0f,0.0f }, + { 1.0f, 1.0f, 0.0f, 1.0f,0.0f }, + {-1.0f,-1.0f, 0.0f, 0.0f,1.0f }, + { 1.0f,-1.0f, 0.0f, 1.0f,1.0f } + }; + + struct + { + UINT x, y; + D3DCOLOR color; + } + expected_colors[] = + { + { 40, 30, 0x23102013},{160, 30, 0x22132312},{320, 30, 0x21122211}, + {480, 30, 0x20112110},{600, 30, 0x23102013}, + { 40,120, 0x13011002},{160,120, 0x120213f2},{320,120, 0x11f212f1}, + {480,120, 0x10f11101},{600,120, 0x13011002}, + { 40,240, 0x02030104},{160,240, 0xf20402f4},{320,240, 0xf1f4f2f3}, + {480,240, 0x01f3f103},{600,240, 0x02030104}, + { 40,360, 0x04200323},{160,360, 0xf4230422},{320,360, 0xf322f421}, + {480,360, 0x0321f320},{600,360, 0x04200323}, + { 40,450, 0x23102013},{160,450, 0x22132312},{320,450, 0x21122211}, + {480,450, 0x20112110},{600,450, 0x23102013}, + }; + + static const DWORD texture_data[4] = {0x10111213, + 0x01f1f202, + 0x03f3f404, + 0x20212223}; + + IDirect3DPixelShader9 *ps_texld, *ps_texldp, *ps_texldd, *ps_texldb, *ps_texldl; + IDirect3DSurface9 *original_rt; + struct surface_readback rb; + IDirect3DTexture9 *texture; + IDirect3DVertexShader9 *vs; + IDirect3DDevice9 *device; + D3DLOCKED_RECT lr; + IDirect3D9 *d3d; + ULONG refcount; + D3DCAPS9 caps; + HWND window; + HRESULT hr; + UINT i, j; + + + window = create_window(); + d3d = Direct3DCreate9(D3D_SDK_VERSION); + ok(!!d3d, "Failed to create a D3D object.\n"); + if (FAILED(IDirect3D9_CheckDeviceFormat(d3d, D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, + D3DFMT_X8R8G8B8, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_TEXTURE, MAKEFOURCC('D','F','2','4')))) + { + skip("No DF24 support, skipping FETCH4 test.\n"); + goto done; + } + if (!(device = create_device(d3d, window, window, TRUE))) + { + skip("Failed to create a D3D device, skipping tests.\n"); + goto done; + } + + hr = IDirect3DDevice9_GetDeviceCaps(device, &caps); + ok(SUCCEEDED(hr), "GetDeviceCaps failed, hr %#x.\n", hr); + if (caps.PixelShaderVersion < D3DPS_VERSION(3, 0)) + { + skip("No pixel shader 3.0 support, skipping FETCH4 test.\n"); + IDirect3DDevice9_Release(device); + goto done; + } + if (caps.TextureCaps & D3DPTEXTURECAPS_POW2) + { + skip("No unconditional NP2 texture support, skipping FETCH4 test.\n"); + IDirect3DDevice9_Release(device); + goto done; + } + hr = IDirect3DDevice9_GetRenderTarget(device, 0, &original_rt); + ok(SUCCEEDED(hr), "GetRenderTarget failed, hr %#x.\n", hr); + + /* Create our texture for FETCH4 */ + hr = IDirect3DDevice9_CreateTexture(device, 4, 4, 1, 0, D3DFMT_L8, D3DPOOL_MANAGED, &texture, NULL); + ok(hr == D3D_OK, "Failed to create texture, hr %#x.\n", hr); + memset(&lr, 0, sizeof(lr)); + hr = IDirect3DTexture9_LockRect(texture, 0, &lr, NULL, 0); + ok(hr == D3D_OK, "Failed to lock texture, hr %#x.\n", hr); + memcpy(lr.pBits, texture_data, sizeof(texture_data)); + hr = IDirect3DTexture9_UnlockRect(texture, 0); + ok(hr == D3D_OK, "Failed to unlock texture, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetTexture(device, 0, (IDirect3DBaseTexture9 *)texture); + ok(hr == D3D_OK, "Failed to set texture, hr %#x.\n", hr); + + /* Create vertex shader */ + hr = IDirect3DDevice9_CreateVertexShader(device, vs_code, &vs); + ok(hr == D3D_OK, "IDirect3DDevice9_CreateVertexShader returned %08x\n", hr); + hr = IDirect3DDevice9_SetVertexShader(device, vs); + ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n", hr); + + /* Prepare the pixel shaders */ + hr = IDirect3DDevice9_CreatePixelShader(device, ps_code_texld, &ps_texld); + ok(SUCCEEDED(hr), "CreatePixelShader failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_CreatePixelShader(device, ps_code_texldp, &ps_texldp); + ok(SUCCEEDED(hr), "CreatePixelShader failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_CreatePixelShader(device, ps_code_texldd, &ps_texldd); + ok(SUCCEEDED(hr), "CreatePixelShader failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_CreatePixelShader(device, ps_code_texldb, &ps_texldb); + ok(SUCCEEDED(hr), "CreatePixelShader failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_CreatePixelShader(device, ps_code_texldl, &ps_texldl); + ok(SUCCEEDED(hr), "CreatePixelShader failed, hr %#x.\n", hr); + + hr = IDirect3DDevice9_SetRenderState(device, D3DRS_LIGHTING, FALSE); + ok(SUCCEEDED(hr), "SetRenderState failed, hr %#x.\n", hr); + + hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MIPMAPLODBIAS, MAKEFOURCC('G','E','T','4')); + ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR); + ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR); + ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MIPFILTER, D3DTEXF_LINEAR); + ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr); + + /* render */ + for (i=0; i<5; i++) + { + if (i==0) + hr = IDirect3DDevice9_SetPixelShader(device, ps_texld); + else if(i==1) + hr = IDirect3DDevice9_SetPixelShader(device, ps_texldp); + else if(i==2) + hr = IDirect3DDevice9_SetPixelShader(device, ps_texldd); + else if(i==3) + hr = IDirect3DDevice9_SetPixelShader(device, ps_texldb); + else + hr = IDirect3DDevice9_SetPixelShader(device, ps_texldl); + ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr); + + hr = IDirect3DDevice9_Clear(device, 0, NULL, D3DCLEAR_TARGET | D3DCLEAR_ZBUFFER, 0, 0.0f, 0); + ok(SUCCEEDED(hr), "Clear failed, hr %#x.\n", hr); + + hr = IDirect3DDevice9_SetFVF(device, D3DFVF_XYZ | D3DFVF_TEX1); + ok(SUCCEEDED(hr), "Failed to set FVF, hr %#x.\n", hr); + + hr = IDirect3DDevice9_BeginScene(device); + ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad)); + ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_EndScene(device); + ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr); + + get_rt_readback(original_rt, &rb); + for (j = 0; j < ARRAY_SIZE(expected_colors); ++j) + { + D3DCOLOR color = get_readback_color(&rb, expected_colors[j].x, expected_colors[j].y); + ok(color_match(color, expected_colors[j].color, 1), + "Expected color 0x%08x at (%u, %u), got 0x%08x.\n", + expected_colors[j].color, expected_colors[j].x, expected_colors[j].y, color); + } + release_surface_readback(&rb); + + hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL, NULL); + ok(SUCCEEDED(hr), "Present failed, hr %#x.\n", hr); + } + + IDirect3DTexture9_Release(texture); + IDirect3DPixelShader9_Release(ps_texld); + IDirect3DPixelShader9_Release(ps_texldp); + IDirect3DPixelShader9_Release(ps_texldb); + IDirect3DPixelShader9_Release(ps_texldd); + IDirect3DPixelShader9_Release(ps_texldl); + IDirect3DVertexShader9_Release(vs); + IDirect3DSurface9_Release(original_rt); + refcount = IDirect3DDevice9_Release(device); + ok(!refcount, "Device has %u references left.\n", refcount); +done: + IDirect3D9_Release(d3d); + DestroyWindow(window); +} + static void shadow_test(void) { static const DWORD ps_code[] = @@ -24291,6 +24542,7 @@ START_TEST(visual) depth_buffer2_test(); depth_blit_test(); intz_test(); + fetch4_test(); shadow_test(); fp_special_test(); depth_bounds_test();
Hi,
While running your changed tests on Windows, I think I found new failures. Being a bot and all I'm not very good at pattern recognition, so I might be wrong, but could you please double-check?
Full results can be found at: https://testbot.winehq.org/JobDetails.pl?Key=44773
Your paranoid android.
=== w8adm (32 bit report) ===
d3d9: visual.c:8572: Test failed: Input test: Quad 3(2crd-wrongidx) returned color 0x00ff00ff, expected 0x00ff0080
=== debian9 (32 bit Japanese:Japan report) ===
d3d9: visual.c:9079: Test failed: Test 0 shading has color1 000000ff, expected 0000ff00. visual.c:9079: Test failed: Test 6 shading has color1 000000ff, expected 0000ff00. visual.c:9079: Test failed: Test 10 shading has color1 000000ff, expected 0000ff00. visual.c:9079: Test failed: Test 11 shading has color1 000000ff, expected 0000ff00. visual.c:9079: Test failed: Test 12 shading has color1 000000ff, expected 0000ff00. visual.c:11580: Test failed: Got unexpected color 0x0000ff00 (case 0, 3, size 0). visual.c:11580: Test failed: Got unexpected color 0x0000ff00 (case 1, 3, size 0). visual.c:11580: Test failed: Got unexpected color 0x0000ff00 (case 2, 3, size 0). visual.c:11580: Test failed: Got unexpected color 0x0000ff00 (case 3, 3, size 0). visual.c:11580: Test failed: Got unexpected color 0x0000ff00 (case 4, 3, size 0). visual.c:11580: Test failed: Got unexpected color 0x00ff0000 (case 5, 3, size 0). visual.c:11580: Test failed: Got unexpected color 0x0000ff00 (case 6, 3, size 0). visual.c:11580: Test failed: Got unexpected color 0x00ff0000 (case 7, 3, size 0). visual.c:11580: Test failed: Got unexpected color 0x0000ff00 (case 8, 3, size 0). visual.c:11580: Test failed: Got unexpected color 0x00ff0000 (case 9, 3, size 0). visual.c:11580: Test failed: Got unexpected color 0x0000ff00 (case 10, 7, size 0). visual.c:11580: Test failed: Got unexpected color 0x0000ff00 (case 11, 7, size 0). visual.c:11580: Test failed: Got unexpected color 0x0000ff00 (case 12, 7, size 0). visual.c:19939: Test failed: Expected color 0x00000000, got 0x000000ff, format D3DFMT_Q8W8V8U8, test 0, location 3x2. visual.c:19939: Test failed: Expected color 0x00000000, got 0x000000ff, format D3DFMT_Q8W8V8U8, test 2, location 3x2. visual.c:21175: Test failed: Got unexpected color 00ffffff at x=64, format 80. visual.c:21182: Test failed: Got unexpected color 000000ff at x=194, format 80. visual.c:21192: Test failed: Got unexpected color 00000000 at x=446, format 80. visual.c:21175: Test failed: Got unexpected color 00ffffff at x=64, format 77. visual.c:21182: Test failed: Got unexpected color 000000ff at x=194, format 77. visual.c:21192: Test failed: Got unexpected color 00000000 at x=446, format 77. visual.c:21175: Test failed: Got unexpected color 00ffffff at x=64, format 71. visual.c:21178: Test failed: Got unexpected color 00ffffff at x=190, format 71. visual.c:21182: Test failed: Got unexpected color 00ffffff at x=194, format 71. visual.c:21185: Test failed: Got unexpected color 00ffffff at x=318, format 71. visual.c:21189: Test failed: Got unexpected color 00000000 at x=322, format 71. visual.c:21192: Test failed: Got unexpected color 00000000 at x=446, format 71. visual.c:21175: Test failed: Got unexpected color 00ffffff at x=64, format 75. visual.c:21182: Test failed: Got unexpected color 000000ff at x=194, format 75. visual.c:21192: Test failed: Got unexpected color 00000000 at x=446, format 75. visual.c:21175: Test failed: Got unexpected color 00ffffff at x=64, format 1515474505. visual.c:21182: Test failed: Got unexpected color 000000ff at x=194, format 1515474505. visual.c:21192: Test failed: Got unexpected color 00000000 at x=446, format 1515474505. visual.c:24322: Test failed: Expected unsynchronised map for flags 0x1000. visual.c:24322: Test failed: Expected unsynchronised map for flags 0x3000.
Hi,
Can you add a test showing that FETCH4 is disabled when the bound texture is not a single-channel texture? I sense that there might be applications that enable FETCH4 and then "forget" to disable it when they render from an ARGB texture.
A test for DF16 and DF24 would be great as well, but it doesn't belong in this patch and should be a separate one. It's probably best to extend intz_test.
- if (caps.TextureCaps & D3DPTEXTURECAPS_POW2)
- {
skip("No unconditional NP2 texture support, skipping FETCH4 test.\n");
IDirect3DDevice9_Release(device);
goto done;
- }
Do you still need this?
- hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR);
- ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr);
- hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR);
- ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr);
- hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MIPFILTER, D3DTEXF_LINEAR);
- ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr);
This contradicts AMD's description, but I assume you placed them here to show that their docs are wrong and FETCH4 is used regardless of the filter. I think a comment would be useful.
I agree with your comments.
Ill extend this patch with more tests, like non-1channel textures and other corner cases. Also I will detail more on the logic behind the arguments used for the test.
Regarding the D3DPTEXTURECAPS_POW2, I just wanted to be sure those textures are supported, but if you think is not needed ill remove it in the next version.
And I can try to draft a DF16/DF24 tests. However I did not think it was necessary since the depth formats are very similar to other depth formats in wine. And there is 1:1 mapping between those depth formats. (DF16 -> D16, DF24 -> D24S8) (unless there is a difference in precision or int/float used for storage, but that is not detailed in the spec)
El jue., 22 nov. 2018 a las 15:01, Stefan Dösinger (< stefandoesinger@gmail.com>) escribió:
Hi,
Can you add a test showing that FETCH4 is disabled when the bound texture is not a single-channel texture? I sense that there might be applications that enable FETCH4 and then "forget" to disable it when they render from an ARGB texture.
A test for DF16 and DF24 would be great as well, but it doesn't belong in this patch and should be a separate one. It's probably best to extend intz_test.
- if (caps.TextureCaps & D3DPTEXTURECAPS_POW2)
- {
skip("No unconditional NP2 texture support, skipping FETCH4
test.\n");
IDirect3DDevice9_Release(device);
goto done;
- }
Do you still need this?
- hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MAGFILTER,
D3DTEXF_LINEAR);
- ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr);
- hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MINFILTER,
D3DTEXF_LINEAR);
- ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr);
- hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MIPFILTER,
D3DTEXF_LINEAR);
- ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr);
This contradicts AMD's description, but I assume you placed them here to show that their docs are wrong and FETCH4 is used regardless of the filter. I think a comment would be useful.
On Thu, 22 Nov 2018 at 05:11, Daniel Ansorregui mailszeros@gmail.com wrote:
@@ -9953,8 +9922,8 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * shader_addline(buffer, "ret = ffp_texcoord[%u] + ret.xyxy;\n", stage); }
shader_addline(buffer, "tex%u = %s(ps_sampler%u, ret.%s);\n",
stage, texture_function, stage, coord_mask);
shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ret.%s%c);\n",
stage, texture_function, proj ? "Proj" : "", stage, coord_mask, proj ? 'w' : ' ');
Not the most important thing, but could you please avoid that stray space by using %s with proj ? "w" : ""?
shader_addline(buffer, "tex%u = %s(ps_sampler%u, ffp_texcoord[%u].%s);\n",
stage, texture_function, stage, stage, coord_mask);
shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ffp_texcoord[%u].%s%c);\n",
stage, texture_function, proj ? "Proj" : "", stage, stage, coord_mask, proj ? 'w' : ' ');
Likewise.
* This is to simplify the code before adding the FETCH4 modifications No change is intended in the shader generation
Signed-off-by: Daniel Ansorregui mailszeros@gmail.com --- dlls/wined3d/glsl_shader.c | 63 ++++++++++---------------------------- 1 file changed, 16 insertions(+), 47 deletions(-)
diff --git a/dlls/wined3d/glsl_shader.c b/dlls/wined3d/glsl_shader.c index 545e1bff9f..5b231a91d1 100644 --- a/dlls/wined3d/glsl_shader.c +++ b/dlls/wined3d/glsl_shader.c @@ -9863,65 +9863,34 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * switch (settings->op[stage].tex_type) { case WINED3D_GL_RES_TYPE_TEX_1D: - if (proj) - { - texture_function = "texture1DProj"; - coord_mask = "xw"; - } - else - { - texture_function = "texture1D"; - coord_mask = "x"; - } + texture_function = "texture1D"; + coord_mask = "x"; break; case WINED3D_GL_RES_TYPE_TEX_2D: - if (proj) - { - texture_function = "texture2DProj"; - coord_mask = "xyw"; - } - else - { - texture_function = "texture2D"; - coord_mask = "xy"; - } + texture_function = "texture2D"; + coord_mask = "xy"; break; case WINED3D_GL_RES_TYPE_TEX_3D: - if (proj) - { - texture_function = "texture3DProj"; - coord_mask = "xyzw"; - } - else - { - texture_function = "texture3D"; - coord_mask = "xyz"; - } + texture_function = "texture3D"; + coord_mask = "xyz"; break; case WINED3D_GL_RES_TYPE_TEX_CUBE: texture_function = "textureCube"; coord_mask = "xyz"; break; case WINED3D_GL_RES_TYPE_TEX_RECT: - if (proj) - { - texture_function = "texture2DRectProj"; - coord_mask = "xyw"; - } - else - { - texture_function = "texture2DRect"; - coord_mask = "xy"; - } + texture_function = "texture2DRect"; + coord_mask = "xy"; break; default: FIXME("Unhandled texture type %#x.\n", settings->op[stage].tex_type); texture_function = ""; coord_mask = "xyzw"; + proj = FALSE; break; } if (!legacy_syntax) - texture_function = proj ? "textureProj" : "texture"; + texture_function = "texture";
if (stage > 0 && (settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP @@ -9953,8 +9922,8 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * shader_addline(buffer, "ret = ffp_texcoord[%u] + ret.xyxy;\n", stage); }
- shader_addline(buffer, "tex%u = %s(ps_sampler%u, ret.%s);\n", - stage, texture_function, stage, coord_mask); + shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ret.%s%s);\n", + stage, texture_function, proj ? "Proj" : "", stage, coord_mask, proj ? "w" : "");
if (settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP_LUMINANCE) shader_addline(buffer, "tex%u *= clamp(tex%u.z * bumpenv_lum_scale%u + bumpenv_lum_offset%u, 0.0, 1.0);\n", @@ -9962,13 +9931,13 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * } else if (settings->op[stage].projected == WINED3D_PROJECTION_COUNT3) { - shader_addline(buffer, "tex%u = %s(ps_sampler%u, ffp_texcoord[%u].xyz);\n", - stage, texture_function, stage, stage); + shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ffp_texcoord[%u].xyz);\n", + stage, texture_function, proj ? "Proj" : "", stage, stage); } else { - shader_addline(buffer, "tex%u = %s(ps_sampler%u, ffp_texcoord[%u].%s);\n", - stage, texture_function, stage, stage, coord_mask); + shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ffp_texcoord[%u].%s%s);\n", + stage, texture_function, proj ? "Proj" : "", stage, stage, coord_mask, proj ? "w" : ""); }
string_buffer_sprintf(tex_reg_name, "tex%u", stage);
Signed-off-by: Daniel Ansorregui mailszeros@gmail.com --- dlls/wined3d/utils.c | 12 ++++++++++++ dlls/wined3d/wined3d_private.h | 1 + 2 files changed, 13 insertions(+)
diff --git a/dlls/wined3d/utils.c b/dlls/wined3d/utils.c index aa68799535..0a401e8f18 100644 --- a/dlls/wined3d/utils.c +++ b/dlls/wined3d/utils.c @@ -340,6 +340,18 @@ static const struct wined3d_format_base_flags format_base_flags[] = {WINED3DFMT_NULL, WINED3DFMT_FLAG_EXTENSION}, {WINED3DFMT_NVDB, WINED3DFMT_FLAG_EXTENSION}, {WINED3DFMT_RESZ, WINED3DFMT_FLAG_EXTENSION}, + {WINED3DFMT_L8_UNORM, WINED3DFMT_FLAG_ALLOW_FETCH4}, + {WINED3DFMT_L16_UNORM, WINED3DFMT_FLAG_ALLOW_FETCH4}, + {WINED3DFMT_R16, WINED3DFMT_FLAG_ALLOW_FETCH4}, + {WINED3DFMT_R32_FLOAT, WINED3DFMT_FLAG_ALLOW_FETCH4}, + + /* The formats below are not tested to be FETCH4 compatible on windows but + * the spec "hints" that depth formats are not recomended (but allowed) */ + {WINED3DFMT_D16_LOCKABLE, WINED3DFMT_FLAG_ALLOW_FETCH4}, + {WINED3DFMT_D16_UNORM, WINED3DFMT_FLAG_ALLOW_FETCH4}, + {WINED3DFMT_D32_UNORM, WINED3DFMT_FLAG_ALLOW_FETCH4}, + {WINED3DFMT_D32_FLOAT, WINED3DFMT_FLAG_ALLOW_FETCH4}, + {WINED3DFMT_D24_UNORM_S8_UINT, WINED3DFMT_FLAG_ALLOW_FETCH4}, };
static void rgb888_from_rgb565(WORD rgb565, BYTE *r, BYTE *g, BYTE *b) diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index f908ff7173..af34f0ce44 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -4403,6 +4403,7 @@ extern enum wined3d_format_id pixelformat_for_depth(DWORD depth) DECLSPEC_HIDDEN #define WINED3DFMT_FLAG_VERTEX_ATTRIBUTE 0x01000000 #define WINED3DFMT_FLAG_BLIT 0x02000000 #define WINED3DFMT_FLAG_MAPPABLE 0x04000000 +#define WINED3DFMT_FLAG_ALLOW_FETCH4 0x08000000
struct wined3d_rational {
On Sat, 24 Nov 2018 at 23:44, Daniel Ansorregui mailszeros@gmail.com wrote:
@@ -340,6 +340,18 @@ static const struct wined3d_format_base_flags format_base_flags[] = {WINED3DFMT_NULL, WINED3DFMT_FLAG_EXTENSION}, {WINED3DFMT_NVDB, WINED3DFMT_FLAG_EXTENSION}, {WINED3DFMT_RESZ, WINED3DFMT_FLAG_EXTENSION},
- {WINED3DFMT_L8_UNORM, WINED3DFMT_FLAG_ALLOW_FETCH4},
- {WINED3DFMT_L16_UNORM, WINED3DFMT_FLAG_ALLOW_FETCH4},
- {WINED3DFMT_R16, WINED3DFMT_FLAG_ALLOW_FETCH4},
- {WINED3DFMT_R32_FLOAT, WINED3DFMT_FLAG_ALLOW_FETCH4},
One of the rules/guidelines we have in Wine is that we try to avoid dead code. This patch runs into that because it sets a flag that isn't used until the following patch in the series. Since both patches aren't especially large, the best thing to do would probably be to just merge them.
- /* The formats below are not tested to be FETCH4 compatible on windows but
* the spec "hints" that depth formats are not recomended (but allowed) */
That should be easy to test, right?
- Tested under W10, when Fetch4 is enabled, projection is ignored - Untested what happens when Fetch4 is used on unsupported textures. Disabling Fetch4 fttb. - The swizzle fix has been checked against windows since it does not match with gather4
Signed-off-by: Daniel Ansorregui mailszeros@gmail.com --- dlls/wined3d/glsl_shader.c | 19 ++++++++++++++++++- dlls/wined3d/utils.c | 4 ++++ dlls/wined3d/wined3d_private.h | 3 ++- 3 files changed, 24 insertions(+), 2 deletions(-)
diff --git a/dlls/wined3d/glsl_shader.c b/dlls/wined3d/glsl_shader.c index 5b231a91d1..f9edba0916 100644 --- a/dlls/wined3d/glsl_shader.c +++ b/dlls/wined3d/glsl_shader.c @@ -9837,6 +9837,7 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * for (stage = 0; stage < MAX_TEXTURES && settings->op[stage].cop != WINED3D_TOP_DISABLE; ++stage) { const char *texture_function, *coord_mask; + BOOL fetch4 = settings->op[stage].fetch4; BOOL proj;
if (!(tex_map & (1u << stage))) @@ -9856,7 +9857,6 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * FIXME("Unexpected projection mode %d\n", settings->op[stage].projected); proj = TRUE; } - if (settings->op[stage].tex_type == WINED3D_GL_RES_TYPE_TEX_CUBE) proj = FALSE;
@@ -9865,6 +9865,7 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * case WINED3D_GL_RES_TYPE_TEX_1D: texture_function = "texture1D"; coord_mask = "x"; + fetch4 = FALSE; break; case WINED3D_GL_RES_TYPE_TEX_2D: texture_function = "texture2D"; @@ -9873,6 +9874,7 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * case WINED3D_GL_RES_TYPE_TEX_3D: texture_function = "texture3D"; coord_mask = "xyz"; + fetch4 = FALSE; break; case WINED3D_GL_RES_TYPE_TEX_CUBE: texture_function = "textureCube"; @@ -9881,17 +9883,28 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * case WINED3D_GL_RES_TYPE_TEX_RECT: texture_function = "texture2DRect"; coord_mask = "xy"; + if (fetch4) + FIXME("Unsupported Fetch4 and texture2DRect sampling"); + fetch4 = FALSE; break; default: FIXME("Unhandled texture type %#x.\n", settings->op[stage].tex_type); texture_function = ""; coord_mask = "xyzw"; proj = FALSE; + fetch4 = FALSE; break; } if (!legacy_syntax) texture_function = "texture";
+ if (fetch4) + { + texture_function = "textureGather"; + /* Tested on W10+Intel, fetch4 enabled disables projection */ + proj = FALSE; + } + if (stage > 0 && (settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP || settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP_LUMINANCE)) @@ -9940,6 +9953,10 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * stage, texture_function, proj ? "Proj" : "", stage, stage, coord_mask, proj ? "w" : ""); }
+ /* Match FETCH4 swizzle with textureGather swizzle */ + if (fetch4) + shader_addline(buffer, "tex%u = tex%u.xwyz;\n", stage, stage); + string_buffer_sprintf(tex_reg_name, "tex%u", stage); shader_glsl_color_correction_ext(buffer, tex_reg_name->buffer, WINED3DSP_WRITEMASK_ALL, settings->op[stage].color_fixup); diff --git a/dlls/wined3d/utils.c b/dlls/wined3d/utils.c index 0a401e8f18..80cb5e4e65 100644 --- a/dlls/wined3d/utils.c +++ b/dlls/wined3d/utils.c @@ -5795,6 +5795,7 @@ void gen_ffp_frag_op(const struct wined3d_context *context, const struct wined3d settings->op[i].tmp_dst = 0; settings->op[i].tex_type = WINED3D_GL_RES_TYPE_TEX_1D; settings->op[i].projected = WINED3D_PROJECTION_NONE; + settings->op[i].fetch4 = FALSE; i++; break; } @@ -5938,6 +5939,9 @@ void gen_ffp_frag_op(const struct wined3d_context *context, const struct wined3d settings->op[i].aarg1 = aarg1; settings->op[i].aarg2 = aarg2; settings->op[i].tmp_dst = state->texture_states[i][WINED3D_TSS_RESULT_ARG] == WINED3DTA_TEMP; + settings->op[i].fetch4 = state->textures[i] && + state->textures[i]->resource.format_flags & WINED3DFMT_FLAG_ALLOW_FETCH4 && + state->sampler_states[i][WINED3D_SAMP_MIPMAP_LOD_BIAS] == MAKEFOURCC('G','E','T','4'); }
/* Clear unsupported stages */ diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index af34f0ce44..09179b7fc3 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -2743,7 +2743,8 @@ struct texture_stage_op unsigned tex_type : 3; unsigned tmp_dst : 1; unsigned projected : 2; - unsigned padding : 10; + unsigned fetch4 : 1; + unsigned padding : 9; };
struct ffp_frag_settings
On Sat, 24 Nov 2018 at 23:44, Daniel Ansorregui mailszeros@gmail.com wrote:
- Tested under W10, when Fetch4 is enabled, projection is ignored
- Untested what happens when Fetch4 is used on unsupported textures. Disabling Fetch4 fttb.
- The swizzle fix has been checked against windows since it does not match with gather4
Could you submit those tests?
@@ -9865,6 +9865,7 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * case WINED3D_GL_RES_TYPE_TEX_1D: texture_function = "texture1D"; coord_mask = "x";
fetch4 = FALSE; break; case WINED3D_GL_RES_TYPE_TEX_2D: texture_function = "texture2D";
@@ -9873,6 +9874,7 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * case WINED3D_GL_RES_TYPE_TEX_3D: texture_function = "texture3D"; coord_mask = "xyz";
fetch4 = FALSE; break; case WINED3D_GL_RES_TYPE_TEX_CUBE: texture_function = "textureCube";
@@ -9881,17 +9883,28 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * case WINED3D_GL_RES_TYPE_TEX_RECT: texture_function = "texture2DRect"; coord_mask = "xy";
if (fetch4)
FIXME("Unsupported Fetch4 and texture2DRect sampling");
fetch4 = FALSE; break; default: FIXME("Unhandled texture type %#x.\n", settings->op[stage].tex_type); texture_function = ""; coord_mask = "xyzw"; proj = FALSE;
fetch4 = FALSE; break;
It may be nicer to just not set "fetch4" in gen_ffp_frag_op() for those texture types.
if (!legacy_syntax) texture_function = "texture";
if (fetch4)
{
texture_function = "textureGather";
Does that work with a GL 3.2 context? Or without core contexts?
@@ -5938,6 +5939,9 @@ void gen_ffp_frag_op(const struct wined3d_context *context, const struct wined3d settings->op[i].aarg1 = aarg1; settings->op[i].aarg2 = aarg2; settings->op[i].tmp_dst = state->texture_states[i][WINED3D_TSS_RESULT_ARG] == WINED3DTA_TEMP;
settings->op[i].fetch4 = state->textures[i] &&
state->textures[i]->resource.format_flags & WINED3DFMT_FLAG_ALLOW_FETCH4 &&
state->sampler_states[i][WINED3D_SAMP_MIPMAP_LOD_BIAS] == MAKEFOURCC('G','E','T','4');
Formatting.
- Implement shader generation of tex-ld/ldp/ldd/ldb/ldl - Vertex texldl unimplemented yet, since it is not possible to access ps_compile_args. Maybe move it to another place. - Trigger PS re-generation at FETCH4 state change
Signed-off-by: Daniel Ansorregui mailszeros@gmail.com --- dlls/wined3d/cs.c | 5 +- dlls/wined3d/device.c | 4 ++ dlls/wined3d/glsl_shader.c | 126 ++++++++++++++++++++++++--------- dlls/wined3d/shader.c | 12 ++++ dlls/wined3d/state.c | 13 ++++ dlls/wined3d/wined3d_private.h | 7 +- 6 files changed, 130 insertions(+), 37 deletions(-)
diff --git a/dlls/wined3d/cs.c b/dlls/wined3d/cs.c index ffa4951c0c..0f20635d48 100644 --- a/dlls/wined3d/cs.c +++ b/dlls/wined3d/cs.c @@ -1354,7 +1354,10 @@ static void wined3d_cs_exec_set_texture(struct wined3d_cs *cs, const void *data) if (!prev || wined3d_texture_gl(op->texture)->target != wined3d_texture_gl(prev)->target || (!is_same_fixup(new_format->color_fixup, old_format->color_fixup) && !(can_use_texture_swizzle(gl_info, new_format) && can_use_texture_swizzle(gl_info, old_format))) - || (new_fmt_flags & WINED3DFMT_FLAG_SHADOW) != (old_fmt_flags & WINED3DFMT_FLAG_SHADOW)) + || (new_fmt_flags & WINED3DFMT_FLAG_SHADOW) != (old_fmt_flags & WINED3DFMT_FLAG_SHADOW) + || ((new_fmt_flags & WINED3DFMT_FLAG_ALLOW_FETCH4) != (old_fmt_flags & WINED3DFMT_FLAG_ALLOW_FETCH4) + && cs->state.sampler_states[op->texture->sampler][WINED3D_SAMP_MIPMAP_LOD_BIAS] + == MAKEFOURCC('G','E','T','4'))) device_invalidate_state(cs->device, STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL));
if (!prev && op->stage < d3d_info->limits.ffp_blend_stages) diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c index a89a33d676..0f7c433d1e 100644 --- a/dlls/wined3d/device.c +++ b/dlls/wined3d/device.c @@ -2088,7 +2088,11 @@ void CDECL wined3d_device_set_sampler_state(struct wined3d_device *device, device, sampler_idx, debug_d3dsamplerstate(state), value);
if (sampler_idx >= WINED3DVERTEXTEXTURESAMPLER0 && sampler_idx <= WINED3DVERTEXTEXTURESAMPLER3) + { sampler_idx -= (WINED3DVERTEXTEXTURESAMPLER0 - MAX_FRAGMENT_SAMPLERS); + if (state == WINED3D_SAMP_MIPMAP_LOD_BIAS && value == MAKEFOURCC('G','E','T','4')) + FIXME("Unsupported FETCH4 and Vertex Texture Sampler"); + }
if (sampler_idx >= ARRAY_SIZE(device->state.sampler_states)) { diff --git a/dlls/wined3d/glsl_shader.c b/dlls/wined3d/glsl_shader.c index f9edba0916..13ee2cb460 100644 --- a/dlls/wined3d/glsl_shader.c +++ b/dlls/wined3d/glsl_shader.c @@ -49,6 +49,7 @@ WINE_DECLARE_DEBUG_CHANNEL(winediag); #define WINED3D_GLSL_SAMPLE_GRAD 0x04 #define WINED3D_GLSL_SAMPLE_LOAD 0x08 #define WINED3D_GLSL_SAMPLE_OFFSET 0x10 +#define WINED3D_GLSL_SAMPLE_GATHER 0x20
static const struct { @@ -3607,6 +3608,7 @@ static void shader_glsl_get_sample_function(const struct wined3d_shader_context BOOL lod = flags & WINED3D_GLSL_SAMPLE_LOD; BOOL grad = flags & WINED3D_GLSL_SAMPLE_GRAD; BOOL offset = flags & WINED3D_GLSL_SAMPLE_OFFSET; + BOOL gather = flags & WINED3D_GLSL_SAMPLE_GATHER; const char *base = "texture", *type_part = "", *suffix = ""; unsigned int coord_size, deriv_size;
@@ -3652,6 +3654,14 @@ static void shader_glsl_get_sample_function(const struct wined3d_shader_context type_part = ""; }
+ if (gather) + { + base = "texture"; + type_part = "Gather"; + suffix = ""; + projected = lod = grad = offset = FALSE; + } + sample_function->name = string_buffer_get(priv->string_buffers); string_buffer_sprintf(sample_function->name, "%s%s%s%s%s%s", base, type_part, projected ? "Proj" : "", lod ? "Lod" : grad ? "Grad" : "", offset ? "Offset" : "", suffix); @@ -5391,11 +5401,18 @@ static void shader_glsl_tex(const struct wined3d_shader_instruction *ins) } }
+ /* Fetch4 overwrites the other texture flags */ + if (priv->cur_ps_args->fetch4 & (1u << resource_idx)){ + sample_flags = WINED3D_GLSL_SAMPLE_GATHER; + mask = 0; + } + shader_glsl_get_sample_function(ins->ctx, resource_idx, resource_idx, sample_flags, &sample_function); mask |= sample_function.coord_mask; sample_function.coord_mask = mask;
if (shader_version < WINED3D_SHADER_VERSION(2,0)) swizzle = WINED3DSP_NOSWIZZLE; + else if (priv->cur_ps_args->fetch4 & (1u << resource_idx)) swizzle = WINED3DSP_FETCH4_SWIZZLE; else swizzle = ins->src[1].swizzle;
/* 1.0-1.3: Use destination register as coordinate source. @@ -5411,7 +5428,7 @@ static void shader_glsl_tex(const struct wined3d_shader_instruction *ins) { struct glsl_src_param coord_param; shader_glsl_add_src_param(ins, &ins->src[0], mask, &coord_param); - if (ins->flags & WINED3DSI_TEXLD_BIAS) + if (ins->flags & WINED3DSI_TEXLD_BIAS && sample_flags != WINED3D_GLSL_SAMPLE_GATHER) { struct glsl_src_param bias; shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &bias); @@ -5427,10 +5444,11 @@ static void shader_glsl_tex(const struct wined3d_shader_instruction *ins)
static void shader_glsl_texldd(const struct wined3d_shader_instruction *ins) { + const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data; const struct wined3d_gl_info *gl_info = ins->ctx->gl_info; struct glsl_src_param coord_param, dx_param, dy_param; struct glsl_sample_function sample_function; - DWORD sampler_idx; + DWORD sampler_idx = ins->src[1].reg.idx[0].offset; DWORD swizzle = ins->src[1].swizzle;
if (!shader_glsl_has_core_grad(gl_info) && !gl_info->supported[ARB_SHADER_TEXTURE_LOD]) @@ -5440,7 +5458,16 @@ static void shader_glsl_texldd(const struct wined3d_shader_instruction *ins) return; }
- sampler_idx = ins->src[1].reg.idx[0].offset; + /* Fetch4 overwrites the other texture flags */ + if (priv->cur_ps_args->fetch4 & (1u << sampler_idx)){ + shader_glsl_get_sample_function(ins->ctx, sampler_idx, sampler_idx, WINED3D_GLSL_SAMPLE_GATHER, &sample_function); + swizzle = WINED3DSP_FETCH4_SWIZZLE; + shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param); + shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, NULL, NULL, NULL, NULL, + "%s", coord_param.param_str); + shader_glsl_release_sample_function(ins->ctx, &sample_function); + return; + }
shader_glsl_get_sample_function(ins->ctx, sampler_idx, sampler_idx, WINED3D_GLSL_SAMPLE_GRAD, &sample_function); shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param); @@ -5455,19 +5482,21 @@ static void shader_glsl_texldd(const struct wined3d_shader_instruction *ins) static void shader_glsl_texldl(const struct wined3d_shader_instruction *ins) { const struct wined3d_shader_version *shader_version = &ins->ctx->reg_maps->shader_version; + const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data; const struct wined3d_gl_info *gl_info = ins->ctx->gl_info; struct glsl_src_param coord_param, lod_param; struct glsl_sample_function sample_function; + DWORD sampler_idx = ins->src[1].reg.idx[0].offset; DWORD swizzle = ins->src[1].swizzle; - DWORD sampler_idx; + DWORD flags = WINED3D_GLSL_SAMPLE_LOD;
- sampler_idx = ins->src[1].reg.idx[0].offset; + /* This call can be used in vertex shader, without cur_ps_args */ + if(priv->cur_ps_args && priv->cur_ps_args->fetch4 & (1u << sampler_idx)) + flags = WINED3D_GLSL_SAMPLE_GATHER;
- shader_glsl_get_sample_function(ins->ctx, sampler_idx, sampler_idx, WINED3D_GLSL_SAMPLE_LOD, &sample_function); + shader_glsl_get_sample_function(ins->ctx, sampler_idx, sampler_idx, flags, &sample_function); shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param);
- shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &lod_param); - if (shader_version->type == WINED3D_SHADER_TYPE_PIXEL && !shader_glsl_has_core_grad(gl_info) && !gl_info->supported[ARB_SHADER_TEXTURE_LOD]) { @@ -5476,6 +5505,18 @@ static void shader_glsl_texldl(const struct wined3d_shader_instruction *ins) * even without the appropriate extension. */ WARN("Using %s in fragment shader.\n", sample_function.name->buffer); } + + if (flags == WINED3D_GLSL_SAMPLE_GATHER){ + swizzle = WINED3DSP_FETCH4_SWIZZLE; + shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, NULL, NULL, NULL, NULL, + "%s", coord_param.param_str); + shader_glsl_release_sample_function(ins->ctx, &sample_function); + return; + } + + shader_glsl_get_sample_function(ins->ctx, sampler_idx, sampler_idx, WINED3D_GLSL_SAMPLE_LOD, &sample_function); + shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param); + shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &lod_param); shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, NULL, NULL, lod_param.param_str, NULL, "%s", coord_param.param_str); shader_glsl_release_sample_function(ins->ctx, &sample_function); @@ -6169,6 +6210,7 @@ static void shader_glsl_ld(const struct wined3d_shader_instruction *ins) struct glsl_sample_function sample_function; DWORD flags = WINED3D_GLSL_SAMPLE_LOAD; BOOL has_lod_param; + const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
if (wined3d_shader_instruction_has_texel_offset(ins)) flags |= WINED3D_GLSL_SAMPLE_OFFSET; @@ -6183,6 +6225,10 @@ static void shader_glsl_ld(const struct wined3d_shader_instruction *ins) } has_lod_param = is_mipmapped(reg_maps->resource_info[resource_idx].type);
+ if (priv->cur_ps_args && priv->cur_ps_args->fetch4 & (1u << resource_idx)){ + FIXME("Unsupported FETCH4 and LD Sampling SM 5.0"); + } + shader_glsl_get_sample_function(ins->ctx, resource_idx, sampler_idx, flags, &sample_function); shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param); shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &lod_param); @@ -6208,46 +6254,57 @@ static void shader_glsl_sample(const struct wined3d_shader_instruction *ins) struct glsl_src_param coord_param, lod_param, dx_param, dy_param; unsigned int resource_idx, sampler_idx, sampler_bind_idx; struct glsl_sample_function sample_function; + const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data; + DWORD swizzle = ins->src[1].swizzle; DWORD flags = 0;
+ resource_idx = ins->src[1].reg.idx[0].offset; + sampler_idx = ins->src[2].reg.idx[0].offset; + if (ins->handler_idx == WINED3DSIH_SAMPLE_GRAD) flags |= WINED3D_GLSL_SAMPLE_GRAD; if (ins->handler_idx == WINED3DSIH_SAMPLE_LOD) flags |= WINED3D_GLSL_SAMPLE_LOD; if (wined3d_shader_instruction_has_texel_offset(ins)) flags |= WINED3D_GLSL_SAMPLE_OFFSET; - - resource_idx = ins->src[1].reg.idx[0].offset; - sampler_idx = ins->src[2].reg.idx[0].offset; + if (priv->cur_ps_args->fetch4 & (1u << sampler_idx)) + { + flags = WINED3D_GLSL_SAMPLE_GATHER; + swizzle = WINED3DSP_FETCH4_SWIZZLE; + }
shader_glsl_get_sample_function(ins->ctx, resource_idx, sampler_idx, flags, &sample_function); shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param);
- switch (ins->handler_idx) + /* Fetch4 overwrites the other texture flags */ + if (flags != WINED3D_GLSL_SAMPLE_GATHER) { - case WINED3DSIH_SAMPLE: - break; - case WINED3DSIH_SAMPLE_B: - shader_glsl_add_src_param(ins, &ins->src[3], WINED3DSP_WRITEMASK_0, &lod_param); - lod_param_str = lod_param.param_str; - break; - case WINED3DSIH_SAMPLE_GRAD: - shader_glsl_add_src_param(ins, &ins->src[3], sample_function.deriv_mask, &dx_param); - shader_glsl_add_src_param(ins, &ins->src[4], sample_function.deriv_mask, &dy_param); - dx_param_str = dx_param.param_str; - dy_param_str = dy_param.param_str; - break; - case WINED3DSIH_SAMPLE_LOD: - shader_glsl_add_src_param(ins, &ins->src[3], WINED3DSP_WRITEMASK_0, &lod_param); - lod_param_str = lod_param.param_str; - break; - default: - ERR("Unhandled opcode %s.\n", debug_d3dshaderinstructionhandler(ins->handler_idx)); - break; + switch (ins->handler_idx) + { + case WINED3DSIH_SAMPLE: + break; + case WINED3DSIH_SAMPLE_B: + shader_glsl_add_src_param(ins, &ins->src[3], WINED3DSP_WRITEMASK_0, &lod_param); + lod_param_str = lod_param.param_str; + break; + case WINED3DSIH_SAMPLE_GRAD: + shader_glsl_add_src_param(ins, &ins->src[3], sample_function.deriv_mask, &dx_param); + shader_glsl_add_src_param(ins, &ins->src[4], sample_function.deriv_mask, &dy_param); + dx_param_str = dx_param.param_str; + dy_param_str = dy_param.param_str; + break; + case WINED3DSIH_SAMPLE_LOD: + shader_glsl_add_src_param(ins, &ins->src[3], WINED3DSP_WRITEMASK_0, &lod_param); + lod_param_str = lod_param.param_str; + break; + default: + ERR("Unhandled opcode %s.\n", debug_d3dshaderinstructionhandler(ins->handler_idx)); + break; + } }
sampler_bind_idx = shader_glsl_find_sampler(&ins->ctx->reg_maps->sampler_map, resource_idx, sampler_idx); - shader_glsl_gen_sample_code(ins, sampler_bind_idx, &sample_function, ins->src[1].swizzle, + shader_glsl_gen_sample_code(ins, sampler_bind_idx, &sample_function, swizzle, dx_param_str, dy_param_str, lod_param_str, &ins->texel_offset, "%s", coord_param.param_str); shader_glsl_release_sample_function(ins->ctx, &sample_function); } @@ -6293,6 +6350,9 @@ static void shader_glsl_sample_c(const struct wined3d_shader_instruction *ins) unsigned int coord_size; DWORD flags = 0;
+ resource_idx = ins->src[1].reg.idx[0].offset; + sampler_idx = ins->src[2].reg.idx[0].offset; + if (ins->handler_idx == WINED3DSIH_SAMPLE_C_LZ) { lod_param = "0"; @@ -6304,8 +6364,6 @@ static void shader_glsl_sample_c(const struct wined3d_shader_instruction *ins)
if (!(resource_info = shader_glsl_get_resource_info(ins, &ins->src[1].reg))) return; - resource_idx = ins->src[1].reg.idx[0].offset; - sampler_idx = ins->src[2].reg.idx[0].offset;
shader_glsl_get_sample_function(ins->ctx, resource_idx, sampler_idx, flags, &sample_function); coord_size = shader_glsl_get_write_mask_size(sample_function.coord_mask); diff --git a/dlls/wined3d/shader.c b/dlls/wined3d/shader.c index e11a37cf07..f07804de63 100644 --- a/dlls/wined3d/shader.c +++ b/dlls/wined3d/shader.c @@ -4027,6 +4027,18 @@ void find_ps_compile_args(const struct wined3d_state *state, const struct wined3 } }
+ if (gl_info->supported[ARB_TEXTURE_GATHER]) + { + for (i = 0; i < MAX_FRAGMENT_SAMPLERS; ++i) + { + if (state->sampler_states[i][WINED3D_SAMP_MIPMAP_LOD_BIAS] == MAKEFOURCC('G','E','T','4') + && state->textures[i]->resource.format_flags & WINED3DFMT_FLAG_ALLOW_FETCH4) + args->fetch4 |= 1 << i; + else + args->fetch4 &= ~(1 << i); + } + } + if (context->d3d_info->limits.varying_count < wined3d_max_compat_varyings(context->gl_info)) { const struct wined3d_shader *vs = state->shader[WINED3D_SHADER_TYPE_VERTEX]; diff --git a/dlls/wined3d/state.c b/dlls/wined3d/state.c index 42c109da57..6a4d23c451 100644 --- a/dlls/wined3d/state.c +++ b/dlls/wined3d/state.c @@ -3616,6 +3616,8 @@ static void sampler(struct wined3d_context *context, const struct wined3d_state { struct wined3d_texture_gl *texture_gl = wined3d_texture_gl(state->textures[sampler_idx]); BOOL srgb = state->sampler_states[sampler_idx][WINED3D_SAMP_SRGB_TEXTURE]; + BOOL fetch4 = state->sampler_states[sampler_idx][WINED3D_SAMP_MIPMAP_LOD_BIAS] + == MAKEFOURCC('G','E','T','4'); const DWORD *sampler_states = state->sampler_states[sampler_idx]; struct wined3d_device *device = context->device; struct wined3d_sampler_desc desc; @@ -3650,6 +3652,17 @@ static void sampler(struct wined3d_context *context, const struct wined3d_state /* Trigger shader constant reloading (for NP2 texcoord fixup) */ if (!(texture_gl->t.flags & WINED3D_TEXTURE_POW2_MAT_IDENT)) context->constant_update_mask |= WINED3D_SHADER_CONST_PS_NP2_FIXUP; + + /* Trigger pixel shader recompilation for FETCH4 changes */ + if(gl_info->supported[ARB_TEXTURE_GATHER] && + ((context->last_was_fetch4 >> sampler_idx) & 0x1) ^ fetch4) + { + if (fetch4) + context->last_was_fetch4 |= 1 << sampler_idx; + else + context->last_was_fetch4 &= ~(1 << sampler_idx); + context->shader_update_mask = (1u << WINED3D_SHADER_TYPE_PIXEL); + } } else { diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index 09179b7fc3..5be566c3af 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -520,7 +520,8 @@ enum wined3d_immconst_type WINED3D_IMMCONST_VEC4, };
-#define WINED3DSP_NOSWIZZLE (0u | (1u << 2) | (2u << 4) | (3u << 6)) +#define WINED3DSP_NOSWIZZLE (0u | (1u << 2) | (2u << 4) | (3u << 6)) +#define WINED3DSP_FETCH4_SWIZZLE (0u | (3u << 2) | (1u << 4) | (2u << 6)) /* xwyz */
enum wined3d_shader_src_modifier { @@ -1357,7 +1358,8 @@ struct ps_compile_args DWORD alpha_test_func : 3; DWORD render_offscreen : 1; DWORD rt_alpha_swizzle : 8; /* MAX_RENDER_TARGET_VIEWS, 8 */ - DWORD padding : 18; + WORD fetch4 : 16; /* MAX_FRAGMENT_SAMPLERS, 16 */ + DWORD padding : 2; };
enum fog_src_type @@ -1892,6 +1894,7 @@ struct wined3d_context DWORD last_was_ffp_blit : 1; DWORD last_was_blit : 1; DWORD last_was_ckey : 1; + DWORD last_was_fetch4 : 16; /* MAX_FRAGMENT_SAMPLERS */ DWORD fog_coord : 1; DWORD fog_enabled : 1; DWORD current : 1;
* This assumes FETCH4 is already supported by wine And checks for FETCH4 support before exposing DF24
Signed-off-by: Daniel Ansorregui mailszeros@gmail.com --- dlls/wined3d/directx.c | 6 ++++++ dlls/wined3d/utils.c | 33 +++++++++++++++++++++++++++++++++ include/wine/wined3d.h | 2 ++ 3 files changed, 41 insertions(+)
diff --git a/dlls/wined3d/directx.c b/dlls/wined3d/directx.c index b9aa9fa440..a7c98d8095 100644 --- a/dlls/wined3d/directx.c +++ b/dlls/wined3d/directx.c @@ -1711,6 +1711,12 @@ HRESULT CDECL wined3d_check_device_format(const struct wined3d *wined3d, UINT ad return WINED3DOK_NOMIPGEN; }
+ if ((check_format_id == WINED3DFMT_DF24) && !adapter->gl_info.supported[ARB_TEXTURE_GATHER]) + { + TRACE("No Support for Fetch4 disabling DF24 support.\n"); + return WINED3DERR_NOTAVAILABLE; + } + return WINED3D_OK; }
diff --git a/dlls/wined3d/utils.c b/dlls/wined3d/utils.c index 80cb5e4e65..6a31c84af6 100644 --- a/dlls/wined3d/utils.c +++ b/dlls/wined3d/utils.c @@ -65,6 +65,8 @@ format_index_remap[] = {WINED3DFMT_R16, WINED3D_FORMAT_FOURCC_BASE + 20}, {WINED3DFMT_AL16, WINED3D_FORMAT_FOURCC_BASE + 21}, {WINED3DFMT_NV12, WINED3D_FORMAT_FOURCC_BASE + 22}, + {WINED3DFMT_DF16, WINED3D_FORMAT_FOURCC_BASE + 23}, + {WINED3DFMT_DF24, WINED3D_FORMAT_FOURCC_BASE + 24}, };
#define WINED3D_FORMAT_COUNT (WINED3D_FORMAT_FOURCC_BASE + ARRAY_SIZE(format_index_remap)) @@ -141,6 +143,8 @@ static const struct wined3d_format_channels formats[] = {WINED3DFMT_NVHU, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0}, {WINED3DFMT_NVHS, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0}, {WINED3DFMT_NULL, 8, 8, 8, 8, 0, 8, 16, 24, 4, 0, 0}, + {WINED3DFMT_DF16, 0, 0, 0, 0, 0, 0, 0, 0, 2, 16, 0}, + {WINED3DFMT_DF24, 0, 0, 0, 0, 0, 0, 0, 0, 4, 24, 0}, /* Unsure about them, could not find a Windows driver that supports them */ {WINED3DFMT_R16, 16, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0}, {WINED3DFMT_AL16, 0, 0, 0, 16, 0, 0, 0, 16, 4, 0, 0}, @@ -352,6 +356,8 @@ static const struct wined3d_format_base_flags format_base_flags[] = {WINED3DFMT_D32_UNORM, WINED3DFMT_FLAG_ALLOW_FETCH4}, {WINED3DFMT_D32_FLOAT, WINED3DFMT_FLAG_ALLOW_FETCH4}, {WINED3DFMT_D24_UNORM_S8_UINT, WINED3DFMT_FLAG_ALLOW_FETCH4}, + {WINED3DFMT_DF16, WINED3DFMT_FLAG_ALLOW_FETCH4}, + {WINED3DFMT_DF24, WINED3DFMT_FLAG_ALLOW_FETCH4}, };
static void rgb888_from_rgb565(WORD rgb565, BYTE *r, BYTE *g, BYTE *b) @@ -1900,6 +1906,25 @@ static const struct wined3d_format_texture_info format_texture_info[] = WINED3DFMT_FLAG_TEXTURE | WINED3DFMT_FLAG_POSTPIXELSHADER_BLENDING | WINED3DFMT_FLAG_FILTERING | WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL, ARB_FRAMEBUFFER_OBJECT, NULL}, + {WINED3DFMT_DF16, GL_DEPTH_COMPONENT, GL_DEPTH_COMPONENT, 0, + GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, 0, + WINED3DFMT_FLAG_DEPTH, + WINED3D_GL_EXT_NONE, NULL}, + {WINED3DFMT_DF16, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT16, 0, + GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, 0, + WINED3DFMT_FLAG_TEXTURE | WINED3DFMT_FLAG_POSTPIXELSHADER_BLENDING | WINED3DFMT_FLAG_FILTERING + | WINED3DFMT_FLAG_DEPTH, + ARB_DEPTH_TEXTURE, NULL}, + {WINED3DFMT_DF24, GL_DEPTH24_STENCIL8_EXT, GL_DEPTH24_STENCIL8_EXT, 0, + GL_DEPTH_STENCIL_EXT, GL_UNSIGNED_INT_24_8_EXT, 0, + WINED3DFMT_FLAG_TEXTURE | WINED3DFMT_FLAG_POSTPIXELSHADER_BLENDING | WINED3DFMT_FLAG_FILTERING + | WINED3DFMT_FLAG_DEPTH, + EXT_PACKED_DEPTH_STENCIL, NULL}, + {WINED3DFMT_DF24, GL_DEPTH24_STENCIL8, GL_DEPTH24_STENCIL8, 0, + GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, 0, + WINED3DFMT_FLAG_TEXTURE | WINED3DFMT_FLAG_POSTPIXELSHADER_BLENDING | WINED3DFMT_FLAG_FILTERING + | WINED3DFMT_FLAG_DEPTH, + ARB_FRAMEBUFFER_OBJECT, NULL}, {WINED3DFMT_NULL, 0, 0, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, 0, WINED3DFMT_FLAG_TEXTURE | WINED3DFMT_FLAG_RENDERTARGET | WINED3DFMT_FLAG_FBO_ATTACHABLE, @@ -3541,6 +3566,12 @@ static void apply_format_fixups(struct wined3d_adapter *adapter, struct wined3d_ {WINED3DFMT_INTZ, "XXXX", FALSE, WINED3D_GL_EXT_NONE}, {WINED3DFMT_INTZ, "XYZW", FALSE, WINED3D_GL_LEGACY_CONTEXT},
+ {WINED3DFMT_DF16, "XXXX", FALSE, WINED3D_GL_EXT_NONE}, + {WINED3DFMT_DF16, "XYZW", FALSE, WINED3D_GL_LEGACY_CONTEXT}, + + {WINED3DFMT_DF24, "XXXX", FALSE, WINED3D_GL_EXT_NONE}, + {WINED3DFMT_DF24, "XYZW", FALSE, WINED3D_GL_LEGACY_CONTEXT}, + {WINED3DFMT_L8_UNORM, "XXX1", FALSE, ARB_TEXTURE_RG}, };
@@ -4395,6 +4426,8 @@ const char *debug_d3dformat(enum wined3d_format_id format_id) FMT_TO_STR(WINED3DFMT_R16); FMT_TO_STR(WINED3DFMT_AL16); FMT_TO_STR(WINED3DFMT_NV12); + FMT_TO_STR(WINED3DFMT_DF16); + FMT_TO_STR(WINED3DFMT_DF24); #undef FMT_TO_STR default: { diff --git a/include/wine/wined3d.h b/include/wine/wined3d.h index 40553f7e51..97c640acc5 100644 --- a/include/wine/wined3d.h +++ b/include/wine/wined3d.h @@ -271,6 +271,8 @@ enum wined3d_format_id WINED3DFMT_R16 = WINEMAKEFOURCC(' ','R','1','6'), WINED3DFMT_AL16 = WINEMAKEFOURCC('A','L','1','6'), WINED3DFMT_NV12 = WINEMAKEFOURCC('N','V','1','2'), + WINED3DFMT_DF16 = WINEMAKEFOURCC('D','F','1','6'), + WINED3DFMT_DF24 = WINEMAKEFOURCC('D','F','2','4'),
WINED3DFMT_FORCE_DWORD = 0xffffffff };
- Implemented for texld/texldp/texldd/texldb/texldl - In all cases tested on Windows10 + Intel Fetch4 enabled always produced same result (like texld)
Signed-off-by: Daniel Ansorregui mailszeros@gmail.com --- dlls/d3d9/tests/visual.c | 313 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 313 insertions(+)
diff --git a/dlls/d3d9/tests/visual.c b/dlls/d3d9/tests/visual.c index 4f26b0d23f..a2c5f6285c 100644 --- a/dlls/d3d9/tests/visual.c +++ b/dlls/d3d9/tests/visual.c @@ -15104,6 +15104,318 @@ done: DestroyWindow(window); }
+static void fetch4_test(void) +{ + static const DWORD vs_code[] = + { + 0xfffe0300, /* vs_3_0 */ + 0x0200001f, 0x80000000, 0x900f0000, /* dcl_position v0 */ + 0x0200001f, 0x80000005, 0x900f0001, /* dcl_texcoord v1 */ + 0x0200001f, 0x80000000, 0xe00f0000, /* dcl_position o0 */ + 0x0200001f, 0x80000005, 0xe00f0001, /* dcl_texcoord o1 */ + 0x02000001, 0xe00f0000, 0x90e40000, /* mov o0, v0 */ + 0x02000001, 0xe00f0001, 0x90e40001, /* mov o1, v1 */ + 0x0000ffff + }; + static const DWORD ps_code_texld[] = + { + 0xffff0300, /* ps_3_0 */ + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ + 0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */ + 0x03000042, 0x800f0000, 0x90e40000, 0xa0e40800, /* texld r0, v0, s0 */ + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ + 0x0000ffff /* end */ + }; + static const DWORD ps_code_texldp[] = + { + 0xffff0300, /* ps_3_0 */ + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ + 0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */ + 0x05000051, 0xa00f0000, 0x00000000, 0x00000000, 0x3f000000, 0x3f000000, /* def c0, 0.0, 0.0, 0.5, 0.5 */ + 0x03000002, 0x800f0000, 0x90e40000, 0xa0e40000, /* add r0, v0, c0 */ + 0x03010042, 0x800f0000, 0x80e40000, 0xa0e40800, /* texldp r0, r0, s0 */ + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ + 0x0000ffff, /* end */ + }; + static const DWORD ps_code_texldd[] = + { + 0xffff0300, /* ps_3_0 */ + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ + 0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */ + 0x05000051, 0xa00f0000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, /* def c0, 0.5, 0.5, 0.5, 0.5 */ + 0x05000051, 0xa00f0001, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, /* def c0, 1.0, 1.0, 1.0, 1.0 */ + 0x02000001, 0x800f0002, 0xa0e40000, /* mov r2, c0 */ + 0x0500005d, 0x800f0000, 0x90e40000, 0xa0e40800, 0xa0e40000, 0x80e40002, /* texldd r0, v0, s0, c0, r2 */ + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ + 0x0000ffff, /* end */ + }; + static const DWORD ps_code_texldb[] = + { + 0xffff0300, /* ps_3_0 */ + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ + 0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */ + 0x05000051, 0xa00f0000, 0x00000000, 0x00000000, 0x40a00000, 0x40a00000, /* def c0, 0.0, 0.0, 5.0, 5.0 */ + 0x03000002, 0x800f0000, 0x90e40000, 0xa0e40000, /* add r0, v0, c0 */ + 0x03020042, 0x800f0000, 0x80e40000, 0xa0e40800, /* texldb r0, r0, s0 */ + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ + 0x0000ffff, /* end */ + }; + static const DWORD ps_code_texldl[] = + { + 0xffff0300, /* ps_3_0 */ + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ + 0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */ + 0x05000051, 0xa00f0000, 0x00000000, 0x00000000, 0x3f000000, 0x3f000000, /* def c0, 0.0, 0.0, 0.5, 0.5 */ + 0x03000002, 0x800f0000, 0x90e40000, 0xa0e40000, /* add r0, v0, c0 */ + 0x0300005f, 0x800f0000, 0x80e40000, 0xa0e40800, /* texldl r0, r0, s0 */ + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ + 0x0000ffff, /* end */ + }; + + struct + { + float x, y, z; + float tu, tv; + } + quad[] = + { + {-1.0f, 1.0f, 0.0f, 0.0f,0.0f }, + { 1.0f, 1.0f, 0.0f, 1.0f,0.0f }, + {-1.0f,-1.0f, 0.0f, 0.0f,1.0f }, + { 1.0f,-1.0f, 0.0f, 1.0f,1.0f } + }; + + struct struct_expected_color + { + UINT x, y; + D3DCOLOR color; + }; + struct struct_expected_color expected_colors[] = + { + { 40, 30, 0x23102013},{160, 30, 0x22132312},{320, 30, 0x21122211}, + {480, 30, 0x20112110},{600, 30, 0x23102013}, + { 40,120, 0x13011002},{160,120, 0x120213f2},{320,120, 0x11f212f1}, + {480,120, 0x10f11101},{600,120, 0x13011002}, + { 40,240, 0x02030104},{160,240, 0xf20402f4},{320,240, 0xf1f4f2f3}, + {480,240, 0x01f3f103},{600,240, 0x02030104}, + { 40,360, 0x04200323},{160,360, 0xf4230422},{320,360, 0xf322f421}, + {480,360, 0x0321f320},{600,360, 0x04200323}, + { 40,450, 0x23102013},{160,450, 0x22132312},{320,450, 0x21122211}, + {480,450, 0x20112110},{600,450, 0x23102013}, + }; + struct struct_expected_color expected_color = {40, 30, 0x0}; + + static const DWORD texture_data[4] = {0x10111213, + 0x01f1f202, + 0x03f3f404, + 0x20212223}; + static const DWORD texture_data2 = 0xff804000; + + IDirect3DPixelShader9 *ps_texld, *ps_texldp, *ps_texldd, *ps_texldb, *ps_texldl; + IDirect3DTexture9 *texture_L8, *texture_A8, *texture_A8R8G8B8; + IDirect3DSurface9 *original_rt; + struct surface_readback rb; + IDirect3DVertexShader9 *vs; + IDirect3DDevice9 *device; + D3DLOCKED_RECT lr; + IDirect3D9 *d3d; + ULONG refcount; + D3DCAPS9 caps; + HWND window; + HRESULT hr; + UINT i, j; + + + window = create_window(); + d3d = Direct3DCreate9(D3D_SDK_VERSION); + ok(!!d3d, "Failed to create a D3D object.\n"); + if (FAILED(IDirect3D9_CheckDeviceFormat(d3d, D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, + D3DFMT_X8R8G8B8, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_TEXTURE, MAKEFOURCC('D','F','2','4')))) + { + skip("No DF24 support, skipping FETCH4 test.\n"); + goto done; + } + if (!(device = create_device(d3d, window, window, TRUE))) + { + skip("Failed to create a D3D device, skipping tests.\n"); + goto done; + } + + hr = IDirect3DDevice9_GetDeviceCaps(device, &caps); + ok(SUCCEEDED(hr), "GetDeviceCaps failed, hr %#x.\n", hr); + if (caps.PixelShaderVersion < D3DPS_VERSION(3, 0)) + { + skip("No pixel shader 3.0 support, skipping FETCH4 test.\n"); + IDirect3DDevice9_Release(device); + goto done; + } + hr = IDirect3DDevice9_GetRenderTarget(device, 0, &original_rt); + ok(SUCCEEDED(hr), "GetRenderTarget failed, hr %#x.\n", hr); + + /* Create our texture for FETCH4 */ + hr = IDirect3DDevice9_CreateTexture(device, 4, 4, 1, 0, D3DFMT_L8, D3DPOOL_MANAGED, &texture_L8, NULL); + ok(hr == D3D_OK, "Failed to create texture, hr %#x.\n", hr); + memset(&lr, 0, sizeof(lr)); + hr = IDirect3DTexture9_LockRect(texture_L8, 0, &lr, NULL, 0); + ok(hr == D3D_OK, "Failed to lock texture, hr %#x.\n", hr); + memcpy(lr.pBits, texture_data, sizeof(texture_data)); + hr = IDirect3DTexture9_UnlockRect(texture_L8, 0); + ok(hr == D3D_OK, "Failed to unlock texture, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetTexture(device, 0, (IDirect3DBaseTexture9 *)texture_L8); + ok(hr == D3D_OK, "Failed to set texture, hr %#x.\n", hr); + + /* Create Other textures to test FETCH4 does not work there */ + hr = IDirect3DDevice9_CreateTexture(device, 2, 2, 1, 0, D3DFMT_A8, D3DPOOL_MANAGED, &texture_A8, NULL); + ok(hr == D3D_OK, "Failed to create texture, hr %#x.\n", hr); + memset(&lr, 0, sizeof(lr)); + hr = IDirect3DTexture9_LockRect(texture_A8, 0, &lr, NULL, 0); + ok(hr == D3D_OK, "Failed to lock texture, hr %#x.\n", hr); + memcpy(lr.pBits, &texture_data2, 4); + hr = IDirect3DTexture9_UnlockRect(texture_A8, 0); + ok(hr == D3D_OK, "Failed to unlock texture, hr %#x.\n", hr); + + hr = IDirect3DDevice9_CreateTexture(device, 2, 2, 1, 0, D3DFMT_A8R8G8B8, D3DPOOL_MANAGED, &texture_A8R8G8B8, NULL); + ok(hr == D3D_OK, "Failed to create texture, hr %#x.\n", hr); + memset(&lr, 0, sizeof(lr)); + hr = IDirect3DTexture9_LockRect(texture_A8R8G8B8, 0, &lr, NULL, 0); + ok(hr == D3D_OK, "Failed to lock texture, hr %#x.\n", hr); + memcpy(lr.pBits, &texture_data2, 4); + hr = IDirect3DTexture9_UnlockRect(texture_A8R8G8B8, 0); + ok(hr == D3D_OK, "Failed to unlock texture, hr %#x.\n", hr); + + /* Create vertex shader */ + hr = IDirect3DDevice9_CreateVertexShader(device, vs_code, &vs); + ok(hr == D3D_OK, "IDirect3DDevice9_CreateVertexShader returned %08x\n", hr); + hr = IDirect3DDevice9_SetVertexShader(device, vs); + ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n", hr); + + /* Prepare the pixel shaders */ + hr = IDirect3DDevice9_CreatePixelShader(device, ps_code_texld, &ps_texld); + ok(SUCCEEDED(hr), "CreatePixelShader failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_CreatePixelShader(device, ps_code_texldp, &ps_texldp); + ok(SUCCEEDED(hr), "CreatePixelShader failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_CreatePixelShader(device, ps_code_texldd, &ps_texldd); + ok(SUCCEEDED(hr), "CreatePixelShader failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_CreatePixelShader(device, ps_code_texldb, &ps_texldb); + ok(SUCCEEDED(hr), "CreatePixelShader failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_CreatePixelShader(device, ps_code_texldl, &ps_texldl); + ok(SUCCEEDED(hr), "CreatePixelShader failed, hr %#x.\n", hr); + + hr = IDirect3DDevice9_SetRenderState(device, D3DRS_LIGHTING, FALSE); + ok(SUCCEEDED(hr), "SetRenderState failed, hr %#x.\n", hr); + + /* According to the spec, FETCH4 is enabled when D3DSAMP_MIPMAPLODBIAS = GET4 + and also D3DSAMP_MAGFILTER = D3DTEXF_POINT. But apparently only GET4 is needed + So the tests exercices that only GET4 is required, and any other parameter will work */ + hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MIPMAPLODBIAS, MAKEFOURCC('G','E','T','4')); + ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR); + ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MINFILTER, D3DTEXF_POINT); + ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MIPFILTER, D3DTEXF_NONE); + ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr); + + /* Render with fetch4 and test if we obtain proper results */ + for (i=0; i<5; i++) + { + if (i==0) + hr = IDirect3DDevice9_SetPixelShader(device, ps_texld); + else if(i==1) + hr = IDirect3DDevice9_SetPixelShader(device, ps_texldp); + else if(i==2) + hr = IDirect3DDevice9_SetPixelShader(device, ps_texldd); + else if(i==3) + hr = IDirect3DDevice9_SetPixelShader(device, ps_texldb); + else + hr = IDirect3DDevice9_SetPixelShader(device, ps_texldl); + ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr); + + hr = IDirect3DDevice9_Clear(device, 0, NULL, D3DCLEAR_TARGET | D3DCLEAR_ZBUFFER, 0, 0.0f, 0); + ok(SUCCEEDED(hr), "Clear failed, hr %#x.\n", hr); + + hr = IDirect3DDevice9_SetFVF(device, D3DFVF_XYZ | D3DFVF_TEX1); + ok(SUCCEEDED(hr), "Failed to set FVF, hr %#x.\n", hr); + + hr = IDirect3DDevice9_BeginScene(device); + ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad)); + ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_EndScene(device); + ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr); + + get_rt_readback(original_rt, &rb); + for (j = 0; j < ARRAY_SIZE(expected_colors); ++j) + { + D3DCOLOR color = get_readback_color(&rb, expected_colors[j].x, expected_colors[j].y); + ok(color_match(color, expected_colors[j].color, 1), + "Expected color 0x%08x at (%u, %u), got 0x%08x.\n", + expected_colors[j].color, expected_colors[j].x, expected_colors[j].y, color); + } + release_surface_readback(&rb); + + hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL, NULL); + ok(SUCCEEDED(hr), "Present failed, hr %#x.\n", hr); + } + + /* Test if FETCH4 is disabled when incompatible textures are used */ + hr = IDirect3DDevice9_SetPixelShader(device, ps_texld); + ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr); + for (i=0; i<2; i++) + { + if (i==0) + { + hr = IDirect3DDevice9_SetTexture(device, 0, (IDirect3DBaseTexture9 *)texture_A8); + expected_color.color = 0x0f000000; + } + else if(i==1) + { + hr = IDirect3DDevice9_SetTexture(device, 0, (IDirect3DBaseTexture9 *)texture_A8R8G8B8); + expected_color.color = 0x64321900; + } + ok(hr == D3D_OK, "Failed to set texture, hr %#x.\n", hr); + + hr = IDirect3DDevice9_Clear(device, 0, NULL, D3DCLEAR_TARGET | D3DCLEAR_ZBUFFER, 0, 0.0f, 0); + ok(SUCCEEDED(hr), "Clear failed, hr %#x.\n", hr); + + hr = IDirect3DDevice9_SetFVF(device, D3DFVF_XYZ | D3DFVF_TEX1); + ok(SUCCEEDED(hr), "Failed to set FVF, hr %#x.\n", hr); + + hr = IDirect3DDevice9_BeginScene(device); + ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad)); + ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_EndScene(device); + ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr); + + get_rt_readback(original_rt, &rb); + D3DCOLOR color = get_readback_color(&rb, expected_color.x, expected_color.y); + ok(color_match(color, expected_color.color, 1), + "Expected color 0x%08x at (%u, %u), got 0x%08x.\n", + expected_color.color, expected_color.x, expected_color.y, color); + release_surface_readback(&rb); + + hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL, NULL); + ok(SUCCEEDED(hr), "Present failed, hr %#x.\n", hr); + } + + IDirect3DTexture9_Release(texture_L8); + IDirect3DTexture9_Release(texture_A8); + IDirect3DTexture9_Release(texture_A8R8G8B8); + IDirect3DPixelShader9_Release(ps_texld); + IDirect3DPixelShader9_Release(ps_texldp); + IDirect3DPixelShader9_Release(ps_texldb); + IDirect3DPixelShader9_Release(ps_texldd); + IDirect3DPixelShader9_Release(ps_texldl); + IDirect3DVertexShader9_Release(vs); + IDirect3DSurface9_Release(original_rt); + refcount = IDirect3DDevice9_Release(device); + ok(!refcount, "Device has %u references left.\n", refcount); +done: + IDirect3D9_Release(d3d); + DestroyWindow(window); +} + static void shadow_test(void) { static const DWORD ps_code[] = @@ -24291,6 +24603,7 @@ START_TEST(visual) depth_buffer2_test(); depth_blit_test(); intz_test(); + fetch4_test(); shadow_test(); fp_special_test(); depth_bounds_test();
Hi,
While running your changed tests on Windows, I think I found new failures. Being a bot and all I'm not very good at pattern recognition, so I might be wrong, but could you please double-check?
Full results can be found at: https://testbot.winehq.org/JobDetails.pl?Key=44933
Your paranoid android.
=== w8adm (32 bit report) ===
d3d9: visual.c:8620: Test failed: Got unexpected color 0x00007580 for quad 2 (different colors). visual.c:8620: Test failed: Got unexpected color 0x00ff0000 for quad 2 (different colors).
On Sat, 24 Nov 2018 at 23:51, Daniel Ansorregui mailszeros@gmail.com wrote:
- Implemented for texld/texldp/texldd/texldb/texldl
- In all cases tested on Windows10 + Intel Fetch4 enabled always produced same result (like texld)
Signed-off-by: Daniel Ansorregui mailszeros@gmail.com
dlls/d3d9/tests/visual.c | 313 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 313 insertions(+)
Since the CheckDeviceFormat() check will prevent this from doing anything on implementations that don't support DF24, you may as well send this test as the first patch in the series.
- struct
- {
float x, y, z;
float tu, tv;
- }
Not a big deal, but we tend to do these as "struct vec3 position" and "struct vec2 texcoord".
- struct struct_expected_color
- {
UINT x, y;
D3DCOLOR color;
- };
Why the "struct_" prefix on the structure name? In fact, you don't really need a structure name at all.
- static const DWORD texture_data[4] = {0x10111213,
Again minor, but no need for the "4".
- IDirect3DTexture9 *texture_L8, *texture_A8, *texture_A8R8G8B8;
You'll probably want an array of texture formats you want to test, and loop over it.
- /* Create our texture for FETCH4 */
- hr = IDirect3DDevice9_CreateTexture(device, 4, 4, 1, 0, D3DFMT_L8, D3DPOOL_MANAGED, &texture_L8, NULL);
- ok(hr == D3D_OK, "Failed to create texture, hr %#x.\n", hr);
- memset(&lr, 0, sizeof(lr));
The memset() is redundant.
- hr = IDirect3DTexture9_LockRect(texture_L8, 0, &lr, NULL, 0);
- ok(hr == D3D_OK, "Failed to lock texture, hr %#x.\n", hr);
- memcpy(lr.pBits, texture_data, sizeof(texture_data));
- hr = IDirect3DTexture9_UnlockRect(texture_L8, 0);
- ok(hr == D3D_OK, "Failed to unlock texture, hr %#x.\n", hr);
This probably works in practice, but you should be using the pitch.
- /* Render with fetch4 and test if we obtain proper results */
- for (i=0; i<5; i++)
Formatting.
- {
if (i==0)
hr = IDirect3DDevice9_SetPixelShader(device, ps_texld);
else if(i==1)
hr = IDirect3DDevice9_SetPixelShader(device, ps_texldp);
else if(i==2)
hr = IDirect3DDevice9_SetPixelShader(device, ps_texldd);
else if(i==3)
hr = IDirect3DDevice9_SetPixelShader(device, ps_texldb);
else
hr = IDirect3DDevice9_SetPixelShader(device, ps_texldl);
ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr);
If you create an array like this:
static const DWORD *ps_code[] = { ps_code_texld, ps_code_texldp, ... };
You can loop over it, and avoid the fairly awkward construction above.
Signed-off-by: Henri Verbeet hverbeet@codeweavers.com