- Implement shader generation of tex-ld/ldp/ldd/ldb/ldl - FIXME: Vertex texldl unimplemented yet, since it is not possible to access ps_compile_args. Maybe move it to another place. Probably does not work on windows anyway - FIXME: 3D textures and fetch4 - Trigger PS re-generation at FETCH4 state change, by storing a context flag - Add ps_compile_arg flag for fetch4
Signed-off-by: Daniel Ansorregui mailszeros@gmail.com --- dlls/wined3d/cs.c | 5 +- dlls/wined3d/device.c | 4 + dlls/wined3d/glsl_shader.c | 160 ++++++++++++++++++++++++++------- dlls/wined3d/shader.c | 12 +++ dlls/wined3d/state.c | 13 +++ dlls/wined3d/wined3d_private.h | 7 +- 6 files changed, 164 insertions(+), 37 deletions(-)
diff --git a/dlls/wined3d/cs.c b/dlls/wined3d/cs.c index 5ea25e992a..355286f346 100644 --- a/dlls/wined3d/cs.c +++ b/dlls/wined3d/cs.c @@ -1384,7 +1384,10 @@ static void wined3d_cs_exec_set_texture(struct wined3d_cs *cs, const void *data) if (!prev || wined3d_texture_gl(op->texture)->target != wined3d_texture_gl(prev)->target || (!is_same_fixup(new_format->color_fixup, old_format->color_fixup) && !(can_use_texture_swizzle(gl_info, new_format) && can_use_texture_swizzle(gl_info, old_format))) - || (new_fmt_flags & WINED3DFMT_FLAG_SHADOW) != (old_fmt_flags & WINED3DFMT_FLAG_SHADOW)) + || (new_fmt_flags & WINED3DFMT_FLAG_SHADOW) != (old_fmt_flags & WINED3DFMT_FLAG_SHADOW) + || ((new_fmt_flags & WINED3DFMT_FLAG_ALLOW_FETCH4) != (old_fmt_flags & WINED3DFMT_FLAG_ALLOW_FETCH4) + && cs->state.sampler_states[op->texture->sampler][WINED3D_SAMP_MIPMAP_LOD_BIAS] + == MAKEFOURCC('G','E','T','4'))) device_invalidate_state(cs->device, STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL));
if (!prev && op->stage < d3d_info->limits.ffp_blend_stages) diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c index d03a982e74..876f34fc2c 100644 --- a/dlls/wined3d/device.c +++ b/dlls/wined3d/device.c @@ -2087,7 +2087,11 @@ void CDECL wined3d_device_set_sampler_state(struct wined3d_device *device, device, sampler_idx, debug_d3dsamplerstate(state), value);
if (sampler_idx >= WINED3DVERTEXTEXTURESAMPLER0 && sampler_idx <= WINED3DVERTEXTEXTURESAMPLER3) + { sampler_idx -= (WINED3DVERTEXTEXTURESAMPLER0 - MAX_FRAGMENT_SAMPLERS); + if (state == WINED3D_SAMP_MIPMAP_LOD_BIAS && value == MAKEFOURCC('G','E','T','4')) + FIXME("Unsupported FETCH4 and Vertex Texture Sampler"); + }
if (sampler_idx >= ARRAY_SIZE(device->state.sampler_states)) { diff --git a/dlls/wined3d/glsl_shader.c b/dlls/wined3d/glsl_shader.c index d9eb26762c..1bd94c8271 100644 --- a/dlls/wined3d/glsl_shader.c +++ b/dlls/wined3d/glsl_shader.c @@ -49,6 +49,7 @@ WINE_DECLARE_DEBUG_CHANNEL(winediag); #define WINED3D_GLSL_SAMPLE_GRAD 0x04 #define WINED3D_GLSL_SAMPLE_LOAD 0x08 #define WINED3D_GLSL_SAMPLE_OFFSET 0x10 +#define WINED3D_GLSL_SAMPLE_GATHER 0x20
static const struct { @@ -3613,6 +3614,7 @@ static void shader_glsl_get_sample_function(const struct wined3d_shader_context BOOL lod = flags & WINED3D_GLSL_SAMPLE_LOD; BOOL grad = flags & WINED3D_GLSL_SAMPLE_GRAD; BOOL offset = flags & WINED3D_GLSL_SAMPLE_OFFSET; + BOOL gather = !shadow && flags & WINED3D_GLSL_SAMPLE_GATHER; const char *base = "texture", *type_part = "", *suffix = ""; unsigned int coord_size, deriv_size;
@@ -3658,6 +3660,19 @@ static void shader_glsl_get_sample_function(const struct wined3d_shader_context type_part = ""; }
+ if (gather) + { + if (resource_type == WINED3D_SHADER_RESOURCE_TEXTURE_3D) + FIXME("Unsupported Fetch4 and texture3D sampling"); + else + { + base = "texture"; + type_part = "Gather"; + suffix = ""; + projected = lod = grad = offset = FALSE; + } + } + sample_function->name = string_buffer_get(priv->string_buffers); string_buffer_sprintf(sample_function->name, "%s%s%s%s%s%s", base, type_part, projected ? "Proj" : "", lod ? "Lod" : grad ? "Grad" : "", offset ? "Offset" : "", suffix); @@ -5397,11 +5412,25 @@ static void shader_glsl_tex(const struct wined3d_shader_instruction *ins) } }
+ /* Fetch4 overwrites the other texture flags */ + if (priv->cur_ps_args->fetch4 & (1u << resource_idx)){ + /* 3D + Fetch4 should return textureGather(sampler2DArray, t.xy0) + unfortunately, we cant convert 3D to 2DArray */ + if (ins->ctx->reg_maps->resource_info[resource_idx].type == WINED3D_SHADER_RESOURCE_TEXTURE_3D) + FIXME("Unsupported Fetch4 and texture3D sampling"); + else + { + sample_flags = WINED3D_GLSL_SAMPLE_GATHER; + mask = 0; + } + } + shader_glsl_get_sample_function(ins->ctx, resource_idx, resource_idx, sample_flags, &sample_function); mask |= sample_function.coord_mask; sample_function.coord_mask = mask;
if (shader_version < WINED3D_SHADER_VERSION(2,0)) swizzle = WINED3DSP_NOSWIZZLE; + else if (sample_flags == WINED3D_GLSL_SAMPLE_GATHER) swizzle = WINED3DSP_FETCH4_SWIZZLE; else swizzle = ins->src[1].swizzle;
/* 1.0-1.3: Use destination register as coordinate source. @@ -5417,7 +5446,7 @@ static void shader_glsl_tex(const struct wined3d_shader_instruction *ins) { struct glsl_src_param coord_param; shader_glsl_add_src_param(ins, &ins->src[0], mask, &coord_param); - if (ins->flags & WINED3DSI_TEXLD_BIAS) + if (ins->flags & WINED3DSI_TEXLD_BIAS && sample_flags != WINED3D_GLSL_SAMPLE_GATHER) { struct glsl_src_param bias; shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &bias); @@ -5433,10 +5462,11 @@ static void shader_glsl_tex(const struct wined3d_shader_instruction *ins)
static void shader_glsl_texldd(const struct wined3d_shader_instruction *ins) { + const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data; const struct wined3d_gl_info *gl_info = ins->ctx->gl_info; struct glsl_src_param coord_param, dx_param, dy_param; struct glsl_sample_function sample_function; - DWORD sampler_idx; + DWORD sampler_idx = ins->src[1].reg.idx[0].offset; DWORD swizzle = ins->src[1].swizzle;
if (!shader_glsl_has_core_grad(gl_info) && !gl_info->supported[ARB_SHADER_TEXTURE_LOD]) @@ -5446,7 +5476,24 @@ static void shader_glsl_texldd(const struct wined3d_shader_instruction *ins) return; }
- sampler_idx = ins->src[1].reg.idx[0].offset; + /* Fetch4 overwrites the other texture flags */ + if (priv->cur_ps_args->fetch4 & (1u << sampler_idx)){ + /* 3D + Fetch4 should return textureGather(sampler2DArray, t.xy0) + unfortunately, we cant convert 3D to 2DArray */ + if (ins->ctx->reg_maps->resource_info[sampler_idx].type != WINED3D_SHADER_RESOURCE_TEXTURE_3D) + { + + shader_glsl_get_sample_function(ins->ctx, sampler_idx, sampler_idx, WINED3D_GLSL_SAMPLE_GATHER, &sample_function); + swizzle = WINED3DSP_FETCH4_SWIZZLE; + shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param); + shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, NULL, NULL, NULL, NULL, + "%s", coord_param.param_str); + shader_glsl_release_sample_function(ins->ctx, &sample_function); + return; + } + else + FIXME("Unsupported Fetch4 and texture3D sampling"); + }
shader_glsl_get_sample_function(ins->ctx, sampler_idx, sampler_idx, WINED3D_GLSL_SAMPLE_GRAD, &sample_function); shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param); @@ -5461,19 +5508,28 @@ static void shader_glsl_texldd(const struct wined3d_shader_instruction *ins) static void shader_glsl_texldl(const struct wined3d_shader_instruction *ins) { const struct wined3d_shader_version *shader_version = &ins->ctx->reg_maps->shader_version; + const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data; const struct wined3d_gl_info *gl_info = ins->ctx->gl_info; struct glsl_src_param coord_param, lod_param; struct glsl_sample_function sample_function; + DWORD sampler_idx = ins->src[1].reg.idx[0].offset; DWORD swizzle = ins->src[1].swizzle; - DWORD sampler_idx; + DWORD flags = WINED3D_GLSL_SAMPLE_LOD;
- sampler_idx = ins->src[1].reg.idx[0].offset; + /* This call can be used in vertex shader, without cur_ps_args */ + if(priv->cur_ps_args && priv->cur_ps_args->fetch4 & (1u << sampler_idx)) + { + /* 3D + Fetch4 should return textureGather(sampler2DArray, t.xy0) + unfortunately, we cant convert 3D to 2DArray */ + if (ins->ctx->reg_maps->resource_info[sampler_idx].type == WINED3D_SHADER_RESOURCE_TEXTURE_3D) + FIXME("Unsupported Fetch4 and texture3D sampling"); + else + flags = WINED3D_GLSL_SAMPLE_GATHER; + }
- shader_glsl_get_sample_function(ins->ctx, sampler_idx, sampler_idx, WINED3D_GLSL_SAMPLE_LOD, &sample_function); + shader_glsl_get_sample_function(ins->ctx, sampler_idx, sampler_idx, flags, &sample_function); shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param);
- shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &lod_param); - if (shader_version->type == WINED3D_SHADER_TYPE_PIXEL && !shader_glsl_has_core_grad(gl_info) && !gl_info->supported[ARB_SHADER_TEXTURE_LOD]) { @@ -5482,6 +5538,18 @@ static void shader_glsl_texldl(const struct wined3d_shader_instruction *ins) * even without the appropriate extension. */ WARN("Using %s in fragment shader.\n", sample_function.name->buffer); } + + if (flags == WINED3D_GLSL_SAMPLE_GATHER){ + swizzle = WINED3DSP_FETCH4_SWIZZLE; + shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, NULL, NULL, NULL, NULL, + "%s", coord_param.param_str); + shader_glsl_release_sample_function(ins->ctx, &sample_function); + return; + } + + shader_glsl_get_sample_function(ins->ctx, sampler_idx, sampler_idx, WINED3D_GLSL_SAMPLE_LOD, &sample_function); + shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param); + shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &lod_param); shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, NULL, NULL, lod_param.param_str, NULL, "%s", coord_param.param_str); shader_glsl_release_sample_function(ins->ctx, &sample_function); @@ -6175,6 +6243,7 @@ static void shader_glsl_ld(const struct wined3d_shader_instruction *ins) struct glsl_sample_function sample_function; DWORD flags = WINED3D_GLSL_SAMPLE_LOAD; BOOL has_lod_param; + const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
if (wined3d_shader_instruction_has_texel_offset(ins)) flags |= WINED3D_GLSL_SAMPLE_OFFSET; @@ -6189,6 +6258,10 @@ static void shader_glsl_ld(const struct wined3d_shader_instruction *ins) } has_lod_param = is_mipmapped(reg_maps->resource_info[resource_idx].type);
+ if (priv->cur_ps_args && priv->cur_ps_args->fetch4 & (1u << resource_idx)){ + FIXME("Unsupported FETCH4 and LD Sampling SM 5.0"); + } + shader_glsl_get_sample_function(ins->ctx, resource_idx, sampler_idx, flags, &sample_function); shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param); shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &lod_param); @@ -6214,46 +6287,64 @@ static void shader_glsl_sample(const struct wined3d_shader_instruction *ins) struct glsl_src_param coord_param, lod_param, dx_param, dy_param; unsigned int resource_idx, sampler_idx, sampler_bind_idx; struct glsl_sample_function sample_function; + const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data; + DWORD swizzle = ins->src[1].swizzle; DWORD flags = 0;
+ resource_idx = ins->src[1].reg.idx[0].offset; + sampler_idx = ins->src[2].reg.idx[0].offset; + if (ins->handler_idx == WINED3DSIH_SAMPLE_GRAD) flags |= WINED3D_GLSL_SAMPLE_GRAD; if (ins->handler_idx == WINED3DSIH_SAMPLE_LOD) flags |= WINED3D_GLSL_SAMPLE_LOD; if (wined3d_shader_instruction_has_texel_offset(ins)) flags |= WINED3D_GLSL_SAMPLE_OFFSET; - - resource_idx = ins->src[1].reg.idx[0].offset; - sampler_idx = ins->src[2].reg.idx[0].offset; + if (priv->cur_ps_args->fetch4 & (1u << sampler_idx)) + { + /* 3D + Fetch4 should return textureGather(sampler2DArray, t.xy0) + unfortunately, we cant convert 3D to 2DArray */ + if (ins->ctx->reg_maps->resource_info[sampler_idx].type == WINED3D_SHADER_RESOURCE_TEXTURE_3D) + FIXME("Unsupported Fetch4 and texture3D sampling"); + else + { + flags = WINED3D_GLSL_SAMPLE_GATHER; + swizzle = WINED3DSP_FETCH4_SWIZZLE; + } + }
shader_glsl_get_sample_function(ins->ctx, resource_idx, sampler_idx, flags, &sample_function); shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param);
- switch (ins->handler_idx) + /* Fetch4 overwrites the other texture flags */ + if (flags != WINED3D_GLSL_SAMPLE_GATHER) { - case WINED3DSIH_SAMPLE: - break; - case WINED3DSIH_SAMPLE_B: - shader_glsl_add_src_param(ins, &ins->src[3], WINED3DSP_WRITEMASK_0, &lod_param); - lod_param_str = lod_param.param_str; - break; - case WINED3DSIH_SAMPLE_GRAD: - shader_glsl_add_src_param(ins, &ins->src[3], sample_function.deriv_mask, &dx_param); - shader_glsl_add_src_param(ins, &ins->src[4], sample_function.deriv_mask, &dy_param); - dx_param_str = dx_param.param_str; - dy_param_str = dy_param.param_str; - break; - case WINED3DSIH_SAMPLE_LOD: - shader_glsl_add_src_param(ins, &ins->src[3], WINED3DSP_WRITEMASK_0, &lod_param); - lod_param_str = lod_param.param_str; - break; - default: - ERR("Unhandled opcode %s.\n", debug_d3dshaderinstructionhandler(ins->handler_idx)); - break; + switch (ins->handler_idx) + { + case WINED3DSIH_SAMPLE: + break; + case WINED3DSIH_SAMPLE_B: + shader_glsl_add_src_param(ins, &ins->src[3], WINED3DSP_WRITEMASK_0, &lod_param); + lod_param_str = lod_param.param_str; + break; + case WINED3DSIH_SAMPLE_GRAD: + shader_glsl_add_src_param(ins, &ins->src[3], sample_function.deriv_mask, &dx_param); + shader_glsl_add_src_param(ins, &ins->src[4], sample_function.deriv_mask, &dy_param); + dx_param_str = dx_param.param_str; + dy_param_str = dy_param.param_str; + break; + case WINED3DSIH_SAMPLE_LOD: + shader_glsl_add_src_param(ins, &ins->src[3], WINED3DSP_WRITEMASK_0, &lod_param); + lod_param_str = lod_param.param_str; + break; + default: + ERR("Unhandled opcode %s.\n", debug_d3dshaderinstructionhandler(ins->handler_idx)); + break; + } }
sampler_bind_idx = shader_glsl_find_sampler(&ins->ctx->reg_maps->sampler_map, resource_idx, sampler_idx); - shader_glsl_gen_sample_code(ins, sampler_bind_idx, &sample_function, ins->src[1].swizzle, + shader_glsl_gen_sample_code(ins, sampler_bind_idx, &sample_function, swizzle, dx_param_str, dy_param_str, lod_param_str, &ins->texel_offset, "%s", coord_param.param_str); shader_glsl_release_sample_function(ins->ctx, &sample_function); } @@ -6299,6 +6390,9 @@ static void shader_glsl_sample_c(const struct wined3d_shader_instruction *ins) unsigned int coord_size; DWORD flags = 0;
+ resource_idx = ins->src[1].reg.idx[0].offset; + sampler_idx = ins->src[2].reg.idx[0].offset; + if (ins->handler_idx == WINED3DSIH_SAMPLE_C_LZ) { lod_param = "0"; @@ -6310,8 +6404,6 @@ static void shader_glsl_sample_c(const struct wined3d_shader_instruction *ins)
if (!(resource_info = shader_glsl_get_resource_info(ins, &ins->src[1].reg))) return; - resource_idx = ins->src[1].reg.idx[0].offset; - sampler_idx = ins->src[2].reg.idx[0].offset;
shader_glsl_get_sample_function(ins->ctx, resource_idx, sampler_idx, flags, &sample_function); coord_size = shader_glsl_get_write_mask_size(sample_function.coord_mask); diff --git a/dlls/wined3d/shader.c b/dlls/wined3d/shader.c index e11a37cf07..f07804de63 100644 --- a/dlls/wined3d/shader.c +++ b/dlls/wined3d/shader.c @@ -4027,6 +4027,18 @@ void find_ps_compile_args(const struct wined3d_state *state, const struct wined3 } }
+ if (gl_info->supported[ARB_TEXTURE_GATHER]) + { + for (i = 0; i < MAX_FRAGMENT_SAMPLERS; ++i) + { + if (state->sampler_states[i][WINED3D_SAMP_MIPMAP_LOD_BIAS] == MAKEFOURCC('G','E','T','4') + && state->textures[i]->resource.format_flags & WINED3DFMT_FLAG_ALLOW_FETCH4) + args->fetch4 |= 1 << i; + else + args->fetch4 &= ~(1 << i); + } + } + if (context->d3d_info->limits.varying_count < wined3d_max_compat_varyings(context->gl_info)) { const struct wined3d_shader *vs = state->shader[WINED3D_SHADER_TYPE_VERTEX]; diff --git a/dlls/wined3d/state.c b/dlls/wined3d/state.c index 8708aa09b3..dc69e935c5 100644 --- a/dlls/wined3d/state.c +++ b/dlls/wined3d/state.c @@ -3616,6 +3616,8 @@ static void sampler(struct wined3d_context *context, const struct wined3d_state { struct wined3d_texture_gl *texture_gl = wined3d_texture_gl(state->textures[sampler_idx]); BOOL srgb = state->sampler_states[sampler_idx][WINED3D_SAMP_SRGB_TEXTURE]; + BOOL fetch4 = state->sampler_states[sampler_idx][WINED3D_SAMP_MIPMAP_LOD_BIAS] + == MAKEFOURCC('G','E','T','4'); const DWORD *sampler_states = state->sampler_states[sampler_idx]; struct wined3d_device *device = context->device; struct wined3d_sampler_desc desc; @@ -3650,6 +3652,17 @@ static void sampler(struct wined3d_context *context, const struct wined3d_state /* Trigger shader constant reloading (for NP2 texcoord fixup) */ if (!(texture_gl->t.flags & WINED3D_TEXTURE_POW2_MAT_IDENT)) context->constant_update_mask |= WINED3D_SHADER_CONST_PS_NP2_FIXUP; + + /* Trigger pixel shader recompilation for FETCH4 changes */ + if(gl_info->supported[ARB_TEXTURE_GATHER] && + ((context->last_was_fetch4 >> sampler_idx) & 0x1) ^ fetch4) + { + if (fetch4) + context->last_was_fetch4 |= 1 << sampler_idx; + else + context->last_was_fetch4 &= ~(1 << sampler_idx); + context->shader_update_mask = (1u << WINED3D_SHADER_TYPE_PIXEL); + } } else { diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index 60edb9bac1..79cc638ae6 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -521,7 +521,8 @@ enum wined3d_immconst_type WINED3D_IMMCONST_VEC4, };
-#define WINED3DSP_NOSWIZZLE (0u | (1u << 2) | (2u << 4) | (3u << 6)) +#define WINED3DSP_NOSWIZZLE (0u | (1u << 2) | (2u << 4) | (3u << 6)) +#define WINED3DSP_FETCH4_SWIZZLE (0u | (3u << 2) | (1u << 4) | (2u << 6)) /* xwyz */
enum wined3d_shader_src_modifier { @@ -1358,7 +1359,8 @@ struct ps_compile_args DWORD alpha_test_func : 3; DWORD render_offscreen : 1; DWORD rt_alpha_swizzle : 8; /* MAX_RENDER_TARGET_VIEWS, 8 */ - DWORD padding : 18; + WORD fetch4 : 16; /* MAX_FRAGMENT_SAMPLERS, 16 */ + DWORD padding : 2; };
enum fog_src_type @@ -1893,6 +1895,7 @@ struct wined3d_context DWORD last_was_ffp_blit : 1; DWORD last_was_blit : 1; DWORD last_was_ckey : 1; + DWORD last_was_fetch4 : 16; /* MAX_FRAGMENT_SAMPLERS */ DWORD fog_coord : 1; DWORD fog_enabled : 1; DWORD current : 1;