- Implement shader generation of tex-ld/ldp/ldd/ldb/ldl - FIXME: Vertex texldl unimplemented yet, since it is not possible to access ps_compile_args. Maybe move it to another place. Probably does not work on windows anyway - Trigger PS re-generation at FETCH4 state change, by storing a context flag - Add ps_compile_arg flag for fetch4
Signed-off-by: Daniel Ansorregui mailszeros@gmail.com --- dlls/wined3d/cs.c | 5 ++++- dlls/wined3d/device.c | 4 ++++ dlls/wined3d/glsl_shader.c | 41 +++++++++++++++++++++++++++++++--- dlls/wined3d/shader.c | 10 +++++++++ dlls/wined3d/state.c | 12 ++++++++++ dlls/wined3d/wined3d_private.h | 15 +++++++++++-- 6 files changed, 81 insertions(+), 6 deletions(-)
diff --git a/dlls/wined3d/cs.c b/dlls/wined3d/cs.c index 7471d24e7e..c97083ab8e 100644 --- a/dlls/wined3d/cs.c +++ b/dlls/wined3d/cs.c @@ -1372,7 +1372,10 @@ static void wined3d_cs_exec_set_texture(struct wined3d_cs *cs, const void *data) if (!prev || wined3d_texture_gl(op->texture)->target != wined3d_texture_gl(prev)->target || (!is_same_fixup(new_format->color_fixup, old_format->color_fixup) && !(can_use_texture_swizzle(gl_info, new_format) && can_use_texture_swizzle(gl_info, old_format))) - || (new_fmt_flags & WINED3DFMT_FLAG_SHADOW) != (old_fmt_flags & WINED3DFMT_FLAG_SHADOW)) + || (new_fmt_flags & WINED3DFMT_FLAG_SHADOW) != (old_fmt_flags & WINED3DFMT_FLAG_SHADOW) + || ((new_fmt_flags & WINED3DFMT_FLAG_ALLOW_FETCH4) != (old_fmt_flags & WINED3DFMT_FLAG_ALLOW_FETCH4) + && cs->state.sampler_states[op->texture->sampler][WINED3D_SAMP_MIPMAP_LOD_BIAS] + == MAKEFOURCC('G','E','T','4'))) device_invalidate_state(cs->device, STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL));
if (!prev && op->stage < d3d_info->limits.ffp_blend_stages) diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c index dee99dcde2..99248ffb1b 100644 --- a/dlls/wined3d/device.c +++ b/dlls/wined3d/device.c @@ -2082,7 +2082,11 @@ void CDECL wined3d_device_set_sampler_state(struct wined3d_device *device, device, sampler_idx, debug_d3dsamplerstate(state), value);
if (sampler_idx >= WINED3DVERTEXTEXTURESAMPLER0 && sampler_idx <= WINED3DVERTEXTEXTURESAMPLER3) + { sampler_idx -= (WINED3DVERTEXTEXTURESAMPLER0 - MAX_FRAGMENT_SAMPLERS); + if (state == WINED3D_SAMP_MIPMAP_LOD_BIAS && value == MAKEFOURCC('G','E','T','4')) + FIXME("Unsupported FETCH4 and Vertex Texture Sampler"); + }
if (sampler_idx >= ARRAY_SIZE(device->state.sampler_states)) { diff --git a/dlls/wined3d/glsl_shader.c b/dlls/wined3d/glsl_shader.c index 1950db06a9..074f9f0ab7 100644 --- a/dlls/wined3d/glsl_shader.c +++ b/dlls/wined3d/glsl_shader.c @@ -104,6 +104,8 @@ struct glsl_sample_function enum wined3d_data_type data_type; BOOL output_single_component; unsigned int offset_size; + BOOL fetch4_enabled; + BOOL fetch4_projected; };
enum heap_node_op @@ -3613,6 +3615,7 @@ static void shader_glsl_get_sample_function(const struct wined3d_shader_context BOOL lod = flags & WINED3D_GLSL_SAMPLE_LOD; BOOL grad = flags & WINED3D_GLSL_SAMPLE_GRAD; BOOL offset = flags & WINED3D_GLSL_SAMPLE_OFFSET; + BOOL gather = !shadow && priv->cur_ps_args && priv->cur_ps_args->fetch4 & (1u << sampler_idx); const char *base = "texture", *type_part = "", *suffix = ""; unsigned int coord_size, deriv_size;
@@ -3658,6 +3661,16 @@ static void shader_glsl_get_sample_function(const struct wined3d_shader_context type_part = ""; }
+ sample_function->fetch4_projected = projected; + sample_function->fetch4_enabled = gather; + if (gather) + { + base = "texture"; + type_part = "Gather"; + suffix = ""; + projected = lod = grad = offset = FALSE; + } + sample_function->name = string_buffer_get(priv->string_buffers); string_buffer_sprintf(sample_function->name, "%s%s%s%s%s%s", base, type_part, projected ? "Proj" : "", lod ? "Lod" : grad ? "Grad" : "", offset ? "Offset" : "", suffix); @@ -3792,13 +3805,18 @@ static void PRINTF_ATTR(9, 10) shader_glsl_gen_sample_code(const struct wined3d_ const char *coord_reg_fmt, ...) { const struct wined3d_shader_version *version = &ins->ctx->reg_maps->shader_version; + const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data; + DWORD merged_swizzle = swizzle; char dst_swizzle[6]; struct color_fixup_desc fixup; BOOL np2_fixup = FALSE; va_list args; int ret;
- shader_glsl_swizzle_to_str(swizzle, FALSE, ins->dst[0].write_mask, dst_swizzle); + /* Merge swizzle requested with the fetch4 swizzle */ + if (sample_function->fetch4_enabled) + merged_swizzle = wined3d_merge_swizzle(WINED3DSP_FETCH4_SWIZZLE, swizzle); + shader_glsl_swizzle_to_str(merged_swizzle, FALSE, ins->dst[0].write_mask, dst_swizzle);
/* If ARB_texture_swizzle is supported we don't need to do anything here. * We actually rely on it for vertex shaders and SM4+. */ @@ -3836,7 +3854,6 @@ static void PRINTF_ATTR(9, 10) shader_glsl_gen_sample_code(const struct wined3d_
if (np2_fixup) { - const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data; const unsigned char idx = priv->cur_np2fixup_info->idx[sampler_bind_idx];
switch (shader_glsl_get_write_mask_size(sample_function->coord_mask)) @@ -3859,7 +3876,21 @@ static void PRINTF_ATTR(9, 10) shader_glsl_gen_sample_code(const struct wined3d_ break; } } - if (dx && dy) + if (sample_function->fetch4_enabled) + { + if (sample_function->fetch4_projected) + { + struct wined3d_string_buffer *reg_name = string_buffer_get(priv->string_buffers); + shader_glsl_get_register_name(&ins->src[0].reg, ins->src[0].reg.data_type, reg_name, NULL, ins->ctx); + shader_addline(ins->ctx->buffer, " / %s.w", reg_name->buffer); + string_buffer_release(priv->string_buffers, reg_name); + } + + /* Correct the fetch4 0.5 texel offset */ + shader_addline(ins->ctx->buffer, " + (vec2(0.5) / textureSize(%s_sampler%u, 0).xy)", + shader_glsl_get_prefix(version->type), sampler_bind_idx); + } + else if (dx && dy) shader_addline(ins->ctx->buffer, ", %s, %s", dx, dy); else if (bias) shader_addline(ins->ctx->buffer, ", %s", bias); @@ -5397,6 +5428,10 @@ static void shader_glsl_tex(const struct wined3d_shader_instruction *ins) } }
+ /* When fetch4 is active, projection is done manually */ + if (priv->cur_ps_args->fetch4 & (1u << resource_idx)) + mask = 0; + shader_glsl_get_sample_function(ins->ctx, resource_idx, resource_idx, sample_flags, &sample_function); mask |= sample_function.coord_mask; sample_function.coord_mask = mask; diff --git a/dlls/wined3d/shader.c b/dlls/wined3d/shader.c index e11a37cf07..ede51cfa7d 100644 --- a/dlls/wined3d/shader.c +++ b/dlls/wined3d/shader.c @@ -4027,6 +4027,16 @@ void find_ps_compile_args(const struct wined3d_state *state, const struct wined3 } }
+ for (i = 0; i < MAX_FRAGMENT_SAMPLERS; ++i) + { + if (state->sampler_states[i][WINED3D_SAMP_MIPMAP_LOD_BIAS] == MAKEFOURCC('G','E','T','4') + && state->textures[i]->resource.format_flags & WINED3DFMT_FLAG_ALLOW_FETCH4 + && shader->reg_maps.resource_info[i].type == WINED3D_SHADER_RESOURCE_TEXTURE_2D) + args->fetch4 |= 1 << i; + else + args->fetch4 &= ~(1 << i); + } + if (context->d3d_info->limits.varying_count < wined3d_max_compat_varyings(context->gl_info)) { const struct wined3d_shader *vs = state->shader[WINED3D_SHADER_TYPE_VERTEX]; diff --git a/dlls/wined3d/state.c b/dlls/wined3d/state.c index 8708aa09b3..03d623725e 100644 --- a/dlls/wined3d/state.c +++ b/dlls/wined3d/state.c @@ -3616,6 +3616,8 @@ static void sampler(struct wined3d_context *context, const struct wined3d_state { struct wined3d_texture_gl *texture_gl = wined3d_texture_gl(state->textures[sampler_idx]); BOOL srgb = state->sampler_states[sampler_idx][WINED3D_SAMP_SRGB_TEXTURE]; + BOOL fetch4 = state->sampler_states[sampler_idx][WINED3D_SAMP_MIPMAP_LOD_BIAS] + == MAKEFOURCC('G','E','T','4'); const DWORD *sampler_states = state->sampler_states[sampler_idx]; struct wined3d_device *device = context->device; struct wined3d_sampler_desc desc; @@ -3650,6 +3652,16 @@ static void sampler(struct wined3d_context *context, const struct wined3d_state /* Trigger shader constant reloading (for NP2 texcoord fixup) */ if (!(texture_gl->t.flags & WINED3D_TEXTURE_POW2_MAT_IDENT)) context->constant_update_mask |= WINED3D_SHADER_CONST_PS_NP2_FIXUP; + + /* Trigger pixel shader recompilation for FETCH4 changes */ + if (((context->last_was_fetch4 >> sampler_idx) & 0x1) ^ fetch4) + { + if (fetch4) + context->last_was_fetch4 |= 1 << sampler_idx; + else + context->last_was_fetch4 &= ~(1 << sampler_idx); + context->shader_update_mask = (1u << WINED3D_SHADER_TYPE_PIXEL); + } } else { diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index 4224461142..24696ed477 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -521,7 +521,16 @@ enum wined3d_immconst_type WINED3D_IMMCONST_VEC4, };
-#define WINED3DSP_NOSWIZZLE (0u | (1u << 2) | (2u << 4) | (3u << 6)) +#define WINED3DSP_NOSWIZZLE (0u | (1u << 2) | (2u << 4) | (3u << 6)) +#define WINED3DSP_FETCH4_SWIZZLE (2u | (0u << 2) | (1u << 4) | (3u << 6)) /* zxyw */ + +static inline DWORD wined3d_merge_swizzle(const DWORD a, const DWORD b) +{ + return (((a >> ((b >> 0) & 0x3) * 2) & 0x3) << 0) + + (((a >> ((b >> 2) & 0x3) * 2) & 0x3) << 2) + + (((a >> ((b >> 4) & 0x3) * 2) & 0x3) << 4) + + (((a >> ((b >> 6) & 0x3) * 2) & 0x3) << 6); +}
enum wined3d_shader_src_modifier { @@ -1358,7 +1367,8 @@ struct ps_compile_args DWORD alpha_test_func : 3; DWORD render_offscreen : 1; DWORD rt_alpha_swizzle : 8; /* MAX_RENDER_TARGET_VIEWS, 8 */ - DWORD padding : 18; + WORD fetch4 : 16; /* MAX_FRAGMENT_SAMPLERS, 16 */ + DWORD padding : 2; };
enum fog_src_type @@ -1893,6 +1903,7 @@ struct wined3d_context DWORD last_was_ffp_blit : 1; DWORD last_was_blit : 1; DWORD last_was_ckey : 1; + DWORD last_was_fetch4 : 16; /* MAX_FRAGMENT_SAMPLERS */ DWORD fog_coord : 1; DWORD fog_enabled : 1; DWORD current : 1;