- Add flag to indicate FETCH4 support in textures - Implementation follows AMD implementation and swizzle projection is allowed and 0.5 texel offset is added - Projection is done manually in fetch4. textureGather is not compatible with Proj
Signed-off-by: Daniel Ansorregui mailszeros@gmail.com --- dlls/wined3d/glsl_shader.c | 44 ++++++++++++++++++++++++++++------ dlls/wined3d/utils.c | 27 +++++++++++++++++++++ dlls/wined3d/wined3d_private.h | 4 +++- 3 files changed, 67 insertions(+), 8 deletions(-)
diff --git a/dlls/wined3d/glsl_shader.c b/dlls/wined3d/glsl_shader.c index 660005f57e..3ec54e3418 100644 --- a/dlls/wined3d/glsl_shader.c +++ b/dlls/wined3d/glsl_shader.c @@ -9720,6 +9720,8 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * shader_addline(buffer, "#extension GL_ARB_shading_language_420pack : enable\n"); if (gl_info->supported[ARB_TEXTURE_RECTANGLE]) shader_addline(buffer, "#extension GL_ARB_texture_rectangle : enable\n"); + if (gl_info->supported[ARB_TEXTURE_GATHER]) + shader_addline(buffer, "#extension GL_ARB_texture_gather : enable\n");
if (!needs_legacy_glsl_syntax(gl_info)) { @@ -9860,6 +9862,9 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * for (stage = 0; stage < WINED3D_MAX_TEXTURES && settings->op[stage].cop != WINED3D_TOP_DISABLE; ++stage) { const char *texture_function, *coord_mask; + struct wined3d_string_buffer offset; + BOOL fetch4 = settings->op[stage].fetch4; + BOOL fetch4_proj = FALSE; BOOL proj;
if (!(tex_map & (1u << stage))) @@ -9879,7 +9884,6 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * FIXME("Unexpected projection mode %d\n", settings->op[stage].projected); proj = TRUE; } - if (settings->op[stage].tex_type == WINED3D_GL_RES_TYPE_TEX_CUBE) proj = FALSE;
@@ -9910,11 +9914,24 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * texture_function = ""; coord_mask = "xyzw"; proj = FALSE; + fetch4 = FALSE; break; } if (!legacy_syntax) texture_function = "texture";
+ string_buffer_init(&offset); + if (fetch4) + { + texture_function = "textureGather"; + /* Apply a 0.5 texel offset as in AMD implementation */ + shader_addline(&offset, " + (vec2(0.5) / textureSize(ps_sampler%u, 0).xy)", stage); + + /* When projection is needed on fetch4 we have to apply it manually by dividing .w */ + fetch4_proj = proj; + proj = FALSE; + } + if (stage > 0 && (settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP || settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP_LUMINANCE)) @@ -9945,8 +9962,8 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * shader_addline(buffer, "ret = ffp_texcoord[%u] + ret.xyxy;\n", stage); }
- shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ret.%s%s);\n", - stage, texture_function, proj ? "Proj" : "", stage, coord_mask, proj ? "w" : ""); + shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ret.%s%s%s%s);\n", stage, texture_function, + proj ? "Proj" : "", stage, coord_mask, proj ? "w" : "", fetch4_proj ? " / ret.w" : "", offset.buffer);
if (settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP_LUMINANCE) shader_addline(buffer, "tex%u *= clamp(tex%u.z * bumpenv_lum_scale%u + bumpenv_lum_offset%u, 0.0, 1.0);\n", @@ -9954,14 +9971,27 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * } else if (settings->op[stage].projected == WINED3D_PROJECTION_COUNT3) { - shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ffp_texcoord[%u].xyz);\n", - stage, texture_function, proj ? "Proj" : "", stage, stage); + shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ffp_texcoord[%u].xy", stage, + texture_function, proj ? "Proj" : "", stage, stage); + if (fetch4_proj) + shader_addline(buffer, " / ffp_texcoord[%u].z", stage); + else + shader_addline(buffer, "z"); + shader_addline(buffer, "%s);\n", offset.buffer); } else { - shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ffp_texcoord[%u].%s%s);\n", - stage, texture_function, proj ? "Proj" : "", stage, stage, coord_mask, proj ? "w" : ""); + shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ffp_texcoord[%u].%s%s", stage, + texture_function, proj ? "Proj" : "", stage, stage, coord_mask, proj ? "w" : ""); + if (fetch4_proj) + shader_addline(buffer, " / ffp_texcoord[%u].w", stage); + shader_addline(buffer, "%s);\n", offset.buffer); } + string_buffer_clear(&offset); + + /* Match FETCH4 swizzle with textureGather swizzle */ + if (fetch4) + shader_addline(buffer, "tex%u = tex%u.zxyw;\n", stage, stage);
string_buffer_sprintf(tex_reg_name, "tex%u", stage); shader_glsl_color_correction_ext(buffer, tex_reg_name->buffer, WINED3DSP_WRITEMASK_ALL, diff --git a/dlls/wined3d/utils.c b/dlls/wined3d/utils.c index 062f8662aa..d34f338dc0 100644 --- a/dlls/wined3d/utils.c +++ b/dlls/wined3d/utils.c @@ -342,6 +342,19 @@ static const struct wined3d_format_base_flags format_base_flags[] = {WINED3DFMT_RESZ, WINED3DFMT_FLAG_EXTENSION}, };
+/* List of textures were fetch4 can be enabled. + * Only available if ARB_TEXTURE_GATHER extension is present */ +static const enum wined3d_format_id wined3d_format_fetch4_enabled[] = +{ + WINED3DFMT_L8_UNORM, + WINED3DFMT_L16_UNORM, + WINED3DFMT_R16_FLOAT, + WINED3DFMT_R16, + WINED3DFMT_R32_FLOAT, + WINED3DFMT_A8_UNORM, + WINED3DFMT_INTZ, +}; + static void rgb888_from_rgb565(WORD rgb565, BYTE *r, BYTE *g, BYTE *b) { BYTE c; @@ -2120,6 +2133,15 @@ static BOOL init_format_base_info(struct wined3d_adapter *adapter) format_set_flag(format, format_base_flags[i].flags); }
+ if (adapter->gl_info.supported[ARB_TEXTURE_GATHER]) + for (i = 0; i < ARRAY_SIZE(wined3d_format_fetch4_enabled); ++i) + { + if (!(format = get_format_internal(adapter, wined3d_format_fetch4_enabled[i]))) + return FALSE; + + format_set_flag(format, WINED3DFMT_FLAG_ALLOW_FETCH4); + } + return TRUE; }
@@ -5780,6 +5802,7 @@ void gen_ffp_frag_op(const struct wined3d_context *context, const struct wined3d settings->op[i].tmp_dst = 0; settings->op[i].tex_type = WINED3D_GL_RES_TYPE_TEX_1D; settings->op[i].projected = WINED3D_PROJECTION_NONE; + settings->op[i].fetch4 = FALSE; i++; break; } @@ -5923,6 +5946,10 @@ void gen_ffp_frag_op(const struct wined3d_context *context, const struct wined3d settings->op[i].aarg1 = aarg1; settings->op[i].aarg2 = aarg2; settings->op[i].tmp_dst = state->texture_states[i][WINED3D_TSS_RESULT_ARG] == WINED3DTA_TEMP; + settings->op[i].fetch4 = (state->textures[i] + && state->sampler_states[i][WINED3D_SAMP_MIPMAP_LOD_BIAS] == MAKEFOURCC('G','E','T','4') + && state->textures[i]->resource.format_flags & WINED3DFMT_FLAG_ALLOW_FETCH4 + && settings->op[i].tex_type & (WINED3D_GL_RES_TYPE_TEX_2D | WINED3D_GL_RES_TYPE_TEX_RECT)); }
/* Clear unsupported stages */ diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index cd49789c1b..7933c4f547 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -2748,7 +2748,8 @@ struct texture_stage_op unsigned tex_type : 3; unsigned tmp_dst : 1; unsigned projected : 2; - unsigned padding : 10; + unsigned fetch4 : 1; + unsigned padding : 9; };
struct ffp_frag_settings @@ -4466,6 +4467,7 @@ extern enum wined3d_format_id pixelformat_for_depth(DWORD depth) DECLSPEC_HIDDEN #define WINED3DFMT_FLAG_VERTEX_ATTRIBUTE 0x01000000 #define WINED3DFMT_FLAG_BLIT 0x02000000 #define WINED3DFMT_FLAG_MAPPABLE 0x04000000 +#define WINED3DFMT_FLAG_ALLOW_FETCH4 0x08000000
struct wined3d_rational {