- Add flag to indicate FETCH4 support in textures - Implementation follows AMD implementation and swizzle projection is allowed and 0.5 texel offset is added
Signed-off-by: Daniel Ansorregui mailszeros@gmail.com --- dlls/wined3d/glsl_shader.c | 43 ++++++++++++++++++++++++++++------ dlls/wined3d/utils.c | 11 +++++++++ dlls/wined3d/wined3d_private.h | 4 +++- 3 files changed, 50 insertions(+), 8 deletions(-)
diff --git a/dlls/wined3d/glsl_shader.c b/dlls/wined3d/glsl_shader.c index 3298a604fd..1950db06a9 100644 --- a/dlls/wined3d/glsl_shader.c +++ b/dlls/wined3d/glsl_shader.c @@ -9711,6 +9711,8 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * shader_addline(buffer, "#extension GL_ARB_shading_language_420pack : enable\n"); if (gl_info->supported[ARB_TEXTURE_RECTANGLE]) shader_addline(buffer, "#extension GL_ARB_texture_rectangle : enable\n"); + if (gl_info->supported[ARB_TEXTURE_GATHER]) + shader_addline(buffer, "#extension GL_ARB_texture_gather : enable\n");
if (!needs_legacy_glsl_syntax(gl_info)) { @@ -9851,6 +9853,9 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * for (stage = 0; stage < MAX_TEXTURES && settings->op[stage].cop != WINED3D_TOP_DISABLE; ++stage) { const char *texture_function, *coord_mask; + struct wined3d_string_buffer offset; + BOOL fetch4 = settings->op[stage].fetch4; + BOOL fetch4_proj = FALSE; BOOL proj;
if (!(tex_map & (1u << stage))) @@ -9870,7 +9875,6 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * FIXME("Unexpected projection mode %d\n", settings->op[stage].projected); proj = TRUE; } - if (settings->op[stage].tex_type == WINED3D_GL_RES_TYPE_TEX_CUBE) proj = FALSE;
@@ -9879,6 +9883,7 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * case WINED3D_GL_RES_TYPE_TEX_1D: texture_function = "texture1D"; coord_mask = "x"; + fetch4 = FALSE; break; case WINED3D_GL_RES_TYPE_TEX_2D: texture_function = "texture2D"; @@ -9887,6 +9892,9 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * case WINED3D_GL_RES_TYPE_TEX_3D: texture_function = "texture3D"; coord_mask = "xyz"; + if (fetch4) + FIXME("Unsupported Fetch4 and texture3D sampling"); + fetch4 = FALSE; break; case WINED3D_GL_RES_TYPE_TEX_CUBE: texture_function = "textureCube"; @@ -9901,11 +9909,24 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * texture_function = ""; coord_mask = "xyzw"; proj = FALSE; + fetch4 = FALSE; break; } if (!legacy_syntax) texture_function = "texture";
+ string_buffer_init(&offset); + if (fetch4) + { + texture_function = "textureGather"; + /* Apply a 0.5 texel offset as in AMD implementation */ + shader_addline(&offset, " + (vec2(0.5) / textureSize(ps_sampler%u, 0).xy)", stage); + + /* When projection is needed on fetch4 we have to apply it manually by dividing .w */ + fetch4_proj = proj; + proj = FALSE; + } + if (stage > 0 && (settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP || settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP_LUMINANCE)) @@ -9936,8 +9957,8 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * shader_addline(buffer, "ret = ffp_texcoord[%u] + ret.xyxy;\n", stage); }
- shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ret.%s%s);\n", - stage, texture_function, proj ? "Proj" : "", stage, coord_mask, proj ? "w" : ""); + shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ret.%s%s%s%s);\n", stage, texture_function, + proj ? "Proj" : "", stage, coord_mask, proj ? "w" : "", fetch4_proj ? " / ret.w" : "", offset.buffer);
if (settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP_LUMINANCE) shader_addline(buffer, "tex%u *= clamp(tex%u.z * bumpenv_lum_scale%u + bumpenv_lum_offset%u, 0.0, 1.0);\n", @@ -9945,14 +9966,22 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * } else if (settings->op[stage].projected == WINED3D_PROJECTION_COUNT3) { - shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ffp_texcoord[%u].xyz);\n", - stage, texture_function, proj ? "Proj" : "", stage, stage); + shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ffp_texcoord[%u].xyz%s);\n", stage, + texture_function, proj ? "Proj" : "", stage, stage, offset.buffer); } else { - shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ffp_texcoord[%u].%s%s);\n", - stage, texture_function, proj ? "Proj" : "", stage, stage, coord_mask, proj ? "w" : ""); + shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ffp_texcoord[%u].%s%s", stage, + texture_function, proj ? "Proj" : "", stage, stage, coord_mask, proj ? "w" : ""); + if (fetch4_proj) + shader_addline(buffer, " / ffp_texcoord[%u].w", stage); + shader_addline(buffer, "%s);\n", offset.buffer); } + string_buffer_clear(&offset); + + /* Match FETCH4 swizzle with textureGather swizzle */ + if (fetch4) + shader_addline(buffer, "tex%u = tex%u.zxyw;\n", stage, stage);
string_buffer_sprintf(tex_reg_name, "tex%u", stage); shader_glsl_color_correction_ext(buffer, tex_reg_name->buffer, WINED3DSP_WRITEMASK_ALL, diff --git a/dlls/wined3d/utils.c b/dlls/wined3d/utils.c index 7b42202213..82111c8bb2 100644 --- a/dlls/wined3d/utils.c +++ b/dlls/wined3d/utils.c @@ -340,6 +340,12 @@ static const struct wined3d_format_base_flags format_base_flags[] = {WINED3DFMT_NULL, WINED3DFMT_FLAG_EXTENSION}, {WINED3DFMT_NVDB, WINED3DFMT_FLAG_EXTENSION}, {WINED3DFMT_RESZ, WINED3DFMT_FLAG_EXTENSION}, + {WINED3DFMT_L8_UNORM, WINED3DFMT_FLAG_ALLOW_FETCH4}, + {WINED3DFMT_L16_UNORM, WINED3DFMT_FLAG_ALLOW_FETCH4}, + {WINED3DFMT_R16_FLOAT, WINED3DFMT_FLAG_ALLOW_FETCH4}, + {WINED3DFMT_R16, WINED3DFMT_FLAG_ALLOW_FETCH4}, + {WINED3DFMT_R32_FLOAT, WINED3DFMT_FLAG_ALLOW_FETCH4}, + {WINED3DFMT_A8_UNORM, WINED3DFMT_FLAG_ALLOW_FETCH4}, };
static void rgb888_from_rgb565(WORD rgb565, BYTE *r, BYTE *g, BYTE *b) @@ -5780,6 +5786,7 @@ void gen_ffp_frag_op(const struct wined3d_context *context, const struct wined3d settings->op[i].tmp_dst = 0; settings->op[i].tex_type = WINED3D_GL_RES_TYPE_TEX_1D; settings->op[i].projected = WINED3D_PROJECTION_NONE; + settings->op[i].fetch4 = FALSE; i++; break; } @@ -5923,6 +5930,10 @@ void gen_ffp_frag_op(const struct wined3d_context *context, const struct wined3d settings->op[i].aarg1 = aarg1; settings->op[i].aarg2 = aarg2; settings->op[i].tmp_dst = state->texture_states[i][WINED3D_TSS_RESULT_ARG] == WINED3DTA_TEMP; + settings->op[i].fetch4 = (state->textures[i] && gl_info->supported[ARB_TEXTURE_GATHER] + && state->sampler_states[i][WINED3D_SAMP_MIPMAP_LOD_BIAS] == MAKEFOURCC('G','E','T','4') + && state->textures[i]->resource.format_flags & WINED3DFMT_FLAG_ALLOW_FETCH4 + && settings->op[i].tex_type & (WINED3D_GL_RES_TYPE_TEX_2D | WINED3D_GL_RES_TYPE_TEX_RECT)); }
/* Clear unsupported stages */ diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index 1e3ec28d6b..4224461142 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -2747,7 +2747,8 @@ struct texture_stage_op unsigned tex_type : 3; unsigned tmp_dst : 1; unsigned projected : 2; - unsigned padding : 10; + unsigned fetch4 : 1; + unsigned padding : 9; };
struct ffp_frag_settings @@ -4434,6 +4435,7 @@ extern enum wined3d_format_id pixelformat_for_depth(DWORD depth) DECLSPEC_HIDDEN #define WINED3DFMT_FLAG_VERTEX_ATTRIBUTE 0x01000000 #define WINED3DFMT_FLAG_BLIT 0x02000000 #define WINED3DFMT_FLAG_MAPPABLE 0x04000000 +#define WINED3DFMT_FLAG_ALLOW_FETCH4 0x08000000
struct wined3d_rational {