Module: wine Branch: master Commit: ef7f769be6dbc4655f6363cd7ed3670f8b1dfc01 URL: http://source.winehq.org/git/wine.git/?a=commit;h=ef7f769be6dbc4655f6363cd7e...
Author: Tobias Jakobi liquid.acid@gmx.net Date: Wed Jun 17 23:26:38 2009 +0200
wined3d: Enable constant packing for NP2 texcoord fixup.
Previously every texture that was flagged for NP2 fixup used a vec2 uniform in the shader to store texture dimensions. Turns out that the GLSL compilers just maps vec2 to vec4, so essentially wasting 2 floats. The new code only uses vec4 uniforms but packs dimensions info of 2 textures into a single uniform.
---
dlls/wined3d/glsl_shader.c | 104 ++++++++++++++++++++++++++++--------------- 1 files changed, 68 insertions(+), 36 deletions(-)
diff --git a/dlls/wined3d/glsl_shader.c b/dlls/wined3d/glsl_shader.c index bd5724a..2fc8586 100644 --- a/dlls/wined3d/glsl_shader.c +++ b/dlls/wined3d/glsl_shader.c @@ -103,7 +103,7 @@ struct glsl_shader_prog_link { GLint vuniformI_locations[MAX_CONST_I]; GLint puniformI_locations[MAX_CONST_I]; GLint posFixup_location; - GLint np2Fixup_location[MAX_FRAGMENT_SAMPLERS]; + GLint np2Fixup_location; GLint bumpenvmat_location[MAX_TEXTURES]; GLint luminancescale_location[MAX_TEXTURES]; GLint luminanceoffset_location[MAX_TEXTURES]; @@ -572,24 +572,31 @@ static void shader_glsl_load_np2fixup_constants( return; }
- if (prog->ps_args.np2_fixup) { - UINT i; - UINT fixup = prog->ps_args.np2_fixup; + if (prog->ps_args.np2_fixup && -1 != prog->np2Fixup_location) { const WineD3D_GL_Info* gl_info = &deviceImpl->adapter->gl_info; const IWineD3DStateBlockImpl* stateBlock = (const IWineD3DStateBlockImpl*) deviceImpl->stateBlock; + UINT i; + UINT fixup = prog->ps_args.np2_fixup; + GLfloat np2fixup_constants[4 * MAX_FRAGMENT_SAMPLERS];
for (i = 0; fixup; fixup >>= 1, ++i) { - if (-1 != prog->np2Fixup_location[i]) { - const IWineD3DBaseTextureImpl* const tex = (const IWineD3DBaseTextureImpl*) stateBlock->textures[i]; - if (!tex) { - FIXME("Nonexistent texture is flagged for NP2 texcoord fixup\n"); - continue; - } else { - const float tex_dim[2] = {tex->baseTexture.pow2Matrix[0], tex->baseTexture.pow2Matrix[5]}; - GL_EXTCALL(glUniform2fvARB(prog->np2Fixup_location[i], 1, tex_dim)); - } + const unsigned char idx = prog->np2Fixup_info->idx[i]; + const IWineD3DBaseTextureImpl* const tex = (const IWineD3DBaseTextureImpl*) stateBlock->textures[i]; + GLfloat* tex_dim = &np2fixup_constants[(idx >> 1) * 4]; + + if (!tex) { + FIXME("Nonexistent texture is flagged for NP2 texcoord fixup\n"); + continue; + } + + if (idx % 2) { + tex_dim[2] = tex->baseTexture.pow2Matrix[0]; tex_dim[3] = tex->baseTexture.pow2Matrix[5]; + } else { + tex_dim[0] = tex->baseTexture.pow2Matrix[0]; tex_dim[1] = tex->baseTexture.pow2Matrix[5]; } } + + GL_EXTCALL(glUniform4fvARB(prog->np2Fixup_location, prog->np2Fixup_info->num_consts, np2fixup_constants)); } }
@@ -776,10 +783,11 @@ static int vec4_varyings(DWORD shader_major, const WineD3D_GL_Info *gl_info) /** Generate the variable & register declarations for the GLSL output target */ static void shader_generate_glsl_declarations(IWineD3DBaseShader *iface, const shader_reg_maps *reg_maps, SHADER_BUFFER *buffer, const WineD3D_GL_Info *gl_info, - const struct ps_compile_args *ps_args) + struct shader_glsl_ctx_priv *ctx_priv) { IWineD3DBaseShaderImpl* This = (IWineD3DBaseShaderImpl*) iface; IWineD3DDeviceImpl *device = (IWineD3DDeviceImpl *) This->baseShader.device; + const struct ps_compile_args *ps_args = ctx_priv->cur_ps_args; unsigned int i, extra_constants_needed = 0; const local_constant *lconst;
@@ -922,15 +930,6 @@ static void shader_generate_glsl_declarations(IWineD3DBaseShader *iface, const s } else { shader_addline(buffer, "uniform sampler2D %csampler%u;\n", prefix, i); } - - if (pshader && ps_args->np2_fixup & (1 << i)) - { - /* NP2/RECT textures in OpenGL use texcoords in the range [0,width]x[0,height] - * while D3D has them in the (normalized) [0,1]x[0,1] range. - * samplerNP2Fixup stores texture dimensions and is updated through - * shader_glsl_load_np2fixup_constants when the sampler changes. */ - shader_addline(buffer, "uniform vec2 %csamplerNP2Fixup%u;\n", prefix, i); - } break; case WINED3DSTT_CUBE: shader_addline(buffer, "uniform samplerCube %csampler%u;\n", prefix, i); @@ -945,7 +944,38 @@ static void shader_generate_glsl_declarations(IWineD3DBaseShader *iface, const s } } } - + + /* Declare uniforms for NP2 texcoord fixup: + * This is NOT done inside the loop that declares the texture samplers since the NP2 fixup code + * is currently only used for the GeforceFX series and when forcing the ARB_npot extension off. + * Modern cards just skip the code anyway, so put it inside a seperate loop. */ + if (pshader && ps_args->np2_fixup) { + + struct ps_np2fixup_info* const fixup = ctx_priv->cur_np2fixup_info; + UINT cur = 0; + + /* NP2/RECT textures in OpenGL use texcoords in the range [0,width]x[0,height] + * while D3D has them in the (normalized) [0,1]x[0,1] range. + * samplerNP2Fixup stores texture dimensions and is updated through + * shader_glsl_load_np2fixup_constants when the sampler changes. */ + + for (i = 0; i < This->baseShader.limits.sampler; ++i) { + if (reg_maps->sampler_type[i]) { + if (!(ps_args->np2_fixup & (1 << i))) continue; + + if (WINED3DSTT_2D != reg_maps->sampler_type[i]) { + FIXME("Non-2D texture is flagged for NP2 texcoord fixup.\n"); + continue; + } + + fixup->idx[i] = cur++; + } + } + + fixup->num_consts = (cur + 1) >> 1; + shader_addline(buffer, "uniform vec4 %csamplerNP2Fixup[%u];\n", prefix, fixup->num_consts); + } + /* Declare address variables */ for (i = 0; i < This->baseShader.limits.address; i++) { if (reg_maps->address[i]) @@ -1679,7 +1709,7 @@ static void PRINTF_ATTR(8, 9) shader_glsl_gen_sample_code(const struct wined3d_s
if (shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type)) { - struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data; + const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data; fixup = priv->cur_ps_args->color_fixup[sampler]; sampler_base = "Psampler";
@@ -1707,7 +1737,11 @@ static void PRINTF_ATTR(8, 9) shader_glsl_gen_sample_code(const struct wined3d_s shader_addline(ins->ctx->buffer, ", %s)%s);\n", bias, dst_swizzle); } else { if (np2_fixup) { - shader_addline(ins->ctx->buffer, " * PsamplerNP2Fixup%u)%s);\n", sampler, dst_swizzle); + const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data; + const unsigned char idx = priv->cur_np2fixup_info->idx[sampler]; + + shader_addline(ins->ctx->buffer, " * PsamplerNP2Fixup[%u].%s)%s);\n", idx >> 1, + (idx % 2) ? "zw" : "xy", dst_swizzle); } else if(dx && dy) { shader_addline(ins->ctx->buffer, ", %s, %s)%s);\n", dx, dy, dst_swizzle); } else { @@ -3658,7 +3692,7 @@ static GLuint shader_glsl_generate_pshader(IWineD3DPixelShaderImpl *This, }
/* Base Declarations */ - shader_generate_glsl_declarations( (IWineD3DBaseShader*) This, reg_maps, buffer, &GLINFO_LOCATION, args); + shader_generate_glsl_declarations( (IWineD3DBaseShader*) This, reg_maps, buffer, &GLINFO_LOCATION, &priv_ctx);
/* Pack 3.0 inputs */ if (reg_maps->shader_version.major >= 3 && args->vp_mode != vertexshader) @@ -3747,7 +3781,7 @@ static GLuint shader_glsl_generate_vshader(IWineD3DVertexShaderImpl *This, priv_ctx.cur_vs_args = args;
/* Base Declarations */ - shader_generate_glsl_declarations( (IWineD3DBaseShader*) This, reg_maps, buffer, &GLINFO_LOCATION, NULL); + shader_generate_glsl_declarations( (IWineD3DBaseShader*) This, reg_maps, buffer, &GLINFO_LOCATION, &priv_ctx);
/* Base Shader Body */ shader_generate_main((IWineD3DBaseShader*)This, buffer, reg_maps, function, &priv_ctx); @@ -4053,7 +4087,6 @@ static void set_glsl_shader_program(IWineD3DDevice *iface, BOOL use_ps, BOOL use
if(pshader) { char name[32]; - WORD map;
for(i = 0; i < MAX_TEXTURES; i++) { sprintf(name, "bumpenvmat%u", i); @@ -4064,13 +4097,12 @@ static void set_glsl_shader_program(IWineD3DDevice *iface, BOOL use_ps, BOOL use entry->luminanceoffset_location[i] = GL_EXTCALL(glGetUniformLocationARB(programId, name)); }
- map = ps_compile_args.np2_fixup; - for (i = 0; map; map >>= 1, ++i) - { - if (!(map & 1)) continue; - - sprintf(name, "PsamplerNP2Fixup%u", i); - entry->np2Fixup_location[i] = GL_EXTCALL(glGetUniformLocationARB(programId, name)); + if (ps_compile_args.np2_fixup) { + if (entry->np2Fixup_info) { + entry->np2Fixup_location = GL_EXTCALL(glGetUniformLocationARB(programId, "PsamplerNP2Fixup")); + } else { + FIXME("NP2 texcoord fixup needed for this pixelshader, but no fixup uniform found."); + } } }