Module: wine
Branch: master
Commit: 38076e08ca0273f5f94634ef27ccacb6c26c09d8
URL: http://source.winehq.org/git/wine.git/?a=commit;h=38076e08ca0273f5f94634ef2…
Author: Stefan Dösinger <stefan(a)codeweavers.com>
Date: Fri May 29 17:15:40 2009 +0200
wined3d: Add a comment about texbem and X2D.
---
dlls/wined3d/arb_program_shader.c | 7 +++++++
1 files changed, 7 insertions(+), 0 deletions(-)
diff --git a/dlls/wined3d/arb_program_shader.c b/dlls/wined3d/arb_program_shader.c
index 0d32bbc..f59ef54 100644
--- a/dlls/wined3d/arb_program_shader.c
+++ b/dlls/wined3d/arb_program_shader.c
@@ -1399,6 +1399,13 @@ static void pshader_hw_texbem(const struct wined3d_shader_instruction *ins)
/* Sampling the perturbation map in Tsrc was done already, including the signedness correction if needed
* The Tx in which the perturbation map is stored is the tempreg incarnation of the texture register
+ *
+ * GL_NV_fragment_program_option could handle this in one instruction via X2D:
+ * X2D TA.xy, fragment.texcoord, T%u, bumpenvmat%u.xzyw
+ *
+ * However, the NV extensions are never enabled for <= 2.0 shaders because of the performance penalty that
+ * comes with it, and texbem is an 1.x only instruction. No 1.x instruction forces us to enable the NV
+ * extension.
*/
shader_addline(buffer, "SWZ TB, bumpenvmat%d, x, z, 0, 0;\n", reg_dest_code);
shader_addline(buffer, "DP3 TA.x, TB, %s;\n", src_reg);
Module: wine
Branch: master
Commit: de12f880ced7318ba0f8a62946fe0bd292ce66d9
URL: http://source.winehq.org/git/wine.git/?a=commit;h=de12f880ced7318ba0f8a6294…
Author: Stefan Dösinger <stefan(a)codeweavers.com>
Date: Fri May 29 17:11:04 2009 +0200
wined3d: Don't enable the NV frag extensions if we don't need them.
Enabling the NV extensions occupies a temp register for some reason. Avoid
needlessly enabling it.
---
dlls/wined3d/arb_program_shader.c | 36 ++++++++++++++++++++++++++++++++++--
dlls/wined3d/baseshader.c | 4 ++++
dlls/wined3d/wined3d_private.h | 2 +-
3 files changed, 39 insertions(+), 3 deletions(-)
diff --git a/dlls/wined3d/arb_program_shader.c b/dlls/wined3d/arb_program_shader.c
index 2a34567..0d32bbc 100644
--- a/dlls/wined3d/arb_program_shader.c
+++ b/dlls/wined3d/arb_program_shader.c
@@ -2058,6 +2058,7 @@ static GLuint shader_arb_generate_pshader(IWineD3DPixelShaderImpl *This,
DWORD *lconst_map = local_const_mapping((IWineD3DBaseShaderImpl *) This);
struct shader_arb_ctx_priv priv_ctx;
BOOL dcl_tmp = args->super.srgb_correction, dcl_td = FALSE;
+ BOOL want_nv_prog = FALSE;
char srgbtmp[4][4];
unsigned int i, found = 0;
@@ -2103,14 +2104,38 @@ static GLuint shader_arb_generate_pshader(IWineD3DPixelShaderImpl *This,
priv_ctx.cur_ps_args = args;
list_init(&priv_ctx.if_frames);
+ /* Avoid enabling NV_fragment_program* if we do not need it.
+ *
+ * Enabling GL_NV_fragment_program_option causes the driver to occupy a temporary register,
+ * and it slows down the shader execution noticeably(about 5%). Usually our instruction emulation
+ * is faster than what we gain from using higher native instructions. There are some things though
+ * that cannot be emulated. In that case enable the extensions.
+ * If the extension is enabled, instruction handlers that support both ways will use it.
+ *
+ * Testing shows no performance difference between OPTION NV_fragment_program2 and NV_fragment_program.
+ * So enable the best we can get.
+ */
+ if(reg_maps->usesdsx || reg_maps->usesdsy || reg_maps->loop_depth > 0)
+ {
+ want_nv_prog = TRUE;
+ }
+
shader_addline(buffer, "!!ARBfp1.0\n");
- if(GL_SUPPORT(NV_FRAGMENT_PROGRAM2)) {
+ if(want_nv_prog && GL_SUPPORT(NV_FRAGMENT_PROGRAM2)) {
shader_addline(buffer, "OPTION NV_fragment_program2;\n");
priv_ctx.target_version = NV3;
- } else if(GL_SUPPORT(NV_FRAGMENT_PROGRAM_OPTION)) {
+ } else if(want_nv_prog && GL_SUPPORT(NV_FRAGMENT_PROGRAM_OPTION)) {
shader_addline(buffer, "OPTION NV_fragment_program;\n");
priv_ctx.target_version = NV2;
} else {
+ if(want_nv_prog)
+ {
+ /* This is an error - either we're advertising the wrong shader version, or aren't enforcing some
+ * limits properly
+ */
+ ERR("The shader requires instructions that are not available in plain GL_ARB_fragment_program\n");
+ ERR("Try GLSL\n");
+ }
priv_ctx.target_version = ARB;
}
@@ -2131,6 +2156,10 @@ static GLuint shader_arb_generate_pshader(IWineD3DPixelShaderImpl *This,
}
}
+ /* For now always declare the temps. At least the Nvidia assembler optimizes completely
+ * unused temps away(but occupies them for the whole shader if they're used once). Always
+ * declaring them avoids tricky bookkeeping work
+ */
shader_addline(buffer, "TEMP TA;\n"); /* Used for modifiers */
shader_addline(buffer, "TEMP TB;\n"); /* Used for modifiers */
shader_addline(buffer, "TEMP TC;\n"); /* Used for modifiers */
@@ -2227,6 +2256,9 @@ static GLuint shader_arb_generate_vshader(IWineD3DVertexShaderImpl *This,
/* Create the hw ARB shader */
shader_addline(buffer, "!!ARBvp1.0\n");
+ /* Always enable the NV extension if available. Unlike fragment shaders, there is no
+ * mesurable performance penalty, and we can always make use of it for clipplanes.
+ */
if(GL_SUPPORT(NV_VERTEX_PROGRAM2_OPTION)) {
shader_addline(buffer, "OPTION NV_vertex_program2;\n");
priv_ctx.target_version = NV2;
diff --git a/dlls/wined3d/baseshader.c b/dlls/wined3d/baseshader.c
index 9dcdbe6..1a34fa2 100644
--- a/dlls/wined3d/baseshader.c
+++ b/dlls/wined3d/baseshader.c
@@ -683,6 +683,10 @@ HRESULT shader_get_registers_used(IWineD3DBaseShader *iface, const struct wined3
{
reg_maps->usesdsy = 1;
}
+ else if (ins.handler_idx == WINED3DSIH_DSX)
+ {
+ reg_maps->usesdsx = 1;
+ }
else if(ins.handler_idx == WINED3DSIH_TEXLDD)
{
reg_maps->usestexldd = 1;
diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h
index 8b7bff8..f217366 100644
--- a/dlls/wined3d/wined3d_private.h
+++ b/dlls/wined3d/wined3d_private.h
@@ -632,7 +632,7 @@ typedef struct shader_reg_maps
WINED3DSAMPLER_TEXTURE_TYPE sampler_type[max(MAX_FRAGMENT_SAMPLERS, MAX_VERTEX_SAMPLERS)];
BOOL bumpmat[MAX_TEXTURES], luminanceparams[MAX_TEXTURES];
- char usesnrm, vpos, usesdsy, usestexldd, usesmova;
+ char usesnrm, vpos, usesdsx, usesdsy, usestexldd, usesmova;
char usesrelconstF;
/* Whether or not loops are used in this shader, and nesting depth */