Module: wine Branch: master Commit: daf2290ea4d92a2e4f11f16ddc4a69cb9e1ae064 URL: http://source.winehq.org/git/wine.git/?a=commit;h=daf2290ea4d92a2e4f11f16ddc...
Author: Stefan Dösinger stefan@codeweavers.com Date: Wed Sep 5 20:09:06 2007 +0200
wined3d: Implement the nrm instruction in arb.
---
dlls/wined3d/arb_program_shader.c | 22 ++++++++++++++++++++++ dlls/wined3d/baseshader.c | 5 ++++- dlls/wined3d/pixelshader.c | 11 +---------- dlls/wined3d/vertexshader.c | 16 ++++++---------- dlls/wined3d/wined3d_private.h | 4 ++++ 5 files changed, 37 insertions(+), 21 deletions(-)
diff --git a/dlls/wined3d/arb_program_shader.c b/dlls/wined3d/arb_program_shader.c index 4d7dd58..8bc458f 100644 --- a/dlls/wined3d/arb_program_shader.c +++ b/dlls/wined3d/arb_program_shader.c @@ -1477,6 +1477,28 @@ void vshader_hw_rsq_rcp(SHADER_OPCODE_ARG* arg) { shader_addline(buffer, "%s;\n", tmpLine); }
+void shader_hw_nrm(SHADER_OPCODE_ARG* arg) { + SHADER_BUFFER* buffer = arg->buffer; + char dst_name[50]; + char src_name[50]; + char dst_wmask[20]; + DWORD shift = (arg->dst & WINED3DSP_DSTSHIFT_MASK) >> WINED3DSP_DSTSHIFT_SHIFT; + BOOL sat = (arg->dst & WINED3DSP_DSTMOD_MASK) & WINED3DSPDM_SATURATE; + + pshader_get_register_name(arg->dst, dst_name); + shader_arb_get_write_mask(arg, arg->dst, dst_wmask); + + pshader_gen_input_modifier_line(buffer, arg->src[0], 0, src_name); + shader_addline(buffer, "DP3 TMP, %s, %s;\n", src_name, src_name); + shader_addline(buffer, "RSQ TMP, TMP.x;\n"); + /* dst.w = src[0].w * 1 / (src.x^2 + src.y^2 + src.z^2)^(1/2) according to msdn*/ + shader_addline(buffer, "MUL%s %s%s, %s, TMP;\n", sat ? "_SAT" : "", dst_name, dst_wmask, + src_name); + + if (shift != 0) + pshader_gen_output_modifier_line(buffer, FALSE, dst_wmask, shift, dst_name); +} + /* TODO: merge with pixel shader */ /* Map the opcode 1-to-1 to the GL code */ void vshader_hw_map2gl(SHADER_OPCODE_ARG* arg) { diff --git a/dlls/wined3d/baseshader.c b/dlls/wined3d/baseshader.c index df9c3e5..555929c 100644 --- a/dlls/wined3d/baseshader.c +++ b/dlls/wined3d/baseshader.c @@ -381,6 +381,9 @@ HRESULT shader_get_registers_used( } } } + if(WINED3DSIO_NRM == curOpcode->opcode) { + reg_maps->usesnrm = 1; + }
/* This will loop over all the registers and try to * make a bitmask of the ones we're interested in. @@ -416,7 +419,7 @@ HRESULT shader_get_registers_used(
else if (WINED3DSPR_RASTOUT == regtype && reg == 1) reg_maps->fog = 1; - } + } } }
diff --git a/dlls/wined3d/pixelshader.c b/dlls/wined3d/pixelshader.c index 10fb732..0c18b26 100644 --- a/dlls/wined3d/pixelshader.c +++ b/dlls/wined3d/pixelshader.c @@ -177,16 +177,7 @@ CONST SHADER_OPCODE IWineD3DPixelShaderImpl_shader_ins[] = { {WINED3DSIO_CMP, "cmp", NULL, 1, 4, pshader_hw_cmp, shader_glsl_cmp, WINED3DPS_VERSION(1,2), WINED3DPS_VERSION(3,0)}, {WINED3DSIO_POW, "pow", "POW", 1, 3, pshader_hw_map2gl, shader_glsl_pow, 0, 0}, {WINED3DSIO_CRS, "crs", "XPD", 1, 3, pshader_hw_map2gl, shader_glsl_cross, 0, 0}, - /* TODO: xyz normalise can be performed as VS_ARB using one temporary register, - DP3 tmp , vec, vec; - RSQ tmp, tmp.x; - MUL vec.xyz, vec, tmp; - but I think this is better because it accounts for w properly. - DP3 tmp , vec, vec; - RSQ tmp, tmp.x; - MUL vec, vec, tmp; - */ - {WINED3DSIO_NRM, "nrm", NULL, 1, 2, NULL, shader_glsl_map2gl, 0, 0}, + {WINED3DSIO_NRM, "nrm", NULL, 1, 2, shader_hw_nrm, shader_glsl_map2gl, 0, 0}, {WINED3DSIO_SINCOS, "sincos", NULL, 1, 4, NULL, shader_glsl_sincos, WINED3DPS_VERSION(2,0), WINED3DPS_VERSION(2,1)}, {WINED3DSIO_SINCOS, "sincos", NULL, 1, 2, NULL, shader_glsl_sincos, WINED3DPS_VERSION(3,0), -1}, {WINED3DSIO_DP2ADD, "dp2add", NULL, 1, 4, pshader_hw_dp2add, pshader_glsl_dp2add, WINED3DPS_VERSION(2,0), -1}, diff --git a/dlls/wined3d/vertexshader.c b/dlls/wined3d/vertexshader.c index 3f8f5bf..8564c06 100644 --- a/dlls/wined3d/vertexshader.c +++ b/dlls/wined3d/vertexshader.c @@ -114,16 +114,7 @@ CONST SHADER_OPCODE IWineD3DVertexShaderImpl_shader_ins[] = { RCP tmp, vec MUL out, tmp, vec*/ {WINED3DSIO_SGN, "sgn", NULL, 1, 2, NULL, shader_glsl_map2gl, 0, 0}, - /* TODO: xyz normalise can be performed as VS_ARB using one temporary register, - DP3 tmp , vec, vec; - RSQ tmp, tmp.x; - MUL vec.xyz, vec, tmp; - but I think this is better because it accounts for w properly. - DP3 tmp , vec, vec; - RSQ tmp, tmp.x; - MUL vec, vec, tmp; - */ - {WINED3DSIO_NRM, "nrm", NULL, 1, 2, NULL, shader_glsl_map2gl, 0, 0}, + {WINED3DSIO_NRM, "nrm", NULL, 1, 2, shader_hw_nrm, shader_glsl_map2gl, 0, 0}, {WINED3DSIO_SINCOS, "sincos", NULL, 1, 4, NULL, shader_glsl_sincos, WINED3DVS_VERSION(2,0), WINED3DVS_VERSION(2,1)}, {WINED3DSIO_SINCOS, "sincos", NULL, 1, 2, NULL, shader_glsl_sincos, WINED3DVS_VERSION(3,0), -1}, /* Matrix */ @@ -382,6 +373,11 @@ static VOID IWineD3DVertexShaderImpl_GenerateShader( This->baseShader.limits.constant_float = min(95, This->baseShader.limits.constant_float);
+ /* Some instructions need a temporary register. Add it if needed, but only if it is really needed */ + if(reg_maps->usesnrm) { + shader_addline(&buffer, "TEMP TMP;\n"); + } + /* Base Declarations */ shader_generate_arb_declarations( (IWineD3DBaseShader*) This, reg_maps, &buffer, &GLINFO_LOCATION);
diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index 70d44e4..a5aa8e3 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -1652,6 +1652,7 @@ typedef struct shader_reg_maps { * Use 0 as default (bit 31 is always 1 on a valid token) */ DWORD samplers[max(MAX_FRAGMENT_SAMPLERS, MAX_VERTEX_SAMPLERS)]; char bumpmat, luminanceparams; + char usesnrm;
/* Whether or not a loop is used in this shader */ char loop; @@ -1788,6 +1789,9 @@ extern void pshader_hw_texm3x3(SHADER_OPCODE_ARG* arg); extern void pshader_hw_texm3x2depth(SHADER_OPCODE_ARG* arg); extern void pshader_hw_dp2add(SHADER_OPCODE_ARG* arg);
+/* ARB vertex / pixel shader common prototypes */ +extern void shader_hw_nrm(SHADER_OPCODE_ARG* arg); + /* ARB vertex shader prototypes */ extern void vshader_hw_map2gl(SHADER_OPCODE_ARG* arg); extern void vshader_hw_mnxn(SHADER_OPCODE_ARG* arg);