Module: wine
Branch: master
Commit: da7176be2a9712737a531f1c661a9346c49fdd7f
URL: http://source.winehq.org/git/wine.git/?a=commit;h=da7176be2a9712737a531f1c6…
Author: Stefan Dösinger <stefan(a)codeweavers.com>
Date: Thu May 28 17:27:54 2009 +0200
wined3d: Use DP2A or X2D for dp2add if available.
---
dlls/wined3d/arb_program_shader.c | 48 ++++++++++++++++++++++++++++++------
1 files changed, 40 insertions(+), 8 deletions(-)
diff --git a/dlls/wined3d/arb_program_shader.c b/dlls/wined3d/arb_program_shader.c
index 1713302..0b188b7 100644
--- a/dlls/wined3d/arb_program_shader.c
+++ b/dlls/wined3d/arb_program_shader.c
@@ -1020,19 +1020,51 @@ static void pshader_hw_dp2add(const struct wined3d_shader_instruction *ins)
SHADER_BUFFER *buffer = ins->ctx->buffer;
char dst_name[50];
char src_name[3][50];
+ struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data;
shader_arb_get_dst_param(ins, dst, dst_name);
shader_arb_get_src_param(ins, &ins->src[0], 0, src_name[0]);
- shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]);
shader_arb_get_src_param(ins, &ins->src[2], 2, src_name[2]);
- /* Emulate a DP2 with a DP3 and 0.0. Don't use the dest as temp register, it could be src[1] or src[2]
- * src_name[0] can be TA, but TA is a private temp for modifiers, so it is save to overwrite
- */
- shader_addline(buffer, "MOV TA, %s;\n", src_name[0]);
- shader_addline(buffer, "MOV TA.z, 0.0;\n");
- shader_addline(buffer, "DP3 TA, TA, %s;\n", src_name[1]);
- shader_addline(buffer, "ADD%s %s, TA, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name[2]);
+ if(ctx->target_version >= NV3)
+ {
+ /* GL_NV_fragment_program2 has a 1:1 matching instruction */
+ shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]);
+ shader_addline(buffer, "DP2A%s %s, %s, %s, %s;\n", shader_arb_get_modifier(ins),
+ dst_name, src_name[0], src_name[1], src_name[2]);
+ }
+ else if(ctx->target_version >= NV2)
+ {
+ /* dst.x = src2.?, src0.x, src1.x + src0.y * src1.y
+ * dst.y = src2.?, src0.x, src1.z + src0.y * src1.w
+ * dst.z = src2.?, src0.x, src1.x + src0.y * src1.y
+ * dst.z = src2.?, src0.x, src1.z + src0.y * src1.w
+ *
+ * Make sure that src1.zw = src1.xy, then we get a classic dp2add
+ *
+ * .xyxy and other swizzles that we could get with this are not valid in
+ * plain ARBfp, but luckily the NV extension grammar lifts this limitation.
+ */
+ struct wined3d_shader_src_param tmp_param = ins->src[1];
+ DWORD swizzle = tmp_param.swizzle & 0xf; /* Selects .xy */
+ tmp_param.swizzle = swizzle | (swizzle << 4); /* Creates .xyxy */
+
+ shader_arb_get_src_param(ins, &tmp_param, 1, src_name[1]);
+
+ shader_addline(buffer, "X2D%s %s, %s, %s, %s;\n", shader_arb_get_modifier(ins),
+ dst_name, src_name[2], src_name[0], src_name[1]);
+ }
+ else
+ {
+ shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]);
+ /* Emulate a DP2 with a DP3 and 0.0. Don't use the dest as temp register, it could be src[1] or src[2]
+ * src_name[0] can be TA, but TA is a private temp for modifiers, so it is save to overwrite
+ */
+ shader_addline(buffer, "MOV TA, %s;\n", src_name[0]);
+ shader_addline(buffer, "MOV TA.z, 0.0;\n");
+ shader_addline(buffer, "DP3 TA, TA, %s;\n", src_name[1]);
+ shader_addline(buffer, "ADD%s %s, TA, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name[2]);
+ }
}
/* Map the opcode 1-to-1 to the GL code */