[PATCH v5 1/1] vkd3d-shader/ir: Implement MAD in two operations if flagged as precise.

8 Apr 2024

From: Conor McCarthy cmccarthy@codeweavers.com
---
 libs/vkd3d-shader/ir.c                        | 55 +++++++++++++++++++
 libs/vkd3d-shader/vkd3d_shader_private.h      | 25 +++++++++
 .../hlsl/arithmetic-float-uniform.shader_test |  2 +-
 3 files changed, 81 insertions(+), 1 deletion(-)

diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c
index 8af537390..2bda60b03 100644
--- a/libs/vkd3d-shader/ir.c
+++ b/libs/vkd3d-shader/ir.c
@@ -1825,6 +1825,8 @@ struct cf_flattener
     unsigned int control_flow_depth;
     struct cf_flattener_info *control_flow_info;
     size_t control_flow_info_size;
+
+    unsigned int precise_mad_temp_idx;
 };
static void cf_flattener_set_error(struct cf_flattener *flattener, enum vkd3d_result error)
@@ -2036,6 +2038,53 @@ static void VKD3D_PRINTF_FUNC(3, 4) cf_flattener_create_block_name(struct cf_fla
     flattener->block_names[block_id] = buffer.buffer;
 }
+/* The Shader Model 5 Assembly documentation states: "If components of a mad
+ * instruction are tagged as precise, the hardware must execute a mad instruction
+ * or the exact equivalent, and it cannot split it into a multiply followed by an add."
+ * But DXIL.rst states the opposite: "Floating point multiply & add. This operation is
+ * not fused for "precise" operations."
+ * Windows drivers seem to conform with the latter, for SM 4-5 and SM 6. */
+static bool cf_flattener_handle_precise_mad(struct cf_flattener *flattener,
+        const struct vkd3d_shader_instruction *instruction)
+{
+    struct vkd3d_shader_instruction *mul_ins, *add_ins;
+    struct vsir_program *program = flattener->program;
+    struct vkd3d_shader_dst_param *mul_dst;
+
+    if (!(instruction->flags & VKD3DSI_PRECISE_XYZW))
+        return cf_flattener_copy_instruction(flattener, instruction);
+
+    if (!(mul_ins = cf_flattener_require_space(flattener, 2)))
+        return false;
+    add_ins = mul_ins + 1;
+
+    if (!flattener->precise_mad_temp_idx)
+        flattener->precise_mad_temp_idx = 1 + program->temp_count++;
+
+    *mul_ins = *instruction;
+    mul_ins->handler_idx = VKD3DSIH_MUL;
+    mul_ins->src_count = 2;
+
+    if (!(vsir_instruction_init_with_params(program, add_ins, &instruction->location, VKD3DSIH_ADD, 1, 2)))
+        return false;
+    add_ins->flags = instruction->flags & VKD3DSI_PRECISE_XYZW;
+
+    mul_dst = mul_ins->dst;
+    *add_ins->dst = *mul_dst;
+
+    mul_dst->modifiers = 0;
+    vsir_register_init(&mul_dst->reg, VKD3DSPR_TEMP, instruction->src[0].reg.data_type, 1);
+    mul_dst->reg.idx[0].offset = flattener->precise_mad_temp_idx - 1;
+
+    add_ins->src[0].reg = mul_dst->reg;
+    add_ins->src[0].swizzle = vsir_swizzle_from_writemask(mul_dst->write_mask);
+    add_ins->src[0].modifiers = 0;
+    add_ins->src[1] = mul_ins->src[2];
+
+    flattener->instruction_count += 2;
+    return true;
+}
+
 static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flattener *flattener,
         struct vkd3d_shader_message_context *message_context)
 {
@@ -2375,6 +2424,12 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte
                     main_block_open = false;
                 break;
+            /* Handle precise MAD here because it requires insertion of an extra instruction. */
+            case VKD3DSIH_MAD:
+                if (!cf_flattener_handle_precise_mad(flattener, instruction))
+                    return VKD3D_ERROR_OUT_OF_MEMORY;
+                break;
+
             default:
                 if (!cf_flattener_copy_instruction(flattener, instruction))
                     return VKD3D_ERROR_OUT_OF_MEMORY;
diff --git a/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d-shader/vkd3d_shader_private.h
index b07a7bff7..f401fd7a3 100644
--- a/libs/vkd3d-shader/vkd3d_shader_private.h
+++ b/libs/vkd3d-shader/vkd3d_shader_private.h
@@ -1761,6 +1761,31 @@ static inline unsigned int vkd3d_compact_swizzle(uint32_t swizzle, uint32_t writ
     return compacted_swizzle;
 }
+static inline uint32_t vsir_swizzle_from_writemask(unsigned int writemask)
+{
+    static const unsigned int swizzles[16] =
+    {
+        0,
+        VKD3D_SHADER_SWIZZLE(X, X, X, X),
+        VKD3D_SHADER_SWIZZLE(Y, Y, Y, Y),
+        VKD3D_SHADER_SWIZZLE(X, Y, X, X),
+        VKD3D_SHADER_SWIZZLE(Z, Z, Z, Z),
+        VKD3D_SHADER_SWIZZLE(X, Z, X, X),
+        VKD3D_SHADER_SWIZZLE(Y, Z, X, X),
+        VKD3D_SHADER_SWIZZLE(X, Y, Z, X),
+        VKD3D_SHADER_SWIZZLE(W, W, W, W),
+        VKD3D_SHADER_SWIZZLE(X, W, X, X),
+        VKD3D_SHADER_SWIZZLE(Y, W, X, X),
+        VKD3D_SHADER_SWIZZLE(X, Y, W, X),
+        VKD3D_SHADER_SWIZZLE(Z, W, X, X),
+        VKD3D_SHADER_SWIZZLE(X, Z, W, X),
+        VKD3D_SHADER_SWIZZLE(Y, Z, W, X),
+        VKD3D_SHADER_SWIZZLE(X, Y, Z, W),
+    };
+
+    return swizzles[writemask & 0xf];
+}
+
 struct vkd3d_struct
 {
     enum vkd3d_shader_structure_type type;
diff --git a/tests/hlsl/arithmetic-float-uniform.shader_test b/tests/hlsl/arithmetic-float-uniform.shader_test
index 8bc3992e7..c6b7caaae 100644
--- a/tests/hlsl/arithmetic-float-uniform.shader_test
+++ b/tests/hlsl/arithmetic-float-uniform.shader_test
@@ -121,7 +121,7 @@ uniform 0 float4 1.00000007 -42.1 4.0 45.0
 uniform 4 float4 1.625 -5.0 4.125 5.0
 uniform 8 float4 1.00000007 -1.0 0.5 -0.5
 todo(sm<6) draw quad
-todo probe all rgba (2.62500048, 209.5, 17.0, 224.5)
+probe all rgba (2.62500048, 209.5, 17.0, 224.5)
[require]
 shader model >= 5.0
-- 
GitLab

https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/723

    

2025

2024

2023

2022

[PATCH v5 1/1] vkd3d-shader/ir: Implement MAD in two operations if flagged as precise.