[PATCH v6 0/1] MR723: vkd3d-shader/spirv: Implement MAD in two operations if flagged as precise.
This would eliminate the todo for the precise mad() test in !718. Maybe we need test results on nvidia and intel to decide if we actually want this. -- v6: vkd3d-shader/ir: Implement MAD in two operations if flagged as precise. https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/723
From: Conor McCarthy <cmccarthy(a)codeweavers.com> --- libs/vkd3d-shader/ir.c | 56 +++++++++++++++++++ libs/vkd3d-shader/vkd3d_shader_private.h | 25 +++++++++ .../hlsl/arithmetic-float-uniform.shader_test | 2 +- 3 files changed, 82 insertions(+), 1 deletion(-) diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 8af537390..03da942ef 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -1825,6 +1825,8 @@ struct cf_flattener unsigned int control_flow_depth; struct cf_flattener_info *control_flow_info; size_t control_flow_info_size; + + unsigned int precise_mad_temp_idx; }; static void cf_flattener_set_error(struct cf_flattener *flattener, enum vkd3d_result error) @@ -2036,6 +2038,54 @@ static void VKD3D_PRINTF_FUNC(3, 4) cf_flattener_create_block_name(struct cf_fla flattener->block_names[block_id] = buffer.buffer; } +/* The Shader Model 5 Assembly documentation states: "If components of a mad + * instruction are tagged as precise, the hardware must execute a mad instruction + * or the exact equivalent, and it cannot split it into a multiply followed by an add." + * But DXIL.rst states the opposite: "Floating point multiply & add. This operation is + * not fused for "precise" operations." + * Windows drivers seem to conform with the latter, for SM 4-5 and SM 6. */ +static bool cf_flattener_handle_precise_mad(struct cf_flattener *flattener, + const struct vkd3d_shader_instruction *instruction) +{ + struct vkd3d_shader_instruction *mul_ins, *add_ins; + struct vsir_program *program = flattener->program; + struct vkd3d_shader_dst_param *mul_dst; + + if (!(instruction->flags & VKD3DSI_PRECISE_XYZW)) + return cf_flattener_copy_instruction(flattener, instruction); + + if (!(mul_ins = cf_flattener_require_space(flattener, 2))) + return false; + add_ins = mul_ins + 1; + + if (!flattener->precise_mad_temp_idx) + flattener->precise_mad_temp_idx = 1 + program->temp_count++; + + *mul_ins = *instruction; + mul_ins->handler_idx = VKD3DSIH_MUL; + mul_ins->src_count = 2; + + if (!(vsir_instruction_init_with_params(program, add_ins, &instruction->location, VKD3DSIH_ADD, 1, 2))) + return false; + add_ins->flags = instruction->flags & VKD3DSI_PRECISE_XYZW; + + mul_dst = mul_ins->dst; + *add_ins->dst = *mul_dst; + + mul_dst->modifiers = 0; + vsir_register_init(&mul_dst->reg, VKD3DSPR_TEMP, instruction->src[0].reg.data_type, 1); + mul_dst->reg.dimension = instruction->dst->reg.dimension; + mul_dst->reg.idx[0].offset = flattener->precise_mad_temp_idx - 1; + + add_ins->src[0].reg = mul_dst->reg; + add_ins->src[0].swizzle = vsir_swizzle_from_writemask(mul_dst->write_mask); + add_ins->src[0].modifiers = 0; + add_ins->src[1] = mul_ins->src[2]; + + flattener->instruction_count += 2; + return true; +} + static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flattener *flattener, struct vkd3d_shader_message_context *message_context) { @@ -2375,6 +2425,12 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte main_block_open = false; break; + /* Handle precise MAD here because it requires insertion of an extra instruction. */ + case VKD3DSIH_MAD: + if (!cf_flattener_handle_precise_mad(flattener, instruction)) + return VKD3D_ERROR_OUT_OF_MEMORY; + break; + default: if (!cf_flattener_copy_instruction(flattener, instruction)) return VKD3D_ERROR_OUT_OF_MEMORY; diff --git a/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d-shader/vkd3d_shader_private.h index b07a7bff7..f401fd7a3 100644 --- a/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d-shader/vkd3d_shader_private.h @@ -1761,6 +1761,31 @@ static inline unsigned int vkd3d_compact_swizzle(uint32_t swizzle, uint32_t writ return compacted_swizzle; } +static inline uint32_t vsir_swizzle_from_writemask(unsigned int writemask) +{ + static const unsigned int swizzles[16] = + { + 0, + VKD3D_SHADER_SWIZZLE(X, X, X, X), + VKD3D_SHADER_SWIZZLE(Y, Y, Y, Y), + VKD3D_SHADER_SWIZZLE(X, Y, X, X), + VKD3D_SHADER_SWIZZLE(Z, Z, Z, Z), + VKD3D_SHADER_SWIZZLE(X, Z, X, X), + VKD3D_SHADER_SWIZZLE(Y, Z, X, X), + VKD3D_SHADER_SWIZZLE(X, Y, Z, X), + VKD3D_SHADER_SWIZZLE(W, W, W, W), + VKD3D_SHADER_SWIZZLE(X, W, X, X), + VKD3D_SHADER_SWIZZLE(Y, W, X, X), + VKD3D_SHADER_SWIZZLE(X, Y, W, X), + VKD3D_SHADER_SWIZZLE(Z, W, X, X), + VKD3D_SHADER_SWIZZLE(X, Z, W, X), + VKD3D_SHADER_SWIZZLE(Y, Z, W, X), + VKD3D_SHADER_SWIZZLE(X, Y, Z, W), + }; + + return swizzles[writemask & 0xf]; +} + struct vkd3d_struct { enum vkd3d_shader_structure_type type; diff --git a/tests/hlsl/arithmetic-float-uniform.shader_test b/tests/hlsl/arithmetic-float-uniform.shader_test index 8bc3992e7..c6b7caaae 100644 --- a/tests/hlsl/arithmetic-float-uniform.shader_test +++ b/tests/hlsl/arithmetic-float-uniform.shader_test @@ -121,7 +121,7 @@ uniform 0 float4 1.00000007 -42.1 4.0 45.0 uniform 4 float4 1.625 -5.0 4.125 5.0 uniform 8 float4 1.00000007 -1.0 0.5 -0.5 todo(sm<6) draw quad -todo probe all rgba (2.62500048, 209.5, 17.0, 224.5) +probe all rgba (2.62500048, 209.5, 17.0, 224.5) [require] shader model >= 5.0 -- GitLab https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/723
participants (2)
-
Conor McCarthy -
Conor McCarthy (@cmccarthy)