This would eliminate the todo for the precise mad() test in !718. Maybe we need test results on nvidia and intel to decide if we actually want this.
-- v4: vkd3d-shader/spirv: Implement MAD in two operations if flagged as precise.
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/spirv.c | 34 +++++++++++++++++-- .../hlsl/arithmetic-float-uniform.shader_test | 2 +- 2 files changed, 33 insertions(+), 3 deletions(-)
diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index 46130244c..3b250da0b 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -1447,6 +1447,20 @@ static uint32_t vkd3d_spirv_build_op_isub(struct vkd3d_spirv_builder *builder, SpvOpISub, result_type, operand0, operand1); }
+static uint32_t vkd3d_spirv_build_op_fadd(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t operand0, uint32_t operand1) +{ + return vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, + SpvOpFAdd, result_type, operand0, operand1); +} + +static uint32_t vkd3d_spirv_build_op_fmul(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t operand0, uint32_t operand1) +{ + return vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, + SpvOpFMul, result_type, operand0, operand1); +} + static uint32_t vkd3d_spirv_build_op_fdiv(struct vkd3d_spirv_builder *builder, uint32_t result_type, uint32_t operand0, uint32_t operand1) { @@ -7204,8 +7218,24 @@ static void spirv_compiler_emit_ext_glsl_instruction(struct spirv_compiler *comp for (i = 0; i < instruction->src_count; ++i) src_id[i] = spirv_compiler_emit_load_src(compiler, &src[i], dst->write_mask);
- val_id = vkd3d_spirv_build_op_ext_inst(builder, type_id, - instr_set_id, glsl_inst, src_id, instruction->src_count); + if (instruction->handler_idx == VKD3DSIH_MAD && (instruction->flags & VKD3DSI_PRECISE_XYZW)) + { + /* The HLSL docs state: "If components of a mad instruction are tagged as precise, the + * hardware must execute a mad instruction or the exact equivalent, and it cannot split + * it into a multiply followed by an add." + * But DXIL.rst states the opposite: "Floating point multiply & add. This operation is + * not fused for "precise" operations." + * Windows drivers seem to conform with the latter, for SM 4-5 and SM 6. */ + val_id = vkd3d_spirv_build_op_fmul(builder, type_id, src_id[0], src_id[1]); + vkd3d_spirv_build_op_decorate(builder, val_id, SpvDecorationNoContraction, NULL, 0); + val_id = vkd3d_spirv_build_op_fadd(builder, type_id, val_id, src_id[2]); + vkd3d_spirv_build_op_decorate(builder, val_id, SpvDecorationNoContraction, NULL, 0); + } + else + { + val_id = vkd3d_spirv_build_op_ext_inst(builder, type_id, + instr_set_id, glsl_inst, src_id, instruction->src_count); + }
if (instruction->handler_idx == VKD3DSIH_FIRSTBIT_HI || instruction->handler_idx == VKD3DSIH_FIRSTBIT_SHI) diff --git a/tests/hlsl/arithmetic-float-uniform.shader_test b/tests/hlsl/arithmetic-float-uniform.shader_test index 8bc3992e7..c6b7caaae 100644 --- a/tests/hlsl/arithmetic-float-uniform.shader_test +++ b/tests/hlsl/arithmetic-float-uniform.shader_test @@ -121,7 +121,7 @@ uniform 0 float4 1.00000007 -42.1 4.0 45.0 uniform 4 float4 1.625 -5.0 4.125 5.0 uniform 8 float4 1.00000007 -1.0 0.5 -0.5 todo(sm<6) draw quad -todo probe all rgba (2.62500048, 209.5, 17.0, 224.5) +probe all rgba (2.62500048, 209.5, 17.0, 224.5)
[require] shader model >= 5.0