Based on !723.
-- v2: vkd3d-shader/ir: Implement MAD in two operations if flagged as precise.
From: Giovanni Mascellani gmascellani@codeweavers.com
It is meant as generic pass to host all program changes to single instructions that do not require keeping a global state, intstead of having to loop through the whole program many times. --- libs/vkd3d-shader/ir.c | 140 +++++++++++++++++++++++------------------ 1 file changed, 79 insertions(+), 61 deletions(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 9d5b79047..39b490f85 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -94,86 +94,104 @@ static bool vsir_instruction_init_with_params(struct vsir_program *program, return true; }
-static enum vkd3d_result vsir_program_lower_texkills(struct vsir_program *program) +static enum vkd3d_result vsir_program_lower_texkill(struct vsir_program *program, + struct vkd3d_shader_instruction *texkill, unsigned int *tmp_idx) { + const unsigned int components_read = 3 + (program->shader_version.major >= 2); struct vkd3d_shader_instruction_array *instructions = &program->instructions; - struct vkd3d_shader_instruction *texkill_ins, *ins; - unsigned int components_read = 3 + (program->shader_version.major >= 2); - unsigned int tmp_idx = ~0u; - unsigned int i, k; - - for (i = 0; i < instructions->count; ++i) - { - texkill_ins = &instructions->elements[i]; + size_t pos = texkill - instructions->elements; + struct vkd3d_shader_instruction *ins; + unsigned int j;
- if (texkill_ins->handler_idx != VKD3DSIH_TEXKILL) - continue; + if (!shader_instruction_array_insert_at(instructions, pos + 1, components_read + 1)) + return VKD3D_ERROR_OUT_OF_MEMORY;
- if (!shader_instruction_array_insert_at(instructions, i + 1, components_read + 1)) - return VKD3D_ERROR_OUT_OF_MEMORY; + if (*tmp_idx == ~0u) + *tmp_idx = program->temp_count++;
- if (tmp_idx == ~0u) - tmp_idx = program->temp_count++; + /* tmp = ins->dst[0] < 0 */
- /* tmp = ins->dst[0] < 0 */ + ins = &instructions->elements[pos + 1]; + if (!vsir_instruction_init_with_params(program, ins, &texkill->location, VKD3DSIH_LTO, 1, 2)) + return VKD3D_ERROR_OUT_OF_MEMORY;
- ins = &instructions->elements[i + 1]; - if (!vsir_instruction_init_with_params(program, ins, &texkill_ins->location, VKD3DSIH_LTO, 1, 2)) + vsir_register_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); + ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; + ins->dst[0].reg.idx[0].offset = *tmp_idx; + ins->dst[0].write_mask = VKD3DSP_WRITEMASK_ALL; + + ins->src[0].reg = texkill->dst[0].reg; + ins->src[0].swizzle = VKD3D_SHADER_NO_SWIZZLE; + vsir_register_init(&ins->src[1].reg, VKD3DSPR_IMMCONST, VKD3D_DATA_FLOAT, 0); + ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; + ins->src[1].reg.u.immconst_f32[0] = 0.0f; + ins->src[1].reg.u.immconst_f32[1] = 0.0f; + ins->src[1].reg.u.immconst_f32[2] = 0.0f; + ins->src[1].reg.u.immconst_f32[3] = 0.0f; + + /* tmp.x = tmp.x || tmp.y */ + /* tmp.x = tmp.x || tmp.z */ + /* tmp.x = tmp.x || tmp.w, if sm >= 2.0 */ + + for (j = 1; j < components_read; ++j) + { + ins = &instructions->elements[pos + 1 + j]; + if (!(vsir_instruction_init_with_params(program, ins, &texkill->location, VKD3DSIH_OR, 1, 2))) return VKD3D_ERROR_OUT_OF_MEMORY;
vsir_register_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; - ins->dst[0].reg.idx[0].offset = tmp_idx; - ins->dst[0].write_mask = VKD3DSP_WRITEMASK_ALL; + ins->dst[0].reg.idx[0].offset = *tmp_idx; + ins->dst[0].write_mask = VKD3DSP_WRITEMASK_0;
- ins->src[0].reg = texkill_ins->dst[0].reg; - ins->src[0].swizzle = VKD3D_SHADER_NO_SWIZZLE; - vsir_register_init(&ins->src[1].reg, VKD3DSPR_IMMCONST, VKD3D_DATA_FLOAT, 0); + vsir_register_init(&ins->src[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); + ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; + ins->src[0].reg.idx[0].offset = *tmp_idx; + ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); + vsir_register_init(&ins->src[1].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; - ins->src[1].reg.u.immconst_f32[0] = 0.0f; - ins->src[1].reg.u.immconst_f32[1] = 0.0f; - ins->src[1].reg.u.immconst_f32[2] = 0.0f; - ins->src[1].reg.u.immconst_f32[3] = 0.0f; + ins->src[1].reg.idx[0].offset = *tmp_idx; + ins->src[1].swizzle = vkd3d_shader_create_swizzle(j, j, j, j); + }
- /* tmp.x = tmp.x || tmp.y */ - /* tmp.x = tmp.x || tmp.z */ - /* tmp.x = tmp.x || tmp.w, if sm >= 2.0 */ + /* discard_nz tmp.x */
- for (k = 1; k < components_read; ++k) - { - ins = &instructions->elements[i + 1 + k]; - if (!(vsir_instruction_init_with_params(program, ins, &texkill_ins->location, VKD3DSIH_OR, 1, 2))) - return VKD3D_ERROR_OUT_OF_MEMORY; + ins = &instructions->elements[pos + 1 + components_read]; + if (!(vsir_instruction_init_with_params(program, ins, &texkill->location, VKD3DSIH_DISCARD, 0, 1))) + return VKD3D_ERROR_OUT_OF_MEMORY; + ins->flags = VKD3D_SHADER_CONDITIONAL_OP_NZ;
- vsir_register_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); - ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; - ins->dst[0].reg.idx[0].offset = tmp_idx; - ins->dst[0].write_mask = VKD3DSP_WRITEMASK_0; - - vsir_register_init(&ins->src[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); - ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; - ins->src[0].reg.idx[0].offset = tmp_idx; - ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); - vsir_register_init(&ins->src[1].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); - ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; - ins->src[1].reg.idx[0].offset = tmp_idx; - ins->src[1].swizzle = vkd3d_shader_create_swizzle(k, k, k, k); - } + vsir_register_init(&ins->src[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); + ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; + ins->src[0].reg.idx[0].offset = *tmp_idx; + ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X);
- /* discard_nz tmp.x */ + /* Make the original instruction no-op */ + vkd3d_shader_instruction_make_nop(texkill);
- ins = &instructions->elements[i + 1 + components_read]; - if (!(vsir_instruction_init_with_params(program, ins, &texkill_ins->location, VKD3DSIH_DISCARD, 0, 1))) - return VKD3D_ERROR_OUT_OF_MEMORY; - ins->flags = VKD3D_SHADER_CONDITIONAL_OP_NZ; + return VKD3D_OK; +}
- vsir_register_init(&ins->src[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); - ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; - ins->src[0].reg.idx[0].offset = tmp_idx; - ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); +static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *program) +{ + struct vkd3d_shader_instruction_array *instructions = &program->instructions; + unsigned int tmp_idx = ~0u, i; + enum vkd3d_result ret;
- /* Make the original instruction no-op */ - vkd3d_shader_instruction_make_nop(texkill_ins); + for (i = 0; i < instructions->count; ++i) + { + struct vkd3d_shader_instruction *ins = &instructions->elements[i]; + + switch (ins->handler_idx) + { + case VKD3DSIH_TEXKILL: + if ((ret = vsir_program_lower_texkill(program, ins, &tmp_idx)) < 0) + return ret; + break; + + default: + break; + } }
return VKD3D_OK; @@ -5409,7 +5427,7 @@ enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t
remove_dcl_temps(program);
- if ((result = vsir_program_lower_texkills(program)) < 0) + if ((result = vsir_program_lower_instructions(program)) < 0) return result;
if (program->shader_version.major >= 6)
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/ir.c | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 39b490f85..e4cc9dcbd 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -56,19 +56,6 @@ static void vkd3d_shader_instruction_make_nop(struct vkd3d_shader_instruction *i vsir_instruction_init(ins, &location, VKD3DSIH_NOP); }
-static void remove_dcl_temps(struct vsir_program *program) -{ - unsigned int i; - - for (i = 0; i < program->instructions.count; ++i) - { - struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - - if (ins->handler_idx == VKD3DSIH_DCL_TEMPS) - vkd3d_shader_instruction_make_nop(ins); - } -} - static bool vsir_instruction_init_with_params(struct vsir_program *program, struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, enum vkd3d_shader_opcode handler_idx, unsigned int dst_count, unsigned int src_count) @@ -189,6 +176,10 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr return ret; break;
+ case VKD3DSIH_DCL_TEMPS: + vkd3d_shader_instruction_make_nop(ins); + break; + default: break; } @@ -5425,8 +5416,6 @@ enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t { enum vkd3d_result result = VKD3D_OK;
- remove_dcl_temps(program); - if ((result = vsir_program_lower_instructions(program)) < 0) return result;
From: Conor McCarthy cmccarthy@codeweavers.com
With some changes by Giovanni Mascellani. --- libs/vkd3d-shader/ir.c | 56 +++++++++++++++++++ libs/vkd3d-shader/vkd3d_shader_private.h | 25 +++++++++ .../hlsl/arithmetic-float-uniform.shader_test | 2 +- 3 files changed, 82 insertions(+), 1 deletion(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index e4cc9dcbd..7c00d3c03 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -17,6 +17,7 @@ */
#include "vkd3d_shader_private.h" +#include "vkd3d_types.h"
bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_version *version, unsigned int reserve) { @@ -159,6 +160,56 @@ static enum vkd3d_result vsir_program_lower_texkill(struct vsir_program *program return VKD3D_OK; }
+/* The Shader Model 5 Assembly documentation states: "If components of a mad + * instruction are tagged as precise, the hardware must execute a mad instruction + * or the exact equivalent, and it cannot split it into a multiply followed by an add." + * But DXIL.rst states the opposite: "Floating point multiply & add. This operation is + * not fused for "precise" operations." + * Windows drivers seem to conform with the latter, for SM 4-5 and SM 6. */ +static enum vkd3d_result vsir_program_lower_precise_mad(struct vsir_program *program, + struct vkd3d_shader_instruction *mad, unsigned int *tmp_idx) +{ + struct vkd3d_shader_instruction_array *instructions = &program->instructions; + struct vkd3d_shader_instruction *mul_ins, *add_ins; + size_t pos = mad - instructions->elements; + struct vkd3d_shader_dst_param *mul_dst; + + if (!(mad->flags & VKD3DSI_PRECISE_XYZW)) + return VKD3D_OK; + + if (!shader_instruction_array_insert_at(instructions, pos + 1, 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + if (*tmp_idx == ~0u) + *tmp_idx = program->temp_count++; + + mul_ins = &instructions->elements[pos]; + add_ins = &instructions->elements[pos + 1]; + + mul_ins->handler_idx = VKD3DSIH_MUL; + mul_ins->src_count = 2; + + if (!(vsir_instruction_init_with_params(program, add_ins, &mul_ins->location, VKD3DSIH_ADD, 1, 2))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + add_ins->flags = mul_ins->flags & VKD3DSI_PRECISE_XYZW; + + mul_dst = mul_ins->dst; + *add_ins->dst = *mul_dst; + + mul_dst->modifiers = 0; + vsir_register_init(&mul_dst->reg, VKD3DSPR_TEMP, mul_ins->src[0].reg.data_type, 1); + mul_dst->reg.dimension = add_ins->dst->reg.dimension; + mul_dst->reg.idx[0].offset = *tmp_idx; + + add_ins->src[0].reg = mul_dst->reg; + add_ins->src[0].swizzle = vsir_swizzle_from_writemask(mul_dst->write_mask); + add_ins->src[0].modifiers = 0; + add_ins->src[1] = mul_ins->src[2]; + + return VKD3D_OK; +} + static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *program) { struct vkd3d_shader_instruction_array *instructions = &program->instructions; @@ -176,6 +227,11 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr return ret; break;
+ case VKD3DSIH_MAD: + if ((ret = vsir_program_lower_precise_mad(program, ins, &tmp_idx)) < 0) + return ret; + break; + case VKD3DSIH_DCL_TEMPS: vkd3d_shader_instruction_make_nop(ins); break; diff --git a/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d-shader/vkd3d_shader_private.h index b07a7bff7..f401fd7a3 100644 --- a/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d-shader/vkd3d_shader_private.h @@ -1761,6 +1761,31 @@ static inline unsigned int vkd3d_compact_swizzle(uint32_t swizzle, uint32_t writ return compacted_swizzle; }
+static inline uint32_t vsir_swizzle_from_writemask(unsigned int writemask) +{ + static const unsigned int swizzles[16] = + { + 0, + VKD3D_SHADER_SWIZZLE(X, X, X, X), + VKD3D_SHADER_SWIZZLE(Y, Y, Y, Y), + VKD3D_SHADER_SWIZZLE(X, Y, X, X), + VKD3D_SHADER_SWIZZLE(Z, Z, Z, Z), + VKD3D_SHADER_SWIZZLE(X, Z, X, X), + VKD3D_SHADER_SWIZZLE(Y, Z, X, X), + VKD3D_SHADER_SWIZZLE(X, Y, Z, X), + VKD3D_SHADER_SWIZZLE(W, W, W, W), + VKD3D_SHADER_SWIZZLE(X, W, X, X), + VKD3D_SHADER_SWIZZLE(Y, W, X, X), + VKD3D_SHADER_SWIZZLE(X, Y, W, X), + VKD3D_SHADER_SWIZZLE(Z, W, X, X), + VKD3D_SHADER_SWIZZLE(X, Z, W, X), + VKD3D_SHADER_SWIZZLE(Y, Z, W, X), + VKD3D_SHADER_SWIZZLE(X, Y, Z, W), + }; + + return swizzles[writemask & 0xf]; +} + struct vkd3d_struct { enum vkd3d_shader_structure_type type; diff --git a/tests/hlsl/arithmetic-float-uniform.shader_test b/tests/hlsl/arithmetic-float-uniform.shader_test index 61957f2bb..a49d3f00a 100644 --- a/tests/hlsl/arithmetic-float-uniform.shader_test +++ b/tests/hlsl/arithmetic-float-uniform.shader_test @@ -121,7 +121,7 @@ uniform 0 float4 1.00000007 -42.1 4.0 45.0 uniform 4 float4 1.625 -5.0 4.125 5.0 uniform 8 float4 1.00000007 -1.0 0.5 -0.5 todo(sm<6) draw quad -todo probe all rgba (2.62500048, 209.5, 17.0, 224.5) +probe all rgba (2.62500048, 209.5, 17.0, 224.5)
[require] shader model >= 5.0
This merge request was approved by Giovanni Mascellani.
This merge request was approved by Conor McCarthy.
There's still a typo in the second commit message.