From: Francisco Casas fcasas@codeweavers.com
Non-constant vector indexing is not solved with relative addressing in the register indexes because this indexation cannot be at the level of register-components.
Mathematical operations must be used instead. --- libs/vkd3d-shader/hlsl.c | 13 +++ libs/vkd3d-shader/hlsl.h | 2 + libs/vkd3d-shader/hlsl_codegen.c | 101 ++++++++++++++++++ tests/array-index-expr.shader_test | 36 +++---- tests/expr-indexing.shader_test | 22 ++-- tests/hlsl-matrix-indexing.shader_test | 6 +- .../hlsl-vector-indexing-uniform.shader_test | 4 +- 7 files changed, 150 insertions(+), 34 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index 617aef30..1cdc358d 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -1287,6 +1287,19 @@ struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl return load; }
+struct hlsl_ir_load *hlsl_new_load_partial_path(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, + unsigned int length, const struct vkd3d_shader_location *loc) +{ + /* This deref can only exists temporarily because it is not the real owner of its members. */ + struct hlsl_deref tmp_deref; + + assert(length <= deref->path_len); + + tmp_deref = *deref; + tmp_deref.path_len = length; + return hlsl_new_load_index(ctx, &tmp_deref, NULL, loc); +} + struct hlsl_ir_load *hlsl_new_var_load(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, const struct vkd3d_shader_location *loc) { diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 6a4e314d..a59fa36d 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -1119,6 +1119,8 @@ struct hlsl_ir_load *hlsl_new_var_load(struct hlsl_ctx *ctx, struct hlsl_ir_var const struct vkd3d_shader_location *loc); struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc); +struct hlsl_ir_load *hlsl_new_load_partial_path(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, + unsigned int length, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, const struct hlsl_deref *deref, unsigned int comp, const struct vkd3d_shader_location *loc);
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index c6cd759f..c8580086 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -1992,6 +1992,106 @@ static bool remove_trivial_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *i return true; }
+static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_node *idx; + struct hlsl_deref *deref; + struct hlsl_type *type; + unsigned int i; + + if (instr->type != HLSL_IR_LOAD) + return false; + + deref = &hlsl_ir_load(instr)->src; + assert(deref->var); + + if (deref->path_len == 0) + return false; + + type = deref->var->data_type; + for (i = 0; i < deref->path_len - 1; ++i) + type = hlsl_get_element_type_from_path_index(ctx, type, deref->path[i].node); + + idx = deref->path[deref->path_len - 1].node; + + if (type->class == HLSL_CLASS_VECTOR && idx->type != HLSL_IR_CONSTANT) + { + struct hlsl_ir_node *eq, *swizzle, *operands[HLSL_MAX_OPERANDS] = {0}; + struct hlsl_ir_load *vector_load; + struct hlsl_ir_constant *c; + + if (!(vector_load = hlsl_new_load_partial_path(ctx, deref, deref->path_len - 1, &instr->loc))) + return false; + list_add_before(&instr->entry, &vector_load->node.entry); + + if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), type->dimx, idx, &instr->loc))) + return false; + list_add_before(&instr->entry, &swizzle->entry); + + if (!(c = hlsl_new_constant(ctx, hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, type->dimx), &instr->loc))) + return false; + c->value.u[0].u = 0; + c->value.u[1].u = 1; + c->value.u[2].u = 2; + c->value.u[3].u = 3; + list_add_before(&instr->entry, &c->node.entry); + + operands[0] = swizzle; + operands[1] = &c->node; + if (!(eq = hlsl_new_expr(ctx, HLSL_OP2_EQUAL, operands, + hlsl_get_vector_type(ctx, HLSL_TYPE_BOOL, type->dimx), &instr->loc))) + return false; + list_add_before(&instr->entry, &eq->entry); + + if (type->base_type == HLSL_TYPE_BOOL) + { + struct hlsl_ir_node *and, *res, *comps[4] = {0}; + + if (!(and = hlsl_new_binary_expr(ctx, HLSL_OP2_LOGIC_AND, &vector_load->node, eq))) + return false; + list_add_before(&instr->entry, &and->entry); + + for (i = 0; i < type->dimx; ++i) + { + unsigned int s = hlsl_swizzle_from_writemask(1 << i); + + if (!(comps[i] = hlsl_new_swizzle(ctx, s, 1, and, &instr->loc))) + return false; + list_add_before(&instr->entry, &comps[i]->entry); + } + + res = comps[0]; + for (i = 1; i < type->dimx; ++i) + { + if (!(res = hlsl_new_binary_expr(ctx, HLSL_OP2_LOGIC_OR, res, comps[i]))) + return false; + list_add_before(&instr->entry, &res->entry); + } + hlsl_replace_node(instr, res); + } + else + { + struct hlsl_ir_node *dot; + + if (!(eq = hlsl_new_cast(ctx, eq, type, &instr->loc))) + return false; + list_add_before(&instr->entry, &eq->entry); + + operands[0] = &vector_load->node; + operands[1] = eq; + if (!(dot = hlsl_new_expr(ctx, type->dimx == 1 ? HLSL_OP2_MUL : HLSL_OP2_DOT, operands, + instr->data_type, &instr->loc))) + return false; + list_add_before(&instr->entry, &dot->entry); + hlsl_replace_node(instr, dot); + } + + return true; + } + + return false; +} + /* Lower DIV to RCP + MUL. */ static bool lower_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { @@ -3968,6 +4068,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry while (progress); hlsl_transform_ir(ctx, split_matrix_copies, body, NULL);
+ hlsl_transform_ir(ctx, lower_nonconstant_vector_derefs, body, NULL); hlsl_transform_ir(ctx, lower_narrowing_casts, body, NULL); hlsl_transform_ir(ctx, lower_casts_to_bool, body, NULL); hlsl_transform_ir(ctx, lower_int_dot, body, NULL); diff --git a/tests/array-index-expr.shader_test b/tests/array-index-expr.shader_test index 35a13f7f..e058d984 100644 --- a/tests/array-index-expr.shader_test +++ b/tests/array-index-expr.shader_test @@ -1,4 +1,4 @@ -[pixel shader todo] +[pixel shader] uniform float i;
float4 main() : SV_TARGET @@ -9,20 +9,20 @@ float4 main() : SV_TARGET
[test] uniform 0 float 0 -todo draw quad -todo probe all rgba (11.0, 11.0, 11.0, 11.0) +draw quad +probe all rgba (11.0, 11.0, 11.0, 11.0) uniform 0 float 1 -todo draw quad -todo probe all rgba (12.0, 12.0, 12.0, 12.0) +draw quad +probe all rgba (12.0, 12.0, 12.0, 12.0) uniform 0 float 2 -todo draw quad -todo probe all rgba (13.0, 13.0, 13.0, 13.0) +draw quad +probe all rgba (13.0, 13.0, 13.0, 13.0) uniform 0 float 3 -todo draw quad -todo probe all rgba (14.0, 14.0, 14.0, 14.0) +draw quad +probe all rgba (14.0, 14.0, 14.0, 14.0)
-[pixel shader todo] +[pixel shader] uniform float i;
float4 main() : SV_TARGET @@ -34,17 +34,17 @@ float4 main() : SV_TARGET
[test] uniform 0 float 0 -todo draw quad -todo probe all rgba (21.0, 1.0, 24.0, 0.0) +draw quad +probe all rgba (21.0, 1.0, 24.0, 0.0) uniform 0 float 1 -todo draw quad -todo probe all rgba (22.0, 0.0, 23.0, 1.0) +draw quad +probe all rgba (22.0, 0.0, 23.0, 1.0) uniform 0 float 2 -todo draw quad -todo probe all rgba (23.0, 1.0, 22.0, 0.0) +draw quad +probe all rgba (23.0, 1.0, 22.0, 0.0) uniform 0 float 3 -todo draw quad -todo probe all rgba (24.0, 0.0, 21.0, 1.0) +draw quad +probe all rgba (24.0, 0.0, 21.0, 1.0)
[pixel shader todo] diff --git a/tests/expr-indexing.shader_test b/tests/expr-indexing.shader_test index 83a63d67..3dcc5727 100644 --- a/tests/expr-indexing.shader_test +++ b/tests/expr-indexing.shader_test @@ -13,7 +13,7 @@ draw quad probe all rgba (8.0, 8.0, 8.0, 8.0)
-[pixel shader todo] +[pixel shader] float4 a, b; float i;
@@ -26,8 +26,8 @@ float4 main() : sv_target uniform 0 float4 1.0 2.0 3.0 4.0 uniform 4 float4 5.0 6.0 7.0 8.0 uniform 8 float 2 -todo draw quad -todo probe all rgba (10.0, 10.0, 10.0, 10.0) +draw quad +probe all rgba (10.0, 10.0, 10.0, 10.0)
[pixel shader] @@ -44,7 +44,7 @@ draw quad probe all rgba (3.0, 3.0, 3.0, 3.0)
-[pixel shader todo] +[pixel shader] float4 a; float i;
@@ -56,11 +56,11 @@ float4 main() : sv_target [test] uniform 0 float4 1.0 2.0 3.0 4.0 uniform 4 float 0 -todo draw quad -todo probe all rgba (4.0, 4.0, 4.0, 4.0) +draw quad +probe all rgba (4.0, 4.0, 4.0, 4.0) uniform 4 float 2 -todo draw quad -todo probe all rgba (1.0, 1.0, 1.0, 1.0) +draw quad +probe all rgba (1.0, 1.0, 1.0, 1.0)
[pixel shader] @@ -82,7 +82,7 @@ draw quad probe all rgba (4.0, 4.0, 4.0, 4.0)
-[pixel shader todo] +[pixel shader] float4 a; float i;
@@ -99,5 +99,5 @@ float4 main() : sv_target [test] uniform 0 float4 1.0 2.0 3.0 4.0 uniform 4 float 1 -todo draw quad -todo probe all rgba (2.0, 2.0, 2.0, 2.0) +draw quad +probe all rgba (2.0, 2.0, 2.0, 2.0) diff --git a/tests/hlsl-matrix-indexing.shader_test b/tests/hlsl-matrix-indexing.shader_test index c3d08296..a57d8fb8 100644 --- a/tests/hlsl-matrix-indexing.shader_test +++ b/tests/hlsl-matrix-indexing.shader_test @@ -108,7 +108,7 @@ draw quad probe all rgba (3.0, 4.0, 50.0, 60.0)
-[pixel shader todo] +[pixel shader] uniform float i;
float4 main() : sv_target @@ -120,8 +120,8 @@ float4 main() : sv_target
[test] uniform 0 float 2 -todo draw quad -todo probe all rgba (8, 9, 10, 11) +draw quad +probe all rgba (8, 9, 10, 11)
[pixel shader todo] diff --git a/tests/hlsl-vector-indexing-uniform.shader_test b/tests/hlsl-vector-indexing-uniform.shader_test index 968f570b..e5ffbdd0 100644 --- a/tests/hlsl-vector-indexing-uniform.shader_test +++ b/tests/hlsl-vector-indexing-uniform.shader_test @@ -1,6 +1,6 @@ % Use a uniform to prevent the compiler from optimizing.
-[pixel shader todo] +[pixel shader] uniform float i; float4 main() : SV_TARGET { @@ -12,5 +12,5 @@ float4 main() : SV_TARGET
[test] uniform 0 float 2 -todo draw quad +draw quad probe all rgba (0.5, 0.3, 0.8, 0.2)