From: Francisco Casas fcasas@codeweavers.com
Non-constant vector indexing is not solved with relative addressing in the register indexes because this indexation cannot be at the level of register-components.
Mathematical operations must be used instead.
For floats, the native compiler seems to index an identity matrix, to get the i-th column, and then proceedes to compute the dot product between that column and the vector. For ints, bit operations seem to be performed.
While probably less efficient, this implementation complies with the type-checking at the IR level and when writing bytecode. --- libs/vkd3d-shader/hlsl.c | 13 +++ libs/vkd3d-shader/hlsl.h | 2 + libs/vkd3d-shader/hlsl_codegen.c | 81 +++++++++++++++++++ tests/array-index-expr.shader_test | 18 ++--- tests/expr-indexing.shader_test | 22 ++--- tests/hlsl-matrix-indexing.shader_test | 6 +- .../hlsl-vector-indexing-uniform.shader_test | 4 +- 7 files changed, 121 insertions(+), 25 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index 617aef30..1cdc358d 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -1287,6 +1287,19 @@ struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl return load; }
+struct hlsl_ir_load *hlsl_new_load_partial_path(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, + unsigned int length, const struct vkd3d_shader_location *loc) +{ + /* This deref can only exists temporarily because it is not the real owner of its members. */ + struct hlsl_deref tmp_deref; + + assert(length <= deref->path_len); + + tmp_deref = *deref; + tmp_deref.path_len = length; + return hlsl_new_load_index(ctx, &tmp_deref, NULL, loc); +} + struct hlsl_ir_load *hlsl_new_var_load(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, const struct vkd3d_shader_location *loc) { diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 6a4e314d..a59fa36d 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -1119,6 +1119,8 @@ struct hlsl_ir_load *hlsl_new_var_load(struct hlsl_ctx *ctx, struct hlsl_ir_var const struct vkd3d_shader_location *loc); struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc); +struct hlsl_ir_load *hlsl_new_load_partial_path(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, + unsigned int length, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, const struct hlsl_deref *deref, unsigned int comp, const struct vkd3d_shader_location *loc);
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 6a99c33e..ed301952 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -1992,6 +1992,86 @@ static bool remove_trivial_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *i return true; }
+static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_deref *deref, + struct hlsl_ir_node *instr) +{ + struct hlsl_ir_node *idx; + struct hlsl_type *type; + unsigned int i; + + assert(deref->var); + + if (deref->path_len == 0) + return false; + + type = deref->var->data_type; + + for (i = 0; i < deref->path_len - 1; ++i) + type = hlsl_get_element_type_from_path_index(ctx, type, deref->path[i].node); + + idx = deref->path[deref->path_len - 1].node; + + if (type->class == HLSL_CLASS_VECTOR && idx->type != HLSL_IR_CONSTANT) + { + struct hlsl_ir_node *eq, *res, *mult, *swizzle, *comps[4] = {0}, *operands[HLSL_MAX_OPERANDS] = {0}; + struct hlsl_ir_load *vector_load; + struct hlsl_ir_constant *c; + + if (!(vector_load = hlsl_new_load_partial_path(ctx, deref, deref->path_len - 1, &instr->loc))) + return false; + list_add_before(&instr->entry, &vector_load->node.entry); + + if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), type->dimx, idx, &instr->loc))) + return false; + list_add_before(&instr->entry, &swizzle->entry); + + if (!(c = hlsl_new_constant(ctx, hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, type->dimx), &instr->loc))) + return false; + c->value.u[0].u = 0; + c->value.u[1].u = 1; + c->value.u[2].u = 2; + c->value.u[3].u = 3; + list_add_before(&instr->entry, &c->node.entry); + + operands[0] = swizzle; + operands[1] = &c->node; + if (!(eq = hlsl_new_expr(ctx, HLSL_OP2_EQUAL, operands, + hlsl_get_vector_type(ctx, HLSL_TYPE_BOOL, type->dimx), &instr->loc))) + return false; + list_add_before(&instr->entry, &eq->entry); + + if (!(eq = hlsl_new_cast(ctx, eq, type, &instr->loc))) + return false; + list_add_before(&instr->entry, &eq->entry); + + if (!(mult = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, &vector_load->node, eq))) + return false; + list_add_before(&instr->entry, &mult->entry); + + for (i = 0; i < type->dimx; ++i) + { + unsigned int s = hlsl_swizzle_from_writemask(1 << i); + + if (!(comps[i] = hlsl_new_swizzle(ctx, s, 1, mult, &instr->loc))) + return false; + list_add_before(&instr->entry, &comps[i]->entry); + } + + res = comps[0]; + for (i = 1; i < type->dimx; ++i) + { + if (!(res = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, res, comps[i]))) + return false; + list_add_before(&instr->entry, &res->entry); + } + + hlsl_replace_node(instr, res); + return true; + } + + return false; +} + /* Lower DIV to RCP + MUL. */ static bool lower_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { @@ -3937,6 +4017,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, body, NULL); } while (progress); + transform_derefs(ctx, lower_nonconstant_vector_derefs, body);
if (profile->major_version < 4) { diff --git a/tests/array-index-expr.shader_test b/tests/array-index-expr.shader_test index 675e5e46..1c469e8c 100644 --- a/tests/array-index-expr.shader_test +++ b/tests/array-index-expr.shader_test @@ -1,4 +1,4 @@ -[pixel shader todo] +[pixel shader] uniform float i;
float4 main() : SV_TARGET @@ -9,17 +9,17 @@ float4 main() : SV_TARGET
[test] uniform 0 float 0 -todo draw quad -todo probe all rgba (11.0, 11.0, 11.0, 11.0) +draw quad +probe all rgba (11.0, 11.0, 11.0, 11.0) uniform 0 float 1 -todo draw quad -todo probe all rgba (12.0, 12.0, 12.0, 12.0) +draw quad +probe all rgba (12.0, 12.0, 12.0, 12.0) uniform 0 float 2 -todo draw quad -todo probe all rgba (13.0, 13.0, 13.0, 13.0) +draw quad +probe all rgba (13.0, 13.0, 13.0, 13.0) uniform 0 float 3 -todo draw quad -todo probe all rgba (14.0, 14.0, 14.0, 14.0) +draw quad +probe all rgba (14.0, 14.0, 14.0, 14.0)
[pixel shader todo] diff --git a/tests/expr-indexing.shader_test b/tests/expr-indexing.shader_test index 83a63d67..3dcc5727 100644 --- a/tests/expr-indexing.shader_test +++ b/tests/expr-indexing.shader_test @@ -13,7 +13,7 @@ draw quad probe all rgba (8.0, 8.0, 8.0, 8.0)
-[pixel shader todo] +[pixel shader] float4 a, b; float i;
@@ -26,8 +26,8 @@ float4 main() : sv_target uniform 0 float4 1.0 2.0 3.0 4.0 uniform 4 float4 5.0 6.0 7.0 8.0 uniform 8 float 2 -todo draw quad -todo probe all rgba (10.0, 10.0, 10.0, 10.0) +draw quad +probe all rgba (10.0, 10.0, 10.0, 10.0)
[pixel shader] @@ -44,7 +44,7 @@ draw quad probe all rgba (3.0, 3.0, 3.0, 3.0)
-[pixel shader todo] +[pixel shader] float4 a; float i;
@@ -56,11 +56,11 @@ float4 main() : sv_target [test] uniform 0 float4 1.0 2.0 3.0 4.0 uniform 4 float 0 -todo draw quad -todo probe all rgba (4.0, 4.0, 4.0, 4.0) +draw quad +probe all rgba (4.0, 4.0, 4.0, 4.0) uniform 4 float 2 -todo draw quad -todo probe all rgba (1.0, 1.0, 1.0, 1.0) +draw quad +probe all rgba (1.0, 1.0, 1.0, 1.0)
[pixel shader] @@ -82,7 +82,7 @@ draw quad probe all rgba (4.0, 4.0, 4.0, 4.0)
-[pixel shader todo] +[pixel shader] float4 a; float i;
@@ -99,5 +99,5 @@ float4 main() : sv_target [test] uniform 0 float4 1.0 2.0 3.0 4.0 uniform 4 float 1 -todo draw quad -todo probe all rgba (2.0, 2.0, 2.0, 2.0) +draw quad +probe all rgba (2.0, 2.0, 2.0, 2.0) diff --git a/tests/hlsl-matrix-indexing.shader_test b/tests/hlsl-matrix-indexing.shader_test index c3d08296..a57d8fb8 100644 --- a/tests/hlsl-matrix-indexing.shader_test +++ b/tests/hlsl-matrix-indexing.shader_test @@ -108,7 +108,7 @@ draw quad probe all rgba (3.0, 4.0, 50.0, 60.0)
-[pixel shader todo] +[pixel shader] uniform float i;
float4 main() : sv_target @@ -120,8 +120,8 @@ float4 main() : sv_target
[test] uniform 0 float 2 -todo draw quad -todo probe all rgba (8, 9, 10, 11) +draw quad +probe all rgba (8, 9, 10, 11)
[pixel shader todo] diff --git a/tests/hlsl-vector-indexing-uniform.shader_test b/tests/hlsl-vector-indexing-uniform.shader_test index 968f570b..e5ffbdd0 100644 --- a/tests/hlsl-vector-indexing-uniform.shader_test +++ b/tests/hlsl-vector-indexing-uniform.shader_test @@ -1,6 +1,6 @@ % Use a uniform to prevent the compiler from optimizing.
-[pixel shader todo] +[pixel shader] uniform float i; float4 main() : SV_TARGET { @@ -12,5 +12,5 @@ float4 main() : SV_TARGET
[test] uniform 0 float 2 -todo draw quad +draw quad probe all rgba (0.5, 0.3, 0.8, 0.2)