Signed-off-by: Francisco Casas fcasas@codeweavers.com
May 5, 2022 9:32 AM, "Giovanni Mascellani" gmascellani@codeweavers.com wrote:
Signed-off-by: Giovanni Mascellani gmascellani@codeweavers.com
Functionality-wise, this should be able to wholly replace lower_broadcasts(). However, it generates less vectorized code, so I don't know what is the general sentiment WRT immediately remove lower_broadcasts(), remove it once more vectorized code is generated (or an effective vectorization pass cares about it), or whatever else.
libs/vkd3d-shader/hlsl.h | 4 + libs/vkd3d-shader/hlsl.y | 52 +++++----- libs/vkd3d-shader/hlsl_codegen.c | 97 +++++++++++++++++++ tests/hlsl-duplicate-modifiers.shader_test | 2 +- tests/hlsl-initializer-matrix.shader_test | 2 +- ...lsl-return-implicit-conversion.shader_test | 10 +- tests/hlsl-shape.shader_test | 10 +- tests/matrix-semantics.shader_test | 14 +-- 8 files changed, 146 insertions(+), 45 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index a142fa48..34c6bfc3 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -709,6 +709,10 @@ struct vkd3d_string_buffer *hlsl_modifiers_to_string(struct hlsl_ctx *ctx, unsig const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type);
void hlsl_add_function(struct hlsl_ctx *ctx, char *name, struct hlsl_ir_function_decl *decl, bool intrinsic); +struct hlsl_ir_node *hlsl_add_implicit_conversion(struct hlsl_ctx *ctx, struct list *instrs,
- struct hlsl_ir_node *node, struct hlsl_type *dst_type, const struct vkd3d_shader_location *loc);
+struct hlsl_ir_load *hlsl_add_load(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_node,
- struct hlsl_ir_node *offset, struct hlsl_type *data_type, const struct vkd3d_shader_location
loc); bool hlsl_add_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *decl, bool local_var);
void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *func); diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index 052d7384..ebd5ff26 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -266,7 +266,7 @@ static bool implicit_compatible_data_types(struct hlsl_type *t1, struct hlsl_typ return false; }
-static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct list *instrs, +struct hlsl_ir_node *hlsl_add_implicit_conversion(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *node, struct hlsl_type *dst_type, const struct vkd3d_shader_location *loc) { struct hlsl_type *src_type = node->data_type; @@ -508,7 +508,7 @@ static struct hlsl_ir_jump *add_return(struct hlsl_ctx *ctx, struct list *instrs { struct hlsl_ir_store *store;
- if (!(return_value = add_implicit_conversion(ctx, instrs, return_value, return_type, &loc)))
- if (!(return_value = hlsl_add_implicit_conversion(ctx, instrs, return_value, return_type, &loc)))
return NULL;
if (!(store = hlsl_new_simple_store(ctx, ctx->cur_function->return_var, return_value))) @@ -528,7 +528,7 @@ static struct hlsl_ir_jump *add_return(struct hlsl_ctx *ctx, struct list *instrs return jump; }
-static struct hlsl_ir_load *add_load(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_node, +struct hlsl_ir_load *hlsl_add_load(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_node, struct hlsl_ir_node *offset, struct hlsl_type *data_type, const struct vkd3d_shader_location loc) { struct hlsl_ir_node *add = NULL; @@ -578,7 +578,7 @@ static bool add_record_load(struct hlsl_ctx *ctx, struct list *instrs, struct hl return false; list_add_tail(instrs, &c->node.entry);
- return !!add_load(ctx, instrs, record, &c->node, field->type, loc);
- return !!hlsl_add_load(ctx, instrs, record, &c->node, field->type, loc);
}
static struct hlsl_ir_expr *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct list *instrs, @@ -618,7 +618,7 @@ static struct hlsl_ir_node *add_matrix_scalar_load(struct hlsl_ctx *ctx, struct if (!(add = add_binary_arithmetic_expr(ctx, instrs, HLSL_OP2_ADD, &mul->node, minor, loc))) return NULL;
- if (!(load = add_load(ctx, instrs, matrix, &add->node, scalar_type, *loc)))
- if (!(load = hlsl_add_load(ctx, instrs, matrix, &add->node, scalar_type, *loc)))
return NULL;
return &load->node; @@ -704,7 +704,7 @@ static bool add_array_load(struct hlsl_ctx *ctx, struct list *instrs, struct hls return false; }
- return !!add_load(ctx, instrs, array, index, data_type, loc);
- return !!hlsl_add_load(ctx, instrs, array, index, data_type, loc);
}
static struct hlsl_struct_field *get_struct_field(struct list *fields, const char *name) @@ -1206,7 +1206,7 @@ static struct hlsl_ir_expr *add_unary_logical_expr(struct hlsl_ctx *ctx, struct bool_type = hlsl_get_numeric_type(ctx, arg->data_type->type, HLSL_TYPE_BOOL, arg->data_type->dimx, arg->data_type->dimy);
- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg, bool_type, loc)))
- if (!(args[0] = hlsl_add_implicit_conversion(ctx, instrs, arg, bool_type, loc)))
return NULL;
return add_expr(ctx, instrs, op, args, bool_type, loc); @@ -1227,10 +1227,10 @@ static struct hlsl_ir_expr *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, str
common_type = hlsl_get_numeric_type(ctx, type, base, dimx, dimy);
- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, common_type, loc)))
- if (!(args[0] = hlsl_add_implicit_conversion(ctx, instrs, arg1, common_type, loc)))
return NULL;
- if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, common_type, loc)))
- if (!(args[1] = hlsl_add_implicit_conversion(ctx, instrs, arg2, common_type, loc)))
return NULL;
return add_expr(ctx, instrs, op, args, common_type, loc); @@ -1285,10 +1285,10 @@ static struct hlsl_ir_expr *add_binary_comparison_expr(struct hlsl_ctx *ctx, str common_type = hlsl_get_numeric_type(ctx, type, base, dimx, dimy); return_type = hlsl_get_numeric_type(ctx, type, HLSL_TYPE_BOOL, dimx, dimy);
- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, common_type, loc)))
- if (!(args[0] = hlsl_add_implicit_conversion(ctx, instrs, arg1, common_type, loc)))
return NULL;
- if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, common_type, loc)))
- if (!(args[1] = hlsl_add_implicit_conversion(ctx, instrs, arg2, common_type, loc)))
return NULL;
return add_expr(ctx, instrs, op, args, return_type, loc); @@ -1319,10 +1319,10 @@ static struct hlsl_ir_expr *add_binary_logical_expr(struct hlsl_ctx *ctx, struct
common_type = hlsl_get_numeric_type(ctx, type, HLSL_TYPE_BOOL, dimx, dimy);
- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, common_type, loc)))
- if (!(args[0] = hlsl_add_implicit_conversion(ctx, instrs, arg1, common_type, loc)))
return NULL;
- if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, common_type, loc)))
- if (!(args[1] = hlsl_add_implicit_conversion(ctx, instrs, arg2, common_type, loc)))
return NULL;
return add_expr(ctx, instrs, op, args, common_type, loc); @@ -1362,10 +1362,10 @@ static struct hlsl_ir_expr *add_binary_shift_expr(struct hlsl_ctx *ctx, struct l return_type = hlsl_get_numeric_type(ctx, type, base, dimx, dimy); integer_type = hlsl_get_numeric_type(ctx, type, HLSL_TYPE_INT, dimx, dimy);
- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, return_type, loc)))
- if (!(args[0] = hlsl_add_implicit_conversion(ctx, instrs, arg1, return_type, loc)))
return NULL;
- if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, integer_type, loc)))
- if (!(args[1] = hlsl_add_implicit_conversion(ctx, instrs, arg2, integer_type, loc)))
return NULL;
return add_expr(ctx, instrs, op, args, return_type, loc); @@ -1471,7 +1471,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in { writemask = (1 << lhs_type->dimx) - 1;
- if (!(rhs = add_implicit_conversion(ctx, instrs, rhs, lhs_type, &rhs->loc)))
- if (!(rhs = hlsl_add_implicit_conversion(ctx, instrs, rhs, lhs_type, &rhs->loc)))
return NULL; }
@@ -1590,10 +1590,10 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs, return; list_add_tail(instrs, &c->node.entry);
- if (!(load = add_load(ctx, instrs, src, &c->node, src_comp_type, src->loc)))
- if (!(load = hlsl_add_load(ctx, instrs, src, &c->node, src_comp_type, src->loc)))
return;
- if (!(conv = add_implicit_conversion(ctx, instrs, &load->node, dst_comp_type, &src->loc)))
- if (!(conv = hlsl_add_implicit_conversion(ctx, instrs, &load->node, dst_comp_type, &src->loc)))
return;
if (!(c = hlsl_new_uint_constant(ctx, dst_reg_offset, &src->loc))) @@ -1878,7 +1878,7 @@ static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx, return arg;
type = hlsl_get_numeric_type(ctx, type->type, HLSL_TYPE_FLOAT, type->dimx, type->dimy);
- return add_implicit_conversion(ctx, params->instrs, arg, type, loc);
- return hlsl_add_implicit_conversion(ctx, params->instrs, arg, type, loc);
}
static bool intrinsic_abs(struct hlsl_ctx *ctx, @@ -1915,10 +1915,10 @@ static bool intrinsic_cross(struct hlsl_ctx *ctx,
cast_type = hlsl_get_vector_type(ctx, base, 3);
- if (!(arg1_cast = add_implicit_conversion(ctx, params->instrs, arg1, cast_type, loc)))
- if (!(arg1_cast = hlsl_add_implicit_conversion(ctx, params->instrs, arg1, cast_type, loc)))
return false;
- if (!(arg2_cast = add_implicit_conversion(ctx, params->instrs, arg2, cast_type, loc)))
- if (!(arg2_cast = hlsl_add_implicit_conversion(ctx, params->instrs, arg2, cast_type, loc)))
return false;
if (!(arg1_swzl1 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, X, Y, Z), 3, arg1_cast, loc))) @@ -2196,7 +2196,7 @@ static bool add_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hl hlsl_fixme(ctx, loc, "Tiled resource status argument.");
/* +1 for the mipmap level */
- if (!(coords = add_implicit_conversion(ctx, instrs, params->args[0],
- if (!(coords = hlsl_add_implicit_conversion(ctx, instrs, params->args[0],
hlsl_get_vector_type(ctx, HLSL_TYPE_INT, sampler_dim + 1), loc))) return false;
@@ -2240,13 +2240,13 @@ static bool add_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hl /* Only HLSL_IR_LOAD can return an object. */ sampler_load = hlsl_ir_load(params->args[0]);
- if (!(coords = add_implicit_conversion(ctx, instrs, params->args[1],
- if (!(coords = hlsl_add_implicit_conversion(ctx, instrs, params->args[1],
hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) return false;
if (params->args_count == 3) {
- if (!(offset = add_implicit_conversion(ctx, instrs, params->args[2],
- if (!(offset = hlsl_add_implicit_conversion(ctx, instrs, params->args[2],
hlsl_get_vector_type(ctx, HLSL_TYPE_INT, sampler_dim), loc))) return false; } @@ -2321,7 +2321,7 @@ static bool add_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hl
if (params->args_count == 3 || params->args_count == 4) {
- if (!(offset = add_implicit_conversion(ctx, instrs, params->args[2],
- if (!(offset = hlsl_add_implicit_conversion(ctx, instrs, params->args[2],
hlsl_get_vector_type(ctx, HLSL_TYPE_INT, sampler_dim), loc))) return false; } @@ -2351,7 +2351,7 @@ static bool add_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hl /* Only HLSL_IR_LOAD can return an object. */ sampler_load = hlsl_ir_load(params->args[0]);
- if (!(coords = add_implicit_conversion(ctx, instrs, params->args[1],
- if (!(coords = hlsl_add_implicit_conversion(ctx, instrs, params->args[1],
hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) return false;
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 5fdcd4dc..6d4b550d 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -273,6 +273,102 @@ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, v return false; }
+static bool lower_numeric_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{
- struct hlsl_type *src_type, *dst_type = instr->data_type;
- struct hlsl_ir_node *src, *value;
- struct vkd3d_string_buffer *name;
- static unsigned int counter = 0;
- struct hlsl_ir_load *load;
- struct hlsl_ir_expr *expr;
- struct hlsl_ir_var *var;
- unsigned int dst_idx;
- bool broadcast;
- if (instr->type != HLSL_IR_EXPR)
- return false;
- expr = hlsl_ir_expr(instr);
- if (expr->op != HLSL_OP1_CAST)
- return false;
- src = expr->operands[0].node;
- src_type = src->data_type;
- if (dst_type->type > HLSL_CLASS_LAST_NUMERIC || src_type->type > HLSL_CLASS_LAST_NUMERIC)
- return false;
- if (dst_type->type == HLSL_CLASS_SCALAR && src_type->type == HLSL_CLASS_SCALAR)
- return false;
- broadcast = src_type->dimx == 1 && src_type->dimy == 1;
- assert(dst_type->dimx * dst_type->dimy <= src_type->dimx * src_type->dimy || broadcast);
- if (src_type->type == HLSL_CLASS_MATRIX && dst_type->type == HLSL_CLASS_MATRIX && !broadcast)
- {
- assert(dst_type->dimx <= src_type->dimx);
- assert(dst_type->dimy <= src_type->dimy);
- }
- name = vkd3d_string_buffer_get(&ctx->string_buffers);
- vkd3d_string_buffer_printf(name, "<cast-%u>", counter++);
- var = hlsl_new_synthetic_var(ctx, name->buffer, dst_type, instr->loc);
- vkd3d_string_buffer_release(&ctx->string_buffers, name);
- if (!var)
- return false;
- for (dst_idx = 0; dst_idx < dst_type->dimx * dst_type->dimy; ++dst_idx)
- {
- struct hlsl_type *src_scalar_type, *dst_scalar_type;
- unsigned int src_idx, src_offset, dst_offset;
- struct hlsl_ir_store *store;
- struct hlsl_ir_constant *c;
- if (broadcast)
- {
- src_idx = 0;
- }
- else
- {
- if (src_type->type == HLSL_CLASS_MATRIX && dst_type->type == HLSL_CLASS_MATRIX)
- {
- unsigned int x = dst_idx % dst_type->dimx, y = dst_idx / dst_type->dimx;
- src_idx = y * src_type->dimx + x;
- }
- else
- {
- src_idx = dst_idx;
- }
- }
- dst_offset = hlsl_compute_component_offset(ctx, dst_type, dst_idx, &dst_scalar_type);
- src_offset = hlsl_compute_component_offset(ctx, src_type, src_idx, &src_scalar_type);
- if (!(c = hlsl_new_uint_constant(ctx, src_offset, &src->loc)))
- return false;
- list_add_before(&instr->entry, &c->node.entry);
- if (!(load = hlsl_add_load(ctx, &instr->entry, src, &c->node, src_scalar_type, src->loc)))
- return false;
- if (!(value = hlsl_add_implicit_conversion(ctx, &instr->entry, &load->node, dst_scalar_type,
&src->loc)))
- return false;
- if (!(c = hlsl_new_uint_constant(ctx, dst_offset, &src->loc)))
- return false;
- list_add_before(&instr->entry, &c->node.entry);
- if (!(store = hlsl_new_store(ctx, var, &c->node, value, 0, src->loc)))
- return false;
- list_add_before(&instr->entry, &store->node.entry);
- }
- if (!(load = hlsl_new_load(ctx, var, NULL, dst_type, instr->loc)))
- return false;
- list_add_before(&instr->entry, &load->node.entry);
- hlsl_replace_node(instr, &load->node);
- return true;
+}
enum copy_propagation_value_state { VALUE_STATE_NOT_WRITTEN = 0, @@ -1904,6 +2000,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry }
transform_ir(ctx, lower_broadcasts, body, NULL);
- transform_ir(ctx, lower_numeric_casts, body, NULL);
while (transform_ir(ctx, fold_redundant_casts, body, NULL)); do { diff --git a/tests/hlsl-duplicate-modifiers.shader_test b/tests/hlsl-duplicate-modifiers.shader_test index fcae12da..6491701a 100644 --- a/tests/hlsl-duplicate-modifiers.shader_test +++ b/tests/hlsl-duplicate-modifiers.shader_test @@ -7,5 +7,5 @@ float4 main() : sv_target }
[test] -todo draw quad +draw quad probe all rgba (0.1, 0.2, 0.3, 0.4) diff --git a/tests/hlsl-initializer-matrix.shader_test b/tests/hlsl-initializer-matrix.shader_test index ea9de9c0..7e12b0a0 100644 --- a/tests/hlsl-initializer-matrix.shader_test +++ b/tests/hlsl-initializer-matrix.shader_test @@ -55,7 +55,7 @@ float4 main() : SV_TARGET }
[test] -todo draw quad +draw quad probe all rgba (21, 22, 31, 32)
diff --git a/tests/hlsl-return-implicit-conversion.shader_test b/tests/hlsl-return-implicit-conversion.shader_test index bf99d9cb..4fe8e7eb 100644 --- a/tests/hlsl-return-implicit-conversion.shader_test +++ b/tests/hlsl-return-implicit-conversion.shader_test @@ -5,7 +5,7 @@ float4 main() : sv_target }
[test] -todo draw quad +draw quad probe all rgba (0.4, 0.3, 0.2, 0.1)
[pixel shader] @@ -15,7 +15,7 @@ float4 main() : sv_target }
[test] -todo draw quad +draw quad probe all rgba (0.4, 0.3, 0.2, 0.1)
[pixel shader] @@ -25,7 +25,7 @@ float4 main() : sv_target }
[test] -todo draw quad +draw quad probe all rgba (0.4, 0.3, 0.2, 0.1)
[pixel shader] @@ -35,8 +35,8 @@ float4x1 main() : sv_target }
[test] -todo draw quad -probe all rgba (0.4, 0.3, 0.2, 0.1) +draw quad +todo probe all rgba (0.4, 0.3, 0.2, 0.1)
[pixel shader] float3 func() diff --git a/tests/hlsl-shape.shader_test b/tests/hlsl-shape.shader_test index 57d59534..65cc322c 100644 --- a/tests/hlsl-shape.shader_test +++ b/tests/hlsl-shape.shader_test @@ -211,7 +211,7 @@ float4 main() : sv_target }
[test] -todo draw quad +draw quad probe all rgba (2.0, 4.0, 6.0, 8.0)
[pixel shader] @@ -235,7 +235,7 @@ float4 main() : sv_target }
[test] -todo draw quad +draw quad probe all rgba (2.0, 4.0, 6.0, 8.0)
[pixel shader] @@ -260,7 +260,7 @@ float4 main() : sv_target }
[test] -todo draw quad +draw quad probe all rgba (2.0, 4.0, 6.0, 8.0)
[pixel shader] @@ -309,7 +309,7 @@ float4 main() : sv_target }
[test] -todo draw quad +draw quad probe all rgba (2.0, 4.0, 0.0, 0.0)
[pixel shader] @@ -321,7 +321,7 @@ float4 main() : sv_target }
[test] -todo draw quad +draw quad probe all rgba (2.0, 4.0, 0.0, 0.0)
[pixel shader] diff --git a/tests/matrix-semantics.shader_test b/tests/matrix-semantics.shader_test index 7106eb86..acda4a16 100644 --- a/tests/matrix-semantics.shader_test +++ b/tests/matrix-semantics.shader_test @@ -5,8 +5,8 @@ float4x1 main() : sv_target }
[test] -todo draw quad -probe all rgba (1.0, 2.0, 3.0, 4.0) +draw quad +todo probe all rgba (1.0, 2.0, 3.0, 4.0)
[pixel shader] row_major float1x4 main() : sv_target @@ -15,7 +15,7 @@ row_major float1x4 main() : sv_target }
[test] -todo draw quad +draw quad probe all rgba (1.0, 2.0, 3.0, 4.0)
[require] @@ -28,7 +28,7 @@ row_major float4x1 main() : sv_target }
[test] -todo draw quad +draw quad probe all r 1.0
[pixel shader] @@ -38,7 +38,7 @@ float1x4 main() : sv_target }
[test] -todo draw quad +draw quad probe all r 1.0
[pixel shader] @@ -49,7 +49,7 @@ void main(out row_major float1x4 x : sv_target0, out float1x4 y : sv_target1) }
[test] -todo draw quad +draw quad probe all rgba (1.0, 2.0, 3.0, 4.0)
[pixel shader fail todo] @@ -67,5 +67,5 @@ void main(out float1x4 x : sv_target0, out float1x4 y : sv_target4) }
[test] -todo draw quad +draw quad probe all r 1.0 -- 2.36.0