Signed-off-by: Giovanni Mascellani gmascellani@codeweavers.com Signed-off-by: Francisco Casas fcasas@codeweavers.com --- Functionality-wise, this should be able to wholly replace lower_broadcasts(). However, it generates less vectorized code, so I don't know what is the general sentiment WRT immediately remove lower_broadcasts(), remove it once more vectorized code is generated (or an effective vectorization pass cares about it), or whatever else. --- libs/vkd3d-shader/hlsl.h | 4 + libs/vkd3d-shader/hlsl.y | 52 +++++----- libs/vkd3d-shader/hlsl_codegen.c | 97 +++++++++++++++++++ tests/hlsl-duplicate-modifiers.shader_test | 2 +- tests/hlsl-initializer-matrix.shader_test | 2 +- ...lsl-return-implicit-conversion.shader_test | 10 +- tests/hlsl-shape.shader_test | 10 +- tests/matrix-semantics.shader_test | 14 +-- 8 files changed, 146 insertions(+), 45 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index a142fa48..34c6bfc3 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -709,6 +709,10 @@ struct vkd3d_string_buffer *hlsl_modifiers_to_string(struct hlsl_ctx *ctx, unsig const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type);
void hlsl_add_function(struct hlsl_ctx *ctx, char *name, struct hlsl_ir_function_decl *decl, bool intrinsic); +struct hlsl_ir_node *hlsl_add_implicit_conversion(struct hlsl_ctx *ctx, struct list *instrs, + struct hlsl_ir_node *node, struct hlsl_type *dst_type, const struct vkd3d_shader_location *loc); +struct hlsl_ir_load *hlsl_add_load(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_node, + struct hlsl_ir_node *offset, struct hlsl_type *data_type, const struct vkd3d_shader_location loc); bool hlsl_add_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *decl, bool local_var);
void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *func); diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index 7d9c0a2d..ac17f8b1 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -266,7 +266,7 @@ static bool implicit_compatible_data_types(struct hlsl_type *t1, struct hlsl_typ return false; }
-static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct list *instrs, +struct hlsl_ir_node *hlsl_add_implicit_conversion(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *node, struct hlsl_type *dst_type, const struct vkd3d_shader_location *loc) { struct hlsl_type *src_type = node->data_type; @@ -508,7 +508,7 @@ static struct hlsl_ir_jump *add_return(struct hlsl_ctx *ctx, struct list *instrs { struct hlsl_ir_store *store;
- if (!(return_value = add_implicit_conversion(ctx, instrs, return_value, return_type, &loc))) + if (!(return_value = hlsl_add_implicit_conversion(ctx, instrs, return_value, return_type, &loc))) return NULL;
if (!(store = hlsl_new_simple_store(ctx, ctx->cur_function->return_var, return_value))) @@ -528,7 +528,7 @@ static struct hlsl_ir_jump *add_return(struct hlsl_ctx *ctx, struct list *instrs return jump; }
-static struct hlsl_ir_load *add_load(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_node, +struct hlsl_ir_load *hlsl_add_load(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_node, struct hlsl_ir_node *offset, struct hlsl_type *data_type, const struct vkd3d_shader_location loc) { struct hlsl_ir_node *add = NULL; @@ -578,7 +578,7 @@ static bool add_record_load(struct hlsl_ctx *ctx, struct list *instrs, struct hl return false; list_add_tail(instrs, &c->node.entry);
- return !!add_load(ctx, instrs, record, &c->node, field->type, loc); + return !!hlsl_add_load(ctx, instrs, record, &c->node, field->type, loc); }
static struct hlsl_ir_expr *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct list *instrs, @@ -618,7 +618,7 @@ static struct hlsl_ir_node *add_matrix_scalar_load(struct hlsl_ctx *ctx, struct if (!(add = add_binary_arithmetic_expr(ctx, instrs, HLSL_OP2_ADD, &mul->node, minor, loc))) return NULL;
- if (!(load = add_load(ctx, instrs, matrix, &add->node, scalar_type, *loc))) + if (!(load = hlsl_add_load(ctx, instrs, matrix, &add->node, scalar_type, *loc))) return NULL;
return &load->node; @@ -704,7 +704,7 @@ static bool add_array_load(struct hlsl_ctx *ctx, struct list *instrs, struct hls return false; }
- return !!add_load(ctx, instrs, array, index, data_type, loc); + return !!hlsl_add_load(ctx, instrs, array, index, data_type, loc); }
static struct hlsl_struct_field *get_struct_field(struct list *fields, const char *name) @@ -1214,7 +1214,7 @@ static struct hlsl_ir_expr *add_unary_logical_expr(struct hlsl_ctx *ctx, struct bool_type = hlsl_get_numeric_type(ctx, arg->data_type->type, HLSL_TYPE_BOOL, arg->data_type->dimx, arg->data_type->dimy);
- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg, bool_type, loc))) + if (!(args[0] = hlsl_add_implicit_conversion(ctx, instrs, arg, bool_type, loc))) return NULL;
return add_expr(ctx, instrs, op, args, bool_type, loc); @@ -1235,10 +1235,10 @@ static struct hlsl_ir_expr *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, str
common_type = hlsl_get_numeric_type(ctx, type, base, dimx, dimy);
- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, common_type, loc))) + if (!(args[0] = hlsl_add_implicit_conversion(ctx, instrs, arg1, common_type, loc))) return NULL;
- if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, common_type, loc))) + if (!(args[1] = hlsl_add_implicit_conversion(ctx, instrs, arg2, common_type, loc))) return NULL;
return add_expr(ctx, instrs, op, args, common_type, loc); @@ -1293,10 +1293,10 @@ static struct hlsl_ir_expr *add_binary_comparison_expr(struct hlsl_ctx *ctx, str common_type = hlsl_get_numeric_type(ctx, type, base, dimx, dimy); return_type = hlsl_get_numeric_type(ctx, type, HLSL_TYPE_BOOL, dimx, dimy);
- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, common_type, loc))) + if (!(args[0] = hlsl_add_implicit_conversion(ctx, instrs, arg1, common_type, loc))) return NULL;
- if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, common_type, loc))) + if (!(args[1] = hlsl_add_implicit_conversion(ctx, instrs, arg2, common_type, loc))) return NULL;
return add_expr(ctx, instrs, op, args, return_type, loc); @@ -1327,10 +1327,10 @@ static struct hlsl_ir_expr *add_binary_logical_expr(struct hlsl_ctx *ctx, struct
common_type = hlsl_get_numeric_type(ctx, type, HLSL_TYPE_BOOL, dimx, dimy);
- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, common_type, loc))) + if (!(args[0] = hlsl_add_implicit_conversion(ctx, instrs, arg1, common_type, loc))) return NULL;
- if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, common_type, loc))) + if (!(args[1] = hlsl_add_implicit_conversion(ctx, instrs, arg2, common_type, loc))) return NULL;
return add_expr(ctx, instrs, op, args, common_type, loc); @@ -1370,10 +1370,10 @@ static struct hlsl_ir_expr *add_binary_shift_expr(struct hlsl_ctx *ctx, struct l return_type = hlsl_get_numeric_type(ctx, type, base, dimx, dimy); integer_type = hlsl_get_numeric_type(ctx, type, HLSL_TYPE_INT, dimx, dimy);
- if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, return_type, loc))) + if (!(args[0] = hlsl_add_implicit_conversion(ctx, instrs, arg1, return_type, loc))) return NULL;
- if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, integer_type, loc))) + if (!(args[1] = hlsl_add_implicit_conversion(ctx, instrs, arg2, integer_type, loc))) return NULL;
return add_expr(ctx, instrs, op, args, return_type, loc); @@ -1479,7 +1479,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in { writemask = (1 << lhs_type->dimx) - 1;
- if (!(rhs = add_implicit_conversion(ctx, instrs, rhs, lhs_type, &rhs->loc))) + if (!(rhs = hlsl_add_implicit_conversion(ctx, instrs, rhs, lhs_type, &rhs->loc))) return NULL; }
@@ -1598,10 +1598,10 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs, return; list_add_tail(instrs, &c->node.entry);
- if (!(load = add_load(ctx, instrs, src, &c->node, src_comp_type, src->loc))) + if (!(load = hlsl_add_load(ctx, instrs, src, &c->node, src_comp_type, src->loc))) return;
- if (!(conv = add_implicit_conversion(ctx, instrs, &load->node, dst_comp_type, &src->loc))) + if (!(conv = hlsl_add_implicit_conversion(ctx, instrs, &load->node, dst_comp_type, &src->loc))) return;
if (!(c = hlsl_new_uint_constant(ctx, dst_reg_offset, &src->loc))) @@ -1886,7 +1886,7 @@ static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx, return arg;
type = hlsl_get_numeric_type(ctx, type->type, HLSL_TYPE_FLOAT, type->dimx, type->dimy); - return add_implicit_conversion(ctx, params->instrs, arg, type, loc); + return hlsl_add_implicit_conversion(ctx, params->instrs, arg, type, loc); }
static bool intrinsic_abs(struct hlsl_ctx *ctx, @@ -1923,10 +1923,10 @@ static bool intrinsic_cross(struct hlsl_ctx *ctx,
cast_type = hlsl_get_vector_type(ctx, base, 3);
- if (!(arg1_cast = add_implicit_conversion(ctx, params->instrs, arg1, cast_type, loc))) + if (!(arg1_cast = hlsl_add_implicit_conversion(ctx, params->instrs, arg1, cast_type, loc))) return false;
- if (!(arg2_cast = add_implicit_conversion(ctx, params->instrs, arg2, cast_type, loc))) + if (!(arg2_cast = hlsl_add_implicit_conversion(ctx, params->instrs, arg2, cast_type, loc))) return false;
if (!(arg1_swzl1 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, X, Y, Z), 3, arg1_cast, loc))) @@ -2204,7 +2204,7 @@ static bool add_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hl hlsl_fixme(ctx, loc, "Tiled resource status argument.");
/* +1 for the mipmap level */ - if (!(coords = add_implicit_conversion(ctx, instrs, params->args[0], + if (!(coords = hlsl_add_implicit_conversion(ctx, instrs, params->args[0], hlsl_get_vector_type(ctx, HLSL_TYPE_INT, sampler_dim + 1), loc))) return false;
@@ -2248,13 +2248,13 @@ static bool add_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hl /* Only HLSL_IR_LOAD can return an object. */ sampler_load = hlsl_ir_load(params->args[0]);
- if (!(coords = add_implicit_conversion(ctx, instrs, params->args[1], + if (!(coords = hlsl_add_implicit_conversion(ctx, instrs, params->args[1], hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) return false;
if (params->args_count == 3) { - if (!(offset = add_implicit_conversion(ctx, instrs, params->args[2], + if (!(offset = hlsl_add_implicit_conversion(ctx, instrs, params->args[2], hlsl_get_vector_type(ctx, HLSL_TYPE_INT, sampler_dim), loc))) return false; } @@ -2329,7 +2329,7 @@ static bool add_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hl
if (params->args_count == 3 || params->args_count == 4) { - if (!(offset = add_implicit_conversion(ctx, instrs, params->args[2], + if (!(offset = hlsl_add_implicit_conversion(ctx, instrs, params->args[2], hlsl_get_vector_type(ctx, HLSL_TYPE_INT, sampler_dim), loc))) return false; } @@ -2359,7 +2359,7 @@ static bool add_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hl /* Only HLSL_IR_LOAD can return an object. */ sampler_load = hlsl_ir_load(params->args[0]);
- if (!(coords = add_implicit_conversion(ctx, instrs, params->args[1], + if (!(coords = hlsl_add_implicit_conversion(ctx, instrs, params->args[1], hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) return false;
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 5fdcd4dc..6d4b550d 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -273,6 +273,102 @@ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, v return false; }
+static bool lower_numeric_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_type *src_type, *dst_type = instr->data_type; + struct hlsl_ir_node *src, *value; + struct vkd3d_string_buffer *name; + static unsigned int counter = 0; + struct hlsl_ir_load *load; + struct hlsl_ir_expr *expr; + struct hlsl_ir_var *var; + unsigned int dst_idx; + bool broadcast; + + if (instr->type != HLSL_IR_EXPR) + return false; + expr = hlsl_ir_expr(instr); + if (expr->op != HLSL_OP1_CAST) + return false; + + src = expr->operands[0].node; + src_type = src->data_type; + + if (dst_type->type > HLSL_CLASS_LAST_NUMERIC || src_type->type > HLSL_CLASS_LAST_NUMERIC) + return false; + if (dst_type->type == HLSL_CLASS_SCALAR && src_type->type == HLSL_CLASS_SCALAR) + return false; + + broadcast = src_type->dimx == 1 && src_type->dimy == 1; + assert(dst_type->dimx * dst_type->dimy <= src_type->dimx * src_type->dimy || broadcast); + if (src_type->type == HLSL_CLASS_MATRIX && dst_type->type == HLSL_CLASS_MATRIX && !broadcast) + { + assert(dst_type->dimx <= src_type->dimx); + assert(dst_type->dimy <= src_type->dimy); + } + + name = vkd3d_string_buffer_get(&ctx->string_buffers); + vkd3d_string_buffer_printf(name, "<cast-%u>", counter++); + var = hlsl_new_synthetic_var(ctx, name->buffer, dst_type, instr->loc); + vkd3d_string_buffer_release(&ctx->string_buffers, name); + if (!var) + return false; + + for (dst_idx = 0; dst_idx < dst_type->dimx * dst_type->dimy; ++dst_idx) + { + struct hlsl_type *src_scalar_type, *dst_scalar_type; + unsigned int src_idx, src_offset, dst_offset; + struct hlsl_ir_store *store; + struct hlsl_ir_constant *c; + + if (broadcast) + { + src_idx = 0; + } + else + { + if (src_type->type == HLSL_CLASS_MATRIX && dst_type->type == HLSL_CLASS_MATRIX) + { + unsigned int x = dst_idx % dst_type->dimx, y = dst_idx / dst_type->dimx; + + src_idx = y * src_type->dimx + x; + } + else + { + src_idx = dst_idx; + } + } + + dst_offset = hlsl_compute_component_offset(ctx, dst_type, dst_idx, &dst_scalar_type); + src_offset = hlsl_compute_component_offset(ctx, src_type, src_idx, &src_scalar_type); + + if (!(c = hlsl_new_uint_constant(ctx, src_offset, &src->loc))) + return false; + list_add_before(&instr->entry, &c->node.entry); + + if (!(load = hlsl_add_load(ctx, &instr->entry, src, &c->node, src_scalar_type, src->loc))) + return false; + + if (!(value = hlsl_add_implicit_conversion(ctx, &instr->entry, &load->node, dst_scalar_type, &src->loc))) + return false; + + if (!(c = hlsl_new_uint_constant(ctx, dst_offset, &src->loc))) + return false; + list_add_before(&instr->entry, &c->node.entry); + + if (!(store = hlsl_new_store(ctx, var, &c->node, value, 0, src->loc))) + return false; + list_add_before(&instr->entry, &store->node.entry); + } + + if (!(load = hlsl_new_load(ctx, var, NULL, dst_type, instr->loc))) + return false; + list_add_before(&instr->entry, &load->node.entry); + hlsl_replace_node(instr, &load->node); + + return true; +} + enum copy_propagation_value_state { VALUE_STATE_NOT_WRITTEN = 0, @@ -1904,6 +2000,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry }
transform_ir(ctx, lower_broadcasts, body, NULL); + transform_ir(ctx, lower_numeric_casts, body, NULL); while (transform_ir(ctx, fold_redundant_casts, body, NULL)); do { diff --git a/tests/hlsl-duplicate-modifiers.shader_test b/tests/hlsl-duplicate-modifiers.shader_test index fcae12da..6491701a 100644 --- a/tests/hlsl-duplicate-modifiers.shader_test +++ b/tests/hlsl-duplicate-modifiers.shader_test @@ -7,5 +7,5 @@ float4 main() : sv_target }
[test] -todo draw quad +draw quad probe all rgba (0.1, 0.2, 0.3, 0.4) diff --git a/tests/hlsl-initializer-matrix.shader_test b/tests/hlsl-initializer-matrix.shader_test index ea9de9c0..7e12b0a0 100644 --- a/tests/hlsl-initializer-matrix.shader_test +++ b/tests/hlsl-initializer-matrix.shader_test @@ -55,7 +55,7 @@ float4 main() : SV_TARGET }
[test] -todo draw quad +draw quad probe all rgba (21, 22, 31, 32)
diff --git a/tests/hlsl-return-implicit-conversion.shader_test b/tests/hlsl-return-implicit-conversion.shader_test index bf99d9cb..4fe8e7eb 100644 --- a/tests/hlsl-return-implicit-conversion.shader_test +++ b/tests/hlsl-return-implicit-conversion.shader_test @@ -5,7 +5,7 @@ float4 main() : sv_target }
[test] -todo draw quad +draw quad probe all rgba (0.4, 0.3, 0.2, 0.1)
[pixel shader] @@ -15,7 +15,7 @@ float4 main() : sv_target }
[test] -todo draw quad +draw quad probe all rgba (0.4, 0.3, 0.2, 0.1)
[pixel shader] @@ -25,7 +25,7 @@ float4 main() : sv_target }
[test] -todo draw quad +draw quad probe all rgba (0.4, 0.3, 0.2, 0.1)
[pixel shader] @@ -35,8 +35,8 @@ float4x1 main() : sv_target }
[test] -todo draw quad -probe all rgba (0.4, 0.3, 0.2, 0.1) +draw quad +todo probe all rgba (0.4, 0.3, 0.2, 0.1)
[pixel shader] float3 func() diff --git a/tests/hlsl-shape.shader_test b/tests/hlsl-shape.shader_test index 57d59534..65cc322c 100644 --- a/tests/hlsl-shape.shader_test +++ b/tests/hlsl-shape.shader_test @@ -211,7 +211,7 @@ float4 main() : sv_target }
[test] -todo draw quad +draw quad probe all rgba (2.0, 4.0, 6.0, 8.0)
[pixel shader] @@ -235,7 +235,7 @@ float4 main() : sv_target }
[test] -todo draw quad +draw quad probe all rgba (2.0, 4.0, 6.0, 8.0)
[pixel shader] @@ -260,7 +260,7 @@ float4 main() : sv_target }
[test] -todo draw quad +draw quad probe all rgba (2.0, 4.0, 6.0, 8.0)
[pixel shader] @@ -309,7 +309,7 @@ float4 main() : sv_target }
[test] -todo draw quad +draw quad probe all rgba (2.0, 4.0, 0.0, 0.0)
[pixel shader] @@ -321,7 +321,7 @@ float4 main() : sv_target }
[test] -todo draw quad +draw quad probe all rgba (2.0, 4.0, 0.0, 0.0)
[pixel shader] diff --git a/tests/matrix-semantics.shader_test b/tests/matrix-semantics.shader_test index 7106eb86..acda4a16 100644 --- a/tests/matrix-semantics.shader_test +++ b/tests/matrix-semantics.shader_test @@ -5,8 +5,8 @@ float4x1 main() : sv_target }
[test] -todo draw quad -probe all rgba (1.0, 2.0, 3.0, 4.0) +draw quad +todo probe all rgba (1.0, 2.0, 3.0, 4.0)
[pixel shader] row_major float1x4 main() : sv_target @@ -15,7 +15,7 @@ row_major float1x4 main() : sv_target }
[test] -todo draw quad +draw quad probe all rgba (1.0, 2.0, 3.0, 4.0)
[require] @@ -28,7 +28,7 @@ row_major float4x1 main() : sv_target }
[test] -todo draw quad +draw quad probe all r 1.0
[pixel shader] @@ -38,7 +38,7 @@ float1x4 main() : sv_target }
[test] -todo draw quad +draw quad probe all r 1.0
[pixel shader] @@ -49,7 +49,7 @@ void main(out row_major float1x4 x : sv_target0, out float1x4 y : sv_target1) }
[test] -todo draw quad +draw quad probe all rgba (1.0, 2.0, 3.0, 4.0)
[pixel shader fail todo] @@ -67,5 +67,5 @@ void main(out float1x4 x : sv_target0, out float1x4 y : sv_target4) }
[test] -todo draw quad +draw quad probe all r 1.0