This is a simplified version of !748 that doesn't handle conditional jumps. I'll submit a cleaned up version of that after this.
-- v4: vkd3d-shader/hlsl: Implement loop unrolling. vkd3d-shader/hlsl: Parse loop and unroll loop attributes. vkd3d-shader/hlsl: Pull evaluate_static_expression_as_uint upwards.
From: Victor Chiletto vchiletto@codeweavers.com
--- tests/hlsl/texture-load.shader_test | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+)
diff --git a/tests/hlsl/texture-load.shader_test b/tests/hlsl/texture-load.shader_test index 495fa88ea..3858f7ca6 100644 --- a/tests/hlsl/texture-load.shader_test +++ b/tests/hlsl/texture-load.shader_test @@ -117,3 +117,30 @@ float4 main(float4 pos : sv_position) : sv_target Texture2DMS<float4> s = t; return s.Load(pos.yx, 0); } + +% SM4.0 cannot dynamically index multisampled textures, it relies on loop unrolling. + +[require] +shader model >= 4.0 +shader model < 4.1 + +[pixel shader todo] +Texture2DMS<float4, 1> t; + +float4 main(float4 pos : sv_position) : sv_target +{ + int i; + float4 o; + for (i = 0; i < 1; i++) + { + o = t.Load(pos.xy, i); + } + return o; +} + +[test] +todo draw quad +probe (0, 0) rgba (0.1, 0.2, 0.3, 0.4) +probe (1, 0) rgba (0.5, 0.7, 0.6, 0.8) +probe (0, 1) rgba (0.6, 0.5, 0.2, 0.1) +probe (1, 1) rgba (0.8, 0.0, 0.7, 1.0)
From: Victor Chiletto vchiletto@codeweavers.com
We potentially generate OOB accesses during loop unrolling that are later deleted. --- libs/vkd3d-shader/hlsl_codegen.c | 173 ++++++++++++++++++++++--------- 1 file changed, 124 insertions(+), 49 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index eaa72836d..e0812627d 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -1957,6 +1957,76 @@ bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *bloc return progress; }
+enum validation_result +{ + DEREF_VALIDATION_OK, + DEREF_VALIDATION_OUT_OF_BOUNDS, + DEREF_VALIDATION_NOT_CONSTANT, +}; + +static enum validation_result hlsl_validate_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref) +{ + struct hlsl_type *type = deref->var->data_type; + unsigned int i; + + for (i = 0; i < deref->path_len; ++i) + { + struct hlsl_ir_node *path_node = deref->path[i].node; + unsigned int idx = 0; + + assert(path_node); + if (path_node->type != HLSL_IR_CONSTANT) + return DEREF_VALIDATION_NOT_CONSTANT; + + /* We should always have generated a cast to UINT. */ + assert(path_node->data_type->class == HLSL_CLASS_SCALAR + && path_node->data_type->base_type == HLSL_TYPE_UINT); + + idx = hlsl_ir_constant(path_node)->value.u[0].u; + + switch (type->class) + { + case HLSL_CLASS_VECTOR: + if (idx >= type->dimx) + { + hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, + "Vector index is out of bounds. %u/%u", idx, type->dimx); + return DEREF_VALIDATION_OUT_OF_BOUNDS; + } + break; + + case HLSL_CLASS_MATRIX: + if (idx >= hlsl_type_major_size(type)) + { + hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, + "Matrix index is out of bounds. %u/%u", idx, hlsl_type_major_size(type)); + return DEREF_VALIDATION_OUT_OF_BOUNDS; + } + break; + + case HLSL_CLASS_ARRAY: + if (idx >= type->e.array.elements_count) + { + hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, + "Array index is out of bounds. %u/%u (%p, %u)", idx, type->e.array.elements_count, path_node, path_node->index); + return DEREF_VALIDATION_OUT_OF_BOUNDS; + } + break; + + case HLSL_CLASS_STRUCT: + break; + + default: + vkd3d_unreachable(); + } + + type = hlsl_get_element_type_from_path_index(ctx, type, path_node); + } + + return DEREF_VALIDATION_OK; +} + + static void note_non_static_deref_expressions(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, const char *usage) { @@ -1974,60 +2044,77 @@ static void note_non_static_deref_expressions(struct hlsl_ctx *ctx, const struct } }
-static bool validate_static_object_references(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, +static bool validate_dereferences(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { - unsigned int start, count; - - if (instr->type == HLSL_IR_RESOURCE_LOAD) + switch (instr->type) { - struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); - - if (!load->resource.var->is_uniform) + case HLSL_IR_RESOURCE_LOAD: { - hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, - "Loaded resource must have a single uniform source."); + struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); + + if (!load->resource.var->is_uniform) + { + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, + "Loaded resource must have a single uniform source."); + } + else if (hlsl_validate_component_index_range_from_deref(ctx, &load->resource) == DEREF_VALIDATION_NOT_CONSTANT) + { + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, + "Loaded resource from "%s" must be determinable at compile time.", + load->resource.var->name); + note_non_static_deref_expressions(ctx, &load->resource, "loaded resource"); + } + + if (load->sampler.var) + { + if (!load->sampler.var->is_uniform) + { + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, + "Resource load sampler must have a single uniform source."); + } + else if (hlsl_validate_component_index_range_from_deref(ctx, &load->sampler) == DEREF_VALIDATION_NOT_CONSTANT) + { + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, + "Resource load sampler from "%s" must be determinable at compile time.", + load->sampler.var->name); + note_non_static_deref_expressions(ctx, &load->sampler, "resource load sampler"); + } + } + break; } - else if (!hlsl_component_index_range_from_deref(ctx, &load->resource, &start, &count)) + case HLSL_IR_RESOURCE_STORE: { - hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, - "Loaded resource from "%s" must be determinable at compile time.", - load->resource.var->name); - note_non_static_deref_expressions(ctx, &load->resource, "loaded resource"); - } + struct hlsl_ir_resource_store *store = hlsl_ir_resource_store(instr);
- if (load->sampler.var) - { - if (!load->sampler.var->is_uniform) + if (!store->resource.var->is_uniform) { hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, - "Resource load sampler must have a single uniform source."); + "Accessed resource must have a single uniform source."); } - else if (!hlsl_component_index_range_from_deref(ctx, &load->sampler, &start, &count)) + else if (hlsl_validate_component_index_range_from_deref(ctx, &store->resource) == DEREF_VALIDATION_NOT_CONSTANT) { hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, - "Resource load sampler from "%s" must be determinable at compile time.", - load->sampler.var->name); - note_non_static_deref_expressions(ctx, &load->sampler, "resource load sampler"); + "Accessed resource from "%s" must be determinable at compile time.", + store->resource.var->name); + note_non_static_deref_expressions(ctx, &store->resource, "accessed resource"); } + break; } - } - else if (instr->type == HLSL_IR_RESOURCE_STORE) - { - struct hlsl_ir_resource_store *store = hlsl_ir_resource_store(instr); - - if (!store->resource.var->is_uniform) + case HLSL_IR_LOAD: { - hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, - "Accessed resource must have a single uniform source."); + struct hlsl_ir_load *load = hlsl_ir_load(instr); + hlsl_validate_component_index_range_from_deref(ctx, &load->src); + break; } - else if (!hlsl_component_index_range_from_deref(ctx, &store->resource, &start, &count)) + case HLSL_IR_STORE: { - hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, - "Accessed resource from "%s" must be determinable at compile time.", - store->resource.var->name); - note_non_static_deref_expressions(ctx, &store->resource, "accessed resource"); + struct hlsl_ir_store *store = hlsl_ir_store(instr); + hlsl_validate_component_index_range_from_deref(ctx, &store->lhs); + break; } + default: + break; }
return false; @@ -5042,21 +5129,13 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl { case HLSL_CLASS_VECTOR: if (idx >= type->dimx) - { - hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, - "Vector index is out of bounds. %u/%u", idx, type->dimx); return false; - } *start += idx; break;
case HLSL_CLASS_MATRIX: if (idx >= hlsl_type_major_size(type)) - { - hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, - "Matrix index is out of bounds. %u/%u", idx, hlsl_type_major_size(type)); return false; - } if (hlsl_type_is_row_major(type)) *start += idx * type->dimx; else @@ -5065,11 +5144,7 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl
case HLSL_CLASS_ARRAY: if (idx >= type->e.array.elements_count) - { - hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, - "Array index is out of bounds. %u/%u", idx, type->e.array.elements_count); return false; - } *start += idx * hlsl_type_component_count(type->e.array.type); break;
@@ -5429,7 +5504,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry lower_ir(ctx, lower_casts_to_bool, body); lower_ir(ctx, lower_int_dot, body);
- hlsl_transform_ir(ctx, validate_static_object_references, body, NULL); + hlsl_transform_ir(ctx, validate_dereferences, body, NULL); hlsl_transform_ir(ctx, track_object_components_sampler_dim, body, NULL); if (profile->major_version >= 4) hlsl_transform_ir(ctx, lower_combined_samples, body, NULL);
From: Victor Chiletto vchiletto@codeweavers.com
--- libs/vkd3d-shader/hlsl.y | 134 +++++++++++++++++++-------------------- 1 file changed, 67 insertions(+), 67 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index 7fc35d4e1..4ff61e56b 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -570,6 +570,73 @@ static void check_loop_attributes(struct hlsl_ctx *ctx, const struct parse_attri hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Unroll attribute can't be used with 'fastopt' attribute."); }
+static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, struct hlsl_block *block, + const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_constant *constant; + struct hlsl_ir_node *node; + struct hlsl_block expr; + unsigned int ret = 0; + bool progress; + + LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) + { + switch (node->type) + { + case HLSL_IR_CONSTANT: + case HLSL_IR_EXPR: + case HLSL_IR_SWIZZLE: + case HLSL_IR_LOAD: + case HLSL_IR_INDEX: + continue; + case HLSL_IR_CALL: + case HLSL_IR_IF: + case HLSL_IR_LOOP: + case HLSL_IR_JUMP: + case HLSL_IR_RESOURCE_LOAD: + case HLSL_IR_RESOURCE_STORE: + case HLSL_IR_STORE: + case HLSL_IR_SWITCH: + case HLSL_IR_STATEBLOCK_CONSTANT: + hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, + "Expected literal expression."); + } + } + + if (!hlsl_clone_block(ctx, &expr, &ctx->static_initializers)) + return 0; + hlsl_block_add_block(&expr, block); + + if (!add_implicit_conversion(ctx, &expr, node_from_block(&expr), + hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc)) + { + hlsl_block_cleanup(&expr); + return 0; + } + + do + { + progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, &expr, NULL); + progress |= hlsl_copy_propagation_execute(ctx, &expr); + } while (progress); + + node = node_from_block(&expr); + if (node->type == HLSL_IR_CONSTANT) + { + constant = hlsl_ir_constant(node); + ret = constant->value.u[0].u; + } + else + { + hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, + "Failed to evaluate constant expression."); + } + + hlsl_block_cleanup(&expr); + + return ret; +} + static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, const struct parse_attribute_list *attributes, struct hlsl_block *init, struct hlsl_block *cond, struct hlsl_block *iter, struct hlsl_block *body, const struct vkd3d_shader_location *loc) @@ -1273,73 +1340,6 @@ static struct hlsl_block *make_block(struct hlsl_ctx *ctx, struct hlsl_ir_node * return block; }
-static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, struct hlsl_block *block, - const struct vkd3d_shader_location *loc) -{ - struct hlsl_ir_constant *constant; - struct hlsl_ir_node *node; - struct hlsl_block expr; - unsigned int ret = 0; - bool progress; - - LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) - { - switch (node->type) - { - case HLSL_IR_CONSTANT: - case HLSL_IR_EXPR: - case HLSL_IR_SWIZZLE: - case HLSL_IR_LOAD: - case HLSL_IR_INDEX: - continue; - case HLSL_IR_CALL: - case HLSL_IR_IF: - case HLSL_IR_LOOP: - case HLSL_IR_JUMP: - case HLSL_IR_RESOURCE_LOAD: - case HLSL_IR_RESOURCE_STORE: - case HLSL_IR_STORE: - case HLSL_IR_SWITCH: - case HLSL_IR_STATEBLOCK_CONSTANT: - hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, - "Expected literal expression."); - } - } - - if (!hlsl_clone_block(ctx, &expr, &ctx->static_initializers)) - return 0; - hlsl_block_add_block(&expr, block); - - if (!add_implicit_conversion(ctx, &expr, node_from_block(&expr), - hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc)) - { - hlsl_block_cleanup(&expr); - return 0; - } - - do - { - progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, &expr, NULL); - progress |= hlsl_copy_propagation_execute(ctx, &expr); - } while (progress); - - node = node_from_block(&expr); - if (node->type == HLSL_IR_CONSTANT) - { - constant = hlsl_ir_constant(node); - ret = constant->value.u[0].u; - } - else - { - hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, - "Failed to evaluate constant expression."); - } - - hlsl_block_cleanup(&expr); - - return ret; -} - static bool expr_compatible_data_types(struct hlsl_type *t1, struct hlsl_type *t2) { if (t1->base_type > HLSL_TYPE_LAST_SCALAR || t2->base_type > HLSL_TYPE_LAST_SCALAR)
From: Victor Chiletto vchiletto@codeweavers.com
Based on a patch by Nikolay Sivov.
Co-authored-by: Nikolay Sivov nsivov@codeweavers.com --- libs/vkd3d-shader/hlsl.c | 8 ++++++-- libs/vkd3d-shader/hlsl.h | 11 ++++++++++- libs/vkd3d-shader/hlsl.y | 23 ++++++++++++++--------- 3 files changed, 30 insertions(+), 12 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index ed80e2b75..b22d5891f 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -1667,7 +1667,8 @@ struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type }
struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, - struct hlsl_block *block, const struct vkd3d_shader_location *loc) + struct hlsl_block *block, enum hlsl_ir_loop_unroll_type unroll_type, + unsigned int unroll_limit, const struct vkd3d_shader_location *loc) { struct hlsl_ir_loop *loop;
@@ -1676,6 +1677,9 @@ struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, init_node(&loop->node, HLSL_IR_LOOP, NULL, loc); hlsl_block_init(&loop->body); hlsl_block_add_block(&loop->body, block); + + loop->unroll_type = unroll_type; + loop->unroll_limit = unroll_limit; return &loop->node; }
@@ -1837,7 +1841,7 @@ static struct hlsl_ir_node *clone_loop(struct hlsl_ctx *ctx, struct clone_instr_ if (!clone_block(ctx, &body, &src->body, map)) return NULL;
- if (!(dst = hlsl_new_loop(ctx, &body, &src->node.loc))) + if (!(dst = hlsl_new_loop(ctx, &body, src->unroll_type, src->unroll_limit, &src->node.loc))) { hlsl_block_cleanup(&body); return NULL; diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 3cb98b765..dadb42100 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -559,12 +559,21 @@ struct hlsl_ir_if struct hlsl_block else_block; };
+enum hlsl_ir_loop_unroll_type +{ + HLSL_IR_LOOP_UNROLL, + HLSL_IR_LOOP_FORCE_UNROLL, + HLSL_IR_LOOP_FORCE_LOOP +}; + struct hlsl_ir_loop { struct hlsl_ir_node node; /* loop condition is stored in the body (as "if (!condition) break;") */ struct hlsl_block body; unsigned int next_index; /* liveness index of the end of the loop */ + unsigned int unroll_limit; + enum hlsl_ir_loop_unroll_type unroll_type; };
struct hlsl_ir_switch_case @@ -1344,7 +1353,7 @@ bool hlsl_index_chain_has_resource_access(struct hlsl_ir_index *index); struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *val, struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, - struct hlsl_block *block, const struct vkd3d_shader_location *loc); + struct hlsl_block *block, enum hlsl_ir_loop_unroll_type unroll_type, unsigned int unroll_limit, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_resource_load(struct hlsl_ctx *ctx, const struct hlsl_resource_load_params *params, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct hlsl_deref *resource, diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index 4ff61e56b..38783c2ad 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -641,8 +641,9 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, const struct parse_attribute_list *attributes, struct hlsl_block *init, struct hlsl_block *cond, struct hlsl_block *iter, struct hlsl_block *body, const struct vkd3d_shader_location *loc) { + enum hlsl_ir_loop_unroll_type unroll_type = HLSL_IR_LOOP_UNROLL; + unsigned int i, unroll_limit = 0; struct hlsl_ir_node *loop; - unsigned int i;
if (attribute_list_has_duplicates(attributes)) hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Found duplicate attribute."); @@ -655,18 +656,22 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, const struct hlsl_attribute *attr = attributes->attrs[i]; if (!strcmp(attr->name, "unroll")) { - if (attr->args_count) - { - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Unroll attribute with iteration count."); - } - else + if (attr->args_count == 1) { - hlsl_warning(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Loop unrolling is not implemented."); + struct hlsl_block expr; + hlsl_block_init(&expr); + if (!hlsl_clone_block(ctx, &expr, &attr->instrs)) + return NULL; + + unroll_limit = evaluate_static_expression_as_uint(ctx, &expr, loc); + hlsl_block_cleanup(&expr); + + unroll_type = HLSL_IR_LOOP_FORCE_UNROLL; } } else if (!strcmp(attr->name, "loop")) { - /* TODO: this attribute will be used to disable unrolling, once it's implememented. */ + unroll_type = HLSL_IR_LOOP_FORCE_LOOP; } else if (!strcmp(attr->name, "fastopt") || !strcmp(attr->name, "allow_uav_condition")) @@ -695,7 +700,7 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, else list_move_head(&body->instrs, &cond->instrs);
- if (!(loop = hlsl_new_loop(ctx, body, loc))) + if (!(loop = hlsl_new_loop(ctx, body, unroll_type, unroll_limit, loc))) goto oom; hlsl_block_add_instr(init, loop);
From: Victor Chiletto vchiletto@codeweavers.com
Based on a patch by Nikolay Sivov.
Co-authored-by: Nikolay Sivov nsivov@codeweavers.com --- libs/vkd3d-shader/hlsl_codegen.c | 205 +++++++++++++++++++++-- libs/vkd3d-shader/vkd3d_shader_private.h | 2 + tests/hlsl/for.shader_test | 4 +- tests/hlsl/function-return.shader_test | 4 +- tests/hlsl/loop.shader_test | 16 +- tests/hlsl/return.shader_test | 4 +- tests/hlsl/texture-load.shader_test | 4 +- 7 files changed, 213 insertions(+), 26 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index e0812627d..4639e2798 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -5387,6 +5387,199 @@ void hlsl_prepend_global_uniform_copy(struct hlsl_ctx *ctx, struct hlsl_block *b } }
+static void transform_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) +{ + bool progress; + + do + { + progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); + progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL); + progress |= hlsl_copy_propagation_execute(ctx, body); + progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL); + progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, body, NULL); + progress |= hlsl_transform_ir(ctx, remove_trivial_conditional_branches, body, NULL); + } while (progress); +} + +static bool loop_unrolling_find_jump(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, void *context) +{ + struct hlsl_ir_jump **out = context; + + if (node->type == HLSL_IR_JUMP) + { + struct hlsl_ir_jump *jump = hlsl_ir_jump(node); + + if (jump->type == HLSL_IR_JUMP_BREAK || jump->type == HLSL_IR_JUMP_CONTINUE) + { + *out = jump; + return false; + } + } + + return true; +} + +#define LOOP_UNROLLING_DEFAULT_MAX_ITERATIONS 1024 + +static bool loop_unrolling_unroll_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_loop *loop) +{ + struct hlsl_block draft, tmp_dst; + unsigned int max_iterations, i; + + max_iterations = LOOP_UNROLLING_DEFAULT_MAX_ITERATIONS; + if (loop->unroll_limit) + max_iterations = min(loop->unroll_limit, max_iterations); + + hlsl_block_init(&draft); + hlsl_block_init(&tmp_dst); + list_move_slice_tail(&draft.instrs, list_head(&block->instrs), list_prev(&block->instrs, &loop->node.entry)); + + for (i = 0; i < max_iterations; ++i) + { + struct hlsl_ir_jump *jump = NULL; + + if (!hlsl_clone_block(ctx, &tmp_dst, &loop->body)) + goto fail; + hlsl_block_add_block(&draft, &tmp_dst); + + transform_run_const_passes(ctx, &draft); + remove_unreachable_code(ctx, &draft); + hlsl_transform_ir(ctx, loop_unrolling_find_jump, &draft, &jump); + + if (jump) + { + enum hlsl_ir_jump_type type = jump->type; + + if (list_next(&draft.instrs, &jump->node.entry)) + { + hlsl_warning(ctx, &jump->node.loc, VKD3D_SHADER_WARNING_HLSL_UNABLE_TO_UNROLL, "Unable to unroll loop, unrolling loops with conditional jumps is currently not supported"); + goto fail; + } + + list_remove(&jump->node.entry); + hlsl_free_instr(&jump->node); + + if (type == HLSL_IR_JUMP_BREAK) + break; + } + } + + /* Native gives up on unrolling entirely after 1024 iterations. + * It also will not insert a loop if there are iterations left + * after max_iterations, i.e [unroll(4)] for (i = 0; i < 8; ++i)) */ + if (i == LOOP_UNROLLING_DEFAULT_MAX_ITERATIONS) + { + hlsl_warning(ctx, &loop->node.loc, VKD3D_SHADER_WARNING_HLSL_UNABLE_TO_UNROLL, "Unable to unroll loop, maximum iterations reached (%u).", LOOP_UNROLLING_DEFAULT_MAX_ITERATIONS); + goto fail; + } + + list_remove(&loop->node.entry); + hlsl_free_instr(&loop->node); + + list_move_head(&block->instrs, &draft.instrs); + hlsl_block_cleanup(&tmp_dst); + hlsl_block_cleanup(&draft); + + return true; + +fail: + if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) + hlsl_error(ctx, &loop->node.loc, VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL, "Failed to unroll loop marked as forced unroll."); + + hlsl_block_cleanup(&draft); + hlsl_block_cleanup(&tmp_dst); + + return false; +} + +static struct hlsl_ir_loop *loop_unrolling_find_unrollable_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_block **containing_block) +{ + struct hlsl_ir_node *instr, *next; + + LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) + { + switch (instr->type) + { + case HLSL_IR_LOOP: + { + struct hlsl_ir_loop *nested_loop; + struct hlsl_ir_loop *loop = hlsl_ir_loop(instr); + + if ((nested_loop = loop_unrolling_find_unrollable_loop(ctx, &loop->body, containing_block))) + return nested_loop; + + if (loop->unroll_type == HLSL_IR_LOOP_UNROLL || loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) + { + *containing_block = block; + return loop; + } + + break; + } + case HLSL_IR_IF: + { + struct hlsl_ir_loop *loop; + struct hlsl_ir_if *iff = hlsl_ir_if(instr); + + if ((loop = loop_unrolling_find_unrollable_loop(ctx, &iff->then_block, containing_block))) + return loop; + if ((loop = loop_unrolling_find_unrollable_loop(ctx, &iff->else_block, containing_block))) + return loop; + + break; + } + case HLSL_IR_SWITCH: + { + struct hlsl_ir_switch *s = hlsl_ir_switch(instr); + struct hlsl_ir_switch_case *c; + struct hlsl_ir_loop *loop; + + LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) + { + if ((loop = loop_unrolling_find_unrollable_loop(ctx, &c->body, containing_block))) + return loop; + } + + break; + } + default: + break; + } + } + + return NULL; +} + +static void transform_unroll_loops(struct hlsl_ctx *ctx, struct hlsl_block *block) +{ + while (true) + { + struct hlsl_block clone, *containing_block; + struct hlsl_ir_loop *loop, *cloned_loop; + + if (!(loop = loop_unrolling_find_unrollable_loop(ctx, block, &containing_block))) + return; + + if (!hlsl_clone_block(ctx, &clone, block)) + return; + + cloned_loop = loop_unrolling_find_unrollable_loop(ctx, &clone, &containing_block); + assert(cloned_loop); + + if (!loop_unrolling_unroll_loop(ctx, containing_block, cloned_loop)) + { + hlsl_block_cleanup(&clone); + loop->unroll_type = HLSL_IR_LOOP_FORCE_LOOP; + continue; + } + + hlsl_block_cleanup(block); + hlsl_block_init(block); + hlsl_block_add_block(block, &clone); + } +} + int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out) { @@ -5487,16 +5680,8 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry lower_ir(ctx, lower_casts_to_bool, body); lower_ir(ctx, lower_float_modulus, body); hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL); - do - { - progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); - progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL); - progress |= hlsl_copy_propagation_execute(ctx, body); - progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL); - progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, body, NULL); - progress |= hlsl_transform_ir(ctx, remove_trivial_conditional_branches, body, NULL); - } - while (progress); + transform_unroll_loops(ctx, body); + transform_run_const_passes(ctx, body); remove_unreachable_code(ctx, body); hlsl_transform_ir(ctx, normalize_switch_cases, body, NULL);
diff --git a/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d-shader/vkd3d_shader_private.h index b07a7bff7..ce78d85f9 100644 --- a/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d-shader/vkd3d_shader_private.h @@ -150,6 +150,7 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_HLSL_DUPLICATE_SWITCH_CASE = 5028, VKD3D_SHADER_ERROR_HLSL_MISSING_TECHNIQUE = 5029, VKD3D_SHADER_ERROR_HLSL_UNKNOWN_MODIFIER = 5030, + VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL = 5031,
VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301, @@ -157,6 +158,7 @@ enum vkd3d_shader_error VKD3D_SHADER_WARNING_HLSL_IMAGINARY_NUMERIC_RESULT = 5303, VKD3D_SHADER_WARNING_HLSL_NON_FINITE_RESULT = 5304, VKD3D_SHADER_WARNING_HLSL_IGNORED_ATTRIBUTE = 5305, + VKD3D_SHADER_WARNING_HLSL_UNABLE_TO_UNROLL = 5306,
VKD3D_SHADER_ERROR_GLSL_INTERNAL = 6000,
diff --git a/tests/hlsl/for.shader_test b/tests/hlsl/for.shader_test index 7ce6c8213..b3fbd76d7 100644 --- a/tests/hlsl/for.shader_test +++ b/tests/hlsl/for.shader_test @@ -63,7 +63,7 @@ probe (481, 0, 640, 480) rgba ( 5.0, 10.0, 0.0, 0.0) [require] % Reset requirements
-[pixel shader todo(sm<4)] +[pixel shader] float4 main(float tex : texcoord) : sv_target { int i; @@ -76,7 +76,7 @@ float4 main(float tex : texcoord) : sv_target }
[test] -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (10.0, 45.0, 0.0, 0.0)
[pixel shader fail(sm<6)] diff --git a/tests/hlsl/function-return.shader_test b/tests/hlsl/function-return.shader_test index 3c085a578..9d754a0e2 100644 --- a/tests/hlsl/function-return.shader_test +++ b/tests/hlsl/function-return.shader_test @@ -143,7 +143,7 @@ uniform 0 float 0.9 todo(sm<4 | glsl) draw quad probe all rgba (1.0, 0.9, 1.0, 0.6) 1
-[pixel shader todo(sm<4)] +[pixel shader] float func(out float o) { o = 0.1; @@ -181,7 +181,7 @@ float4 main() : sv_target }
[test] -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (0.4, 0.3, 0.3, 0.9) 1
[pixel shader todo(sm<4)] diff --git a/tests/hlsl/loop.shader_test b/tests/hlsl/loop.shader_test index 2de10d986..47fece6ac 100644 --- a/tests/hlsl/loop.shader_test +++ b/tests/hlsl/loop.shader_test @@ -1,6 +1,6 @@ % TODO: dxcompiler emits no loops for any of these test shaders.
-[pixel shader todo(sm<4)] +[pixel shader] float a;
float4 main() : sv_target @@ -18,11 +18,11 @@ float4 main() : sv_target
[test] uniform 0 float 5.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (50.0, 50.0, 50.0, 50.0)
-[pixel shader todo(sm<4)] +[pixel shader] float a;
float4 main() : sv_target @@ -41,10 +41,10 @@ float4 main() : sv_target
[test] uniform 0 float 4.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (20.0, 20.0, 20.0, 20.0)
-[pixel shader todo(sm<4)] +[pixel shader] float a;
float4 main() : sv_target @@ -70,10 +70,10 @@ float4 main() : sv_target
[test] uniform 0 float 4.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (409.1, 409.1, 409.1, 409.1)
-[pixel shader todo(sm<4)] +[pixel shader] float a;
float4 main() : sv_target @@ -100,7 +100,7 @@ float4 main() : sv_target
[test] uniform 0 float 4.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (410.1, 410.1, 410.1, 410.1)
% loop attribute by itself diff --git a/tests/hlsl/return.shader_test b/tests/hlsl/return.shader_test index 2195f749a..24c157afd 100644 --- a/tests/hlsl/return.shader_test +++ b/tests/hlsl/return.shader_test @@ -124,7 +124,7 @@ uniform 0 float 0.9 todo(sm<4 | glsl) draw quad probe all rgba (0.4, 0.5, 0.6, 0.7) 1
-[pixel shader todo(sm<4)] +[pixel shader] void main(out float4 ret : sv_target) { ret = float4(0.1, 0.2, 0.3, 0.4); @@ -138,7 +138,7 @@ void main(out float4 ret : sv_target) }
[test] -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (0.2, 0.4, 0.6, 0.8)
[pixel shader todo(sm<4)] diff --git a/tests/hlsl/texture-load.shader_test b/tests/hlsl/texture-load.shader_test index 3858f7ca6..bf63ec307 100644 --- a/tests/hlsl/texture-load.shader_test +++ b/tests/hlsl/texture-load.shader_test @@ -124,7 +124,7 @@ float4 main(float4 pos : sv_position) : sv_target shader model >= 4.0 shader model < 4.1
-[pixel shader todo] +[pixel shader] Texture2DMS<float4, 1> t;
float4 main(float4 pos : sv_position) : sv_target @@ -139,7 +139,7 @@ float4 main(float4 pos : sv_position) : sv_target }
[test] -todo draw quad +todo(glsl) draw quad probe (0, 0) rgba (0.1, 0.2, 0.3, 0.4) probe (1, 0) rgba (0.5, 0.7, 0.6, 0.8) probe (0, 1) rgba (0.6, 0.5, 0.2, 0.1)