This is the last piece that's needed for function calls to basically work.
-- v3: vkd3d-shader/hlsl: Allow the final expression in a for loop initializer to be omitted. vkd3d-shader/hlsl: Handle early return after a CF block only if there was actually a return somewhere in the CF block. vkd3d-shader/hlsl: Inline function calls. vkd3d-shader/hlsl: Emit a hlsl_fixme() for unhandled instruction types when writing bytecode. vkd3d-shader/hlsl: Lower return statements.
From: Zebediah Figura zfigura@codeweavers.com
It requires caps beyond shader model 5.0. --- tests/uav-out-param.shader_test | 34 ++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-)
diff --git a/tests/uav-out-param.shader_test b/tests/uav-out-param.shader_test index 646d14f6..d7e29667 100644 --- a/tests/uav-out-param.shader_test +++ b/tests/uav-out-param.shader_test @@ -11,9 +11,12 @@ size (1, 1)
RWTexture2D<float4> u;
-void func(inout float4 f) +void func(out float4 f) { - f.xz += 0.1; + f.x = 0.1; + f.y = 0.2; + f.z = 0.3; + f.w = 0.4; }
[numthreads(1, 1, 1)] @@ -24,4 +27,29 @@ void main()
[test] todo dispatch 1 1 1 -todo probe uav 0 (0, 0) rgba (0.1, 0.3, 0.3, 0.5) +probe uav 0 (0, 0) rgba (0.4, 0.1, 0.2, 0.3) + +[uav 0] +format r32 float +size (1, 1) + +0.1 + +[compute shader todo] + +RWTexture2D<float> u; + +void func(inout float f) +{ + f += 0.1; +} + + [numthreads(1, 1, 1)] +void main() +{ + func(u[uint2(0, 0)]); +} + +[test] +todo dispatch 1 1 1 +probe uav 0 (0, 0) r (0.2)
From: Zebediah Figura zfigura@codeweavers.com
--- libs/vkd3d-shader/hlsl.c | 24 ++++++++++++++++++++---- libs/vkd3d-shader/hlsl.h | 2 +- tests/hlsl-invalid.shader_test | 4 ++++ 3 files changed, 25 insertions(+), 5 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index 19e294f9..1f21f043 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -554,9 +554,13 @@ struct hlsl_type *hlsl_get_type(struct hlsl_scope *scope, const char *name, bool return NULL; }
-bool hlsl_get_function(struct hlsl_ctx *ctx, const char *name) +struct hlsl_ir_function *hlsl_get_function(struct hlsl_ctx *ctx, const char *name) { - return rb_get(&ctx->functions, name) != NULL; + struct rb_entry *entry; + + if ((entry = rb_get(&ctx->functions, name))) + return RB_ENTRY_VALUE(entry, struct hlsl_ir_function, entry); + return NULL; }
struct hlsl_ir_function_decl *hlsl_get_func_decl(struct hlsl_ctx *ctx, const char *name) @@ -2628,8 +2632,9 @@ int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) { const struct vkd3d_shader_hlsl_source_info *hlsl_source_info; - struct hlsl_ir_function_decl *entry_func; + struct hlsl_ir_function_decl *decl, *entry_func = NULL; const struct hlsl_profile_info *profile; + struct hlsl_ir_function *func; const char *entry_point; struct hlsl_ctx ctx; int ret; @@ -2685,7 +2690,18 @@ int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d return VKD3D_ERROR_NOT_IMPLEMENTED; }
- if (!(entry_func = hlsl_get_func_decl(&ctx, entry_point))) + if ((func = hlsl_get_function(&ctx, entry_point))) + { + RB_FOR_EACH_ENTRY(decl, &func->overloads, struct hlsl_ir_function_decl, entry) + { + if (!decl->has_body) + continue; + entry_func = decl; + break; + } + } + + if (!entry_func) { const struct vkd3d_shader_location loc = {.source_name = compile_info->source_name};
diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 567e3ad3..d615ff8a 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -988,7 +988,7 @@ void hlsl_free_instr_list(struct list *list); void hlsl_free_type(struct hlsl_type *type); void hlsl_free_var(struct hlsl_ir_var *decl);
-bool hlsl_get_function(struct hlsl_ctx *ctx, const char *name); +struct hlsl_ir_function *hlsl_get_function(struct hlsl_ctx *ctx, const char *name); struct hlsl_ir_function_decl *hlsl_get_func_decl(struct hlsl_ctx *ctx, const char *name); struct hlsl_type *hlsl_get_type(struct hlsl_scope *scope, const char *name, bool recursive); struct hlsl_ir_var *hlsl_get_var(struct hlsl_scope *scope, const char *name); diff --git a/tests/hlsl-invalid.shader_test b/tests/hlsl-invalid.shader_test index 1b39d7f0..e56205cb 100644 --- a/tests/hlsl-invalid.shader_test +++ b/tests/hlsl-invalid.shader_test @@ -264,3 +264,7 @@ float4 main() : sv_target { return 1.0; } + +[pixel shader fail] + +float4 main() : sv_target;
From: Zebediah Figura zfigura@codeweavers.com
--- libs/vkd3d-shader/hlsl.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index 1f21f043..3656d05d 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -2696,8 +2696,13 @@ int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d { if (!decl->has_body) continue; + if (entry_func) + { + /* Depending on d3dcompiler version, either the first or last is + * selected. */ + hlsl_fixme(&ctx, &decl->loc, "Multiple valid entry point definitions."); + } entry_func = decl; - break; } }
From: Zebediah Figura zfigura@codeweavers.com
--- include/private/list.h | 24 ++++ libs/vkd3d-shader/hlsl.c | 14 +++ libs/vkd3d-shader/hlsl.h | 5 + libs/vkd3d-shader/hlsl_codegen.c | 186 +++++++++++++++++++++++++++++++ tests/return.shader_test | 14 +-- 5 files changed, 236 insertions(+), 7 deletions(-)
diff --git a/include/private/list.h b/include/private/list.h index 5e92cfb2..2e1d95f3 100644 --- a/include/private/list.h +++ b/include/private/list.h @@ -186,6 +186,30 @@ static inline void list_move_tail( struct list *dst, struct list *src ) list_move_before( dst, src ); }
+/* move the slice of elements from begin to end inclusive to the head of dst */ +static inline void list_move_slice_head( struct list *dst, struct list *begin, struct list *end ) +{ + struct list *dst_next = dst->next; + begin->prev->next = end->next; + end->next->prev = begin->prev; + dst->next = begin; + dst_next->prev = end; + begin->prev = dst; + end->next = dst_next; +} + +/* move the slice of elements from begin to end inclusive to the tail of dst */ +static inline void list_move_slice_tail( struct list *dst, struct list *begin, struct list *end ) +{ + struct list *dst_prev = dst->prev; + begin->prev->next = end->next; + end->next->prev = begin->prev; + dst_prev->next = begin; + dst->prev = end; + begin->prev = dst_prev; + end->next = dst; +} + /* iterate through the list */ #define LIST_FOR_EACH(cursor,list) \ for ((cursor) = (list)->next; (cursor) != (list); (cursor) = (cursor)->next) diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index 3656d05d..0375e226 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -1209,6 +1209,8 @@ struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, const struct vkd3d_shader_location *loc) { struct hlsl_ir_function_decl *decl; + struct hlsl_ir_constant *constant; + struct hlsl_ir_store *store;
if (!(decl = hlsl_alloc(ctx, sizeof(*decl)))) return NULL; @@ -1227,6 +1229,18 @@ struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, decl->return_var->semantic = *semantic; }
+ if (!(decl->early_return_var = hlsl_new_synthetic_var(ctx, "early_return", + hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), loc))) + return decl; + + if (!(constant = hlsl_new_bool_constant(ctx, false, loc))) + return decl; + list_add_tail(&decl->body.instrs, &constant->node.entry); + + if (!(store = hlsl_new_simple_store(ctx, decl->early_return_var, &constant->node))) + return decl; + list_add_tail(&decl->body.instrs, &store->node.entry); + return decl; }
diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index d615ff8a..bc0c4b4c 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -429,6 +429,11 @@ struct hlsl_ir_function_decl * Not to be confused with the function parameters! */ unsigned int attr_count; const struct hlsl_attribute *const *attrs; + + /* Synthetic boolean variable marking whether a return statement has been + * executed. Needed to deal with return statements in non-uniform control + * flow, since some backends can't handle them. */ + struct hlsl_ir_var *early_return_var; };
struct hlsl_ir_call diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 7c95ebd1..579b9b53 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -499,6 +499,185 @@ static bool find_recursive_calls(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst return false; }
+static void insert_early_return_break(struct hlsl_ctx *ctx, + struct hlsl_ir_function_decl *func, struct hlsl_ir_node *cf_instr) +{ + struct hlsl_ir_jump *jump; + struct hlsl_ir_load *load; + struct hlsl_ir_if *iff; + + if (!(load = hlsl_new_var_load(ctx, func->early_return_var, cf_instr->loc))) + return; + list_add_after(&cf_instr->entry, &load->node.entry); + + if (!(iff = hlsl_new_if(ctx, &load->node, cf_instr->loc))) + return; + list_add_after(&load->node.entry, &iff->node.entry); + + if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, cf_instr->loc))) + return; + list_add_tail(&iff->then_instrs.instrs, &jump->node.entry); +} + +/* Remove HLSL_IR_JUMP_RETURN calls by altering subsequent control flow. */ +static void lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, + struct hlsl_block *block, bool in_loop) +{ + struct hlsl_ir_node *return_instr = NULL, *cf_instr = NULL; + struct hlsl_ir_node *instr, *next; + + /* SM1 has no function calls. SM4 does, but native d3dcompiler inlines + * everything anyway. We are safest following suit. + * + * The basic idea is to keep track of whether the function has executed an + * early return in a synthesized boolean variable (func->early_return_var) + * and guard all code after the return on that variable being false. In the + * case of loops we also replace the return with a break. + * + * The following algorithm loops over instructions in a block, recursing + * into inferior CF blocks, until it hits one of the following two things: + * + * - A return statement. In this case, we remove everything after the return + * statement in this block. We have to stop and do this in a separate + * loop, because instructions must be deleted in reverse order (due to + * def-use chains.) + * + * If we're inside of a loop CF block, we can instead just turn the + * return into a break, which offers the right semantics—except that it + * won't break out of nested loops. + * + * - A CF block which might contain a return statement. After calling + * lower_return() on the CF block body, we stop, pull out everything after + * the CF instruction, shove it into an if block, and then lower that if + * block. + * + * (We could return a "did we make progress" boolean like transform_ir() + * and run this pass multiple times, but we already know the only block + * that still needs to be addressed, so there's not much point.) + * + * If we're inside of a loop CF block, we again do things differently. We + * already turned any returns into breaks. If the block we just processed + * was conditional, then "break" did our work for us. If it was a loop, + * we need to propagate that break to the outer loop. + */ + + LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) + { + if (instr->type == HLSL_IR_CALL) + { + struct hlsl_ir_call *call = hlsl_ir_call(instr); + + lower_return(ctx, call->decl, &call->decl->body, false); + } + else if (instr->type == HLSL_IR_IF) + { + struct hlsl_ir_if *iff = hlsl_ir_if(instr); + + lower_return(ctx, func, &iff->then_instrs, in_loop); + lower_return(ctx, func, &iff->else_instrs, in_loop); + + /* If we're in a loop, we don't need to do anything here. We + * turned the return into a break, and that will already skip + * anything that comes after this "if" block. */ + if (!in_loop) + { + cf_instr = instr; + break; + } + } + else if (instr->type == HLSL_IR_LOOP) + { + lower_return(ctx, func, &hlsl_ir_loop(instr)->body, true); + + if (in_loop) + { + /* "instr" is a nested loop. "return" breaks out of all + * loops, so break out of this one too now. */ + insert_early_return_break(ctx, func, instr); + } + else + { + cf_instr = instr; + break; + } + } + else if (instr->type == HLSL_IR_JUMP) + { + struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); + struct hlsl_ir_constant *constant; + struct hlsl_ir_store *store; + + if (jump->type == HLSL_IR_JUMP_RETURN) + { + if (!(constant = hlsl_new_bool_constant(ctx, true, &jump->node.loc))) + return; + list_add_before(&jump->node.entry, &constant->node.entry); + + if (!(store = hlsl_new_simple_store(ctx, func->early_return_var, &constant->node))) + return; + list_add_after(&constant->node.entry, &store->node.entry); + + if (in_loop) + { + jump->type = HLSL_IR_JUMP_BREAK; + } + else + { + return_instr = instr; + break; + } + } + } + } + + if (return_instr) + { + /* If we're in a loop, we should have used "break" instead. */ + assert(!in_loop); + + /* Iterate in reverse, to avoid use-after-free when unlinking sources from + * the "uses" list. */ + LIST_FOR_EACH_ENTRY_SAFE_REV(instr, next, &block->instrs, struct hlsl_ir_node, entry) + { + list_remove(&instr->entry); + hlsl_free_instr(instr); + + /* Yes, we just freed it, but we're comparing pointers. */ + if (instr == return_instr) + break; + } + } + else if (cf_instr) + { + struct list *tail = list_tail(&block->instrs); + struct hlsl_ir_load *load; + struct hlsl_ir_node *not; + struct hlsl_ir_if *iff; + + /* If we're in a loop, we should have used "break" instead. */ + assert(!in_loop); + + if (tail == &cf_instr->entry) + return; + + if (!(load = hlsl_new_var_load(ctx, func->early_return_var, cf_instr->loc))) + return; + list_add_tail(&block->instrs, &load->node.entry); + + if (!(not = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, &load->node, cf_instr->loc))) + return; + list_add_tail(&block->instrs, ¬->entry); + + if (!(iff = hlsl_new_if(ctx, not, cf_instr->loc))) + return; + list_add_tail(&block->instrs, &iff->node.entry); + + list_move_slice_tail(&iff->then_instrs.instrs, list_next(&block->instrs, &cf_instr->entry), tail); + + lower_return(ctx, func, &iff->then_instrs, in_loop); + } +} + /* Lower casts from vec1 to vecN to swizzles. */ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { @@ -2926,6 +3105,13 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry transform_ir(ctx, find_recursive_calls, body, &recursive_call_ctx); vkd3d_free(recursive_call_ctx.backtrace);
+ /* Avoid going into an infinite loop when processing call instructions. + * lower_return() recurses into inferior calls. */ + if (ctx->result) + return ctx->result; + + lower_return(ctx, entry_func, body, false); + LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) { if (var->storage_modifiers & HLSL_STORAGE_UNIFORM) diff --git a/tests/return.shader_test b/tests/return.shader_test index e913d15d..3847765e 100644 --- a/tests/return.shader_test +++ b/tests/return.shader_test @@ -10,7 +10,7 @@ float4 main() : sv_target
[test] draw quad -todo probe all rgba (0.1, 0.2, 0.3, 0.4) +probe all rgba (0.1, 0.2, 0.3, 0.4)
[pixel shader]
@@ -23,7 +23,7 @@ void main(out float4 ret : sv_target)
[test] draw quad -todo probe all rgba (0.1, 0.2, 0.3, 0.4) +probe all rgba (0.1, 0.2, 0.3, 0.4)
[pixel shader]
@@ -39,7 +39,7 @@ float4 main() : sv_target [test] uniform 0 float 0.2 draw quad -todo probe all rgba (0.1, 0.2, 0.3, 0.4) +probe all rgba (0.1, 0.2, 0.3, 0.4) uniform 0 float 0.8 draw quad probe all rgba (0.5, 0.6, 0.7, 0.8) @@ -69,7 +69,7 @@ draw quad probe all rgba (0.3, 0.4, 0.5, 0.6) uniform 0 float 0.8 draw quad -todo probe all rgba (0.1, 0.2, 0.3, 0.4) +probe all rgba (0.1, 0.2, 0.3, 0.4)
[pixel shader]
@@ -93,10 +93,10 @@ void main(out float4 ret : sv_target) [test] uniform 0 float 0.1 draw quad -todo probe all rgba (0.1, 0.2, 0.3, 0.4) 1 +probe all rgba (0.1, 0.2, 0.3, 0.4) 1 uniform 0 float 0.5 draw quad -todo probe all rgba (0.2, 0.3, 0.4, 0.5) 1 +probe all rgba (0.2, 0.3, 0.4, 0.5) 1 uniform 0 float 0.9 draw quad probe all rgba (0.5, 0.6, 0.7, 0.8) 1 @@ -120,7 +120,7 @@ void main(out float4 ret : sv_target) [test] uniform 0 float 0.1 draw quad -todo probe all rgba (0.1, 0.2, 0.3, 0.4) 1 +probe all rgba (0.1, 0.2, 0.3, 0.4) 1 uniform 0 float 0.5 draw quad probe all rgba (0.5, 0.6, 0.7, 0.8) 1
From: Zebediah Figura zfigura@codeweavers.com
This was originally left alone in order to allow functions without early return to succeed, since in that case we would already emit the correct bytecode despite not handling the HLSL_IR_JUMP_RETURN instruction.
Now that we lower return statements, however, any unhandled instructions are either definitely going to result in invalid bytecode, or rare enough that it's not worth returning success anyway. --- libs/vkd3d-shader/hlsl_sm1.c | 2 +- libs/vkd3d-shader/hlsl_sm4.c | 2 +- tests/return.shader_test | 12 ++++++------ 3 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_sm1.c b/libs/vkd3d-shader/hlsl_sm1.c index 34bead54..9bbf784a 100644 --- a/libs/vkd3d-shader/hlsl_sm1.c +++ b/libs/vkd3d-shader/hlsl_sm1.c @@ -915,7 +915,7 @@ static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b break;
default: - FIXME("Unhandled instruction type %s.\n", hlsl_node_type_to_string(instr->type)); + hlsl_fixme(ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); } } } diff --git a/libs/vkd3d-shader/hlsl_sm4.c b/libs/vkd3d-shader/hlsl_sm4.c index fb14889d..69f9b9d3 100644 --- a/libs/vkd3d-shader/hlsl_sm4.c +++ b/libs/vkd3d-shader/hlsl_sm4.c @@ -2401,7 +2401,7 @@ static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * break;
default: - FIXME("Unhandled instruction type %s.\n", hlsl_node_type_to_string(instr->type)); + hlsl_fixme(ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); } } } diff --git a/tests/return.shader_test b/tests/return.shader_test index 3847765e..a66d8756 100644 --- a/tests/return.shader_test +++ b/tests/return.shader_test @@ -146,7 +146,7 @@ void main(out float4 ret : sv_target) todo draw quad todo probe all rgba (0.2, 0.4, 0.6, 0.8)
-[pixel shader] +[pixel shader todo]
uniform float f;
@@ -166,23 +166,23 @@ void main(out float4 ret : sv_target)
[test] uniform 0 float 0.0 -draw quad +todo draw quad todo probe all rgba (0.1, 0.1, 0.1, 0.1) 1
uniform 0 float 0.1 -draw quad +todo draw quad todo probe all rgba (0.2, 0.2, 0.2, 0.2) 1
uniform 0 float 0.3 -draw quad +todo draw quad todo probe all rgba (0.4, 0.4, 0.4, 0.4) 1
uniform 0 float 0.7 -draw quad +todo draw quad todo probe all rgba (0.8, 0.8, 0.8, 0.8) 1
uniform 0 float 0.9 -draw quad +todo draw quad todo probe all rgba (0.9, 0.9, 0.9, 0.9) 1
[pixel shader todo]
From: Zebediah Figura zfigura@codeweavers.com
--- libs/vkd3d-shader/hlsl.c | 289 ++++++++++++++++++ libs/vkd3d-shader/hlsl.h | 2 + libs/vkd3d-shader/hlsl_codegen.c | 32 +- libs/vkd3d-shader/hlsl_sm1.c | 3 +- libs/vkd3d-shader/hlsl_sm4.c | 3 +- tests/function-return.shader_test | 22 +- tests/hlsl-function.shader_test | 16 +- tests/hlsl-numthreads.shader_test | 2 +- ...lsl-return-implicit-conversion.shader_test | 60 ++-- tests/hlsl-static-initializer.shader_test | 6 +- tests/hlsl-storage-qualifiers.shader_test | 6 +- tests/uav-out-param.shader_test | 8 +- 12 files changed, 383 insertions(+), 66 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index 0375e226..97713343 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -1204,6 +1204,295 @@ struct hlsl_ir_loop *hlsl_new_loop(struct hlsl_ctx *ctx, struct vkd3d_shader_loc return loop; }
+struct clone_instr_map +{ + struct + { + const struct hlsl_ir_node *src; + struct hlsl_ir_node *dst; + } *instrs; + size_t count, capacity; +}; + +static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, + struct clone_instr_map *map, const struct hlsl_ir_node *instr); + +static bool clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, + const struct hlsl_block *src_block, struct clone_instr_map *map) +{ + const struct hlsl_ir_node *src; + struct hlsl_ir_node *dst; + + LIST_FOR_EACH_ENTRY(src, &src_block->instrs, struct hlsl_ir_node, entry) + { + if (!(dst = clone_instr(ctx, map, src))) + { + hlsl_free_instr_list(&dst_block->instrs); + return false; + } + list_add_tail(&dst_block->instrs, &dst->entry); + + if (!list_empty(&src->uses)) + { + if (!vkd3d_array_reserve((void **)&map->instrs, &map->capacity, map->count + 1, sizeof(*map->instrs))) + { + hlsl_free_instr_list(&dst_block->instrs); + return false; + } + + map->instrs[map->count].dst = dst; + map->instrs[map->count].src = src; + ++map->count; + } + } + return true; +} + +static struct hlsl_ir_node *map_instr(const struct clone_instr_map *map, struct hlsl_ir_node *src) +{ + size_t i; + + if (!src) + return NULL; + + for (i = 0; i < map->count; ++i) + { + if (map->instrs[i].src == src) + return map->instrs[i].dst; + } + + /* The block passed to hlsl_clone_block() should have been free of external + * references. */ + vkd3d_unreachable(); +} + +static bool clone_deref(struct hlsl_ctx *ctx, struct clone_instr_map *map, + struct hlsl_deref *dst, const struct hlsl_deref *src) +{ + unsigned int i; + + assert(!src->offset.node); + + if (!init_deref(ctx, dst, src->var, src->path_len)) + return false; + + for (i = 0; i < src->path_len; ++i) + hlsl_src_from_node(&dst->path[i], map_instr(map, src->path[i].node)); + + return true; +} + +static void clone_src(struct clone_instr_map *map, struct hlsl_src *dst, const struct hlsl_src *src) +{ + hlsl_src_from_node(dst, map_instr(map, src->node)); +} + +static struct hlsl_ir_node *clone_call(struct hlsl_ctx *ctx, struct hlsl_ir_call *src) +{ + return hlsl_new_call(ctx, src->decl, &src->node.loc); +} + +static struct hlsl_ir_node *clone_constant(struct hlsl_ctx *ctx, struct hlsl_ir_constant *src) +{ + struct hlsl_ir_constant *dst; + + if (!(dst = hlsl_new_constant(ctx, src->node.data_type, &src->node.loc))) + return NULL; + memcpy(dst->value, src->value, sizeof(src->value)); + return &dst->node; +} + +static struct hlsl_ir_node *clone_expr(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_expr *src) +{ + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS]; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(operands); ++i) + operands[i] = map_instr(map, src->operands[i].node); + + return hlsl_new_expr(ctx, src->op, operands, src->node.data_type, &src->node.loc); +} + +static struct hlsl_ir_node *clone_if(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_if *src) +{ + struct hlsl_ir_if *dst; + + if (!(dst = hlsl_new_if(ctx, map_instr(map, src->condition.node), src->node.loc))) + return NULL; + + if (!clone_block(ctx, &dst->then_instrs, &src->then_instrs, map) + || !clone_block(ctx, &dst->else_instrs, &src->else_instrs, map)) + { + hlsl_free_instr(&dst->node); + return NULL; + } + return &dst->node; +} + +static struct hlsl_ir_node *clone_jump(struct hlsl_ctx *ctx, struct hlsl_ir_jump *src) +{ + struct hlsl_ir_jump *dst; + + if (!(dst = hlsl_new_jump(ctx, src->type, src->node.loc))) + return NULL; + return &dst->node; +} + +static struct hlsl_ir_node *clone_load(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_load *src) +{ + struct hlsl_ir_load *dst; + + if (!(dst = hlsl_alloc(ctx, sizeof(*dst)))) + return NULL; + init_node(&dst->node, HLSL_IR_LOAD, src->node.data_type, &src->node.loc); + + if (!clone_deref(ctx, map, &dst->src, &src->src)) + { + vkd3d_free(dst); + return NULL; + } + return &dst->node; +} + +static struct hlsl_ir_node *clone_loop(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_loop *src) +{ + struct hlsl_ir_loop *dst; + + if (!(dst = hlsl_new_loop(ctx, src->node.loc))) + return NULL; + if (!clone_block(ctx, &dst->body, &src->body, map)) + { + hlsl_free_instr(&dst->node); + return NULL; + } + return &dst->node; +} + +static struct hlsl_ir_node *clone_resource_load(struct hlsl_ctx *ctx, + struct clone_instr_map *map, struct hlsl_ir_resource_load *src) +{ + struct hlsl_ir_resource_load *dst; + + if (!(dst = hlsl_alloc(ctx, sizeof(*dst)))) + return NULL; + init_node(&dst->node, HLSL_IR_RESOURCE_LOAD, src->node.data_type, &src->node.loc); + dst->load_type = src->load_type; + if (!clone_deref(ctx, map, &dst->resource, &src->resource)) + { + vkd3d_free(dst); + return NULL; + } + if (!clone_deref(ctx, map, &dst->sampler, &src->sampler)) + { + hlsl_cleanup_deref(&dst->resource); + vkd3d_free(dst); + return NULL; + } + clone_src(map, &dst->coords, &src->coords); + clone_src(map, &dst->lod, &src->lod); + clone_src(map, &dst->texel_offset, &src->texel_offset); + return &dst->node; +} + +static struct hlsl_ir_node *clone_resource_store(struct hlsl_ctx *ctx, + struct clone_instr_map *map, struct hlsl_ir_resource_store *src) +{ + struct hlsl_ir_resource_store *dst; + + if (!(dst = hlsl_alloc(ctx, sizeof(*dst)))) + return NULL; + init_node(&dst->node, HLSL_IR_RESOURCE_STORE, NULL, &src->node.loc); + if (!clone_deref(ctx, map, &dst->resource, &src->resource)) + { + vkd3d_free(dst); + return NULL; + } + clone_src(map, &dst->coords, &src->coords); + clone_src(map, &dst->value, &src->value); + return &dst->node; +} + +static struct hlsl_ir_node *clone_store(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_store *src) +{ + struct hlsl_ir_store *dst; + + if (!(dst = hlsl_alloc(ctx, sizeof(*dst)))) + return NULL; + init_node(&dst->node, HLSL_IR_STORE, NULL, &src->node.loc); + + if (!clone_deref(ctx, map, &dst->lhs, &src->lhs)) + { + vkd3d_free(dst); + return NULL; + } + clone_src(map, &dst->rhs, &src->rhs); + dst->writemask = src->writemask; + return &dst->node; +} + +static struct hlsl_ir_node *clone_swizzle(struct hlsl_ctx *ctx, + struct clone_instr_map *map, struct hlsl_ir_swizzle *src) +{ + struct hlsl_ir_swizzle *dst; + + if (!(dst = hlsl_new_swizzle(ctx, src->swizzle, src->node.data_type->dimx, + map_instr(map, src->val.node), &src->node.loc))) + return NULL; + return &dst->node; +} + +static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, + struct clone_instr_map *map, const struct hlsl_ir_node *instr) +{ + switch (instr->type) + { + case HLSL_IR_CALL: + return clone_call(ctx, hlsl_ir_call(instr)); + + case HLSL_IR_CONSTANT: + return clone_constant(ctx, hlsl_ir_constant(instr)); + + case HLSL_IR_EXPR: + return clone_expr(ctx, map, hlsl_ir_expr(instr)); + + case HLSL_IR_IF: + return clone_if(ctx, map, hlsl_ir_if(instr)); + + case HLSL_IR_JUMP: + return clone_jump(ctx, hlsl_ir_jump(instr)); + + case HLSL_IR_LOAD: + return clone_load(ctx, map, hlsl_ir_load(instr)); + + case HLSL_IR_LOOP: + return clone_loop(ctx, map, hlsl_ir_loop(instr)); + + case HLSL_IR_RESOURCE_LOAD: + return clone_resource_load(ctx, map, hlsl_ir_resource_load(instr)); + + case HLSL_IR_RESOURCE_STORE: + return clone_resource_store(ctx, map, hlsl_ir_resource_store(instr)); + + case HLSL_IR_STORE: + return clone_store(ctx, map, hlsl_ir_store(instr)); + + case HLSL_IR_SWIZZLE: + return clone_swizzle(ctx, map, hlsl_ir_swizzle(instr)); + } + + vkd3d_unreachable(); +} + +bool hlsl_clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const struct hlsl_block *src_block) +{ + struct clone_instr_map map = {0}; + bool ret; + + ret = clone_block(ctx, dst_block, src_block, &map); + vkd3d_free(map.instrs); + return ret; +} + struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, struct hlsl_type *return_type, const struct hlsl_func_parameters *parameters, const struct hlsl_semantic *semantic, const struct vkd3d_shader_location *loc) diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index bc0c4b4c..53b47e40 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -975,6 +975,8 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type); void hlsl_add_function(struct hlsl_ctx *ctx, char *name, struct hlsl_ir_function_decl *decl); bool hlsl_add_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *decl, bool local_var);
+bool hlsl_clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const struct hlsl_block *src_block); + void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *func);
int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 579b9b53..b44075a4 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -678,6 +678,32 @@ static void lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun } }
+/* Remove HLSL_IR_CALL instructions by inlining them. */ +static bool lower_calls(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + const struct hlsl_ir_function_decl *decl; + struct hlsl_ir_call *call; + struct hlsl_block block; + + if (instr->type != HLSL_IR_CALL) + return false; + call = hlsl_ir_call(instr); + decl = call->decl; + + if (!decl->has_body) + hlsl_error(ctx, &call->node.loc, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, + "Function "%s" is not defined.", decl->func->name); + + list_init(&block.instrs); + if (!hlsl_clone_block(ctx, &block, &decl->body)) + return false; + list_move_before(&call->node.entry, &block.instrs); + + list_remove(&call->node.entry); + hlsl_free_instr(&call->node); + return true; +} + /* Lower casts from vec1 to vecN to swizzles. */ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { @@ -2173,8 +2199,8 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop switch (instr->type) { case HLSL_IR_CALL: - FIXME("We should have inlined all calls before computing liveness.\n"); - break; + /* We should have inlined all calls before computing liveness. */ + vkd3d_unreachable();
case HLSL_IR_STORE: { @@ -3112,6 +3138,8 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry
lower_return(ctx, entry_func, body, false);
+ while (transform_ir(ctx, lower_calls, body, NULL)); + LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) { if (var->storage_modifiers & HLSL_STORAGE_UNIFORM) diff --git a/libs/vkd3d-shader/hlsl_sm1.c b/libs/vkd3d-shader/hlsl_sm1.c index 9bbf784a..c0cbd2ce 100644 --- a/libs/vkd3d-shader/hlsl_sm1.c +++ b/libs/vkd3d-shader/hlsl_sm1.c @@ -891,8 +891,7 @@ static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b switch (instr->type) { case HLSL_IR_CALL: - hlsl_fixme(ctx, &instr->loc, "Inline call instructions.\n"); - break; + vkd3d_unreachable();
case HLSL_IR_CONSTANT: write_sm1_constant(ctx, buffer, instr); diff --git a/libs/vkd3d-shader/hlsl_sm4.c b/libs/vkd3d-shader/hlsl_sm4.c index 69f9b9d3..2e7c101f 100644 --- a/libs/vkd3d-shader/hlsl_sm4.c +++ b/libs/vkd3d-shader/hlsl_sm4.c @@ -2361,8 +2361,7 @@ static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * switch (instr->type) { case HLSL_IR_CALL: - hlsl_fixme(ctx, &instr->loc, "Inline call instructions.\n"); - break; + vkd3d_unreachable();
case HLSL_IR_CONSTANT: write_sm4_constant(ctx, buffer, hlsl_ir_constant(instr)); diff --git a/tests/function-return.shader_test b/tests/function-return.shader_test index 32fd01f9..7d411998 100644 --- a/tests/function-return.shader_test +++ b/tests/function-return.shader_test @@ -1,6 +1,6 @@ % Test early return from a user-defined function.
-[pixel shader todo] +[pixel shader]
float func(out float o) { @@ -29,10 +29,10 @@ float4 main() : sv_target }
[test] -todo draw quad +draw quad probe all rgba (0.2, 0.1, 0.8, 0.5);
-[pixel shader todo] +[pixel shader]
uniform float f;
@@ -80,19 +80,19 @@ float4 main() : sv_target
[test] uniform 0 float 0.1 -todo draw quad +draw quad probe all rgba (0.3, 0.2, 0.6, 0.3) 1 uniform 0 float 0.4 -todo draw quad +draw quad probe all rgba (0.6, 0.5, 0.6, 0.3) 1 uniform 0 float 0.6 -todo draw quad +draw quad probe all rgba (0.6, 0.5, 0.4, 0.5) 1 uniform 0 float 0.8 -todo draw quad +draw quad probe all rgba (0.8, 0.7, 0.4, 0.5) 1
-[pixel shader todo] +[pixel shader]
uniform float f;
@@ -136,13 +136,13 @@ float4 main() : sv_target
[test] uniform 0 float 0.1 -todo draw quad +draw quad probe all rgba (0.2, 0.1, 0.2, 0.1) 1 uniform 0 float 0.5 -todo draw quad +draw quad probe all rgba (0.5, 0.4, 1.0, 0.9) 1 uniform 0 float 0.9 -todo draw quad +draw quad probe all rgba (1.0, 0.9, 1.0, 0.6) 1
[pixel shader todo] diff --git a/tests/hlsl-function.shader_test b/tests/hlsl-function.shader_test index aef338d8..80412727 100644 --- a/tests/hlsl-function.shader_test +++ b/tests/hlsl-function.shader_test @@ -1,4 +1,4 @@ -[pixel shader fail todo] +[pixel shader fail]
float4 func();
@@ -121,7 +121,7 @@ void func() { }
-[pixel shader todo] +[pixel shader]
void func();
@@ -179,7 +179,7 @@ float4 main() : sv_target todo draw quad todo probe all rgba (0.1, 0.2, 0.3, 0.4)
-[pixel shader todo] +[pixel shader]
float func(in float a, out float b, inout float c) { @@ -200,10 +200,10 @@ float4 main() : sv_target }
[test] -todo draw quad -todo probe all rgba (0.5, 0.6, 0.7, 0) +draw quad +probe all rgba (0.5, 0.6, 0.7, 0)
-[pixel shader todo] +[pixel shader]
void func(in float a, inout float2 b) { @@ -221,8 +221,8 @@ float4 main() : sv_target }
[test] -todo draw quad -todo probe all rgba (0.6, 0.1, 0.5, 0) +draw quad +probe all rgba (0.6, 0.1, 0.5, 0)
% Recursion is forbidden.
diff --git a/tests/hlsl-numthreads.shader_test b/tests/hlsl-numthreads.shader_test index 0d75a924..404d7d76 100644 --- a/tests/hlsl-numthreads.shader_test +++ b/tests/hlsl-numthreads.shader_test @@ -153,7 +153,7 @@ static int x = 1; [numthreads((x = 2), 1, 1)] void main() {}
-[compute shader todo] +[compute shader]
static int x = 1;
diff --git a/tests/hlsl-return-implicit-conversion.shader_test b/tests/hlsl-return-implicit-conversion.shader_test index a58d015d..1767748b 100644 --- a/tests/hlsl-return-implicit-conversion.shader_test +++ b/tests/hlsl-return-implicit-conversion.shader_test @@ -38,7 +38,7 @@ float4x1 main() : sv_target draw quad probe all rgba (0.4, 0.3, 0.2, 0.1)
-[pixel shader todo] +[pixel shader] float3 func() { return float3x1(0.4, 0.3, 0.2); @@ -50,10 +50,10 @@ float4 main() : sv_target }
[test] -todo draw quad -todo probe all rgba (0.4, 0.3, 0.2, 0.0) +draw quad +probe all rgba (0.4, 0.3, 0.2, 0.0)
-[pixel shader todo] +[pixel shader] float3 func() { return float1x3(0.4, 0.3, 0.2); @@ -65,10 +65,10 @@ float4 main() : sv_target }
[test] -todo draw quad -todo probe all rgba (0.4, 0.3, 0.2, 0.0) +draw quad +probe all rgba (0.4, 0.3, 0.2, 0.0)
-[pixel shader todo] +[pixel shader] float1x3 func() { return float3(0.4, 0.3, 0.2); @@ -80,10 +80,10 @@ float4 main() : sv_target }
[test] -todo draw quad -todo probe all rgba (0.4, 0.3, 0.2, 0.0) +draw quad +probe all rgba (0.4, 0.3, 0.2, 0.0)
-[pixel shader todo] +[pixel shader] float3x1 func() { return float3(0.4, 0.3, 0.2); @@ -95,8 +95,8 @@ float4 main() : sv_target }
[test] -todo draw quad -todo probe all rgba (0.4, 0.3, 0.2, 0.0) +draw quad +probe all rgba (0.4, 0.3, 0.2, 0.0)
[pixel shader fail] float3x1 func() @@ -120,7 +120,7 @@ float4 main() : sv_target return float4(func(), 0.0); }
-[pixel shader todo] +[pixel shader] float3 func() { return float4(0.4, 0.3, 0.2, 0.1); @@ -132,10 +132,10 @@ float4 main() : sv_target }
[test] -todo draw quad -todo probe all rgba (0.4, 0.3, 0.2, 0.0) +draw quad +probe all rgba (0.4, 0.3, 0.2, 0.0)
-[pixel shader todo] +[pixel shader] float3 func() { return float4x1(0.4, 0.3, 0.2, 0.1); @@ -147,10 +147,10 @@ float4 main() : sv_target }
[test] -todo draw quad -todo probe all rgba (0.4, 0.3, 0.2, 0.0) +draw quad +probe all rgba (0.4, 0.3, 0.2, 0.0)
-[pixel shader todo] +[pixel shader] float3 func() { return float1x4(0.4, 0.3, 0.2, 0.1); @@ -162,8 +162,8 @@ float4 main() : sv_target }
[test] -todo draw quad -todo probe all rgba (0.4, 0.3, 0.2, 0.0) +draw quad +probe all rgba (0.4, 0.3, 0.2, 0.0)
[pixel shader fail todo] float3x1 func() @@ -176,7 +176,7 @@ float4 main() : sv_target return float4(func(), 0.0); }
-[pixel shader todo] +[pixel shader] float3x1 func() { return float4x1(0.4, 0.3, 0.2, 0.1); @@ -188,8 +188,8 @@ float4 main() : sv_target }
[test] -todo draw quad -todo probe all rgba (0.4, 0.3, 0.2, 0.0) +draw quad +probe all rgba (0.4, 0.3, 0.2, 0.0)
[pixel shader fail] float3x1 func() @@ -202,7 +202,7 @@ float4 main() : sv_target return float4(func(), 0.0); }
-[pixel shader todo] +[pixel shader] float1x3 func() { return float4(0.4, 0.3, 0.2, 0.1); @@ -214,8 +214,8 @@ float4 main() : sv_target }
[test] -todo draw quad -todo probe all rgba (0.4, 0.3, 0.2, 0.0) +draw quad +probe all rgba (0.4, 0.3, 0.2, 0.0)
[pixel shader fail] float1x3 func() @@ -228,7 +228,7 @@ float4 main() : sv_target return float4(func(), 0.0); }
-[pixel shader todo] +[pixel shader] float1x3 func() { return float1x4(0.4, 0.3, 0.2, 0.1); @@ -240,5 +240,5 @@ float4 main() : sv_target }
[test] -todo draw quad -todo probe all rgba (0.4, 0.3, 0.2, 0.0) +draw quad +probe all rgba (0.4, 0.3, 0.2, 0.0) diff --git a/tests/hlsl-static-initializer.shader_test b/tests/hlsl-static-initializer.shader_test index 286145c4..8415d851 100644 --- a/tests/hlsl-static-initializer.shader_test +++ b/tests/hlsl-static-initializer.shader_test @@ -1,4 +1,4 @@ -[pixel shader todo] +[pixel shader] float myfunc() { return 0.6; @@ -12,8 +12,8 @@ float4 main() : sv_target }
[test] -todo draw quad -todo probe all rgba (0.8, 0.0, 0.0, 0.0) +draw quad +probe all rgba (0.8, 0.0, 0.0, 0.0)
[pixel shader fail] diff --git a/tests/hlsl-storage-qualifiers.shader_test b/tests/hlsl-storage-qualifiers.shader_test index 965101e8..00e7b836 100644 --- a/tests/hlsl-storage-qualifiers.shader_test +++ b/tests/hlsl-storage-qualifiers.shader_test @@ -1,4 +1,4 @@ -[pixel shader todo] +[pixel shader] void sub2(in uniform float4 i, out float4 o) { o = i; @@ -17,5 +17,5 @@ void main(in uniform float4 a, uniform float4 b, out float4 o : sv_target) [test] uniform 0 float4 0.1 0.0 0.0 0.0 uniform 4 float4 0.2 0.0 0.0 0.0 -todo draw quad -todo probe all rgba (0.1, 0.2, 0.3, 0.4) +draw quad +probe all rgba (0.1, 0.2, 0.3, 0.4) diff --git a/tests/uav-out-param.shader_test b/tests/uav-out-param.shader_test index d7e29667..f3134474 100644 --- a/tests/uav-out-param.shader_test +++ b/tests/uav-out-param.shader_test @@ -7,7 +7,7 @@ size (1, 1)
0.1 0.2 0.3 0.4
-[compute shader todo] +[compute shader]
RWTexture2D<float4> u;
@@ -26,7 +26,7 @@ void main() }
[test] -todo dispatch 1 1 1 +dispatch 1 1 1 probe uav 0 (0, 0) rgba (0.4, 0.1, 0.2, 0.3)
[uav 0] @@ -35,7 +35,7 @@ size (1, 1)
0.1
-[compute shader todo] +[compute shader]
RWTexture2D<float> u;
@@ -51,5 +51,5 @@ void main() }
[test] -todo dispatch 1 1 1 +dispatch 1 1 1 probe uav 0 (0, 0) r (0.2)
From: Zebediah Figura zfigura@codeweavers.com
--- libs/vkd3d-shader/hlsl_codegen.c | 65 +++++++++++++++++++------------- 1 file changed, 39 insertions(+), 26 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index b44075a4..06d0a30c 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -520,11 +520,12 @@ static void insert_early_return_break(struct hlsl_ctx *ctx, }
/* Remove HLSL_IR_JUMP_RETURN calls by altering subsequent control flow. */ -static void lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, +static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, struct hlsl_block *block, bool in_loop) { struct hlsl_ir_node *return_instr = NULL, *cf_instr = NULL; struct hlsl_ir_node *instr, *next; + bool has_early_return = false;
/* SM1 has no function calls. SM4 does, but native d3dcompiler inlines * everything anyway. We are safest following suit. @@ -546,7 +547,7 @@ static void lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun * return into a break, which offers the right semantics—except that it * won't break out of nested loops. * - * - A CF block which might contain a return statement. After calling + * - A CF block which contains a return statement. After calling * lower_return() on the CF block body, we stop, pull out everything after * the CF instruction, shove it into an if block, and then lower that if * block. @@ -559,6 +560,9 @@ static void lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun * already turned any returns into breaks. If the block we just processed * was conditional, then "break" did our work for us. If it was a loop, * we need to propagate that break to the outer loop. + * + * We return true if there was an early return anywhere in the block we just + * processed (including CF contained inside that block). */
LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) @@ -573,32 +577,38 @@ static void lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun { struct hlsl_ir_if *iff = hlsl_ir_if(instr);
- lower_return(ctx, func, &iff->then_instrs, in_loop); - lower_return(ctx, func, &iff->else_instrs, in_loop); + has_early_return |= lower_return(ctx, func, &iff->then_instrs, in_loop); + has_early_return |= lower_return(ctx, func, &iff->else_instrs, in_loop);
- /* If we're in a loop, we don't need to do anything here. We - * turned the return into a break, and that will already skip - * anything that comes after this "if" block. */ - if (!in_loop) + if (has_early_return) { - cf_instr = instr; - break; + /* If we're in a loop, we don't need to do anything here. We + * turned the return into a break, and that will already skip + * anything that comes after this "if" block. */ + if (!in_loop) + { + cf_instr = instr; + break; + } } } else if (instr->type == HLSL_IR_LOOP) { - lower_return(ctx, func, &hlsl_ir_loop(instr)->body, true); + has_early_return |= lower_return(ctx, func, &hlsl_ir_loop(instr)->body, true);
- if (in_loop) - { - /* "instr" is a nested loop. "return" breaks out of all - * loops, so break out of this one too now. */ - insert_early_return_break(ctx, func, instr); - } - else + if (has_early_return) { - cf_instr = instr; - break; + if (in_loop) + { + /* "instr" is a nested loop. "return" breaks out of all + * loops, so break out of this one too now. */ + insert_early_return_break(ctx, func, instr); + } + else + { + cf_instr = instr; + break; + } } } else if (instr->type == HLSL_IR_JUMP) @@ -610,13 +620,14 @@ static void lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun if (jump->type == HLSL_IR_JUMP_RETURN) { if (!(constant = hlsl_new_bool_constant(ctx, true, &jump->node.loc))) - return; + return false; list_add_before(&jump->node.entry, &constant->node.entry);
if (!(store = hlsl_new_simple_store(ctx, func->early_return_var, &constant->node))) - return; + return false; list_add_after(&constant->node.entry, &store->node.entry);
+ has_early_return = true; if (in_loop) { jump->type = HLSL_IR_JUMP_BREAK; @@ -658,24 +669,26 @@ static void lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun assert(!in_loop);
if (tail == &cf_instr->entry) - return; + return has_early_return;
if (!(load = hlsl_new_var_load(ctx, func->early_return_var, cf_instr->loc))) - return; + return false; list_add_tail(&block->instrs, &load->node.entry);
if (!(not = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, &load->node, cf_instr->loc))) - return; + return false; list_add_tail(&block->instrs, ¬->entry);
if (!(iff = hlsl_new_if(ctx, not, cf_instr->loc))) - return; + return false; list_add_tail(&block->instrs, &iff->node.entry);
list_move_slice_tail(&iff->then_instrs.instrs, list_next(&block->instrs, &cf_instr->entry), tail);
lower_return(ctx, func, &iff->then_instrs, in_loop); } + + return has_early_return; }
/* Remove HLSL_IR_CALL instructions by inlining them. */
From: Zebediah Figura zfigura@codeweavers.com
--- libs/vkd3d-shader/hlsl.y | 14 +++++++++----- tests/return.shader_test | 2 +- 2 files changed, 10 insertions(+), 6 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index d5e2b2a9..3bad2acc 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -3647,6 +3647,7 @@ static bool add_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hl %type <list> declaration_statement %type <list> equality_expr %type <list> expr +%type <list> expr_optional %type <list> expr_statement %type <list> initializer_expr %type <list> jump_statement @@ -4875,24 +4876,27 @@ loop_statement: { $$ = create_loop(ctx, LOOP_DO_WHILE, NULL, $5, NULL, $2, @1); } - | KW_FOR '(' scope_start expr_statement expr_statement expr ')' statement + | KW_FOR '(' scope_start expr_statement expr_statement expr_optional ')' statement { $$ = create_loop(ctx, LOOP_FOR, $4, $5, $6, $8, @1); hlsl_pop_scope(ctx); } - | KW_FOR '(' scope_start declaration expr_statement expr ')' statement + | KW_FOR '(' scope_start declaration expr_statement expr_optional ')' statement { $$ = create_loop(ctx, LOOP_FOR, $4, $5, $6, $8, @1); hlsl_pop_scope(ctx); }
-expr_statement: - ';' +expr_optional: + %empty { if (!($$ = make_empty_list(ctx))) YYABORT; } - | expr ';' + | expr + +expr_statement: + expr_optional ';' { $$ = $1; } diff --git a/tests/return.shader_test b/tests/return.shader_test index a66d8756..2660c6dd 100644 --- a/tests/return.shader_test +++ b/tests/return.shader_test @@ -215,7 +215,7 @@ todo draw quad todo probe all rgba (0.2, 0.2, 0.2, 0.2) uniform 0 float 0.8 todo draw quad -todo probe all rgba (0.5, 0.5, 0.5, 0.5) +probe all rgba (0.5, 0.5, 0.5, 0.5)
[pixel shader todo]
On Fri Feb 10 10:18:42 2023 +0000, Giovanni Mascellani wrote:
I'm still in favor of creating it in `hlsl_new_func_decl()`. Agreed, it doesn't make a big difference, but it's one less thing to track when reading `lower_return()`. Though I can live with it as it is right now.
I don't have a preference personally, so I went ahead and changed it in v3.
On Fri Feb 10 20:36:01 2023 +0000, Giovanni Mascellani wrote:
If possible, please don't rebase on the new `master` while we're still doing the review, as it muddies the diff between patch set versions that GitLab creates. Once everything is ironed out, you can rebase on the latest `master` (or even let AJ do it directly if there are not conflicts).
Sorry about that, I thought I hadn't rebased it in the meantime but I was wrong. I'll be more careful about that in the future...
On Fri Feb 10 17:52:55 2023 +0000, Francisco Casas wrote:
The variable `dst` should be removed.
Fixed in v3, thanks.
This merge request was approved by Francisco Casas.
This merge request was approved by Henri Verbeet.