This is the last piece that's needed for function calls to basically work.
From: Zebediah Figura zfigura@codeweavers.com
It requires caps beyond shader model 5.0. --- tests/uav-out-param.shader_test | 34 ++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-)
diff --git a/tests/uav-out-param.shader_test b/tests/uav-out-param.shader_test index 646d14f6..d7e29667 100644 --- a/tests/uav-out-param.shader_test +++ b/tests/uav-out-param.shader_test @@ -11,9 +11,12 @@ size (1, 1)
RWTexture2D<float4> u;
-void func(inout float4 f) +void func(out float4 f) { - f.xz += 0.1; + f.x = 0.1; + f.y = 0.2; + f.z = 0.3; + f.w = 0.4; }
[numthreads(1, 1, 1)] @@ -24,4 +27,29 @@ void main()
[test] todo dispatch 1 1 1 -todo probe uav 0 (0, 0) rgba (0.1, 0.3, 0.3, 0.5) +probe uav 0 (0, 0) rgba (0.4, 0.1, 0.2, 0.3) + +[uav 0] +format r32 float +size (1, 1) + +0.1 + +[compute shader todo] + +RWTexture2D<float> u; + +void func(inout float f) +{ + f += 0.1; +} + + [numthreads(1, 1, 1)] +void main() +{ + func(u[uint2(0, 0)]); +} + +[test] +todo dispatch 1 1 1 +probe uav 0 (0, 0) r (0.2)
From: Zebediah Figura zfigura@codeweavers.com
--- libs/vkd3d-shader/hlsl.c | 24 ++++++++++++++++++++---- libs/vkd3d-shader/hlsl.h | 2 +- tests/hlsl-invalid.shader_test | 4 ++++ 3 files changed, 25 insertions(+), 5 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index 19e294f9..1f21f043 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -554,9 +554,13 @@ struct hlsl_type *hlsl_get_type(struct hlsl_scope *scope, const char *name, bool return NULL; }
-bool hlsl_get_function(struct hlsl_ctx *ctx, const char *name) +struct hlsl_ir_function *hlsl_get_function(struct hlsl_ctx *ctx, const char *name) { - return rb_get(&ctx->functions, name) != NULL; + struct rb_entry *entry; + + if ((entry = rb_get(&ctx->functions, name))) + return RB_ENTRY_VALUE(entry, struct hlsl_ir_function, entry); + return NULL; }
struct hlsl_ir_function_decl *hlsl_get_func_decl(struct hlsl_ctx *ctx, const char *name) @@ -2628,8 +2632,9 @@ int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) { const struct vkd3d_shader_hlsl_source_info *hlsl_source_info; - struct hlsl_ir_function_decl *entry_func; + struct hlsl_ir_function_decl *decl, *entry_func = NULL; const struct hlsl_profile_info *profile; + struct hlsl_ir_function *func; const char *entry_point; struct hlsl_ctx ctx; int ret; @@ -2685,7 +2690,18 @@ int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d return VKD3D_ERROR_NOT_IMPLEMENTED; }
- if (!(entry_func = hlsl_get_func_decl(&ctx, entry_point))) + if ((func = hlsl_get_function(&ctx, entry_point))) + { + RB_FOR_EACH_ENTRY(decl, &func->overloads, struct hlsl_ir_function_decl, entry) + { + if (!decl->has_body) + continue; + entry_func = decl; + break; + } + } + + if (!entry_func) { const struct vkd3d_shader_location loc = {.source_name = compile_info->source_name};
diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 567e3ad3..d615ff8a 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -988,7 +988,7 @@ void hlsl_free_instr_list(struct list *list); void hlsl_free_type(struct hlsl_type *type); void hlsl_free_var(struct hlsl_ir_var *decl);
-bool hlsl_get_function(struct hlsl_ctx *ctx, const char *name); +struct hlsl_ir_function *hlsl_get_function(struct hlsl_ctx *ctx, const char *name); struct hlsl_ir_function_decl *hlsl_get_func_decl(struct hlsl_ctx *ctx, const char *name); struct hlsl_type *hlsl_get_type(struct hlsl_scope *scope, const char *name, bool recursive); struct hlsl_ir_var *hlsl_get_var(struct hlsl_scope *scope, const char *name); diff --git a/tests/hlsl-invalid.shader_test b/tests/hlsl-invalid.shader_test index 1b39d7f0..e56205cb 100644 --- a/tests/hlsl-invalid.shader_test +++ b/tests/hlsl-invalid.shader_test @@ -264,3 +264,7 @@ float4 main() : sv_target { return 1.0; } + +[pixel shader fail] + +float4 main() : sv_target;
From: Zebediah Figura zfigura@codeweavers.com
--- include/private/list.h | 24 ++++ libs/vkd3d-shader/hlsl.h | 1 + libs/vkd3d-shader/hlsl_codegen.c | 205 +++++++++++++++++++++++++++++++ libs/vkd3d-shader/hlsl_sm1.c | 2 +- libs/vkd3d-shader/hlsl_sm4.c | 2 +- tests/return.shader_test | 26 ++-- 6 files changed, 245 insertions(+), 15 deletions(-)
diff --git a/include/private/list.h b/include/private/list.h index 5e92cfb2..2e1d95f3 100644 --- a/include/private/list.h +++ b/include/private/list.h @@ -186,6 +186,30 @@ static inline void list_move_tail( struct list *dst, struct list *src ) list_move_before( dst, src ); }
+/* move the slice of elements from begin to end inclusive to the head of dst */ +static inline void list_move_slice_head( struct list *dst, struct list *begin, struct list *end ) +{ + struct list *dst_next = dst->next; + begin->prev->next = end->next; + end->next->prev = begin->prev; + dst->next = begin; + dst_next->prev = end; + begin->prev = dst; + end->next = dst_next; +} + +/* move the slice of elements from begin to end inclusive to the tail of dst */ +static inline void list_move_slice_tail( struct list *dst, struct list *begin, struct list *end ) +{ + struct list *dst_prev = dst->prev; + begin->prev->next = end->next; + end->next->prev = begin->prev; + dst_prev->next = begin; + dst->prev = end; + begin->prev = dst_prev; + end->next = dst; +} + /* iterate through the list */ #define LIST_FOR_EACH(cursor,list) \ for ((cursor) = (list)->next; (cursor) != (list); (cursor) = (cursor)->next) diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index d615ff8a..4c4fc478 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -429,6 +429,7 @@ struct hlsl_ir_function_decl * Not to be confused with the function parameters! */ unsigned int attr_count; const struct hlsl_attribute *const *attrs; + struct hlsl_ir_var *early_return_var; };
struct hlsl_ir_call diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 308e3dae..4651815d 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -499,6 +499,205 @@ static bool find_recursive_calls(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst return false; }
+static void insert_early_return_break(struct hlsl_ctx *ctx, + struct hlsl_ir_function_decl *func, struct hlsl_ir_node *cf_instr) +{ + struct hlsl_ir_jump *jump; + struct hlsl_ir_load *load; + struct hlsl_ir_if *iff; + + if (!(load = hlsl_new_var_load(ctx, func->early_return_var, cf_instr->loc))) + return; + list_add_after(&cf_instr->entry, &load->node.entry); + + if (!(iff = hlsl_new_if(ctx, &load->node, cf_instr->loc))) + return; + list_add_after(&load->node.entry, &iff->node.entry); + + if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, cf_instr->loc))) + return; + list_add_tail(&iff->then_instrs.instrs, &jump->node.entry); +} + +/* Remove HLSL_IR_JUMP_RETURN calls by altering subsequent control flow. */ +static void lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, + struct hlsl_block *block, bool in_loop) +{ + struct hlsl_ir_node *return_instr = NULL, *cf_instr = NULL; + struct hlsl_ir_node *instr, *next; + + /* SM1 has no function calls. SM4 does, but native d3dcompiler inlines + * everything anyway. We are safest following suit. + * + * The basic idea is to keep track of whether the function has executed an + * early return in a synthesized boolean variable (func->early_return_var) + * and guard all code after the return on that variable being false. In the + * case of loops we also replace the return with a break. + * + * The following algorithm loops over instructions in a block, recursing + * into inferior CF blocks, until it hits one of the following two things: + * + * - A return statement. In this case, we remove everything after the return + * statement in this block. We have to stop and do this in a separate + * loop, because instructions must be deleted in reverse order (due to + * def-use chains.) + * + * If we're inside of a loop CF block, we can instead just turn the + * return into a break, which offers the right semantics—except that it + * won't break out of nested loops. + * + * - A CF block which might contain a return statement. After calling + * lower_return() on the CF block body, we stop, pull out everything after + * the CF instruction, shove it into an if block, and then lower that if + * block. + * + * (We could return a "did we make progress" like transform_ir() and run + * this pass multiple times, but we already know the only block that still + * needs addressing, so there's not much point.) + * + * If we're inside of a loop CF block, we again do things differently. We + * already turned any returns into breaks. If the block we just processed + * was conditional, then "break" did our work for us. If it was a loop, + * we need to propagate that break to the outer loop. + */ + + LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) + { + if (instr->type == HLSL_IR_CALL) + { + struct hlsl_ir_call *call = hlsl_ir_call(instr); + + lower_return(ctx, call->decl, &call->decl->body, false); + } + else if (instr->type == HLSL_IR_IF) + { + struct hlsl_ir_if *iff = hlsl_ir_if(instr); + + lower_return(ctx, func, &iff->then_instrs, in_loop); + lower_return(ctx, func, &iff->else_instrs, in_loop); + + if (func->early_return_var) + { + /* If we're in a loop, we don't need to do anything here. We + * turned the return into a break, and that will already skip + * anything that comes after this "if" block. */ + if (!in_loop) + { + cf_instr = instr; + break; + } + } + } + else if (instr->type == HLSL_IR_LOOP) + { + lower_return(ctx, func, &hlsl_ir_loop(instr)->body, true); + + if (func->early_return_var) + { + if (in_loop) + { + /* "instr" is a nested loop. "return" breaks out of all + * loops, so break out of this one too now. */ + insert_early_return_break(ctx, func, instr); + } + else + { + cf_instr = instr; + break; + } + } + } + else if (instr->type == HLSL_IR_JUMP) + { + struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); + struct hlsl_ir_constant *constant; + struct hlsl_ir_store *store; + + if (jump->type == HLSL_IR_JUMP_RETURN) + { + if (!func->early_return_var) + { + if (!(func->early_return_var = hlsl_new_synthetic_var(ctx, "early_return", + hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &jump->node.loc))) + return; + + if (!(constant = hlsl_new_bool_constant(ctx, false, &jump->node.loc))) + return; + list_add_head(&func->body.instrs, &constant->node.entry); + + if (!(store = hlsl_new_simple_store(ctx, func->early_return_var, &constant->node))) + return; + list_add_after(&constant->node.entry, &store->node.entry); + } + if (!(constant = hlsl_new_bool_constant(ctx, true, &jump->node.loc))) + return; + list_add_before(&jump->node.entry, &constant->node.entry); + + if (!(store = hlsl_new_simple_store(ctx, func->early_return_var, &constant->node))) + return; + list_add_after(&constant->node.entry, &store->node.entry); + + if (in_loop) + { + jump->type = HLSL_IR_JUMP_BREAK; + } + else + { + return_instr = instr; + break; + } + } + } + } + + if (return_instr) + { + /* If we're in a loop, we should have used "break" instead. */ + assert(!in_loop); + + /* Iterate in reverse, to avoid use-after-free when unlinking sources from + * the "uses" list. */ + LIST_FOR_EACH_ENTRY_SAFE_REV(instr, next, &block->instrs, struct hlsl_ir_node, entry) + { + list_remove(&instr->entry); + hlsl_free_instr(instr); + + /* Yes, we just freed it, but we're comparing pointers. */ + if (instr == return_instr) + break; + } + } + else if (cf_instr) + { + struct list *tail = list_tail(&block->instrs); + struct hlsl_ir_load *load; + struct hlsl_ir_node *not; + struct hlsl_ir_if *iff; + + /* If we're in a loop, we should have used "break" instead. */ + assert(!in_loop); + + if (tail == &cf_instr->entry) + return; + + if (!(load = hlsl_new_var_load(ctx, func->early_return_var, cf_instr->loc))) + return; + list_add_tail(&block->instrs, &load->node.entry); + + if (!(not = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, &load->node, cf_instr->loc))) + return; + list_add_tail(&block->instrs, ¬->entry); + + if (!(iff = hlsl_new_if(ctx, not, cf_instr->loc))) + return; + list_add_tail(&block->instrs, &iff->node.entry); + + list_move_slice_tail(&iff->then_instrs.instrs, list_next(&block->instrs, &cf_instr->entry), tail); + + lower_return(ctx, func, &iff->then_instrs, in_loop); + } +} + /* Lower casts from vec1 to vecN to swizzles. */ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { @@ -2927,6 +3126,12 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry transform_ir(ctx, find_recursive_calls, body, &recursive_call_ctx); vkd3d_free(recursive_call_ctx.backtrace);
+ /* Avoid going into an infinite loop when processing them. */ + if (ctx->result) + return ctx->result; + + lower_return(ctx, entry_func, body, false); + LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) { if (var->storage_modifiers & HLSL_STORAGE_UNIFORM) diff --git a/libs/vkd3d-shader/hlsl_sm1.c b/libs/vkd3d-shader/hlsl_sm1.c index 60b54a42..266eff58 100644 --- a/libs/vkd3d-shader/hlsl_sm1.c +++ b/libs/vkd3d-shader/hlsl_sm1.c @@ -902,7 +902,7 @@ static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b break;
default: - FIXME("Unhandled instruction type %s.\n", hlsl_node_type_to_string(instr->type)); + hlsl_fixme(ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); } } } diff --git a/libs/vkd3d-shader/hlsl_sm4.c b/libs/vkd3d-shader/hlsl_sm4.c index fb14889d..69f9b9d3 100644 --- a/libs/vkd3d-shader/hlsl_sm4.c +++ b/libs/vkd3d-shader/hlsl_sm4.c @@ -2401,7 +2401,7 @@ static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * break;
default: - FIXME("Unhandled instruction type %s.\n", hlsl_node_type_to_string(instr->type)); + hlsl_fixme(ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); } } } diff --git a/tests/return.shader_test b/tests/return.shader_test index e913d15d..a66d8756 100644 --- a/tests/return.shader_test +++ b/tests/return.shader_test @@ -10,7 +10,7 @@ float4 main() : sv_target
[test] draw quad -todo probe all rgba (0.1, 0.2, 0.3, 0.4) +probe all rgba (0.1, 0.2, 0.3, 0.4)
[pixel shader]
@@ -23,7 +23,7 @@ void main(out float4 ret : sv_target)
[test] draw quad -todo probe all rgba (0.1, 0.2, 0.3, 0.4) +probe all rgba (0.1, 0.2, 0.3, 0.4)
[pixel shader]
@@ -39,7 +39,7 @@ float4 main() : sv_target [test] uniform 0 float 0.2 draw quad -todo probe all rgba (0.1, 0.2, 0.3, 0.4) +probe all rgba (0.1, 0.2, 0.3, 0.4) uniform 0 float 0.8 draw quad probe all rgba (0.5, 0.6, 0.7, 0.8) @@ -69,7 +69,7 @@ draw quad probe all rgba (0.3, 0.4, 0.5, 0.6) uniform 0 float 0.8 draw quad -todo probe all rgba (0.1, 0.2, 0.3, 0.4) +probe all rgba (0.1, 0.2, 0.3, 0.4)
[pixel shader]
@@ -93,10 +93,10 @@ void main(out float4 ret : sv_target) [test] uniform 0 float 0.1 draw quad -todo probe all rgba (0.1, 0.2, 0.3, 0.4) 1 +probe all rgba (0.1, 0.2, 0.3, 0.4) 1 uniform 0 float 0.5 draw quad -todo probe all rgba (0.2, 0.3, 0.4, 0.5) 1 +probe all rgba (0.2, 0.3, 0.4, 0.5) 1 uniform 0 float 0.9 draw quad probe all rgba (0.5, 0.6, 0.7, 0.8) 1 @@ -120,7 +120,7 @@ void main(out float4 ret : sv_target) [test] uniform 0 float 0.1 draw quad -todo probe all rgba (0.1, 0.2, 0.3, 0.4) 1 +probe all rgba (0.1, 0.2, 0.3, 0.4) 1 uniform 0 float 0.5 draw quad probe all rgba (0.5, 0.6, 0.7, 0.8) 1 @@ -146,7 +146,7 @@ void main(out float4 ret : sv_target) todo draw quad todo probe all rgba (0.2, 0.4, 0.6, 0.8)
-[pixel shader] +[pixel shader todo]
uniform float f;
@@ -166,23 +166,23 @@ void main(out float4 ret : sv_target)
[test] uniform 0 float 0.0 -draw quad +todo draw quad todo probe all rgba (0.1, 0.1, 0.1, 0.1) 1
uniform 0 float 0.1 -draw quad +todo draw quad todo probe all rgba (0.2, 0.2, 0.2, 0.2) 1
uniform 0 float 0.3 -draw quad +todo draw quad todo probe all rgba (0.4, 0.4, 0.4, 0.4) 1
uniform 0 float 0.7 -draw quad +todo draw quad todo probe all rgba (0.8, 0.8, 0.8, 0.8) 1
uniform 0 float 0.9 -draw quad +todo draw quad todo probe all rgba (0.9, 0.9, 0.9, 0.9) 1
[pixel shader todo]
From: Zebediah Figura zfigura@codeweavers.com
--- libs/vkd3d-shader/hlsl.c | 295 ++++++++++++++++++ libs/vkd3d-shader/hlsl.h | 2 + libs/vkd3d-shader/hlsl_codegen.c | 32 +- libs/vkd3d-shader/hlsl_sm1.c | 3 +- libs/vkd3d-shader/hlsl_sm4.c | 3 +- tests/function-return.shader_test | 22 +- tests/hlsl-function.shader_test | 16 +- tests/hlsl-numthreads.shader_test | 2 +- ...lsl-return-implicit-conversion.shader_test | 60 ++-- tests/hlsl-static-initializer.shader_test | 6 +- tests/hlsl-storage-qualifiers.shader_test | 6 +- tests/uav-out-param.shader_test | 8 +- 12 files changed, 389 insertions(+), 66 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index 1f21f043..d2d0c682 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -1204,6 +1204,301 @@ struct hlsl_ir_loop *hlsl_new_loop(struct hlsl_ctx *ctx, struct vkd3d_shader_loc return loop; }
+struct clone_instr_map +{ + struct + { + const struct hlsl_ir_node *src; + struct hlsl_ir_node *dst; + } *instrs; + size_t count, capacity; +}; + +static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, + struct clone_instr_map *map, const struct hlsl_ir_node *instr); + +static bool clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, + const struct hlsl_block *src_block, struct clone_instr_map *map) +{ + const struct hlsl_ir_node *src; + struct hlsl_ir_node *dst; + + LIST_FOR_EACH_ENTRY(src, &src_block->instrs, struct hlsl_ir_node, entry) + { + if (!(dst = clone_instr(ctx, map, src))) + { + hlsl_free_instr_list(&dst_block->instrs); + return false; + } + list_add_tail(&dst_block->instrs, &dst->entry); + + if (!list_empty(&src->uses)) + { + if (!vkd3d_array_reserve((void **)&map->instrs, &map->capacity, map->count + 1, sizeof(*map->instrs))) + { + hlsl_free_instr_list(&dst_block->instrs); + return false; + } + + map->instrs[map->count].dst = dst; + map->instrs[map->count].src = src; + ++map->count; + } + } + return true; +} + +static struct hlsl_ir_node *map_instr(const struct clone_instr_map *map, struct hlsl_ir_node *src) +{ + size_t i; + + if (!src) + return NULL; + + for (i = 0; i < map->count; ++i) + { + if (map->instrs[i].src == src) + return map->instrs[i].dst; + } + /* The block passed to hlsl_clone_block() should have been free of external + * references. */ + assert(0); + return NULL; +} + +static bool clone_deref(struct hlsl_ctx *ctx, struct clone_instr_map *map, + struct hlsl_deref *dst, const struct hlsl_deref *src) +{ + unsigned int i; + + assert(!src->offset.node); + + if (!init_deref(ctx, dst, src->var, src->path_len)) + return false; + + for (i = 0; i < src->path_len; ++i) + hlsl_src_from_node(&dst->path[i], map_instr(map, src->path[i].node)); + + return true; +} + +static void clone_src(struct clone_instr_map *map, struct hlsl_src *dst, const struct hlsl_src *src) +{ + hlsl_src_from_node(dst, map_instr(map, src->node)); +} + +static struct hlsl_ir_node *clone_call(struct hlsl_ctx *ctx, struct hlsl_ir_call *src) +{ + struct hlsl_ir_node *dst; + + if (!(dst = hlsl_new_call(ctx, src->decl, &src->node.loc))) + return NULL; + return dst; +} + +static struct hlsl_ir_node *clone_constant(struct hlsl_ctx *ctx, struct hlsl_ir_constant *src) +{ + struct hlsl_ir_constant *dst; + + if (!(dst = hlsl_alloc(ctx, sizeof(*dst)))) + return NULL; + init_node(&dst->node, HLSL_IR_CONSTANT, src->node.data_type, &src->node.loc); + memcpy(dst->value, src->value, sizeof(src->value)); + return &dst->node; +} + +static struct hlsl_ir_node *clone_expr(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_expr *src) +{ + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS]; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(operands); ++i) + operands[i] = map_instr(map, src->operands[i].node); + + return hlsl_new_expr(ctx, src->op, operands, src->node.data_type, &src->node.loc); +} + +static struct hlsl_ir_node *clone_if(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_if *src) +{ + struct hlsl_ir_if *dst; + + if (!(dst = hlsl_new_if(ctx, map_instr(map, src->condition.node), src->node.loc))) + return NULL; + + if (!clone_block(ctx, &dst->then_instrs, &src->then_instrs, map) + || !clone_block(ctx, &dst->else_instrs, &src->else_instrs, map)) + { + hlsl_free_instr(&dst->node); + return NULL; + } + return &dst->node; +} + +static struct hlsl_ir_node *clone_jump(struct hlsl_ctx *ctx, struct hlsl_ir_jump *src) +{ + struct hlsl_ir_jump *dst; + + if (!(dst = hlsl_new_jump(ctx, src->type, src->node.loc))) + return NULL; + return &dst->node; +} + +static struct hlsl_ir_node *clone_load(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_load *src) +{ + struct hlsl_ir_load *dst; + + if (!(dst = hlsl_alloc(ctx, sizeof(*dst)))) + return NULL; + init_node(&dst->node, HLSL_IR_LOAD, src->node.data_type, &src->node.loc); + + if (!clone_deref(ctx, map, &dst->src, &src->src)) + { + vkd3d_free(dst); + return NULL; + } + return &dst->node; +} + +static struct hlsl_ir_node *clone_loop(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_loop *src) +{ + struct hlsl_ir_loop *dst; + + if (!(dst = hlsl_new_loop(ctx, src->node.loc))) + return NULL; + if (!clone_block(ctx, &dst->body, &src->body, map)) + { + hlsl_free_instr(&dst->node); + return NULL; + } + return &dst->node; +} + +static struct hlsl_ir_node *clone_resource_load(struct hlsl_ctx *ctx, + struct clone_instr_map *map, struct hlsl_ir_resource_load *src) +{ + struct hlsl_ir_resource_load *dst; + + if (!(dst = hlsl_alloc(ctx, sizeof(*dst)))) + return NULL; + init_node(&dst->node, HLSL_IR_RESOURCE_LOAD, src->node.data_type, &src->node.loc); + dst->load_type = src->load_type; + if (!clone_deref(ctx, map, &dst->resource, &src->resource)) + { + vkd3d_free(dst); + return NULL; + } + if (!clone_deref(ctx, map, &dst->sampler, &src->sampler)) + { + hlsl_cleanup_deref(&dst->resource); + vkd3d_free(dst); + return NULL; + } + clone_src(map, &dst->coords, &src->coords); + clone_src(map, &dst->lod, &src->lod); + clone_src(map, &dst->texel_offset, &src->texel_offset); + return &dst->node; +} + +static struct hlsl_ir_node *clone_resource_store(struct hlsl_ctx *ctx, + struct clone_instr_map *map, struct hlsl_ir_resource_store *src) +{ + struct hlsl_ir_resource_store *dst; + + if (!(dst = hlsl_alloc(ctx, sizeof(*dst)))) + return NULL; + init_node(&dst->node, HLSL_IR_RESOURCE_STORE, NULL, &src->node.loc); + if (!clone_deref(ctx, map, &dst->resource, &src->resource)) + { + vkd3d_free(dst); + return NULL; + } + clone_src(map, &dst->coords, &src->coords); + clone_src(map, &dst->value, &src->value); + return &dst->node; +} + +static struct hlsl_ir_node *clone_store(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_store *src) +{ + struct hlsl_ir_store *dst; + + if (!(dst = hlsl_alloc(ctx, sizeof(*dst)))) + return NULL; + init_node(&dst->node, HLSL_IR_STORE, NULL, &src->node.loc); + + if (!clone_deref(ctx, map, &dst->lhs, &src->lhs)) + { + vkd3d_free(dst); + return NULL; + } + clone_src(map, &dst->rhs, &src->rhs); + dst->writemask = src->writemask; + return &dst->node; +} + +static struct hlsl_ir_node *clone_swizzle(struct hlsl_ctx *ctx, + struct clone_instr_map *map, struct hlsl_ir_swizzle *src) +{ + struct hlsl_ir_swizzle *dst; + + if (!(dst = hlsl_new_swizzle(ctx, src->swizzle, src->node.data_type->dimx, + map_instr(map, src->val.node), &src->node.loc))) + return NULL; + return &dst->node; +} + +static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, + struct clone_instr_map *map, const struct hlsl_ir_node *instr) +{ + switch (instr->type) + { + case HLSL_IR_CALL: + return clone_call(ctx, hlsl_ir_call(instr)); + + case HLSL_IR_CONSTANT: + return clone_constant(ctx, hlsl_ir_constant(instr)); + + case HLSL_IR_EXPR: + return clone_expr(ctx, map, hlsl_ir_expr(instr)); + + case HLSL_IR_IF: + return clone_if(ctx, map, hlsl_ir_if(instr)); + + case HLSL_IR_JUMP: + return clone_jump(ctx, hlsl_ir_jump(instr)); + + case HLSL_IR_LOAD: + return clone_load(ctx, map, hlsl_ir_load(instr)); + + case HLSL_IR_LOOP: + return clone_loop(ctx, map, hlsl_ir_loop(instr)); + + case HLSL_IR_RESOURCE_LOAD: + return clone_resource_load(ctx, map, hlsl_ir_resource_load(instr)); + + case HLSL_IR_RESOURCE_STORE: + return clone_resource_store(ctx, map, hlsl_ir_resource_store(instr)); + + case HLSL_IR_STORE: + return clone_store(ctx, map, hlsl_ir_store(instr)); + + case HLSL_IR_SWIZZLE: + return clone_swizzle(ctx, map, hlsl_ir_swizzle(instr)); + } + + assert(0); + return NULL; +} + +bool hlsl_clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const struct hlsl_block *src_block) +{ + struct clone_instr_map map = {0}; + bool ret; + + ret = clone_block(ctx, dst_block, src_block, &map); + vkd3d_free(map.instrs); + return ret; +} + struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, struct hlsl_type *return_type, const struct hlsl_func_parameters *parameters, const struct hlsl_semantic *semantic, const struct vkd3d_shader_location *loc) diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 4c4fc478..b5ecb656 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -971,6 +971,8 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type); void hlsl_add_function(struct hlsl_ctx *ctx, char *name, struct hlsl_ir_function_decl *decl); bool hlsl_add_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *decl, bool local_var);
+bool hlsl_clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const struct hlsl_block *src_block); + void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *func);
int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 4651815d..14f7c075 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -698,6 +698,32 @@ static void lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun } }
+/* Remove HLSL_IR_CALL instructions by inlining them. */ +static bool lower_calls(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + const struct hlsl_ir_function_decl *decl; + struct hlsl_ir_call *call; + struct hlsl_block block; + + if (instr->type != HLSL_IR_CALL) + return false; + call = hlsl_ir_call(instr); + decl = call->decl; + + if (!decl->has_body) + hlsl_error(ctx, &call->node.loc, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, + "Function "%s" is not defined.", decl->func->name); + + list_init(&block.instrs); + if (!hlsl_clone_block(ctx, &block, &decl->body)) + return false; + list_move_before(&call->node.entry, &block.instrs); + + list_remove(&call->node.entry); + hlsl_free_instr(&call->node); + return true; +} + /* Lower casts from vec1 to vecN to swizzles. */ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { @@ -2193,8 +2219,8 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop switch (instr->type) { case HLSL_IR_CALL: - FIXME("We should have inlined all calls before computing liveness.\n"); - break; + /* We should have inlined all calls before computing liveness. */ + vkd3d_unreachable();
case HLSL_IR_STORE: { @@ -3132,6 +3158,8 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry
lower_return(ctx, entry_func, body, false);
+ while (transform_ir(ctx, lower_calls, body, NULL)); + LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) { if (var->storage_modifiers & HLSL_STORAGE_UNIFORM) diff --git a/libs/vkd3d-shader/hlsl_sm1.c b/libs/vkd3d-shader/hlsl_sm1.c index 266eff58..70d7c97c 100644 --- a/libs/vkd3d-shader/hlsl_sm1.c +++ b/libs/vkd3d-shader/hlsl_sm1.c @@ -878,8 +878,7 @@ static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b switch (instr->type) { case HLSL_IR_CALL: - hlsl_fixme(ctx, &instr->loc, "Inline call instructions.\n"); - break; + vkd3d_unreachable();
case HLSL_IR_CONSTANT: write_sm1_constant(ctx, buffer, instr); diff --git a/libs/vkd3d-shader/hlsl_sm4.c b/libs/vkd3d-shader/hlsl_sm4.c index 69f9b9d3..2e7c101f 100644 --- a/libs/vkd3d-shader/hlsl_sm4.c +++ b/libs/vkd3d-shader/hlsl_sm4.c @@ -2361,8 +2361,7 @@ static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * switch (instr->type) { case HLSL_IR_CALL: - hlsl_fixme(ctx, &instr->loc, "Inline call instructions.\n"); - break; + vkd3d_unreachable();
case HLSL_IR_CONSTANT: write_sm4_constant(ctx, buffer, hlsl_ir_constant(instr)); diff --git a/tests/function-return.shader_test b/tests/function-return.shader_test index 32fd01f9..7d411998 100644 --- a/tests/function-return.shader_test +++ b/tests/function-return.shader_test @@ -1,6 +1,6 @@ % Test early return from a user-defined function.
-[pixel shader todo] +[pixel shader]
float func(out float o) { @@ -29,10 +29,10 @@ float4 main() : sv_target }
[test] -todo draw quad +draw quad probe all rgba (0.2, 0.1, 0.8, 0.5);
-[pixel shader todo] +[pixel shader]
uniform float f;
@@ -80,19 +80,19 @@ float4 main() : sv_target
[test] uniform 0 float 0.1 -todo draw quad +draw quad probe all rgba (0.3, 0.2, 0.6, 0.3) 1 uniform 0 float 0.4 -todo draw quad +draw quad probe all rgba (0.6, 0.5, 0.6, 0.3) 1 uniform 0 float 0.6 -todo draw quad +draw quad probe all rgba (0.6, 0.5, 0.4, 0.5) 1 uniform 0 float 0.8 -todo draw quad +draw quad probe all rgba (0.8, 0.7, 0.4, 0.5) 1
-[pixel shader todo] +[pixel shader]
uniform float f;
@@ -136,13 +136,13 @@ float4 main() : sv_target
[test] uniform 0 float 0.1 -todo draw quad +draw quad probe all rgba (0.2, 0.1, 0.2, 0.1) 1 uniform 0 float 0.5 -todo draw quad +draw quad probe all rgba (0.5, 0.4, 1.0, 0.9) 1 uniform 0 float 0.9 -todo draw quad +draw quad probe all rgba (1.0, 0.9, 1.0, 0.6) 1
[pixel shader todo] diff --git a/tests/hlsl-function.shader_test b/tests/hlsl-function.shader_test index aef338d8..80412727 100644 --- a/tests/hlsl-function.shader_test +++ b/tests/hlsl-function.shader_test @@ -1,4 +1,4 @@ -[pixel shader fail todo] +[pixel shader fail]
float4 func();
@@ -121,7 +121,7 @@ void func() { }
-[pixel shader todo] +[pixel shader]
void func();
@@ -179,7 +179,7 @@ float4 main() : sv_target todo draw quad todo probe all rgba (0.1, 0.2, 0.3, 0.4)
-[pixel shader todo] +[pixel shader]
float func(in float a, out float b, inout float c) { @@ -200,10 +200,10 @@ float4 main() : sv_target }
[test] -todo draw quad -todo probe all rgba (0.5, 0.6, 0.7, 0) +draw quad +probe all rgba (0.5, 0.6, 0.7, 0)
-[pixel shader todo] +[pixel shader]
void func(in float a, inout float2 b) { @@ -221,8 +221,8 @@ float4 main() : sv_target }
[test] -todo draw quad -todo probe all rgba (0.6, 0.1, 0.5, 0) +draw quad +probe all rgba (0.6, 0.1, 0.5, 0)
% Recursion is forbidden.
diff --git a/tests/hlsl-numthreads.shader_test b/tests/hlsl-numthreads.shader_test index 0d75a924..404d7d76 100644 --- a/tests/hlsl-numthreads.shader_test +++ b/tests/hlsl-numthreads.shader_test @@ -153,7 +153,7 @@ static int x = 1; [numthreads((x = 2), 1, 1)] void main() {}
-[compute shader todo] +[compute shader]
static int x = 1;
diff --git a/tests/hlsl-return-implicit-conversion.shader_test b/tests/hlsl-return-implicit-conversion.shader_test index a58d015d..1767748b 100644 --- a/tests/hlsl-return-implicit-conversion.shader_test +++ b/tests/hlsl-return-implicit-conversion.shader_test @@ -38,7 +38,7 @@ float4x1 main() : sv_target draw quad probe all rgba (0.4, 0.3, 0.2, 0.1)
-[pixel shader todo] +[pixel shader] float3 func() { return float3x1(0.4, 0.3, 0.2); @@ -50,10 +50,10 @@ float4 main() : sv_target }
[test] -todo draw quad -todo probe all rgba (0.4, 0.3, 0.2, 0.0) +draw quad +probe all rgba (0.4, 0.3, 0.2, 0.0)
-[pixel shader todo] +[pixel shader] float3 func() { return float1x3(0.4, 0.3, 0.2); @@ -65,10 +65,10 @@ float4 main() : sv_target }
[test] -todo draw quad -todo probe all rgba (0.4, 0.3, 0.2, 0.0) +draw quad +probe all rgba (0.4, 0.3, 0.2, 0.0)
-[pixel shader todo] +[pixel shader] float1x3 func() { return float3(0.4, 0.3, 0.2); @@ -80,10 +80,10 @@ float4 main() : sv_target }
[test] -todo draw quad -todo probe all rgba (0.4, 0.3, 0.2, 0.0) +draw quad +probe all rgba (0.4, 0.3, 0.2, 0.0)
-[pixel shader todo] +[pixel shader] float3x1 func() { return float3(0.4, 0.3, 0.2); @@ -95,8 +95,8 @@ float4 main() : sv_target }
[test] -todo draw quad -todo probe all rgba (0.4, 0.3, 0.2, 0.0) +draw quad +probe all rgba (0.4, 0.3, 0.2, 0.0)
[pixel shader fail] float3x1 func() @@ -120,7 +120,7 @@ float4 main() : sv_target return float4(func(), 0.0); }
-[pixel shader todo] +[pixel shader] float3 func() { return float4(0.4, 0.3, 0.2, 0.1); @@ -132,10 +132,10 @@ float4 main() : sv_target }
[test] -todo draw quad -todo probe all rgba (0.4, 0.3, 0.2, 0.0) +draw quad +probe all rgba (0.4, 0.3, 0.2, 0.0)
-[pixel shader todo] +[pixel shader] float3 func() { return float4x1(0.4, 0.3, 0.2, 0.1); @@ -147,10 +147,10 @@ float4 main() : sv_target }
[test] -todo draw quad -todo probe all rgba (0.4, 0.3, 0.2, 0.0) +draw quad +probe all rgba (0.4, 0.3, 0.2, 0.0)
-[pixel shader todo] +[pixel shader] float3 func() { return float1x4(0.4, 0.3, 0.2, 0.1); @@ -162,8 +162,8 @@ float4 main() : sv_target }
[test] -todo draw quad -todo probe all rgba (0.4, 0.3, 0.2, 0.0) +draw quad +probe all rgba (0.4, 0.3, 0.2, 0.0)
[pixel shader fail todo] float3x1 func() @@ -176,7 +176,7 @@ float4 main() : sv_target return float4(func(), 0.0); }
-[pixel shader todo] +[pixel shader] float3x1 func() { return float4x1(0.4, 0.3, 0.2, 0.1); @@ -188,8 +188,8 @@ float4 main() : sv_target }
[test] -todo draw quad -todo probe all rgba (0.4, 0.3, 0.2, 0.0) +draw quad +probe all rgba (0.4, 0.3, 0.2, 0.0)
[pixel shader fail] float3x1 func() @@ -202,7 +202,7 @@ float4 main() : sv_target return float4(func(), 0.0); }
-[pixel shader todo] +[pixel shader] float1x3 func() { return float4(0.4, 0.3, 0.2, 0.1); @@ -214,8 +214,8 @@ float4 main() : sv_target }
[test] -todo draw quad -todo probe all rgba (0.4, 0.3, 0.2, 0.0) +draw quad +probe all rgba (0.4, 0.3, 0.2, 0.0)
[pixel shader fail] float1x3 func() @@ -228,7 +228,7 @@ float4 main() : sv_target return float4(func(), 0.0); }
-[pixel shader todo] +[pixel shader] float1x3 func() { return float1x4(0.4, 0.3, 0.2, 0.1); @@ -240,5 +240,5 @@ float4 main() : sv_target }
[test] -todo draw quad -todo probe all rgba (0.4, 0.3, 0.2, 0.0) +draw quad +probe all rgba (0.4, 0.3, 0.2, 0.0) diff --git a/tests/hlsl-static-initializer.shader_test b/tests/hlsl-static-initializer.shader_test index 286145c4..8415d851 100644 --- a/tests/hlsl-static-initializer.shader_test +++ b/tests/hlsl-static-initializer.shader_test @@ -1,4 +1,4 @@ -[pixel shader todo] +[pixel shader] float myfunc() { return 0.6; @@ -12,8 +12,8 @@ float4 main() : sv_target }
[test] -todo draw quad -todo probe all rgba (0.8, 0.0, 0.0, 0.0) +draw quad +probe all rgba (0.8, 0.0, 0.0, 0.0)
[pixel shader fail] diff --git a/tests/hlsl-storage-qualifiers.shader_test b/tests/hlsl-storage-qualifiers.shader_test index 965101e8..00e7b836 100644 --- a/tests/hlsl-storage-qualifiers.shader_test +++ b/tests/hlsl-storage-qualifiers.shader_test @@ -1,4 +1,4 @@ -[pixel shader todo] +[pixel shader] void sub2(in uniform float4 i, out float4 o) { o = i; @@ -17,5 +17,5 @@ void main(in uniform float4 a, uniform float4 b, out float4 o : sv_target) [test] uniform 0 float4 0.1 0.0 0.0 0.0 uniform 4 float4 0.2 0.0 0.0 0.0 -todo draw quad -todo probe all rgba (0.1, 0.2, 0.3, 0.4) +draw quad +probe all rgba (0.1, 0.2, 0.3, 0.4) diff --git a/tests/uav-out-param.shader_test b/tests/uav-out-param.shader_test index d7e29667..f3134474 100644 --- a/tests/uav-out-param.shader_test +++ b/tests/uav-out-param.shader_test @@ -7,7 +7,7 @@ size (1, 1)
0.1 0.2 0.3 0.4
-[compute shader todo] +[compute shader]
RWTexture2D<float4> u;
@@ -26,7 +26,7 @@ void main() }
[test] -todo dispatch 1 1 1 +dispatch 1 1 1 probe uav 0 (0, 0) rgba (0.4, 0.1, 0.2, 0.3)
[uav 0] @@ -35,7 +35,7 @@ size (1, 1)
0.1
-[compute shader todo] +[compute shader]
RWTexture2D<float> u;
@@ -51,5 +51,5 @@ void main() }
[test] -todo dispatch 1 1 1 +dispatch 1 1 1 probe uav 0 (0, 0) r (0.2)
From: Zebediah Figura zfigura@codeweavers.com
--- libs/vkd3d-shader/hlsl_codegen.c | 37 +++++++++++++++++--------------- 1 file changed, 20 insertions(+), 17 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 14f7c075..8f8bfcd1 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -520,11 +520,12 @@ static void insert_early_return_break(struct hlsl_ctx *ctx, }
/* Remove HLSL_IR_JUMP_RETURN calls by altering subsequent control flow. */ -static void lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, +static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, struct hlsl_block *block, bool in_loop) { struct hlsl_ir_node *return_instr = NULL, *cf_instr = NULL; struct hlsl_ir_node *instr, *next; + bool has_early_return = false;
/* SM1 has no function calls. SM4 does, but native d3dcompiler inlines * everything anyway. We are safest following suit. @@ -546,7 +547,7 @@ static void lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun * return into a break, which offers the right semantics—except that it * won't break out of nested loops. * - * - A CF block which might contain a return statement. After calling + * - A CF block which contains a return statement. After calling * lower_return() on the CF block body, we stop, pull out everything after * the CF instruction, shove it into an if block, and then lower that if * block. @@ -573,10 +574,10 @@ static void lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun { struct hlsl_ir_if *iff = hlsl_ir_if(instr);
- lower_return(ctx, func, &iff->then_instrs, in_loop); - lower_return(ctx, func, &iff->else_instrs, in_loop); + has_early_return |= lower_return(ctx, func, &iff->then_instrs, in_loop); + has_early_return |= lower_return(ctx, func, &iff->else_instrs, in_loop);
- if (func->early_return_var) + if (has_early_return) { /* If we're in a loop, we don't need to do anything here. We * turned the return into a break, and that will already skip @@ -590,10 +591,9 @@ static void lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun } else if (instr->type == HLSL_IR_LOOP) { - lower_return(ctx, func, &hlsl_ir_loop(instr)->body, true); - - if (func->early_return_var) + if (lower_return(ctx, func, &hlsl_ir_loop(instr)->body, true)) { + has_early_return = true; if (in_loop) { /* "instr" is a nested loop. "return" breaks out of all @@ -619,24 +619,25 @@ static void lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun { if (!(func->early_return_var = hlsl_new_synthetic_var(ctx, "early_return", hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &jump->node.loc))) - return; + return false;
if (!(constant = hlsl_new_bool_constant(ctx, false, &jump->node.loc))) - return; + return false; list_add_head(&func->body.instrs, &constant->node.entry);
if (!(store = hlsl_new_simple_store(ctx, func->early_return_var, &constant->node))) - return; + return false; list_add_after(&constant->node.entry, &store->node.entry); } if (!(constant = hlsl_new_bool_constant(ctx, true, &jump->node.loc))) - return; + return false; list_add_before(&jump->node.entry, &constant->node.entry);
if (!(store = hlsl_new_simple_store(ctx, func->early_return_var, &constant->node))) - return; + return false; list_add_after(&constant->node.entry, &store->node.entry);
+ has_early_return = true; if (in_loop) { jump->type = HLSL_IR_JUMP_BREAK; @@ -678,24 +679,26 @@ static void lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun assert(!in_loop);
if (tail == &cf_instr->entry) - return; + return has_early_return;
if (!(load = hlsl_new_var_load(ctx, func->early_return_var, cf_instr->loc))) - return; + return false; list_add_tail(&block->instrs, &load->node.entry);
if (!(not = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, &load->node, cf_instr->loc))) - return; + return false; list_add_tail(&block->instrs, ¬->entry);
if (!(iff = hlsl_new_if(ctx, not, cf_instr->loc))) - return; + return false; list_add_tail(&block->instrs, &iff->node.entry);
list_move_slice_tail(&iff->then_instrs.instrs, list_next(&block->instrs, &cf_instr->entry), tail);
lower_return(ctx, func, &iff->then_instrs, in_loop); } + + return has_early_return; }
/* Remove HLSL_IR_CALL instructions by inlining them. */
From: Zebediah Figura zfigura@codeweavers.com
--- libs/vkd3d-shader/hlsl.y | 14 +++++++++----- tests/return.shader_test | 2 +- 2 files changed, 10 insertions(+), 6 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index d5e2b2a9..3bad2acc 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -3647,6 +3647,7 @@ static bool add_method_call(struct hlsl_ctx *ctx, struct list *instrs, struct hl %type <list> declaration_statement %type <list> equality_expr %type <list> expr +%type <list> expr_optional %type <list> expr_statement %type <list> initializer_expr %type <list> jump_statement @@ -4875,24 +4876,27 @@ loop_statement: { $$ = create_loop(ctx, LOOP_DO_WHILE, NULL, $5, NULL, $2, @1); } - | KW_FOR '(' scope_start expr_statement expr_statement expr ')' statement + | KW_FOR '(' scope_start expr_statement expr_statement expr_optional ')' statement { $$ = create_loop(ctx, LOOP_FOR, $4, $5, $6, $8, @1); hlsl_pop_scope(ctx); } - | KW_FOR '(' scope_start declaration expr_statement expr ')' statement + | KW_FOR '(' scope_start declaration expr_statement expr_optional ')' statement { $$ = create_loop(ctx, LOOP_FOR, $4, $5, $6, $8, @1); hlsl_pop_scope(ctx); }
-expr_statement: - ';' +expr_optional: + %empty { if (!($$ = make_empty_list(ctx))) YYABORT; } - | expr ';' + | expr + +expr_statement: + expr_optional ';' { $$ = $1; } diff --git a/tests/return.shader_test b/tests/return.shader_test index a66d8756..2660c6dd 100644 --- a/tests/return.shader_test +++ b/tests/return.shader_test @@ -215,7 +215,7 @@ todo draw quad todo probe all rgba (0.2, 0.2, 0.2, 0.2) uniform 0 float 0.8 todo draw quad -todo probe all rgba (0.5, 0.5, 0.5, 0.5) +probe all rgba (0.5, 0.5, 0.5, 0.5)
[pixel shader todo]
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/hlsl_codegen.c:
* the CF instruction, shove it into an if block, and then lower that if
* block.
*
* (We could return a "did we make progress" like transform_ir() and run
* this pass multiple times, but we already know the only block that still
* needs addressing, so there's not much point.)
*
* If we're inside of a loop CF block, we again do things differently. We
* already turned any returns into breaks. If the block we just processed
* was conditional, then "break" did our work for us. If it was a loop,
* we need to propagate that break to the outer loop.
*/
- LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry)
- {
if (instr->type == HLSL_IR_CALL)
No strong opinions, but maybe this would be more readable as a `switch` block?
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/hlsl_codegen.c:
- LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry)
- {
if (instr->type == HLSL_IR_CALL)
{
struct hlsl_ir_call *call = hlsl_ir_call(instr);
lower_return(ctx, call->decl, &call->decl->body, false);
}
else if (instr->type == HLSL_IR_IF)
{
struct hlsl_ir_if *iff = hlsl_ir_if(instr);
lower_return(ctx, func, &iff->then_instrs, in_loop);
lower_return(ctx, func, &iff->else_instrs, in_loop);
if (func->early_return_var)
I think I would find it more legible if we stipulated that `early_return_var` always exists, so you can skip a few branches in this function, which is already relatively complicated. This would leave in some more dead code, but hopefully there will eventually be a dead if elimination pass that gets rid of it.
That said, nice function! Before reading that I thought it would have been much more of a pain.
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/hlsl.c:
+static struct hlsl_ir_node *map_instr(const struct clone_instr_map *map, struct hlsl_ir_node *src) +{
- size_t i;
- if (!src)
return NULL;
- for (i = 0; i < map->count; ++i)
- {
if (map->instrs[i].src == src)
return map->instrs[i].dst;
- }
- /* The block passed to hlsl_clone_block() should have been free of external
* references. */
- assert(0);
`vkd3d_unreachable()`
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/hlsl.c:
return clone_loop(ctx, map, hlsl_ir_loop(instr));
case HLSL_IR_RESOURCE_LOAD:
return clone_resource_load(ctx, map, hlsl_ir_resource_load(instr));
case HLSL_IR_RESOURCE_STORE:
return clone_resource_store(ctx, map, hlsl_ir_resource_store(instr));
case HLSL_IR_STORE:
return clone_store(ctx, map, hlsl_ir_store(instr));
case HLSL_IR_SWIZZLE:
return clone_swizzle(ctx, map, hlsl_ir_swizzle(instr));
- }
- assert(0);
`vkd3d_unreachable()`
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/hlsl.c:
+static struct hlsl_ir_node *clone_call(struct hlsl_ctx *ctx, struct hlsl_ir_call *src) +{
- struct hlsl_ir_node *dst;
- if (!(dst = hlsl_new_call(ctx, src->decl, &src->node.loc)))
return NULL;
- return dst;
+}
+static struct hlsl_ir_node *clone_constant(struct hlsl_ctx *ctx, struct hlsl_ir_constant *src) +{
- struct hlsl_ir_constant *dst;
- if (!(dst = hlsl_alloc(ctx, sizeof(*dst))))
return NULL;
- init_node(&dst->node, HLSL_IR_CONSTANT, src->node.data_type, &src->node.loc);
We have `hlsl_new_constant()` now.
Francisco Casas (@fcasas) commented about tests/uav-out-param.shader_test:
RWTexture2D<float4> u;
-void func(inout float4 f) +void func(out float4 f) {
- f.xz += 0.1;
- f.x = 0.1;
- f.y = 0.2;
- f.z = 0.3;
- f.w = 0.4;
}
[numthreads(1, 1, 1)]
void main() { func(u[uint2(0, 0)].yzwx);
Seems that this is another case of discrepancy between the behavior of fxc 9.29 and fxc 10.1. The test being deleted in this patch worked for me using the latter (profile `cs_5_0`) so, at least in principle, I don't think it should be deleted.
Francisco Casas (@fcasas) commented about libs/vkd3d-shader/hlsl.c:
return VKD3D_ERROR_NOT_IMPLEMENTED; }
- if (!(entry_func = hlsl_get_func_decl(&ctx, entry_point)))
- if ((func = hlsl_get_function(&ctx, entry_point)))
- {
RB_FOR_EACH_ENTRY(decl, &func->overloads, struct hlsl_ir_function_decl, entry)
{
if (!decl->has_body)
continue;
entry_func = decl;
break;
}
Since, in case of multiple overloads with definitions, we are still selecting the one that comes first in the order given by `compare_function_decl_rb()` -- which depends on the parameter types -- but in !65 it was mentioned that in native it depends on the .dll version and the order in which the declarations appear, I would suggest displaying a `FIXME`:
```c RB_FOR_EACH_ENTRY(decl, &func->overloads, struct hlsl_ir_function_decl, entry) { if (!decl->has_body) continue; if (entry_func) { FIXME("Multiple valid entry point definitions.\n"); break; } entry_func = decl; } ```
Francisco Casas (@fcasas) commented about libs/vkd3d-shader/hlsl.h:
* Not to be confused with the function parameters! */ unsigned int attr_count; const struct hlsl_attribute *const *attrs;
- struct hlsl_ir_var *early_return_var;
nitpick: may be good to add an empty line before this one so that it is known that the comment doesn't apply to it.
Or even better, a comment along the lines of:
Synthetic variable to keep track of whether a return statement has been executed.
Francisco Casas (@fcasas) commented about libs/vkd3d-shader/hlsl_sm4.c:
break; default:
FIXME("Unhandled instruction type %s.\n", hlsl_node_type_to_string(instr->type));
hlsl_fixme(ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type));
Shouldn't this be a separate patch?
Same for the change in `hlsl_sm1.c`.
Francisco Casas (@fcasas) commented about libs/vkd3d-shader/hlsl_codegen.c:
transform_ir(ctx, find_recursive_calls, body, &recursive_call_ctx); vkd3d_free(recursive_call_ctx.backtrace);
- /* Avoid going into an infinite loop when processing them. */
I think that this comment should be more specific, because the "them" may not be immediate clear. Perhaps something like: ```c /* Avoid going into an infinite loop in case a recursive call was detected. */ ```
Francisco Casas (@fcasas) commented about libs/vkd3d-shader/hlsl_codegen.c:
* statement in this block. We have to stop and do this in a separate
* loop, because instructions must be deleted in reverse order (due to
* def-use chains.)
*
* If we're inside of a loop CF block, we can instead just turn the
* return into a break, which offers the right semantics—except that it
* won't break out of nested loops.
*
* - A CF block which might contain a return statement. After calling
* lower_return() on the CF block body, we stop, pull out everything after
* the CF instruction, shove it into an if block, and then lower that if
* block.
*
* (We could return a "did we make progress" like transform_ir() and run
* this pass multiple times, but we already know the only block that still
* needs addressing, so there's not much point.)
The previous part of the comment is great at explaining what this does!
This paragraph got me a little confused at first though, because I didn't made the association with the `progress` flag immediately. Perhaps a rephrasing would help:
We could return a "did we make progress" **boolean** like **in** transform_ir() and run this pass multiple times, but we already know the only block that **remains** to be **addressed**, so there's not much point.
Also, I have the feeling that this approach wouldn't be easy either, because we would have to check if we already inserted the `early_return_var` conditionals during the last iteration.
Excellent MR!
We have to remember to keep the clone functions in mind from now on if we add new fields to instruction types, but they seem necessary.
On Thu Feb 9 16:01:37 2023 +0000, Giovanni Mascellani wrote:
I think I would find it more legible if we stipulated that `early_return_var` always exists, so you can skip a few branches in this function, which is already relatively complicated. This would leave in some more dead code, but hopefully there will eventually be a dead if elimination pass that gets rid of it. That said, nice function! Before reading that I thought it would have been much more of a pain.
From what I understood, `early_return_var` is only created for the current function once a `return` is found inside it, so it doesn't exist always.
On Thu Feb 9 16:15:59 2023 +0000, Francisco Casas wrote:
From what I understood, `early_return_var` is only created for the current function once a `return` is found inside it, so it doesn't exist always.
Yeah, my proposal is to have it exist always. It could be created with the function itself, I guess. This way you can simplify a little bit a function which is already quite complex.
On Thu Feb 9 16:13:25 2023 +0000, Francisco Casas wrote:
Seems that this is another case of discrepancy between the behavior of fxc 9.29 and fxc 10.1. The test being deleted in this patch worked for me using the latter (profile `cs_5_0`) so, at least in principle, I don't think it should be deleted.
The problem isn't that it doesn't compile, it's that it requires an optional d3d11/d3d12 feature to run. (It probably works on 47 and not on earlier versions because earlier versions predate that d3d11 version.) In practice it also fails in the Vulkan backend for basically that reason. We could add support in the shader runner [require] block to check for that feature, but it seemed easier to just change the test; it should still check the same things I was trying to test in the first place (i.e. swizzles work, in/out parameters work...)
On Thu Feb 9 16:01:36 2023 +0000, Giovanni Mascellani wrote:
No strong opinions, but maybe this would be more readable as a `switch` block?
I don't think it makes much of a difference in terms of readability, but it means we can't just use the "break" instructions to break out of the instruction loop :-/ And restructuring with helpers would probably be hard.
On Thu Feb 9 19:24:06 2023 +0000, Giovanni Mascellani wrote:
Yeah, my proposal is to have it exist always. It could be created with the function itself, I guess. This way you can simplify a little bit a function which is already quite complex.
Hmm. After 5/6 it's only actually checked in one place. I suppose we could still create it in hlsl_func_decl() though? Not sure whether it makes a big difference.