On 9/21/22 21:05, Francisco Casas wrote:
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index f919a30e..72f7b498 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -454,23 +454,26 @@ static bool transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx return progress; }
-/* Lower casts from vec1 to vecN to swizzles. */ +/* Lower broadcasts casts from scalars or vec1 values to other types. */ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
Hmm, do we need to have this function in hlsl_codegen.c in the first place? Could we generate these already lowered? It probably doesn't need to block this patch, but it's at least something we should consider doing.
{ - const struct hlsl_type *src_type, *dst_type; - struct hlsl_type *dst_scalar_type; + struct hlsl_type *src_type, *dst_type; struct hlsl_ir_expr *cast;
if (instr->type != HLSL_IR_EXPR) return false; cast = hlsl_ir_expr(instr); + if (cast->op != HLSL_OP1_CAST) + return false; src_type = cast->operands[0].node->data_type; dst_type = cast->node.data_type;
- if (cast->op == HLSL_OP1_CAST - && src_type->type <= HLSL_CLASS_VECTOR && dst_type->type <= HLSL_CLASS_VECTOR - && src_type->dimx == 1) + if (src_type->type > HLSL_CLASS_VECTOR || src_type->dimx != 1) + return false; + + if (dst_type->type <= HLSL_CLASS_VECTOR) { + struct hlsl_type *dst_scalar_type; struct hlsl_ir_node *replacement; struct hlsl_ir_swizzle *swizzle; struct hlsl_ir_expr *new_cast; @@ -494,6 +497,37 @@ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, v hlsl_replace_node(&cast->node, replacement); return true; } + else if (HLSL_CLASS_VECTOR < dst_type->type)
This can just be "else", right?
+ { + unsigned int size = hlsl_type_component_count(dst_type); + struct vkd3d_string_buffer *string; + static unsigned int counter = 0; + struct list *broadcast_instrs; + unsigned int store_index = 0; + struct hlsl_ir_load *load; + struct hlsl_ir_var *var; + + if (!(string = hlsl_get_string_buffer(ctx))) + return false; + vkd3d_string_buffer_printf(string, "<broadcast-%x>", counter++);
In general I'd prefer either %u, or %#x, unless we have a good reason to use %x.
+ if (!(var = hlsl_new_synthetic_var(ctx, string->buffer, dst_type, instr->loc))) + return false; + hlsl_release_string_buffer(ctx, string);
Just a thought (can be relegated to later), but we might consider making a helper for this pattern; we have it in several places. (Some use a pointer value, and some use a counter; we could probably standardize on the latter.)
+ + if (!(broadcast_instrs = hlsl_alloc(ctx, sizeof(*broadcast_instrs)))) + return false; + list_init(broadcast_instrs);
This can be on stack, right?
+ + while (store_index < size) + hlsl_initialize_var_components(ctx, broadcast_instrs, var, &store_index, cast->operands[0].node);
You're only initializing from a single component, so you should just be able to use hlsl_new_cast() plus hlsl_new_store_component(). [Note we probably want to use hlsl_new_cast() rather than add_implicit_conversion(), mostly because the difference is type checking, and we should already have done that.]
+ + list_move_before(&cast->node.entry, broadcast_instrs); + vkd3d_free(broadcast_instrs); + + load = hlsl_new_var_load(ctx, var, var->loc); + list_add_before(&cast->node.entry, &load->node.entry); + hlsl_replace_node(&cast->node, &load->node); + }
return false; }