diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index f919a30e..72f7b498 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -454,23 +454,26 @@ static bool transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx return progress; }
-/* Lower casts from vec1 to vecN to swizzles. */ +/* Lower broadcasts casts from scalars or vec1 values to other types. */ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
Hmm, do we need to have this function in hlsl_codegen.c in the first place? Could we generate these already lowered?
It probably doesn't need to block this patch, but it's at least something we should consider doing.
I don't see a particular reason for having this as a compilation pass. I am moving the code to add_cast() then.
{
- const struct hlsl_type *src_type, *dst_type;
- struct hlsl_type *dst_scalar_type;
struct hlsl_type *src_type, *dst_type; struct hlsl_ir_expr *cast;
if (instr->type != HLSL_IR_EXPR) return false; cast = hlsl_ir_expr(instr);
if (cast->op != HLSL_OP1_CAST)
return false; src_type = cast->operands[0].node->data_type; dst_type = cast->node.data_type;
- if (cast->op == HLSL_OP1_CAST
&& src_type->type <= HLSL_CLASS_VECTOR && dst_type->type <= HLSL_CLASS_VECTOR
&& src_type->dimx == 1)
- if (src_type->type > HLSL_CLASS_VECTOR || src_type->dimx != 1)
return false;
- if (dst_type->type <= HLSL_CLASS_VECTOR) {
struct hlsl_type *dst_scalar_type; struct hlsl_ir_node *replacement; struct hlsl_ir_swizzle *swizzle; struct hlsl_ir_expr *new_cast;
@@ -494,6 +497,37 @@ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, v hlsl_replace_node(&cast->node, replacement); return true; }
- else if (HLSL_CLASS_VECTOR < dst_type->type)
This can just be "else", right?
Indeed, I don't know why I added that check.
- {
unsigned int size = hlsl_type_component_count(dst_type);
struct vkd3d_string_buffer *string;
static unsigned int counter = 0;
struct list *broadcast_instrs;
unsigned int store_index = 0;
struct hlsl_ir_load *load;
struct hlsl_ir_var *var;
if (!(string = hlsl_get_string_buffer(ctx)))
return false;
vkd3d_string_buffer_printf(string, "<broadcast-%x>", counter++);
In general I'd prefer either %u, or %#x, unless we have a good reason to use %x.
Okay.
if (!(var = hlsl_new_synthetic_var(ctx, string->buffer, dst_type, instr->loc)))
return false;
hlsl_release_string_buffer(ctx, string);
Just a thought (can be relegated to later), but we might consider making a helper for this pattern; we have it in several places. (Some use a pointer value, and some use a counter; we could probably standardize on the latter.)
if (!(broadcast_instrs = hlsl_alloc(ctx, sizeof(*broadcast_instrs))))
return false;
list_init(broadcast_instrs);
This can be on stack, right?
Now that you mention it, yes! Probably I didn't understand lists as well when I first wrote this.
while (store_index < size)
hlsl_initialize_var_components(ctx, broadcast_instrs, var, &store_index, cast->operands[0].node);
You're only initializing from a single component, so you should just be able to use hlsl_new_cast() plus hlsl_new_store_component(). [Note we probably want to use hlsl_new_cast() rather than add_implicit_conversion(), mostly because the difference is type checking, and we should already have done that.]
A reason for using initialize_var_components() (and thus, add_implicit_conversion()), is that if dst_type is a struct that contains an object type, the broadcast should fail.
The alternative could be adding checks for these cases in the functions for checking compatible data types, but this may require more heavy thought.
P.S. I realized that, in native, it is a allowed to casts from a complex type to another "component-wise-compatible" complex type.
For instance, the following shader compiles in native: