Mainly comprises support for allocating arrays of resources, and loading from them, for both SM1 and SM4.
The `vkd3d-shader/hlsl: Skip object components when creating input/output copies.` patch can be dropped if !148 is accepted first.
From: Francisco Casas fcasas@codeweavers.com
Variables that contain more than one object (arrays or structs) require the allocation of contiguous registers in the respective object register spaces. --- libs/vkd3d-shader/hlsl.c | 8 ++- libs/vkd3d-shader/hlsl.h | 3 +- libs/vkd3d-shader/hlsl_codegen.c | 69 +++++++++++++++++-------- libs/vkd3d-shader/hlsl_sm1.c | 4 +- libs/vkd3d-shader/hlsl_sm4.c | 20 +++++-- tests/register-reservations.shader_test | 2 +- 6 files changed, 75 insertions(+), 31 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index 869638a6..35b83f8b 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -166,6 +166,9 @@ static unsigned int get_array_size(const struct hlsl_type *type)
bool hlsl_type_is_resource(const struct hlsl_type *type) { + if (type->class == HLSL_CLASS_ARRAY) + return hlsl_type_is_resource(type->e.array.type); + if (type->class == HLSL_CLASS_OBJECT) { switch (type->base_type) @@ -186,6 +189,9 @@ enum hlsl_regset hlsl_type_get_regset(const struct hlsl_type *type) if (type->class <= HLSL_CLASS_LAST_NUMERIC) return HLSL_REGSET_NUMERIC;
+ if (type->class == HLSL_CLASS_ARRAY) + return hlsl_type_get_regset(type->e.array.type); + if (type->class == HLSL_CLASS_OBJECT) { switch (type->base_type) @@ -203,8 +209,6 @@ enum hlsl_regset hlsl_type_get_regset(const struct hlsl_type *type) vkd3d_unreachable(); } } - else if (type->class == HLSL_CLASS_ARRAY) - return hlsl_type_get_regset(type->e.array.type);
vkd3d_unreachable(); } diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index b38fba1c..88ef08d5 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -228,7 +228,7 @@ struct hlsl_struct_field size_t name_bytecode_offset; };
-/* Information of the register allocated for an instruction node or variable. +/* Information of the register(s) allocated for an instruction node or variable. * These values are initialized at the end of hlsl_emit_bytecode(), after the compilation passes, * just before writing the bytecode. * For numeric registers, a writemask can be provided to indicate the reservation of only some of the @@ -238,6 +238,7 @@ struct hlsl_struct_field struct hlsl_reg { uint32_t id; + uint32_t count; unsigned int writemask; /* Whether the register has been allocated. */ bool allocated; diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index c8130a3f..40565273 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -2399,6 +2399,7 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) { var->regs[regset].allocated = true; var->regs[regset].id = var->reg_reservation.reg_index; + var->regs[regset].count = var->data_type->reg_size[regset]; TRACE("Allocated reserved %s to %c%u.\n", var->name, var->reg_reservation.reg_type, var->reg_reservation.reg_index); } @@ -3165,13 +3166,17 @@ static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum uint32_t index) { const struct hlsl_ir_var *var; + unsigned int start, count;
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, const struct hlsl_ir_var, extern_entry) { if (!var->regs[regset].allocated) continue;
- if (index == var->regs[regset].id) + start = var->regs[regset].id; + count = var->regs[regset].count; + + if (start <= index && index < start + count) return var; } return NULL; @@ -3182,7 +3187,6 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) char regset_name = get_regset_name(regset); struct hlsl_ir_var *var; uint32_t min_index = 0; - uint32_t index;
if (regset == HLSL_REGSET_UAVS) { @@ -3194,19 +3198,17 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) } }
- index = min_index; - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { - if (!var->last_read || !var->data_type->reg_size[regset]) + unsigned int count = var->regs[regset].count; + + if (count == 0) continue;
if (var->regs[regset].allocated) { const struct hlsl_ir_var *reserved_object; - unsigned int index = var->regs[regset].id; - - reserved_object = get_allocated_object(ctx, regset, index); + unsigned int index, i;
if (var->regs[regset].id < min_index) { @@ -3214,28 +3216,43 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, "UAV index (%u) must be higher than the maximum render target index (%u).", var->regs[regset].id, min_index - 1); + continue; } - else if (reserved_object && reserved_object != var) + + for (i = 0; i < count; ++i) { - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, - "Multiple objects bound to %c%u.", regset_name, index); - hlsl_note(ctx, &reserved_object->loc, VKD3D_SHADER_LOG_ERROR, - "Object '%s' is already bound to %c%u.", reserved_object->name, - regset_name, index); - } + index = var->regs[regset].id + i;
- var->regs[regset].id = var->reg_reservation.reg_index; - var->regs[regset].allocated = true; - TRACE("Allocated reserved %s to %c%u.\n", var->name, regset_name, var->regs[regset].id); + reserved_object = get_allocated_object(ctx, regset, index); + if (reserved_object && reserved_object != var) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, + "Multiple variables bound to %c%u.", regset_name, index); + hlsl_note(ctx, &reserved_object->loc, VKD3D_SHADER_LOG_ERROR, + "Variable '%s' is already bound to %c%u.", reserved_object->name, + regset_name, index); + } + } } else { - while (get_allocated_object(ctx, regset, index)) + unsigned int index = min_index; + unsigned int available = 0; + + while (available < count) + { + if (get_allocated_object(ctx, regset, index)) + available = 0; + else + ++available; ++index; + } + index -= count;
var->regs[regset].id = index; var->regs[regset].allocated = true; - TRACE("Allocated object to %c%u.\n", regset_name, index); + TRACE("Allocated variable %s to %c%u-%c%u.\n", var->name, regset_name, index, regset_name, + index + count); ++index; } } @@ -3462,7 +3479,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry { var = entry_func->parameters.vars[i];
- if (var->data_type->class == HLSL_CLASS_OBJECT || (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) + if (hlsl_type_is_resource(var->data_type) || (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) { prepend_uniform_copy(ctx, &body->instrs, var); } @@ -3557,6 +3574,16 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry rb_for_each_entry(&ctx->functions, dump_function, ctx);
allocate_register_reservations(ctx); + + /* For now, request all the registers for each variable, even if they are not used. */ + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + unsigned int k; + + for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) + var->regs[k].count = var->data_type->reg_size[k]; + } + allocate_temp_registers(ctx, entry_func); if (profile->major_version < 4) { diff --git a/libs/vkd3d-shader/hlsl_sm1.c b/libs/vkd3d-shader/hlsl_sm1.c index be665981..137bb058 100644 --- a/libs/vkd3d-shader/hlsl_sm1.c +++ b/libs/vkd3d-shader/hlsl_sm1.c @@ -360,9 +360,7 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe if (!var->semantic.name && var->regs[regset].allocated) { put_u32(buffer, 0); /* name */ - if (var->data_type->class == HLSL_CLASS_OBJECT - && (var->data_type->base_type == HLSL_TYPE_SAMPLER - || var->data_type->base_type == HLSL_TYPE_TEXTURE)) + if (regset != HLSL_REGSET_NUMERIC) { assert(regset == HLSL_REGSET_SAMPLERS); put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[regset].id)); diff --git a/libs/vkd3d-shader/hlsl_sm4.c b/libs/vkd3d-shader/hlsl_sm4.c index 8be848c5..452a2f83 100644 --- a/libs/vkd3d-shader/hlsl_sm4.c +++ b/libs/vkd3d-shader/hlsl_sm4.c @@ -427,6 +427,9 @@ static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b
static D3D_SHADER_INPUT_TYPE sm4_resource_type(const struct hlsl_type *type) { + if (type->class == HLSL_CLASS_ARRAY) + return sm4_resource_type(type->e.array.type); + switch (type->base_type) { case HLSL_TYPE_SAMPLER: @@ -442,6 +445,9 @@ static D3D_SHADER_INPUT_TYPE sm4_resource_type(const struct hlsl_type *type)
static D3D_RESOURCE_RETURN_TYPE sm4_resource_format(const struct hlsl_type *type) { + if (type->class == HLSL_CLASS_ARRAY) + return sm4_resource_format(type->e.array.type); + switch (type->e.resource_format->base_type) { case HLSL_TYPE_DOUBLE: @@ -466,6 +472,9 @@ static D3D_RESOURCE_RETURN_TYPE sm4_resource_format(const struct hlsl_type *type
static D3D_SRV_DIMENSION sm4_rdef_resource_dimension(const struct hlsl_type *type) { + if (type->class == HLSL_CLASS_ARRAY) + return sm4_rdef_resource_dimension(type->e.array.type); + switch (type->sampler_dim) { case HLSL_SAMPLER_DIM_1D: @@ -869,7 +878,12 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r
if (var->is_uniform) { - if (data_type->class == HLSL_CLASS_OBJECT && data_type->base_type == HLSL_TYPE_TEXTURE) + enum hlsl_regset regset = HLSL_REGSET_NUMERIC; + + if (hlsl_type_is_resource(data_type)) + regset = hlsl_type_get_regset(data_type); + + if (regset == HLSL_REGSET_TEXTURES) { reg->type = VKD3D_SM4_RT_RESOURCE; reg->dim = VKD3D_SM4_DIMENSION_VEC4; @@ -879,7 +893,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r reg->idx_count = 1; *writemask = VKD3DSP_WRITEMASK_ALL; } - else if (data_type->class == HLSL_CLASS_OBJECT && data_type->base_type == HLSL_TYPE_UAV) + else if (regset == HLSL_REGSET_UAVS) { reg->type = VKD3D_SM5_RT_UAV; reg->dim = VKD3D_SM4_DIMENSION_VEC4; @@ -889,7 +903,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r reg->idx_count = 1; *writemask = VKD3DSP_WRITEMASK_ALL; } - else if (data_type->class == HLSL_CLASS_OBJECT && data_type->base_type == HLSL_TYPE_SAMPLER) + else if (regset == HLSL_REGSET_SAMPLERS) { reg->type = VKD3D_SM4_RT_SAMPLER; reg->dim = VKD3D_SM4_DIMENSION_NONE; diff --git a/tests/register-reservations.shader_test b/tests/register-reservations.shader_test index f0287d00..5aeb7623 100644 --- a/tests/register-reservations.shader_test +++ b/tests/register-reservations.shader_test @@ -48,4 +48,4 @@ float4 main() : sv_target
[test] draw quad -todo probe all rgba (4.0, 4.0, 4.0, 99.0) +probe all rgba (4.0, 4.0, 4.0, 99.0)
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl_sm4.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_sm4.c b/libs/vkd3d-shader/hlsl_sm4.c index 452a2f83..94bb4ed9 100644 --- a/libs/vkd3d-shader/hlsl_sm4.c +++ b/libs/vkd3d-shader/hlsl_sm4.c @@ -737,10 +737,13 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc)
if (profile->major_version >= 5) { - put_u32(&buffer, 0); /* texture start */ - put_u32(&buffer, 0); /* texture count */ - put_u32(&buffer, 0); /* sampler start */ - put_u32(&buffer, 0); /* sampler count */ + unsigned int tex_alloc = !!var->regs[HLSL_REGSET_TEXTURES].allocated; + unsigned int sam_alloc = !!var->regs[HLSL_REGSET_SAMPLERS].allocated; + + put_u32(&buffer, tex_alloc * var->regs[HLSL_REGSET_TEXTURES].id); + put_u32(&buffer, tex_alloc * var->regs[HLSL_REGSET_TEXTURES].count); + put_u32(&buffer, sam_alloc * var->regs[HLSL_REGSET_SAMPLERS].id); + put_u32(&buffer, sam_alloc * var->regs[HLSL_REGSET_SAMPLERS].count); } } }
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl_codegen.c | 16 ++++++++++++++++ libs/vkd3d-shader/vkd3d_shader_private.h | 1 + 2 files changed, 17 insertions(+)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 40565273..8173355c 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -263,6 +263,12 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct list *instrs, struct struct hlsl_ir_var *var = lhs->src.var; unsigned int i;
+ if (hlsl_type_is_resource(type)) + { + hlsl_fixme(ctx, &lhs->node.loc, "Objects as input parameters."); + return; + } + vector_type = hlsl_get_vector_type(ctx, type->base_type, hlsl_type_minor_size(type));
for (i = 0; i < hlsl_type_major_size(type); ++i) @@ -315,6 +321,9 @@ static void prepend_input_struct_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *field_load; struct hlsl_ir_constant *c;
+ if (hlsl_type_is_resource(field->type)) + continue; + if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) return; list_add_after(&lhs->node.entry, &c->node.entry); @@ -358,6 +367,13 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct list *instrs, struct struct hlsl_ir_var *var = rhs->src.var; unsigned int i;
+ if (hlsl_type_is_resource(type)) + { + hlsl_error(ctx, &rhs->node.loc, VKD3D_SHADER_ERROR_HLSL_INVALID_PARAMETER, + "Objects cannot be output parameters."); + return; + } + vector_type = hlsl_get_vector_type(ctx, type->base_type, hlsl_type_minor_size(type));
for (i = 0; i < hlsl_type_major_size(type); ++i) diff --git a/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d-shader/vkd3d_shader_private.h index e635a70b..18105342 100644 --- a/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d-shader/vkd3d_shader_private.h @@ -125,6 +125,7 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_HLSL_INVALID_THREAD_COUNT = 5023, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE = 5024, VKD3D_SHADER_ERROR_HLSL_RECURSIVE_CALL = 5025, + VKD3D_SHADER_ERROR_HLSL_INVALID_PARAMETER = 5026,
VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301,
From: Francisco Casas fcasas@codeweavers.com
It could be argued that the new hlsl_error() calls should be hlsl_fixme() because our costant folding is not perfect, but ideally these should be hlsl_error(). --- libs/vkd3d-shader/hlsl.c | 22 ++++ libs/vkd3d-shader/hlsl.h | 6 + libs/vkd3d-shader/hlsl_codegen.c | 140 +++++++++++++++++++++-- libs/vkd3d-shader/vkd3d_shader_private.h | 1 + 4 files changed, 161 insertions(+), 8 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index 35b83f8b..4852ad7e 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -112,8 +112,12 @@ struct hlsl_ir_var *hlsl_get_var(struct hlsl_scope *scope, const char *name)
void hlsl_free_var(struct hlsl_ir_var *decl) { + unsigned int k; + vkd3d_free((void *)decl->name); hlsl_cleanup_semantic(&decl->semantic); + for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) + vkd3d_free((void *)decl->objects_usage[k]); vkd3d_free(decl); }
@@ -942,6 +946,7 @@ struct hlsl_ir_var *hlsl_new_var(struct hlsl_ctx *ctx, const char *name, struct const struct hlsl_reg_reservation *reg_reservation) { struct hlsl_ir_var *var; + unsigned int k;
if (!(var = hlsl_alloc(ctx, sizeof(*var)))) return NULL; @@ -954,6 +959,23 @@ struct hlsl_ir_var *hlsl_new_var(struct hlsl_ctx *ctx, const char *name, struct var->storage_modifiers = modifiers; if (reg_reservation) var->reg_reservation = *reg_reservation; + + for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) + { + unsigned int i, obj_count = type->reg_size[k]; + + if (obj_count == 0) + continue; + + if (!(var->objects_usage[k] = hlsl_alloc(ctx, sizeof(*var->objects_usage[0]) * obj_count))) + { + for (i = 0; i < k; ++i) + vkd3d_free(var->objects_usage[i]); + vkd3d_free(var); + return NULL; + } + } + return var; }
diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 88ef08d5..a1937002 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -390,6 +390,10 @@ struct hlsl_ir_var * and the buffer_offset instead. */ struct hlsl_reg regs[HLSL_REGSET_LAST + 1];
+ struct { + bool used; + } *objects_usage[HLSL_REGSET_LAST_OBJECT + 1]; + uint32_t is_input_semantic : 1; uint32_t is_output_semantic : 1; uint32_t is_uniform : 1; @@ -1150,6 +1154,8 @@ unsigned int hlsl_swizzle_from_writemask(unsigned int writemask); struct hlsl_type *hlsl_deref_get_type(struct hlsl_ctx *ctx, const struct hlsl_deref *deref); bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, unsigned int *start, unsigned int *count); +bool hlsl_regset_index_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, + enum hlsl_regset regset, unsigned int *index); bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, unsigned int *offset); unsigned int hlsl_offset_from_deref_safe(struct hlsl_ctx *ctx, const struct hlsl_deref *deref); struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref); diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 8173355c..6fb37d91 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -2712,6 +2712,84 @@ static const char *debug_register(char class, struct hlsl_reg reg, const struct return vkd3d_dbg_sprintf("%c%u%s", class, reg.id, debug_hlsl_writemask(reg.writemask)); }
+static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_resource_load *load; + struct hlsl_ir_var *var; + enum hlsl_regset regset; + unsigned int index; + + if (instr->type != HLSL_IR_RESOURCE_LOAD) + return false; + + load = hlsl_ir_resource_load(instr); + var = load->resource.var; + regset = hlsl_type_get_regset(hlsl_deref_get_type(ctx, &load->resource)); + + if (regset == HLSL_REGSET_SAMPLERS) + { + assert(!load->sampler.var); + + if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) + { + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NONCONSTANT_INDEX, + "Non-constant index in sampler resource load."); + return false; + } + var->objects_usage[regset][index].used = true; + return true; + } + else + { + if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) + { + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NONCONSTANT_INDEX, + "Non-constant index in resource load's resource."); + return false; + } + + var->objects_usage[regset][index].used = true; + + if (load->sampler.var) + { + var = load->sampler.var; + if (!hlsl_regset_index_from_deref(ctx, &load->sampler, HLSL_REGSET_SAMPLERS, &index)) + { + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NONCONSTANT_INDEX, + "Non-constant index in resource load's sampler."); + return false; + } + var->objects_usage[HLSL_REGSET_SAMPLERS][index].used = true; + } + } + + return false; +} + +static bool request_object_registers_for_allocation(struct hlsl_ctx *ctx) +{ + struct hlsl_ir_var *var; + struct hlsl_type *type; + unsigned int i, k; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + type = var->data_type; + + for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) + { + for (i = 0; i < type->reg_size[k]; ++i) + { + /* Samplers are only allocated until the last used one. */ + if (var->objects_usage[k][i].used) + var->regs[k].count = (k == HLSL_REGSET_SAMPLERS) ? i + 1 : type->reg_size[k]; + } + } + } + + return false; +} + static void allocate_variable_temp_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, struct liveness *liveness) { if (var->is_input_semantic || var->is_output_semantic || var->is_uniform) @@ -3349,6 +3427,58 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl return true; }
+bool hlsl_regset_index_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, + enum hlsl_regset regset, unsigned int *index) +{ + struct hlsl_type *type = deref->var->data_type; + unsigned int i; + + assert(regset <= HLSL_REGSET_LAST_OBJECT); + + *index = 0; + + for (i = 0; i < deref->path_len; ++i) + { + struct hlsl_ir_node *path_node = deref->path[i].node; + unsigned int idx = 0; + + assert(path_node); + if (path_node->type != HLSL_IR_CONSTANT) + return false; + + /* We should always have generated a cast to UINT. */ + assert(path_node->data_type->class == HLSL_CLASS_SCALAR + && path_node->data_type->base_type == HLSL_TYPE_UINT); + + idx = hlsl_ir_constant(path_node)->value[0].u; + + switch (type->class) + { + case HLSL_CLASS_ARRAY: + if (idx >= type->e.array.elements_count) + { + hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, + "Array index is out of bounds. %u/%u", idx, type->e.array.elements_count); + return false; + } + *index += idx * type->e.array.type->reg_size[regset]; + break; + + case HLSL_CLASS_STRUCT: + *index += type->e.record.fields[idx].reg_offset[regset]; + break; + + default: + break; + } + + type = hlsl_get_element_type_from_path_index(ctx, type, path_node); + } + + assert(type->reg_size[regset] == 1); + return true; +} + bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, unsigned int *offset) { struct hlsl_ir_node *offset_node = deref->offset.node; @@ -3575,6 +3705,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry }
transform_ir(ctx, validate_static_object_references, body, NULL); + transform_ir(ctx, track_object_components_usage, body, NULL);
/* TODO: move forward, remove when no longer needed */ transform_ir(ctx, transform_deref_paths_into_offsets, body, NULL); @@ -3591,14 +3722,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry
allocate_register_reservations(ctx);
- /* For now, request all the registers for each variable, even if they are not used. */ - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - unsigned int k; - - for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) - var->regs[k].count = var->data_type->reg_size[k]; - } + request_object_registers_for_allocation(ctx);
allocate_temp_registers(ctx, entry_func); if (profile->major_version < 4) diff --git a/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d-shader/vkd3d_shader_private.h index 18105342..ce276f48 100644 --- a/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d-shader/vkd3d_shader_private.h @@ -126,6 +126,7 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE = 5024, VKD3D_SHADER_ERROR_HLSL_RECURSIVE_CALL = 5025, VKD3D_SHADER_ERROR_HLSL_INVALID_PARAMETER = 5026, + VKD3D_SHADER_ERROR_HLSL_NONCONSTANT_INDEX = 5027,
VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301,
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl.c | 3 +++ libs/vkd3d-shader/hlsl.h | 4 ++++ libs/vkd3d-shader/hlsl.y | 1 + libs/vkd3d-shader/hlsl_codegen.c | 21 ++++++++++++++++++++- libs/vkd3d-shader/vkd3d_shader_private.h | 1 + 5 files changed, 29 insertions(+), 1 deletion(-)
diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index 4852ad7e..7432f65d 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -1343,6 +1343,9 @@ struct hlsl_ir_resource_load *hlsl_new_resource_load(struct hlsl_ctx *ctx, hlsl_src_from_node(&load->coords, params->coords); hlsl_src_from_node(&load->texel_offset, params->texel_offset); hlsl_src_from_node(&load->lod, params->lod); + load->sampling_dim = params->sampling_dim; + if (load->sampling_dim == HLSL_SAMPLER_DIM_GENERIC) + load->sampling_dim = hlsl_deref_get_type(ctx, &load->resource)->sampler_dim; return load; }
diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index a1937002..883291d0 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -392,6 +392,8 @@ struct hlsl_ir_var
struct { bool used; + enum hlsl_sampler_dim usage_dim; + struct vkd3d_shader_location first_dim_usage_loc; } *objects_usage[HLSL_REGSET_LAST_OBJECT + 1];
uint32_t is_input_semantic : 1; @@ -603,6 +605,7 @@ struct hlsl_ir_resource_load enum hlsl_resource_load_type load_type; struct hlsl_deref resource, sampler; struct hlsl_src coords, lod, texel_offset; + enum hlsl_sampler_dim sampling_dim; };
struct hlsl_ir_resource_store @@ -801,6 +804,7 @@ struct hlsl_resource_load_params enum hlsl_resource_load_type type; struct hlsl_ir_node *resource, *sampler; struct hlsl_ir_node *coords, *lod, *texel_offset; + enum hlsl_sampler_dim sampling_dim; };
static inline struct hlsl_ir_call *hlsl_ir_call(const struct hlsl_ir_node *node) diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index 0ddae6ee..4816ad9b 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -3200,6 +3200,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer *
load_params.coords = coords; load_params.format = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4); + load_params.sampling_dim = dim;
if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) return false; diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 6fb37d91..fdc16fed 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -2728,15 +2728,33 @@ static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_n
if (regset == HLSL_REGSET_SAMPLERS) { - assert(!load->sampler.var); + enum hlsl_sampler_dim dim;
+ assert(!load->sampler.var); if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) { hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NONCONSTANT_INDEX, "Non-constant index in sampler resource load."); return false; } + dim = var->objects_usage[regset][index].usage_dim; + if (dim != load->sampling_dim) + { + if (dim == HLSL_SAMPLER_DIM_GENERIC) + { + var->objects_usage[regset][index].first_dim_usage_loc = instr->loc; + } + else + { + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INCONSISTENT_SAMPLER, + "Inconsistent generic sampler usage dimension."); + hlsl_note(ctx, &var->objects_usage[regset][index].first_dim_usage_loc, + VKD3D_SHADER_LOG_ERROR, "First use is here."); + return false; + } + } var->objects_usage[regset][index].used = true; + var->objects_usage[regset][index].usage_dim = load->sampling_dim; return true; } else @@ -2749,6 +2767,7 @@ static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_n }
var->objects_usage[regset][index].used = true; + var->objects_usage[regset][index].usage_dim = load->sampling_dim;
if (load->sampler.var) { diff --git a/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d-shader/vkd3d_shader_private.h index ce276f48..f04c60a8 100644 --- a/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d-shader/vkd3d_shader_private.h @@ -127,6 +127,7 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_HLSL_RECURSIVE_CALL = 5025, VKD3D_SHADER_ERROR_HLSL_INVALID_PARAMETER = 5026, VKD3D_SHADER_ERROR_HLSL_NONCONSTANT_INDEX = 5027, + VKD3D_SHADER_ERROR_HLSL_INCONSISTENT_SAMPLER = 5028,
VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301,
From: Francisco Casas fcasas@codeweavers.com
--- include/vkd3d_d3d9types.h | 7 ++-- libs/vkd3d-shader/hlsl_sm1.c | 73 ++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 3 deletions(-)
diff --git a/include/vkd3d_d3d9types.h b/include/vkd3d_d3d9types.h index 75d04614..c85c3bea 100644 --- a/include/vkd3d_d3d9types.h +++ b/include/vkd3d_d3d9types.h @@ -29,9 +29,10 @@
#define D3DSI_INSTLENGTH_SHIFT 24
-#define D3DSP_DCL_USAGE_SHIFT 0 -#define D3DSP_DCL_USAGEINDEX_SHIFT 16 -#define D3DSP_DSTMOD_SHIFT 20 +#define D3DSP_DCL_USAGE_SHIFT 0 +#define D3DSP_DCL_USAGEINDEX_SHIFT 16 +#define D3DSP_DSTMOD_SHIFT 20 +#define D3DSP_DCL_RESOURCETYPE_SHIFT 27
#define D3DSP_SRCMOD_SHIFT 24
diff --git a/libs/vkd3d-shader/hlsl_sm1.c b/libs/vkd3d-shader/hlsl_sm1.c index 137bb058..21913f31 100644 --- a/libs/vkd3d-shader/hlsl_sm1.c +++ b/libs/vkd3d-shader/hlsl_sm1.c @@ -653,6 +653,78 @@ static void write_sm1_semantic_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ } }
+static void write_sm1_sampler_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + unsigned int reg_id, enum hlsl_sampler_dim sampler_dim) +{ + struct sm1_dst_register reg = {0}; + uint32_t token, res_type = 0; + + token = D3DSIO_DCL; + if (ctx->profile->major_version > 1) + token |= 2 << D3DSI_INSTLENGTH_SHIFT; + put_u32(buffer, token); + + switch (sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: + res_type = 1; + break; + + case HLSL_SAMPLER_DIM_2D: + res_type = 2; + break; + + case HLSL_SAMPLER_DIM_CUBE: + res_type = 3; + break; + + case HLSL_SAMPLER_DIM_3D: + res_type = 4; + break; + + default: + vkd3d_unreachable(); + break; + } + + token = (1u << 31); + token |= res_type << D3DSP_DCL_RESOURCETYPE_SHIFT; + put_u32(buffer, token); + + reg.type = D3DSPR_SAMPLER; + reg.writemask = VKD3DSP_WRITEMASK_ALL; + reg.reg = reg_id; + + write_sm1_dst_register(buffer, ®); +} + +static void write_sm1_sampler_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) +{ + enum hlsl_sampler_dim usage_dim; + unsigned int i, count, reg_id; + struct hlsl_ir_var *var; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (!var->regs[HLSL_REGSET_SAMPLERS].allocated) + continue; + + count = var->regs[HLSL_REGSET_SAMPLERS].count; + + for (i = 0; i < count; ++i) + { + if (var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) + { + usage_dim = var->objects_usage[HLSL_REGSET_SAMPLERS][i].usage_dim; + assert(usage_dim != HLSL_SAMPLER_DIM_GENERIC); + + reg_id = var->regs[HLSL_REGSET_SAMPLERS].id + i; + write_sm1_sampler_dcl(ctx, buffer, reg_id, usage_dim); + } + } + } +} + static void write_sm1_constant(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) { @@ -969,6 +1041,7 @@ int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_fun
write_sm1_constant_defs(ctx, &buffer); write_sm1_semantic_dcls(ctx, &buffer); + write_sm1_sampler_dcls(ctx, &buffer); write_sm1_instructions(ctx, &buffer, entry_func);
put_u32(&buffer, D3DSIO_END);
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl_sm1.c | 40 ++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+)
diff --git a/libs/vkd3d-shader/hlsl_sm1.c b/libs/vkd3d-shader/hlsl_sm1.c index 21913f31..6632dc99 100644 --- a/libs/vkd3d-shader/hlsl_sm1.c +++ b/libs/vkd3d-shader/hlsl_sm1.c @@ -902,6 +902,42 @@ static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b write_sm1_instruction(ctx, buffer, &sm1_instr); }
+static void write_sm1_resource_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_ir_node *instr) +{ + const struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); + struct hlsl_ir_node *coords = load->coords.node; + unsigned int sampler_offset, reg_id; + struct sm1_instruction sm1_instr; + + sampler_offset = hlsl_offset_from_deref_safe(ctx, &load->resource); + reg_id = load->resource.var->regs[HLSL_REGSET_SAMPLERS].id + sampler_offset; + + sm1_instr = (struct sm1_instruction) + { + .opcode = D3DSIO_TEX, + + .dst.type = D3DSPR_TEMP, + .dst.reg = instr->reg.id, + .dst.writemask = instr->reg.writemask, + .has_dst = 1, + + .srcs[0].type = D3DSPR_TEMP, + .srcs[0].reg = coords->reg.id, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL), + + .srcs[1].type = D3DSPR_SAMPLER, + .srcs[1].reg = reg_id, + .srcs[1].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL), + + .src_count = 2, + }; + + assert(instr->reg.allocated); + + write_sm1_instruction(ctx, buffer, &sm1_instr); +} + static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) { @@ -1016,6 +1052,10 @@ static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b write_sm1_load(ctx, buffer, instr); break;
+ case HLSL_IR_RESOURCE_LOAD: + write_sm1_resource_load(ctx, buffer, instr); + break; + case HLSL_IR_STORE: write_sm1_store(ctx, buffer, instr); break;
From: Francisco Casas fcasas@codeweavers.com
The new fixmes can be triggered in presence of object components within structs.
In shaders such as this one:
struct apple { Texture2D tex : TEX; float4 color : COLOR; };
float4 main(struct apple input) : sv_target { return input.tex.Load(int3(1, 2, 3)); }
Or this one:
struct { Texture2D tex; float4 color; } s;
float4 main() : sv_target { return s.tex.Load(int3(1, 2, 3)); } --- libs/vkd3d-shader/hlsl_sm4.c | 127 +++++++++++++++++----------- tests/object-references.shader_test | 6 +- tests/uav.shader_test | 4 +- 3 files changed, 84 insertions(+), 53 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_sm4.c b/libs/vkd3d-shader/hlsl_sm4.c index 94bb4ed9..1fa419c9 100644 --- a/libs/vkd3d-shader/hlsl_sm4.c +++ b/libs/vkd3d-shader/hlsl_sm4.c @@ -627,13 +627,15 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) } else { + unsigned int dimx = hlsl_type_get_component_type(ctx, var->data_type, 0)->e.resource_format->dimx; + put_u32(&buffer, sm4_resource_format(var->data_type)); put_u32(&buffer, sm4_rdef_resource_dimension(var->data_type)); put_u32(&buffer, ~0u); /* FIXME: multisample count */ - flags |= (var->data_type->e.resource_format->dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT; + flags |= (dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT; } put_u32(&buffer, var->regs[regset].id); - put_u32(&buffer, 1); /* bind count */ + put_u32(&buffer, var->regs[regset].count); /* bind count */ put_u32(&buffer, flags); }
@@ -893,6 +895,8 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r if (swizzle_type) *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; reg->idx[0] = var->regs[HLSL_REGSET_TEXTURES].id; + reg->idx[0] += hlsl_offset_from_deref_safe(ctx, deref); + assert(deref->offset_regset == HLSL_REGSET_TEXTURES); reg->idx_count = 1; *writemask = VKD3DSP_WRITEMASK_ALL; } @@ -903,6 +907,8 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r if (swizzle_type) *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; reg->idx[0] = var->regs[HLSL_REGSET_UAVS].id; + reg->idx[0] += hlsl_offset_from_deref_safe(ctx, deref); + assert(deref->offset_regset == HLSL_REGSET_UAVS); reg->idx_count = 1; *writemask = VKD3DSP_WRITEMASK_ALL; } @@ -913,6 +919,8 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r if (swizzle_type) *swizzle_type = VKD3D_SM4_SWIZZLE_NONE; reg->idx[0] = var->regs[HLSL_REGSET_SAMPLERS].id; + reg->idx[0] += hlsl_offset_from_deref_safe(ctx, deref); + assert(deref->offset_regset == HLSL_REGSET_SAMPLERS); reg->idx_count = 1; *writemask = VKD3DSP_WRITEMASK_ALL; } @@ -1175,44 +1183,67 @@ static void write_sm4_dcl_constant_buffer(struct vkd3d_bytecode_buffer *buffer, write_sm4_instruction(buffer, &instr); }
-static void write_sm4_dcl_sampler(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) +static void write_sm4_dcl_samplers(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) { - const struct sm4_instruction instr = + unsigned int i, count = var->data_type->reg_size[HLSL_REGSET_SAMPLERS]; + struct sm4_instruction instr; + + for (i = 0; i < count; ++i) { - .opcode = VKD3D_SM4_OP_DCL_SAMPLER, + if (!var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) + continue;
- .dsts[0].reg.type = VKD3D_SM4_RT_SAMPLER, - .dsts[0].reg.idx = {var->regs[HLSL_REGSET_SAMPLERS].id}, - .dsts[0].reg.idx_count = 1, - .dst_count = 1, - }; - write_sm4_instruction(buffer, &instr); + instr = (struct sm4_instruction) + { + .opcode = VKD3D_SM4_OP_DCL_SAMPLER, + + .dsts[0].reg.type = VKD3D_SM4_RT_SAMPLER, + .dsts[0].reg.idx = {var->regs[HLSL_REGSET_SAMPLERS].id + i}, + .dsts[0].reg.idx_count = 1, + .dst_count = 1, + }; + + write_sm4_instruction(buffer, &instr); + } }
-static void write_sm4_dcl_texture(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) +static void write_sm4_dcl_textures(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_ir_var *var, bool uav) { - bool uav = (var->data_type->base_type == HLSL_TYPE_UAV); - struct sm4_instruction instr = + enum hlsl_regset regset = uav ? HLSL_REGSET_UAVS : HLSL_REGSET_TEXTURES; + unsigned int i, count = var->data_type->reg_size[regset]; + struct hlsl_type *component_type; + struct sm4_instruction instr; + + component_type = hlsl_type_get_component_type(ctx, var->data_type, 0); + + for (i = 0; i < count; ++i) { - .opcode = (uav ? VKD3D_SM5_OP_DCL_UAV_TYPED : VKD3D_SM4_OP_DCL_RESOURCE) - | (sm4_resource_dimension(var->data_type) << VKD3D_SM4_RESOURCE_TYPE_SHIFT), + if (!var->objects_usage[regset][i].used) + continue;
- .dsts[0].reg.type = uav ? VKD3D_SM5_RT_UAV : VKD3D_SM4_RT_RESOURCE, - .dsts[0].reg.idx = {uav ? var->regs[HLSL_REGSET_UAVS].id : var->regs[HLSL_REGSET_TEXTURES].id}, - .dsts[0].reg.idx_count = 1, - .dst_count = 1, + instr = (struct sm4_instruction) + { + .opcode = (uav ? VKD3D_SM5_OP_DCL_UAV_TYPED : VKD3D_SM4_OP_DCL_RESOURCE) + | (sm4_resource_dimension(component_type) << VKD3D_SM4_RESOURCE_TYPE_SHIFT),
- .idx[0] = sm4_resource_format(var->data_type) * 0x1111, - .idx_count = 1, - }; + .dsts[0].reg.type = uav ? VKD3D_SM5_RT_UAV : VKD3D_SM4_RT_RESOURCE, + .dsts[0].reg.idx = {var->regs[regset].id + i}, + .dsts[0].reg.idx_count = 1, + .dst_count = 1,
- if (var->data_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS - || var->data_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY) - { - instr.opcode |= var->data_type->sample_count << VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; - } + .idx[0] = sm4_resource_format(component_type) * 0x1111, + .idx_count = 1, + };
- write_sm4_instruction(buffer, &instr); + if (component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS + || component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY) + { + instr.opcode |= component_type->sample_count << VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; + } + + write_sm4_instruction(buffer, &instr); + } }
static void write_sm4_dcl_semantic(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) @@ -1492,9 +1523,9 @@ static void write_sm4_constant(struct hlsl_ctx *ctx, static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, const struct hlsl_deref *resource, const struct hlsl_ir_node *coords, - const struct hlsl_ir_node *texel_offset) + const struct hlsl_ir_node *texel_offset, enum hlsl_sampler_dim dim) { - bool uav = (resource_type->base_type == HLSL_TYPE_UAV); + bool uav = (hlsl_type_get_regset(resource_type) == HLSL_REGSET_UAVS); struct sm4_instruction instr; unsigned int dim_count;
@@ -1520,7 +1551,7 @@ static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buf { /* Mipmap level is in the last component in the IR, but needs to be in the W * component in the instruction. */ - dim_count = hlsl_sampler_dim_count(resource_type->sampler_dim); + dim_count = hlsl_sampler_dim_count(dim); if (dim_count == 1) instr.srcs[0].swizzle = hlsl_combine_swizzles(instr.srcs[0].swizzle, HLSL_SWIZZLE(X, X, X, Y), 4); if (dim_count == 2) @@ -2256,10 +2287,9 @@ static void write_sm4_resource_load(struct hlsl_ctx *ctx, const struct hlsl_ir_node *texel_offset = load->texel_offset.node; const struct hlsl_ir_node *coords = load->coords.node;
- if (resource_type->class != HLSL_CLASS_OBJECT) + if (!hlsl_type_is_resource(resource_type)) { - assert(resource_type->class == HLSL_CLASS_ARRAY || resource_type->class == HLSL_CLASS_STRUCT); - hlsl_fixme(ctx, &load->node.loc, "Resource being a component of another variable."); + hlsl_fixme(ctx, &load->node.loc, "Separate object fields as new variables."); return; }
@@ -2267,14 +2297,11 @@ static void write_sm4_resource_load(struct hlsl_ctx *ctx, { const struct hlsl_type *sampler_type = load->sampler.var->data_type;
- if (sampler_type->class != HLSL_CLASS_OBJECT) + if (!hlsl_type_is_resource(sampler_type)) { - assert(sampler_type->class == HLSL_CLASS_ARRAY || sampler_type->class == HLSL_CLASS_STRUCT); - hlsl_fixme(ctx, &load->node.loc, "Sampler being a component of another variable."); + hlsl_fixme(ctx, &load->node.loc, "Separate object fields as new variables."); return; } - assert(sampler_type->base_type == HLSL_TYPE_SAMPLER); - assert(sampler_type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC);
if (!load->sampler.var->is_uniform) { @@ -2293,7 +2320,7 @@ static void write_sm4_resource_load(struct hlsl_ctx *ctx, { case HLSL_RESOURCE_LOAD: write_sm4_ld(ctx, buffer, resource_type, &load->node, &load->resource, - coords, texel_offset); + coords, texel_offset, load->sampling_dim); break;
case HLSL_RESOURCE_SAMPLE: @@ -2337,10 +2364,9 @@ static void write_sm4_resource_store(struct hlsl_ctx *ctx, { const struct hlsl_type *resource_type = store->resource.var->data_type;
- if (resource_type->class != HLSL_CLASS_OBJECT) + if (!hlsl_type_is_resource(resource_type)) { - assert(resource_type->class == HLSL_CLASS_ARRAY || resource_type->class == HLSL_CLASS_STRUCT); - hlsl_fixme(ctx, &store->node.loc, "Resource being a component of another variable."); + hlsl_fixme(ctx, &store->node.loc, "Separate object fields as new variables."); return; }
@@ -2504,12 +2530,17 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx,
for (i = 0; i < extern_resources_count; ++i) { + enum hlsl_regset regset; + var = extern_resources[i]; + regset = hlsl_type_get_regset(var->data_type);
- if (var->data_type->base_type == HLSL_TYPE_SAMPLER) - write_sm4_dcl_sampler(&buffer, var); - else if (var->data_type->base_type == HLSL_TYPE_TEXTURE || var->data_type->base_type == HLSL_TYPE_UAV) - write_sm4_dcl_texture(&buffer, var); + if (regset == HLSL_REGSET_SAMPLERS) + write_sm4_dcl_samplers(&buffer, var); + else if (regset == HLSL_REGSET_TEXTURES) + write_sm4_dcl_textures(ctx, &buffer, var, false); + else if (regset == HLSL_REGSET_UAVS) + write_sm4_dcl_textures(ctx, &buffer, var, true); }
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) diff --git a/tests/object-references.shader_test b/tests/object-references.shader_test index bc74ccd4..5ed4dcd6 100644 --- a/tests/object-references.shader_test +++ b/tests/object-references.shader_test @@ -92,7 +92,7 @@ size (1, 1) size (1, 1) 3.0 3.0 3.0 1.0
-[pixel shader todo] +[pixel shader] Texture2D tex[3];
struct foo { @@ -111,8 +111,8 @@ float4 main() : sv_target }
[test] -todo draw quad -todo probe all rgba (312, 312, 312, 111) +draw quad +probe all rgba (312, 312, 312, 111)
[pixel shader] diff --git a/tests/uav.shader_test b/tests/uav.shader_test index 0c690f8e..e7fdb8f4 100644 --- a/tests/uav.shader_test +++ b/tests/uav.shader_test @@ -133,7 +133,7 @@ size (1, 1)
0.5 0.6 0.7 0.8
-[pixel shader todo] +[pixel shader] RWTexture2D<float4> u[2] : register(u2);
float4 main() : sv_target1 @@ -144,6 +144,6 @@ float4 main() : sv_target1 }
[test] -todo draw quad +draw quad probe uav 2 (0, 0) rgba (1.1, 1.2, 1.3, 1.4) probe uav 3 (0, 0) rgba (2.1, 2.2, 2.3, 2.4)
Zebediah Figura (@zfigura) commented about libs/vkd3d-shader/hlsl.h:
struct hlsl_reg { uint32_t id;
- uint32_t count;
I was thinking "isn't this redundant with var->data_type->reg_size[...]?" and then discovered that this is the number of actually used registers, or at least it's supposed to be. That could use documentation :-)
Zebediah Figura (@zfigura) commented about libs/vkd3d-shader/hlsl_codegen.c:
regset_name, index);
}
index = var->regs[regset].id + i;
var->regs[regset].id = var->reg_reservation.reg_index;
var->regs[regset].allocated = true;
TRACE("Allocated reserved %s to %c%u.\n", var->name, regset_name, var->regs[regset].id);
reserved_object = get_allocated_object(ctx, regset, index);
if (reserved_object && reserved_object != var)
{
hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS,
"Multiple variables bound to %c%u.", regset_name, index);
hlsl_note(ctx, &reserved_object->loc, VKD3D_SHADER_LOG_ERROR,
"Variable '%s' is already bound to %c%u.", reserved_object->name,
regset_name, index);
}
We probably want to `break` here too, lest we report multiple errors for the same overlap.
Zebediah Figura (@zfigura) commented about libs/vkd3d-shader/hlsl_codegen.c:
{ var = entry_func->parameters.vars[i];
if (var->data_type->class == HLSL_CLASS_OBJECT || (var->storage_modifiers & HLSL_STORAGE_UNIFORM))
if (hlsl_type_is_resource(var->data_type) || (var->storage_modifiers & HLSL_STORAGE_UNIFORM))
I've spent far too much time noticing other things that also aren't quite right, but here's another :-/
hlsl_type_is_resource() is always suspicious because the answer to "what about structs?" is "mu". In this case, if we have a struct that contains both resources and non-resources, we actually need to do both prepend_uniform_copy() and prepend_input_var_copy() on it.
I'm not going to block this patch or even this hunk based on that, since it's not making anything worse (rather the opposite—I'd rather this just get in), but I would appreciate a fixme comment.
Zebediah Figura (@zfigura) commented about libs/vkd3d-shader/hlsl_sm1.c:
assert(regset == HLSL_REGSET_NUMERIC); put_u32(buffer, vkd3d_make_u32(D3DXRS_FLOAT4, var->regs[regset].id)); put_u32(buffer, var->data_type->reg_size[regset] / 4); }
These assert() calls are a little redundant now.
Zebediah Figura (@zfigura) commented about libs/vkd3d-shader/hlsl_codegen.c:
uint32_t index)
{ const struct hlsl_ir_var *var;
unsigned int start, count;
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, const struct hlsl_ir_var, extern_entry) { if (!var->regs[regset].allocated) continue;
if (index == var->regs[regset].id)
start = var->regs[regset].id;
count = var->regs[regset].count;
if (start <= index && index < start + count)
I can't remember offhand if we have a test for this, but is this supposed to be the used count or the register size?
Zebediah Figura (@zfigura) commented about libs/vkd3d-shader/hlsl_codegen.c:
rb_for_each_entry(&ctx->functions, dump_function, ctx); allocate_register_reservations(ctx);
- /* For now, request all the registers for each variable, even if they are not used. */
- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
- {
unsigned int k;
for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k)
var->regs[k].count = var->data_type->reg_size[k];
- }
I think this causes a regression for simple variables at least—we used to check last_read. Granted, this is fixed later in the series, but it makes the patch look wrong by itself.
Zebediah Figura (@zfigura) commented about libs/vkd3d-shader/hlsl_sm4.c:
if (profile->major_version >= 5) {
put_u32(&buffer, 0); /* texture start */
put_u32(&buffer, 0); /* texture count */
put_u32(&buffer, 0); /* sampler start */
put_u32(&buffer, 0); /* sampler count */
unsigned int tex_alloc = !!var->regs[HLSL_REGSET_TEXTURES].allocated;
unsigned int sam_alloc = !!var->regs[HLSL_REGSET_SAMPLERS].allocated;
put_u32(&buffer, tex_alloc * var->regs[HLSL_REGSET_TEXTURES].id);
put_u32(&buffer, tex_alloc * var->regs[HLSL_REGSET_TEXTURES].count);
put_u32(&buffer, sam_alloc * var->regs[HLSL_REGSET_SAMPLERS].id);
put_u32(&buffer, sam_alloc * var->regs[HLSL_REGSET_SAMPLERS].count);
Why do we need to check "allocated" here? Even with the hacks in 1/8 I don't think it should be necessary, should it?
Zebediah Figura (@zfigura) commented about libs/vkd3d-shader/hlsl_codegen.c:
struct hlsl_ir_var *var = lhs->src.var; unsigned int i;
- if (hlsl_type_is_resource(type))
- {
hlsl_fixme(ctx, &lhs->node.loc, "Objects as input parameters.");
return;
- }
We skip these below for struct fields, so when can this happen?
Zebediah Figura (@zfigura) commented about libs/vkd3d-shader/hlsl_codegen.c:
- load = hlsl_ir_resource_load(instr);
- var = load->resource.var;
- regset = hlsl_type_get_regset(hlsl_deref_get_type(ctx, &load->resource));
- if (regset == HLSL_REGSET_SAMPLERS)
- {
assert(!load->sampler.var);
if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index))
{
hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NONCONSTANT_INDEX,
"Non-constant index in sampler resource load.");
return false;
}
var->objects_usage[regset][index].used = true;
return true;
I'm somewhat inclined to say we should always return false if we aren't actually using the return value.
Zebediah Figura (@zfigura) commented about libs/vkd3d-shader/hlsl.h:
* and the buffer_offset instead. */ struct hlsl_reg regs[HLSL_REGSET_LAST + 1];
- struct {
bool used;
- } *objects_usage[HLSL_REGSET_LAST_OBJECT + 1];
Thought: what if we tracked this per component instead?
The current usage is close enough to RA / backend logic that I don't think it matters much from a design perspective, but I do suspect that in general this would make things a bit simpler, if only because it's a single array instead of several.
In fact, we could potentially expand this in the future to store other per-component logic. One particularly salient thing that comes to mind is tracking whether we need to use a bool / int variable for sm1 uniforms—which itself determines the size of each register set.
I think the downside is that request_object_registers_for_allocation() has to do a bit more work, but at least it's very clear and straightforward work.
(Also, style nitpick: imbalanced braces.)
Zebediah Figura (@zfigura) commented about libs/vkd3d-shader/hlsl_codegen.c:
"Non-constant index in sampler resource load.");
return false;
}
var->objects_usage[regset][index].used = true;
return true;
- }
- else
- {
if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index))
{
hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NONCONSTANT_INDEX,
"Non-constant index in resource load's resource.");
return false;
}
var->objects_usage[regset][index].used = true;
This is the same except for the error message. I'm not honestly sure if the difference between an sm1 sampler and and sm4 sampler is enough to justify the different error messages anyway.
Zebediah Figura (@zfigura) commented about libs/vkd3d-shader/hlsl_codegen.c:
{
var = load->sampler.var;
if (!hlsl_regset_index_from_deref(ctx, &load->sampler, HLSL_REGSET_SAMPLERS, &index))
{
hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NONCONSTANT_INDEX,
"Non-constant index in resource load's sampler.");
return false;
}
var->objects_usage[HLSL_REGSET_SAMPLERS][index].used = true;
}
- }
- return false;
+}
+static bool request_object_registers_for_allocation(struct hlsl_ctx *ctx)
This name isn't my favourite, I can't immediately tell what it means. How about something like "calculate_var_register_sizes"?
Do we need to return non-void here?
Zebediah Figura (@zfigura) commented about libs/vkd3d-shader/hlsl_codegen.c:
if (path_node->type != HLSL_IR_CONSTANT)
return false;
/* We should always have generated a cast to UINT. */
assert(path_node->data_type->class == HLSL_CLASS_SCALAR
&& path_node->data_type->base_type == HLSL_TYPE_UINT);
idx = hlsl_ir_constant(path_node)->value[0].u;
switch (type->class)
{
case HLSL_CLASS_ARRAY:
if (idx >= type->e.array.elements_count)
{
hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS,
"Array index is out of bounds. %u/%u", idx, type->e.array.elements_count);
"Array index %u is higher than array size %u" is a little more descriptive, although I'm sure this is understandable in practice. I'm more concerned that this isn't quite the place for this check, though, if only because it seems like this function could get called more than once.
Zebediah Figura (@zfigura) commented about libs/vkd3d-shader/hlsl.h:
struct { bool used;
enum hlsl_sampler_dim usage_dim;
struct vkd3d_shader_location first_dim_usage_loc;
"sampler_dim"? (and "first_sampler_dim_loc", for consistency)
Zebediah Figura (@zfigura) commented about libs/vkd3d-shader/hlsl_sm1.c:
- {
case HLSL_SAMPLER_DIM_1D:
res_type = 1;
break;
case HLSL_SAMPLER_DIM_2D:
res_type = 2;
break;
case HLSL_SAMPLER_DIM_CUBE:
res_type = 3;
break;
case HLSL_SAMPLER_DIM_3D:
res_type = 4;
break;
I believe these are D3DSAMPLER_TEXTURE_TYPE.
Or enum vkd3d_sm1_resource_type. I don't really know why we invented our own sm1 definitions, though...
Zebediah Figura (@zfigura) commented about libs/vkd3d-shader/hlsl_sm1.c:
case HLSL_SAMPLER_DIM_CUBE:
res_type = 3;
break;
case HLSL_SAMPLER_DIM_3D:
res_type = 4;
break;
default:
vkd3d_unreachable();
break;
- }
- token = (1u << 31);
- token |= res_type << D3DSP_DCL_RESOURCETYPE_SHIFT;
- put_u32(buffer, token);
I believe 1.x doesn't write the type.
Zebediah Figura (@zfigura) commented about libs/vkd3d-shader/hlsl_sm1.c:
write_sm1_instruction(ctx, buffer, &sm1_instr);
}
+static void write_sm1_resource_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer,
const struct hlsl_ir_node *instr)
+{
- const struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr);
- struct hlsl_ir_node *coords = load->coords.node;
- unsigned int sampler_offset, reg_id;
- struct sm1_instruction sm1_instr;
- sampler_offset = hlsl_offset_from_deref_safe(ctx, &load->resource);
- reg_id = load->resource.var->regs[HLSL_REGSET_SAMPLERS].id + sampler_offset;
- sm1_instr = (struct sm1_instruction)
- {
Nice compound literal :-)
It is c99, so I think there's no reason not to.
I just noticed, the change to hlsl_ir_resource_load() needs the clone function to be updated.
On Fri Apr 14 00:45:45 2023 +0000, Zebediah Figura wrote:
I think this causes a regression for simple variables at least—we used to check last_read. Granted, this is fixed later in the series, but it makes the patch look wrong by itself.
There would be a regression because uniforms would be bound at different offsets?
On Mon Apr 17 21:20:24 2023 +0000, Zebediah Figura wrote:
These assert() calls are a little redundant now.
That's very minor, but maybe the two `if` branches could be reordered. In general I find it easier to mentally parse an equality than an inequality.
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/hlsl_sm4.c:
if (var->is_uniform) {
if (data_type->class == HLSL_CLASS_OBJECT && data_type->base_type == HLSL_TYPE_TEXTURE)
enum hlsl_regset regset = HLSL_REGSET_NUMERIC;
if (hlsl_type_is_resource(data_type))
regset = hlsl_type_get_regset(data_type);
Why not directly `regset = hlsl_type_get_regset(data_type)`? Structures should never reach this point, should they?
On Mon Apr 17 19:39:08 2023 +0000, Zebediah Figura wrote:
I was thinking "isn't this redundant with var->data_type->reg_size[...]?" and then discovered that this is the number of actually used registers, or at least it's supposed to be. That could use documentation :-)
I find a bit puzzling that the `count` field is not always initialized. I think we should either always initialize it, or document in which cases it is expected to be initialized.
On Fri Apr 14 00:45:47 2023 +0000, Zebediah Figura wrote:
We skip these below for struct fields, so when can this happen?
Also, could you please add tests for object input and output parameters?
On Tue Apr 18 11:43:10 2023 +0000, Giovanni Mascellani wrote:
There would be a regression because uniforms would be bound at different offsets?
I believe so, yes.
That's very minor, but maybe the two `if` branches could be reordered. In general I find it easier to mentally parse an equality than an inequality.
Yes, agreed in general.
We skip these below for struct fields, so when can this happen?
Uh, right. First, I am skipping resource struct fields in prepend_input_struct_copy() but not in append_output_struct_copy(), so I added that, which was missing.
Second, yes. The errors in append_input_copy() and append_output_copy() aren't needed if we skip the resource fields and the the variables that are resources themselves, as we do directly in hlsl_emit_bytecode().
Also, could you please add tests for object input and output parameters?
This is related to this thread https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/159#note_29993 .
I think it would be more correct to say "objects within parameters", since resources are not considered input or output parameters, but are handled as uniforms instead, even if they are contained within input/output parameters that are structs. I removed the error for the same reason.
I had tests for objects as parameters in following patches. I brought them to this MR. But here, they are mostly useful to check that there aren't failed assertions or segfaults.
On Fri Apr 14 00:45:43 2023 +0000, Zebediah Figura wrote:
We probably want to `break` here too, lest we report multiple errors for the same overlap.
If be `break` we would not be reporting all the overlaps:
Consider 3 variables: ``` Texture2D a[2] : register(t0); Texture2D b[2] : register(t2); Texture2D c[4] : register(t0);
float4 main() : sv_target { return a[1].Load(int3(0, 0, 0)) + b[1].Load(int3(0, 0, 0)) + c[3].Load(int3(0, 0, 0)); } ``` `c` would be in charge of detecting overlaps, and if it breaks after reporting the overlap with `a`, the overlap with `c` wouldn't be reported.
But I modified the implementation a little to keep track of the last `reserved_object` reported so that the same collision is not reported more than once.
On Fri Apr 14 00:45:44 2023 +0000, Zebediah Figura wrote:
I've spent far too much time noticing other things that also aren't quite right, but here's another :-/ hlsl_type_is_resource() is always suspicious because the answer to "what about structs?" is "mu". In this case, if we have a struct that contains both resources and non-resources, we actually need to do both prepend_uniform_copy() and prepend_input_var_copy() on it. I'm not going to block this patch or even this hunk based on that, since it's not making anything worse (rather the opposite—I'd rather this just get in), but I would appreciate a fixme comment.
Yes, I think that the proper solution would be to implement a recursive version of prepend_uniform_copy() with similar structure as in commit b7885fe5b4709e5b7f2a258a418567124ae3c61a from !148, and always calling it.
For now, I added a patch for emitting a fixme for the missing uniform copies for objects within structs.
On Fri Apr 14 00:45:45 2023 +0000, Zebediah Figura wrote:
I can't remember offhand if we have a test for this, but is this supposed to be the used count or the register size?
Yep, it is supposed to be `count`.
The purpose of `count` is indeed to keep track of how many registers the variable actually needs to allocate, which may be less than the register size.
If the resource has a register reservation, `count` should be the register size. If the resource is not used at all, `count` should be zero. If only some elements are used, sampler arrays only allocate until the last used one, while texture arrays allocate all.
I have some tests along the line for patches after these ones, but perhaps some are missing at this stage. I added a couple of simple tests for texture arrays in object-parameters.shader_test .
On Tue Apr 18 23:32:14 2023 +0000, Zebediah Figura wrote:
I believe so, yes.
I see. I improved this provisional code to avoid the temporal regression.
I was thinking "isn't this redundant with var->data_type->reg_size[...]?" and then discovered that this is the number of actually used registers, or at least it's supposed to be. That could use documentation :-)
Okay, I added the documentation.
I find a bit puzzling that the `count` field is not always initialized. I think we should either always initialize it, or document in which cases it is expected to be initialized.
I agree, `count` is now being initialized for buffers, semantics, and temps now.
On Fri Apr 14 00:45:46 2023 +0000, Zebediah Figura wrote:
Why do we need to check "allocated" here? Even with the hacks in 1/8 I don't think it should be necessary, should it?
Now that you mention it... I really don't remember at this point ;_;
This code path only matters for variables within constant buffers, that have object components, but that also have their numeric components used (otherwise they don't show). We don't support these yet.
For instance, in this test:
``` cbuffer buff { struct { Texture2D tex[4]; float4 a; } st; }
float4 main() : sv_target { return st.a + st.tex[1].Load(int3(0, 0, 0)); } ``` the native compiler gives the following bits for the definition of st (look in particular to the last 4 words of the Variable definition): ``` ... 164 > Variable └─ 00000000000000000000000011001100[offset_name:204] └─ 00000000000000000000000000000000[offset_from_start_of_cbuffer:0] └─ 00000000000000000000000000010000[size:16] └─ 00000000000000000000000000000010[flags:2] └─ 00000000000000000000000100010100[var_type_offset:276] └─ 00000000000000000000000000000000[default_val_offset:0] └─ 00000000000000000000000000000000[tex_start:0] └─ 00000000000000000000000000000010[tex_count:2] └─ 11111111111111111111111111111111[sam_start:4294967295] └─ 00000000000000000000000000000000[sam_count:0] 204 > Name └─ 01110011 01110100 00000000 "st" ... ```
I also noticed that the here called `tex_start` and `sam_start` words should be -1 (rather than 0) when the registers are not allocated, as `sam_start` in this example.
Anyways, I think it is better if I move this patch to another MR, once I have inspected and tested this further.
This name isn't my favourite, I can't immediately tell what it means. How about something like "calculate_var_register_sizes"?
I am not sure about that name. The "register_sizes" part may confuse `count` with `regsize` and the "var" part may give the impression that it is just for one var.
I changed it to "calculate_resource_registers_counts". Is it good?
Do we need to return non-void here?
In fact, we do not. Changed it to void.
On Fri Apr 14 00:45:50 2023 +0000, Zebediah Figura wrote:
I believe these are D3DSAMPLER_TEXTURE_TYPE. Or enum vkd3d_sm1_resource_type. I don't really know why we invented our own sm1 definitions, though...
That makes sense.
`enum vkd3d_sm1_resource_type` is defined in d3dbc.c though. Should I copy the definition or move it to vkd3d_shader_private.h, or some other header?
On Fri Apr 14 00:45:51 2023 +0000, Zebediah Figura wrote:
I believe 1.x doesn't write the type.
From my tests, it doesn't write `dcl_xx` instructions at all.
Should I skip writing the `dcl_xx` sampler declaration instructions for `ps_1_x`, then?
On Tue Apr 18 13:42:27 2023 +0000, Giovanni Mascellani wrote:
Why not directly `regset = hlsl_type_get_regset(data_type)`? Structures should never reach this point, should they?
Yep, they shouldn't. I changed it.
On Wed Apr 19 15:46:30 2023 +0000, Zebediah Figura wrote:
This is the same except for the error message. I'm not honestly sure if the difference between an sm1 sampler and and sm4 sampler is enough to justify the different error messages anyway.
In the next patch, "vkd3d-shader/hlsl: Track objects sampling dimension.", the resource loads where the resource is a sampler have to be handled differently (to handle the sampler_dim), so the separation makes sense.
Regarding the error messages, refer to https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/159#note_30003 .
On Fri Apr 14 00:45:49 2023 +0000, Zebediah Figura wrote:
"Array index %u is higher than array size %u" is a little more descriptive, although I'm sure this is understandable in practice. I'm more concerned that this isn't quite the place for this check, though, if only because it seems like this function could get called more than once.
I realized that all these errors (out of bounds and non-constant indexes for resources) are being reported by validate_static_object_references() now. So I'll better just remove these, now redundant, error messages.
On Fri Apr 14 00:45:48 2023 +0000, Zebediah Figura wrote:
Thought: what if we tracked this per component instead? The current usage is close enough to RA / backend logic that I don't think it matters much from a design perspective, but I do suspect that in general this would make things a bit simpler, if only because it's a single array instead of several. In fact, we could potentially expand this in the future to store other per-component logic. One particularly salient thing that comes to mind is tracking whether we need to use a bool / int variable for sm1 uniforms—which itself determines the size of each register set. I think the downside is that request_object_registers_for_allocation() has to do a bit more work, but at least it's very clear and straightforward work. (Also, style nitpick: imbalanced braces.)
I don't like the idea very much. We often would want (in this MR patches and later) to iterate over the elements in a specific regset. These cases are easier now:
```c unsigned int i, count = var->data_type->reg_size[regset];
for (i = 0; i < count; ++i) { /* Directly use var->objects_usage[regset][i] */ } ```
than they would be if objects_usage is arranged per-component:
```c unsigned int k, i, count = hlsl_type_component_count(var->data_type);
i = 0; for (k = 0; k < count; ++k) { hlsl_type *component_type = hlsl_type_get_component_type(ctx, var->data_type, k);
if (regset != hlsl_type_get_regset(ctx, component_type)) continue
/* Use var->objects_usage[k], knowing that its offset in the regset is i. */
++i; } ```
In fact, we could potentially expand this in the future to store other per-component logic. One particularly salient thing that comes to mind is tracking whether we need to use a bool / int variable for sm1 uniforms—which itself determines the size of each register set.
I think that in this case we would rather store this information variable-wise. Since we only need the maximum register index used, and no information for every component, as we do for samplers (sampler_dim), and textures in SM 5 (we need to write a resource entry for every used component).