First part of v2 of !27, which aims to:
* Allow allocation of variables of complex types that contain both numerics and objects across multiple register sets (regsets). * Support the tex2D and tex3D intrinsics, inferring generic samplers dimension from usage, writing sampler declarations, and writing sample instructions. * Support for arrays of resources for both SM1 and SM4 (not to be confused with the resource-arrays of SM 5.1, which can have non-constant indexes). * Support for resources declared within structs. * Support for synthetic combined samplers for SM1 and synthetic separated samplers for SM4, considering that they can be arrays or members of structs. * Imitate the way the native compiler assigns the register indexes of the resources on allocation, which proved to be the most difficult thing. * Support for object components within complex input parameters. * Small fixes to corner cases.
This part consists on parsing the `tex2D()` and `tex3D()` intrinsics and beginning to support the allocation of variables across multiple regsets.
The whole series, is on my [master6](https://gitlab.winehq.org/fcasas/vkd3d/-/commits/master6) branch.
-- v13: vkd3d-shader/hlsl: Allocate register reservations in a separate pass. vkd3d-shader/hlsl: Respect object reservations even if the object is unused. tests: Test allocation of unused objects. vkd3d-shader/hlsl: Allocate objects according to register set. vkd3d-shader/hlsl: Keep an hlsl_reg for each register set in hlsl_ir_var. vkd3d-shader/hlsl: Obtain extern resources as a separate array in SM4. vkd3d-shader/hlsl: Store the type's register size for each register set. vkd3d-shader/hlsl: Leave offset empty for array and struct derefs. vkd3d-shader/hlsl: Parse the tex3D() intrinsic. vkd3d-shader/hlsl: Parse the tex2D() intrinsic. vkd3d-shader/hlsl: Avoid segfault on missing sampler.
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl_sm4.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/libs/vkd3d-shader/hlsl_sm4.c b/libs/vkd3d-shader/hlsl_sm4.c index 10bb1e3d..d69b8fea 100644 --- a/libs/vkd3d-shader/hlsl_sm4.c +++ b/libs/vkd3d-shader/hlsl_sm4.c @@ -2265,7 +2265,10 @@ static void write_sm4_resource_load(struct hlsl_ctx *ctx,
case HLSL_RESOURCE_SAMPLE: if (!load->sampler.var) + { hlsl_fixme(ctx, &load->node.loc, "SM4 combined sample expression."); + return; + } write_sm4_sample(ctx, buffer, resource_type, &load->node, &load->resource, &load->sampler, coords, texel_offset); break;
From: Zebediah Figura zfigura@codeweavers.com
--- Modifications: * Using new hlsl_resource_load_params struct. * Removed `HLSL_OP2_SAMPLE` from enum hlsl_ir_expr_op, since it is not used. --- libs/vkd3d-shader/hlsl.y | 61 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+)
diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index 697b895d..1166673c 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -2973,6 +2973,66 @@ static bool intrinsic_step(struct hlsl_ctx *ctx, return !!add_implicit_conversion(ctx, params->instrs, ge, type, loc); }
+static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer *params, + const struct vkd3d_shader_location *loc, const char *name, enum hlsl_sampler_dim dim) +{ + struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_SAMPLE}; + const struct hlsl_type *sampler_type; + struct hlsl_ir_resource_load *load; + struct hlsl_ir_load *sampler_load; + struct hlsl_ir_node *coords; + + if (params->args_count != 2 && params->args_count != 4) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Wrong number of arguments to function '%s': expected 2 or 4, but got %u.", name, params->args_count); + return false; + } + + if (params->args_count == 4) + { + hlsl_fixme(ctx, loc, "Samples with gradients are not implemented.\n"); + } + + sampler_type = params->args[0]->data_type; + if (sampler_type->type != HLSL_CLASS_OBJECT || sampler_type->base_type != HLSL_TYPE_SAMPLER + || (sampler_type->sampler_dim != dim && sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC)) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, sampler_type))) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Wrong type for argument 1 of '%s': expected 'sampler' or '%s', but got '%s'.", + name, ctx->builtin_types.sampler[dim]->name, string->buffer); + hlsl_release_string_buffer(ctx, string); + } + else + { + /* Only HLSL_IR_LOAD can return an object. */ + sampler_load = hlsl_ir_load(params->args[0]); + + load_params.resource = sampler_load->src; + } + + if (!(coords = add_implicit_conversion(ctx, params->instrs, params->args[1], + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, hlsl_sampler_dim_count(dim)), loc))) + coords = params->args[1]; + + load_params.coords = coords; + load_params.format = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4); + + if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) + return false; + list_add_tail(params->instrs, &load->node.entry); + return true; +} + +static bool intrinsic_tex2D(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return intrinsic_tex(ctx, params, loc, "tex2D", HLSL_SAMPLER_DIM_2D); +} + static bool intrinsic_transpose(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -3068,6 +3128,7 @@ intrinsic_functions[] = {"smoothstep", 3, true, intrinsic_smoothstep}, {"sqrt", 1, true, intrinsic_sqrt}, {"step", 2, true, intrinsic_step}, + {"tex2D", -1, false, intrinsic_tex2D}, {"transpose", 1, true, intrinsic_transpose}, };
From: Zebediah Figura zfigura@codeweavers.com
--- libs/vkd3d-shader/hlsl.y | 7 +++++++ 1 file changed, 7 insertions(+)
diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index 1166673c..3fdfe20e 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -3033,6 +3033,12 @@ static bool intrinsic_tex2D(struct hlsl_ctx *ctx, return intrinsic_tex(ctx, params, loc, "tex2D", HLSL_SAMPLER_DIM_2D); }
+static bool intrinsic_tex3D(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return intrinsic_tex(ctx, params, loc, "tex3D", HLSL_SAMPLER_DIM_3D); +} + static bool intrinsic_transpose(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -3129,6 +3135,7 @@ intrinsic_functions[] = {"sqrt", 1, true, intrinsic_sqrt}, {"step", 2, true, intrinsic_step}, {"tex2D", -1, false, intrinsic_tex2D}, + {"tex3D", -1, false, intrinsic_tex3D}, {"transpose", 1, true, intrinsic_transpose}, };
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl_codegen.c | 11 +++++++++++ 1 file changed, 11 insertions(+)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 87559502..243d713c 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -125,6 +125,7 @@ static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, st static void replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hlsl_ir_node *instr) { + const struct hlsl_type *type; struct hlsl_ir_node *offset; struct hlsl_block block;
@@ -134,6 +135,16 @@ static void replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_der /* register offsets shouldn't be used before this point is reached. */ assert(!deref->offset.node);
+ type = hlsl_deref_get_type(ctx, deref); + + /* Instructions that directly refer to structs or arrays (instead of single-register components) + * are removed later by dce. So it is not a problem to just cleanup their derefs. */ + if (type->type == HLSL_CLASS_STRUCT || type->type == HLSL_CLASS_ARRAY) + { + hlsl_cleanup_deref(deref); + return; + } + if (!(offset = new_offset_instr_from_deref(ctx, &block, deref, &instr->loc))) return; list_move_before(&instr->entry, &block.instrs);
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl.c | 106 ++++++++++++++++++++++++------- libs/vkd3d-shader/hlsl.h | 34 +++++++--- libs/vkd3d-shader/hlsl_codegen.c | 43 ++++++++----- libs/vkd3d-shader/hlsl_sm1.c | 2 +- libs/vkd3d-shader/hlsl_sm4.c | 4 +- 5 files changed, 137 insertions(+), 52 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index d6a87b3f..42a1d52f 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -164,6 +164,49 @@ static unsigned int get_array_size(const struct hlsl_type *type) return 1; }
+bool hlsl_type_is_resource(const struct hlsl_type *type) +{ + if (type->type == HLSL_CLASS_OBJECT) + { + switch (type->base_type) + { + case HLSL_TYPE_TEXTURE: + case HLSL_TYPE_SAMPLER: + case HLSL_TYPE_UAV: + return true; + default: + return false; + } + } + return false; +} + +enum hlsl_regset hlsl_type_get_regset(const struct hlsl_type *type) +{ + if (type->type <= HLSL_CLASS_LAST_NUMERIC) + return HLSL_REGSET_NUMERIC; + + if (type->type == HLSL_CLASS_OBJECT) + { + switch (type->base_type) + { + case HLSL_TYPE_TEXTURE: + return HLSL_REGSET_TEXTURES; + + case HLSL_TYPE_SAMPLER: + return HLSL_REGSET_SAMPLERS; + + case HLSL_TYPE_UAV: + return HLSL_REGSET_UAVS; + + default: + vkd3d_unreachable(); + } + } + + vkd3d_unreachable(); +} + unsigned int hlsl_type_get_sm4_offset(const struct hlsl_type *type, unsigned int offset) { /* Align to the next vec4 boundary if: @@ -171,7 +214,7 @@ unsigned int hlsl_type_get_sm4_offset(const struct hlsl_type *type, unsigned int * (b) the type would cross a vec4 boundary; i.e. a vec3 and a * vec1 can be packed together, but not a vec3 and a vec2. */ - if (type->type > HLSL_CLASS_LAST_NUMERIC || (offset & 3) + type->reg_size > 4) + if (type->type > HLSL_CLASS_LAST_NUMERIC || (offset & 3) + type->reg_size[HLSL_REGSET_NUMERIC] > 4) return align(offset, 4); return offset; } @@ -179,31 +222,40 @@ unsigned int hlsl_type_get_sm4_offset(const struct hlsl_type *type, unsigned int static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type *type) { bool is_sm4 = (ctx->profile->major_version >= 4); + unsigned int k; + + for (k = 0; k <= HLSL_REGSET_LAST; ++k) + type->reg_size[k] = 0;
switch (type->type) { case HLSL_CLASS_SCALAR: case HLSL_CLASS_VECTOR: - type->reg_size = is_sm4 ? type->dimx : 4; + type->reg_size[HLSL_REGSET_NUMERIC] = is_sm4 ? type->dimx : 4; break;
case HLSL_CLASS_MATRIX: if (hlsl_type_is_row_major(type)) - type->reg_size = is_sm4 ? (4 * (type->dimy - 1) + type->dimx) : (4 * type->dimy); + type->reg_size[HLSL_REGSET_NUMERIC] = is_sm4 ? (4 * (type->dimy - 1) + type->dimx) : (4 * type->dimy); else - type->reg_size = is_sm4 ? (4 * (type->dimx - 1) + type->dimy) : (4 * type->dimx); + type->reg_size[HLSL_REGSET_NUMERIC] = is_sm4 ? (4 * (type->dimx - 1) + type->dimy) : (4 * type->dimx); break;
case HLSL_CLASS_ARRAY: { - unsigned int element_size = type->e.array.type->reg_size; - if (type->e.array.elements_count == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT) - type->reg_size = 0; - else if (is_sm4) - type->reg_size = (type->e.array.elements_count - 1) * align(element_size, 4) + element_size; - else - type->reg_size = type->e.array.elements_count * element_size; + break; + + for (k = 0; k <= HLSL_REGSET_LAST; ++k) + { + unsigned int element_size = type->e.array.type->reg_size[k]; + + if (is_sm4 && k == HLSL_REGSET_NUMERIC) + type->reg_size[k] = (type->e.array.elements_count - 1) * align(element_size, 4) + element_size; + else + type->reg_size[k] = type->e.array.elements_count * element_size; + } + break; }
@@ -212,16 +264,17 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type unsigned int i;
type->dimx = 0; - type->reg_size = 0; - for (i = 0; i < type->e.record.field_count; ++i) { struct hlsl_struct_field *field = &type->e.record.fields[i]; - unsigned int field_size = field->type->reg_size;
- type->reg_size = hlsl_type_get_sm4_offset(field->type, type->reg_size); - field->reg_offset = type->reg_size; - type->reg_size += field_size; + for (k = 0; k <= HLSL_REGSET_LAST; ++k) + { + if (k == HLSL_REGSET_NUMERIC) + type->reg_size[k] = hlsl_type_get_sm4_offset(field->type, type->reg_size[k]); + field->reg_offset[k] = type->reg_size[k]; + type->reg_size[k] += field->type->reg_size[k]; + }
type->dimx += field->type->dimx * field->type->dimy * get_array_size(field->type); } @@ -229,16 +282,25 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type }
case HLSL_CLASS_OBJECT: - type->reg_size = 0; + { + if (hlsl_type_is_resource(type)) + { + enum hlsl_regset regset = hlsl_type_get_regset(type); + + type->reg_size[regset] = 1; + } break; + } } }
-/* Returns the size of a type, considered as part of an array of that type. - * As such it includes padding after the type. */ -unsigned int hlsl_type_get_array_element_reg_size(const struct hlsl_type *type) +/* Returns the size of a type, considered as part of an array of that type, within a specific + * register set. As such it includes padding after the type, when applicable. */ +unsigned int hlsl_type_get_array_element_reg_size(const struct hlsl_type *type, enum hlsl_regset regset) { - return align(type->reg_size, 4); + if (regset == HLSL_REGSET_NUMERIC) + return align(type->reg_size[regset], 4); + return type->reg_size[regset]; }
static struct hlsl_type *hlsl_new_type(struct hlsl_ctx *ctx, const char *name, enum hlsl_type_class type_class, diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 81b1a61d..3cbd3158 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -122,6 +122,16 @@ enum hlsl_matrix_majority HLSL_ROW_MAJOR };
+enum hlsl_regset +{ + HLSL_REGSET_SAMPLERS, + HLSL_REGSET_TEXTURES, + HLSL_REGSET_UAVS, + HLSL_REGSET_LAST_OBJECT = HLSL_REGSET_UAVS, + HLSL_REGSET_NUMERIC, + HLSL_REGSET_LAST = HLSL_REGSET_NUMERIC, +}; + /* An HLSL source-level data type, including anonymous structs and typedefs. */ struct hlsl_type { @@ -183,12 +193,12 @@ struct hlsl_type struct hlsl_type *resource_format; } e;
- /* Number of numeric register components used by one value of this type (4 components make 1 - * register). - * If type is HLSL_CLASS_STRUCT or HLSL_CLASS_ARRAY, this value includes the reg_size of - * their elements and padding (which varies according to the backend). - * This value is 0 for types without numeric components, like objects. */ - unsigned int reg_size; + /* Number of numeric register components used by one value of this type, for each regset. + * For HLSL_REGSET_NUMERIC, 4 components make 1 register, while for other regsets 1 component makes + * 1 register. + * If type is HLSL_CLASS_STRUCT or HLSL_CLASS_ARRAY, the reg_size of their elements and padding + * (which varies according to the backend) is also included. */ + unsigned int reg_size[HLSL_REGSET_LAST + 1]; /* Offset where the type's description starts in the output bytecode, in bytes. */ size_t bytecode_offset;
@@ -215,8 +225,8 @@ struct hlsl_struct_field * type->modifiers instead) and that also are specific to the field and not the whole variable. * In particular, interpolation modifiers. */ unsigned int storage_modifiers; - /* Offset of the field within the type it belongs to, in numeric register components. */ - unsigned int reg_offset; + /* Offset of the field within the type it belongs to, in register components, for each regset. */ + unsigned int reg_offset[HLSL_REGSET_LAST + 1];
/* Offset where the fields's name starts in the output bytecode, in bytes. */ size_t name_bytecode_offset; @@ -556,10 +566,12 @@ struct hlsl_deref struct hlsl_src *path;
/* Single instruction node of data type uint used to represent the register offset (in register - * components), from the start of the variable, of the part referenced. + * components, within the pertaining regset), from the start of the variable, of the part + * referenced. * The path is lowered to this single offset -- whose value may vary between SM1 and SM4 -- * before writing the bytecode. */ struct hlsl_src offset; + enum hlsl_regset offset_regset; };
struct hlsl_ir_load @@ -1084,13 +1096,15 @@ bool hlsl_scope_add_type(struct hlsl_scope *scope, struct hlsl_type *type); struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old, unsigned int default_majority, unsigned int modifiers); unsigned int hlsl_type_component_count(const struct hlsl_type *type); -unsigned int hlsl_type_get_array_element_reg_size(const struct hlsl_type *type); +unsigned int hlsl_type_get_array_element_reg_size(const struct hlsl_type *type, enum hlsl_regset regset); struct hlsl_type *hlsl_type_get_component_type(struct hlsl_ctx *ctx, struct hlsl_type *type, unsigned int index); bool hlsl_type_is_row_major(const struct hlsl_type *type); unsigned int hlsl_type_minor_size(const struct hlsl_type *type); unsigned int hlsl_type_major_size(const struct hlsl_type *type); unsigned int hlsl_type_element_count(const struct hlsl_type *type); +bool hlsl_type_is_resource(const struct hlsl_type *type); +enum hlsl_regset hlsl_type_get_regset(const struct hlsl_type *type); unsigned int hlsl_type_get_sm4_offset(const struct hlsl_type *type, unsigned int offset); bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2);
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 243d713c..9160fd57 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -24,7 +24,7 @@ /* TODO: remove when no longer needed, only used for new_offset_instr_from_deref() */ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_type *type, struct hlsl_ir_node *offset, struct hlsl_ir_node *idx, - const struct vkd3d_shader_location *loc) + enum hlsl_regset regset, const struct vkd3d_shader_location *loc) { struct hlsl_ir_node *idx_offset = NULL; struct hlsl_ir_constant *c; @@ -52,7 +52,7 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str
case HLSL_CLASS_ARRAY: { - unsigned int size = hlsl_type_get_array_element_reg_size(type->e.array.type); + unsigned int size = hlsl_type_get_array_element_reg_size(type->e.array.type, regset);
if (!(c = hlsl_new_uint_constant(ctx, size, loc))) return NULL; @@ -70,7 +70,7 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str unsigned int field_idx = hlsl_ir_constant(idx)->value[0].u; struct hlsl_struct_field *field = &type->e.record.fields[field_idx];
- if (!(c = hlsl_new_uint_constant(ctx, field->reg_offset, loc))) + if (!(c = hlsl_new_uint_constant(ctx, field->reg_offset[regset], loc))) return NULL; list_add_tail(&block->instrs, &c->node.entry);
@@ -110,7 +110,8 @@ static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, st { struct hlsl_block idx_block;
- if (!(offset = new_offset_from_path_index(ctx, &idx_block, type, offset, deref->path[i].node, loc))) + if (!(offset = new_offset_from_path_index(ctx, &idx_block, type, offset, deref->path[i].node, + deref->offset_regset, loc))) return NULL;
list_move_tail(&block->instrs, &idx_block.instrs); @@ -145,6 +146,8 @@ static void replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_der return; }
+ deref->offset_regset = hlsl_type_get_regset(type); + if (!(offset = new_offset_instr_from_deref(ctx, &block, deref, &instr->loc))) return; list_move_before(&instr->entry, &block.instrs); @@ -2488,24 +2491,26 @@ static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct liveness *liv static struct hlsl_reg allocate_numeric_registers_for_type(struct hlsl_ctx *ctx, struct liveness *liveness, unsigned int first_write, unsigned int last_read, const struct hlsl_type *type) { + unsigned int reg_size = type->reg_size[HLSL_REGSET_NUMERIC]; + if (type->type <= HLSL_CLASS_VECTOR) - return allocate_register(ctx, liveness, first_write, last_read, type->reg_size, type->dimx); + return allocate_register(ctx, liveness, first_write, last_read, reg_size, type->dimx); else - return allocate_range(ctx, liveness, first_write, last_read, type->reg_size); + return allocate_range(ctx, liveness, first_write, last_read, reg_size); }
static const char *debug_register(char class, struct hlsl_reg reg, const struct hlsl_type *type) { static const char writemask_offset[] = {'w','x','y','z'}; + unsigned int reg_size = type->reg_size[HLSL_REGSET_NUMERIC];
- if (type->reg_size > 4) + if (reg_size > 4) { - if (type->reg_size & 3) - return vkd3d_dbg_sprintf("%c%u-%c%u.%c", class, reg.id, class, - reg.id + (type->reg_size / 4), writemask_offset[type->reg_size & 3]); + if (reg_size & 3) + return vkd3d_dbg_sprintf("%c%u-%c%u.%c", class, reg.id, class, reg.id + (reg_size / 4), + writemask_offset[reg_size & 3]);
- return vkd3d_dbg_sprintf("%c%u-%c%u", class, reg.id, class, - reg.id + (type->reg_size / 4) - 1); + return vkd3d_dbg_sprintf("%c%u-%c%u", class, reg.id, class, reg.id + (reg_size / 4) - 1); } return vkd3d_dbg_sprintf("%c%u%s", class, reg.id, debug_hlsl_writemask(reg.writemask)); } @@ -2592,7 +2597,7 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_b struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); const struct hlsl_type *type = instr->data_type; unsigned int x, y, i, writemask, end_reg; - unsigned int reg_size = type->reg_size; + unsigned int reg_size = type->reg_size[HLSL_REGSET_NUMERIC];
constant->reg = allocate_numeric_registers_for_type(ctx, liveness, 1, UINT_MAX, type); TRACE("Allocated constant @%u to %s.\n", instr->index, debug_register('c', constant->reg, type)); @@ -2688,7 +2693,7 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi { if (var->is_uniform && var->last_read) { - if (var->data_type->reg_size == 0) + if (var->data_type->reg_size[HLSL_REGSET_NUMERIC] == 0) continue;
var->reg = allocate_numeric_registers_for_type(ctx, &liveness, 1, UINT_MAX, var->data_type); @@ -2807,7 +2812,7 @@ static void calculate_buffer_offset(struct hlsl_ir_var *var)
var->buffer_offset = buffer->size; TRACE("Allocated buffer offset %u to %s.\n", var->buffer_offset, var->name); - buffer->size += var->data_type->reg_size; + buffer->size += var->data_type->reg_size[HLSL_REGSET_NUMERIC]; if (var->last_read) buffer->used_size = buffer->size; } @@ -3062,6 +3067,7 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, unsigned int *offset) { struct hlsl_ir_node *offset_node = deref->offset.node; + unsigned int size;
if (!offset_node) { @@ -3078,10 +3084,11 @@ bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref
*offset = hlsl_ir_constant(offset_node)->value[0].u;
- if (*offset >= deref->var->data_type->reg_size) + size = deref->var->data_type->reg_size[deref->offset_regset]; + if (*offset >= size) { hlsl_error(ctx, &deref->offset.node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, - "Dereference is out of bounds. %u/%u", *offset, deref->var->data_type->reg_size); + "Dereference is out of bounds. %u/%u", *offset, size); return false; }
@@ -3107,6 +3114,8 @@ struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_dere struct hlsl_reg ret = var->reg; unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref);
+ assert(deref->offset_regset == HLSL_REGSET_NUMERIC); + ret.id += offset / 4;
ret.writemask = 0xf & (0xf << (offset % 4)); diff --git a/libs/vkd3d-shader/hlsl_sm1.c b/libs/vkd3d-shader/hlsl_sm1.c index 6215b93d..10f32766 100644 --- a/libs/vkd3d-shader/hlsl_sm1.c +++ b/libs/vkd3d-shader/hlsl_sm1.c @@ -366,7 +366,7 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe else { put_u32(buffer, vkd3d_make_u32(D3DXRS_FLOAT4, var->reg.id)); - put_u32(buffer, var->data_type->reg_size / 4); + put_u32(buffer, var->data_type->reg_size[HLSL_REGSET_NUMERIC] / 4); } put_u32(buffer, 0); /* type */ put_u32(buffer, 0); /* FIXME: default value */ diff --git a/libs/vkd3d-shader/hlsl_sm4.c b/libs/vkd3d-shader/hlsl_sm4.c index d69b8fea..87882fc4 100644 --- a/libs/vkd3d-shader/hlsl_sm4.c +++ b/libs/vkd3d-shader/hlsl_sm4.c @@ -395,7 +395,7 @@ static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b
put_u32(buffer, field->name_bytecode_offset); put_u32(buffer, field->type->bytecode_offset); - put_u32(buffer, field->reg_offset); + put_u32(buffer, field->reg_offset[HLSL_REGSET_NUMERIC]); } }
@@ -711,7 +711,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc)
put_u32(&buffer, 0); /* name */ put_u32(&buffer, var->buffer_offset * sizeof(float)); - put_u32(&buffer, var->data_type->reg_size * sizeof(float)); + put_u32(&buffer, var->data_type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float)); put_u32(&buffer, flags); put_u32(&buffer, 0); /* type */ put_u32(&buffer, 0); /* FIXME: default value */
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl_sm4.c | 92 +++++++++++++++++------------------- 1 file changed, 43 insertions(+), 49 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_sm4.c b/libs/vkd3d-shader/hlsl_sm4.c index 87882fc4..b7eb5ac9 100644 --- a/libs/vkd3d-shader/hlsl_sm4.c +++ b/libs/vkd3d-shader/hlsl_sm4.c @@ -480,52 +480,50 @@ static D3D_SRV_DIMENSION sm4_rdef_resource_dimension(const struct hlsl_type *typ } }
-static int sm4_compare_externs(const struct hlsl_ir_var *a, const struct hlsl_ir_var *b) +static int sm4_compare_extern_resources(const void *a, const void *b) { - if (a->data_type->base_type != b->data_type->base_type) - return a->data_type->base_type - b->data_type->base_type; - if (a->reg.allocated && b->reg.allocated) - return a->reg.id - b->reg.id; - return strcmp(a->name, b->name); + const struct hlsl_ir_var *aa = *(const struct hlsl_ir_var **)a; + const struct hlsl_ir_var *bb = *(const struct hlsl_ir_var **)b; + + if (aa->data_type->base_type != bb->data_type->base_type) + return aa->data_type->base_type - bb->data_type->base_type; + return aa->reg.id - bb->reg.id; }
-static void sm4_sort_extern(struct list *sorted, struct hlsl_ir_var *to_sort) +static const struct hlsl_ir_var **sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count) { - struct hlsl_ir_var *var; - - list_remove(&to_sort->extern_entry); + const struct hlsl_ir_var **extern_resources = NULL; + const struct hlsl_ir_var *var; + size_t capacity = 0; + *count = 0;
- LIST_FOR_EACH_ENTRY(var, sorted, struct hlsl_ir_var, extern_entry) + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { - if (sm4_compare_externs(to_sort, var) < 0) + if (var->reg.allocated && var->data_type->type == HLSL_CLASS_OBJECT) { - list_add_before(&var->extern_entry, &to_sort->extern_entry); - return; + if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, + sizeof(*extern_resources)))) + { + *count = 0; + return NULL; + } + + extern_resources[*count] = var; + ++*count; } }
- list_add_tail(sorted, &to_sort->extern_entry); -} - -static void sm4_sort_externs(struct hlsl_ctx *ctx) -{ - struct list sorted = LIST_INIT(sorted); - struct hlsl_ir_var *var, *next; - - LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - if (var->data_type->type == HLSL_CLASS_OBJECT) - sm4_sort_extern(&sorted, var); - } - list_move_tail(&ctx->extern_vars, &sorted); + qsort(extern_resources, *count, sizeof(*extern_resources), sm4_compare_extern_resources); + return extern_resources; }
static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) { + unsigned int cbuffer_count = 0, resource_count = 0, extern_resources_count, i, j; size_t cbuffers_offset, resources_offset, creator_offset, string_offset; size_t cbuffer_position, resource_position, creator_position; - unsigned int cbuffer_count = 0, resource_count = 0, i, j; const struct hlsl_profile_info *profile = ctx->profile; + const struct hlsl_ir_var **extern_resources; struct vkd3d_bytecode_buffer buffer = {0}; const struct hlsl_buffer *cbuffer; const struct hlsl_ir_var *var; @@ -540,14 +538,9 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) 0x4353, /* COMPUTE */ };
- sm4_sort_externs(ctx); - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - if (var->reg.allocated && var->data_type->type == HLSL_CLASS_OBJECT) - ++resource_count; - } + extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count);
+ resource_count += extern_resources_count; LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) { if (cbuffer->reg.allocated) @@ -583,12 +576,11 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) resources_offset = bytecode_get_size(&buffer); set_u32(&buffer, resource_position, resources_offset);
- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + for (i = 0; i < extern_resources_count; ++i) { uint32_t flags = 0;
- if (!var->reg.allocated || var->data_type->type != HLSL_CLASS_OBJECT) - continue; + var = extern_resources[i];
if (var->reg_reservation.type) flags |= D3D_SIF_USERPACKED; @@ -633,12 +625,9 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) put_u32(&buffer, flags); /* flags */ }
- i = 0; - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + for (i = 0; i < extern_resources_count; ++i) { - if (!var->reg.allocated || var->data_type->type != HLSL_CLASS_OBJECT) - continue; + var = extern_resources[i];
string_offset = put_string(&buffer, var->name); set_u32(&buffer, resources_offset + i++ * 8 * sizeof(uint32_t), string_offset); @@ -653,8 +642,6 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) set_u32(&buffer, resources_offset + i++ * 8 * sizeof(uint32_t), string_offset); }
- assert(i == resource_count); - /* Buffers. */
cbuffers_offset = bytecode_get_size(&buffer); @@ -747,6 +734,8 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) set_u32(&buffer, creator_position, creator_offset);
dxbc_writer_add_section(dxbc, TAG_RDEF, buffer.data, buffer.size); + + vkd3d_free(extern_resources); }
static enum vkd3d_sm4_resource_type sm4_resource_dimension(const struct hlsl_type *type) @@ -2438,7 +2427,9 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *entry_func, struct dxbc_writer *dxbc) { const struct hlsl_profile_info *profile = ctx->profile; + const struct hlsl_ir_var **extern_resources; struct vkd3d_bytecode_buffer buffer = {0}; + unsigned int extern_resources_count, i; const struct hlsl_buffer *cbuffer; const struct hlsl_ir_var *var; size_t token_count_position; @@ -2456,6 +2447,8 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, VKD3D_SM4_LIB, };
+ extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); + put_u32(&buffer, vkd3d_make_u32((profile->major_version << 4) | profile->minor_version, shader_types[profile->type])); token_count_position = put_u32(&buffer, 0);
@@ -2465,10 +2458,9 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, write_sm4_dcl_constant_buffer(&buffer, cbuffer); }
- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, const struct hlsl_ir_var, extern_entry) + for (i = 0; i < extern_resources_count; ++i) { - if (!var->reg.allocated || var->data_type->type != HLSL_CLASS_OBJECT) - continue; + var = extern_resources[i];
if (var->data_type->base_type == HLSL_TYPE_SAMPLER) write_sm4_dcl_sampler(&buffer, var); @@ -2495,6 +2487,8 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, set_u32(&buffer, token_count_position, bytecode_get_size(&buffer) / sizeof(uint32_t));
dxbc_writer_add_section(dxbc, TAG_SHDR, buffer.data, buffer.size); + + vkd3d_free(extern_resources); }
int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out)
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl.h | 16 +++------ libs/vkd3d-shader/hlsl_codegen.c | 47 +++++++++++++++---------- libs/vkd3d-shader/hlsl_sm1.c | 22 ++++++++---- libs/vkd3d-shader/hlsl_sm4.c | 60 +++++++++++++++++++------------- 4 files changed, 84 insertions(+), 61 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 3cbd3158..92bb549c 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -380,19 +380,13 @@ struct hlsl_ir_var /* Offset where the variable's value is stored within its buffer in numeric register components. * This in case the variable is uniform. */ unsigned int buffer_offset; - /* Register to which the variable is allocated during its lifetime. - * In case that the variable spans multiple registers, this is set to the start of the register - * range. - * The register type is inferred from the data type and the storage of the variable. + /* Register to which the variable is allocated during its lifetime, for each register set. + * In case that the variable spans multiple registers in one regset, this is set to the + * start of the register range. * Builtin semantics don't use the field. * In SM4, uniforms don't use the field because they are located using the buffer's hlsl_reg - * and the buffer_offset instead. - * If the variable is an input semantic copy, the register is 'v'. - * If the variable is an output semantic copy, the register is 'o'. - * Textures are stored on 's' registers in SM1, and 't' registers in SM4. - * Samplers are stored on 's' registers. - * UAVs are stored on 'u' registers. */ - struct hlsl_reg reg; + * and the buffer_offset instead. */ + struct hlsl_reg regs[HLSL_REGSET_LAST + 1];
uint32_t is_input_semantic : 1; uint32_t is_output_semantic : 1; diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 9160fd57..5a0ec2bf 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -2520,13 +2520,13 @@ static void allocate_variable_temp_register(struct hlsl_ctx *ctx, struct hlsl_ir if (var->is_input_semantic || var->is_output_semantic || var->is_uniform) return;
- if (!var->reg.allocated && var->last_read) + if (!var->regs[HLSL_REGSET_NUMERIC].allocated && var->last_read) { - var->reg = allocate_numeric_registers_for_type(ctx, liveness, var->first_write, var->last_read, - var->data_type); + var->regs[HLSL_REGSET_NUMERIC] = allocate_numeric_registers_for_type(ctx, liveness, + var->first_write, var->last_read, var->data_type);
- TRACE("Allocated %s to %s (liveness %u-%u).\n", var->name, - debug_register('r', var->reg, var->data_type), var->first_write, var->last_read); + TRACE("Allocated %s to %s (liveness %u-%u).\n", var->name, debug_register('r', + var->regs[HLSL_REGSET_NUMERIC], var->data_type), var->first_write, var->last_read); } }
@@ -2693,11 +2693,15 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi { if (var->is_uniform && var->last_read) { - if (var->data_type->reg_size[HLSL_REGSET_NUMERIC] == 0) + unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; + + if (reg_size == 0) continue;
- var->reg = allocate_numeric_registers_for_type(ctx, &liveness, 1, UINT_MAX, var->data_type); - TRACE("Allocated %s to %s.\n", var->name, debug_register('c', var->reg, var->data_type)); + var->regs[HLSL_REGSET_NUMERIC] = allocate_numeric_registers_for_type(ctx, &liveness, + 1, UINT_MAX, var->data_type); + TRACE("Allocated %s to %s.\n", var->name, + debug_register('c', var->regs[HLSL_REGSET_NUMERIC], var->data_type)); } } } @@ -2771,10 +2775,11 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var } else { - var->reg.allocated = true; - var->reg.id = (*counter)++; - var->reg.writemask = (1 << var->data_type->dimx) - 1; - TRACE("Allocated %s to %s.\n", var->name, debug_register(output ? 'o' : 'v', var->reg, var->data_type)); + var->regs[HLSL_REGSET_NUMERIC].allocated = true; + var->regs[HLSL_REGSET_NUMERIC].id = (*counter)++; + var->regs[HLSL_REGSET_NUMERIC].writemask = (1 << var->data_type->dimx) - 1; + TRACE("Allocated %s to %s.\n", var->name, debug_register(output ? 'o' : 'v', + var->regs[HLSL_REGSET_NUMERIC], var->data_type)); } }
@@ -2937,10 +2942,14 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_base_type type)
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { + enum hlsl_regset regset; + if (!var->last_read || var->data_type->type != HLSL_CLASS_OBJECT || var->data_type->base_type != type) continue;
+ regset = hlsl_type_get_regset(var->data_type); + if (var->reg_reservation.type == type_info->reg_name) { const struct hlsl_ir_var *reserved_object = get_reserved_object(ctx, type_info->reg_name, @@ -2962,8 +2971,8 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_base_type type) type_info->reg_name, var->reg_reservation.index); }
- var->reg.id = var->reg_reservation.index; - var->reg.allocated = true; + var->regs[regset].id = var->reg_reservation.index; + var->regs[regset].allocated = true; TRACE("Allocated reserved %s to %c%u.\n", var->name, type_info->reg_name, var->reg_reservation.index); } else if (!var->reg_reservation.type) @@ -2971,8 +2980,8 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_base_type type) while (get_reserved_object(ctx, type_info->reg_name, index)) ++index;
- var->reg.id = index; - var->reg.allocated = true; + var->regs[regset].id = index; + var->regs[regset].allocated = true; TRACE("Allocated object to %c%u.\n", type_info->reg_name, index); ++index; } @@ -3111,7 +3120,7 @@ unsigned int hlsl_offset_from_deref_safe(struct hlsl_ctx *ctx, const struct hlsl struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref) { const struct hlsl_ir_var *var = deref->var; - struct hlsl_reg ret = var->reg; + struct hlsl_reg ret = var->regs[HLSL_REGSET_NUMERIC]; unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref);
assert(deref->offset_regset == HLSL_REGSET_NUMERIC); @@ -3119,8 +3128,8 @@ struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_dere ret.id += offset / 4;
ret.writemask = 0xf & (0xf << (offset % 4)); - if (var->reg.writemask) - ret.writemask = hlsl_combine_writemasks(var->reg.writemask, ret.writemask); + if (var->regs[HLSL_REGSET_NUMERIC].writemask) + ret.writemask = hlsl_combine_writemasks(var->regs[HLSL_REGSET_NUMERIC].writemask, ret.writemask);
return ret; } diff --git a/libs/vkd3d-shader/hlsl_sm1.c b/libs/vkd3d-shader/hlsl_sm1.c index 10f32766..07f9ceb7 100644 --- a/libs/vkd3d-shader/hlsl_sm1.c +++ b/libs/vkd3d-shader/hlsl_sm1.c @@ -315,7 +315,9 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { - if (!var->semantic.name && var->reg.allocated) + enum hlsl_regset regset = hlsl_type_get_regset(var->data_type); + + if (!var->semantic.name && var->regs[regset].allocated) { ++uniform_count;
@@ -353,20 +355,24 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { - if (!var->semantic.name && var->reg.allocated) + enum hlsl_regset regset = hlsl_type_get_regset(var->data_type); + + if (!var->semantic.name && var->regs[regset].allocated) { put_u32(buffer, 0); /* name */ if (var->data_type->type == HLSL_CLASS_OBJECT && (var->data_type->base_type == HLSL_TYPE_SAMPLER || var->data_type->base_type == HLSL_TYPE_TEXTURE)) { - put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->reg.id)); + assert(regset == HLSL_REGSET_SAMPLERS); + put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[regset].id)); put_u32(buffer, 1); } else { - put_u32(buffer, vkd3d_make_u32(D3DXRS_FLOAT4, var->reg.id)); - put_u32(buffer, var->data_type->reg_size[HLSL_REGSET_NUMERIC] / 4); + assert(regset == HLSL_REGSET_NUMERIC); + put_u32(buffer, vkd3d_make_u32(D3DXRS_FLOAT4, var->regs[regset].id)); + put_u32(buffer, var->data_type->reg_size[regset] / 4); } put_u32(buffer, 0); /* type */ put_u32(buffer, 0); /* FIXME: default value */ @@ -377,7 +383,9 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { - if (!var->semantic.name && var->reg.allocated) + enum hlsl_regset regset = hlsl_type_get_regset(var->data_type); + + if (!var->semantic.name && var->regs[regset].allocated) { size_t var_offset = vars_start + (uniform_count * 5 * sizeof(uint32_t)); size_t name_offset; @@ -584,7 +592,7 @@ static void write_sm1_semantic_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b ret = hlsl_sm1_usage_from_semantic(&var->semantic, &usage, &usage_idx); assert(ret); reg.type = output ? D3DSPR_OUTPUT : D3DSPR_INPUT; - reg.reg = var->reg.id; + reg.reg = var->regs[HLSL_REGSET_NUMERIC].id; }
token = D3DSIO_DCL; diff --git a/libs/vkd3d-shader/hlsl_sm4.c b/libs/vkd3d-shader/hlsl_sm4.c index b7eb5ac9..b95aab4e 100644 --- a/libs/vkd3d-shader/hlsl_sm4.c +++ b/libs/vkd3d-shader/hlsl_sm4.c @@ -182,9 +182,9 @@ static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, } else { - assert(var->reg.allocated); + assert(var->regs[HLSL_REGSET_NUMERIC].allocated); type = VKD3D_SM4_RT_INPUT; - reg_idx = var->reg.id; + reg_idx = var->regs[HLSL_REGSET_NUMERIC].id; }
use_mask = width; /* FIXME: accurately report use mask */ @@ -484,33 +484,43 @@ static int sm4_compare_extern_resources(const void *a, const void *b) { const struct hlsl_ir_var *aa = *(const struct hlsl_ir_var **)a; const struct hlsl_ir_var *bb = *(const struct hlsl_ir_var **)b; + enum hlsl_regset aa_regset, bb_regset;
- if (aa->data_type->base_type != bb->data_type->base_type) - return aa->data_type->base_type - bb->data_type->base_type; - return aa->reg.id - bb->reg.id; + aa_regset = hlsl_type_get_regset(aa->data_type); + bb_regset = hlsl_type_get_regset(bb->data_type); + + if (aa_regset != bb_regset) + return aa_regset - bb_regset; + + return aa->regs[aa_regset].id - bb->regs[bb_regset].id; }
static const struct hlsl_ir_var **sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count) { const struct hlsl_ir_var **extern_resources = NULL; const struct hlsl_ir_var *var; + enum hlsl_regset regset; size_t capacity = 0; + *count = 0;
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { - if (var->reg.allocated && var->data_type->type == HLSL_CLASS_OBJECT) - { - if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, - sizeof(*extern_resources)))) - { - *count = 0; - return NULL; - } + if (!hlsl_type_is_resource(var->data_type)) + continue; + regset = hlsl_type_get_regset(var->data_type); + if (!var->regs[regset].allocated) + continue;
- extern_resources[*count] = var; - ++*count; + if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, + sizeof(*extern_resources)))) + { + *count = 0; + return NULL; } + + extern_resources[*count] = var; + ++*count; }
qsort(extern_resources, *count, sizeof(*extern_resources), sm4_compare_extern_resources); @@ -578,16 +588,18 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc)
for (i = 0; i < extern_resources_count; ++i) { + enum hlsl_regset regset; uint32_t flags = 0;
var = extern_resources[i]; + regset = hlsl_type_get_regset(var->data_type);
if (var->reg_reservation.type) flags |= D3D_SIF_USERPACKED;
put_u32(&buffer, 0); /* name */ put_u32(&buffer, sm4_resource_type(var->data_type)); - if (var->data_type->base_type == HLSL_TYPE_SAMPLER) + if (regset == HLSL_REGSET_SAMPLERS) { put_u32(&buffer, 0); put_u32(&buffer, 0); @@ -600,7 +612,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) put_u32(&buffer, ~0u); /* FIXME: multisample count */ flags |= (var->data_type->e.resource_format->dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT; } - put_u32(&buffer, var->reg.id); + put_u32(&buffer, var->regs[regset].id); put_u32(&buffer, 1); /* bind count */ put_u32(&buffer, flags); } @@ -852,7 +864,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r reg->dim = VKD3D_SM4_DIMENSION_VEC4; if (swizzle_type) *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; - reg->idx[0] = var->reg.id; + reg->idx[0] = var->regs[HLSL_REGSET_TEXTURES].id; reg->idx_count = 1; *writemask = VKD3DSP_WRITEMASK_ALL; } @@ -862,7 +874,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r reg->dim = VKD3D_SM4_DIMENSION_VEC4; if (swizzle_type) *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; - reg->idx[0] = var->reg.id; + reg->idx[0] = var->regs[HLSL_REGSET_UAVS].id; reg->idx_count = 1; *writemask = VKD3DSP_WRITEMASK_ALL; } @@ -872,7 +884,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r reg->dim = VKD3D_SM4_DIMENSION_NONE; if (swizzle_type) *swizzle_type = VKD3D_SM4_SWIZZLE_NONE; - reg->idx[0] = var->reg.id; + reg->idx[0] = var->regs[HLSL_REGSET_SAMPLERS].id; reg->idx_count = 1; *writemask = VKD3DSP_WRITEMASK_ALL; } @@ -1142,7 +1154,7 @@ static void write_sm4_dcl_sampler(struct vkd3d_bytecode_buffer *buffer, const st .opcode = VKD3D_SM4_OP_DCL_SAMPLER,
.dsts[0].reg.type = VKD3D_SM4_RT_SAMPLER, - .dsts[0].reg.idx = {var->reg.id}, + .dsts[0].reg.idx = {var->regs[HLSL_REGSET_SAMPLERS].id}, .dsts[0].reg.idx_count = 1, .dst_count = 1, }; @@ -1158,7 +1170,7 @@ static void write_sm4_dcl_texture(struct vkd3d_bytecode_buffer *buffer, const st | (sm4_resource_dimension(var->data_type) << VKD3D_SM4_RESOURCE_TYPE_SHIFT),
.dsts[0].reg.type = uav ? VKD3D_SM5_RT_UAV : VKD3D_SM4_RT_RESOURCE, - .dsts[0].reg.idx = {var->reg.id}, + .dsts[0].reg.idx = {uav ? var->regs[HLSL_REGSET_UAVS].id : var->regs[HLSL_REGSET_TEXTURES].id}, .dsts[0].reg.idx_count = 1, .dst_count = 1,
@@ -1197,9 +1209,9 @@ static void write_sm4_dcl_semantic(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b else { instr.dsts[0].reg.type = output ? VKD3D_SM4_RT_OUTPUT : VKD3D_SM4_RT_INPUT; - instr.dsts[0].reg.idx[0] = var->reg.id; + instr.dsts[0].reg.idx[0] = var->regs[HLSL_REGSET_NUMERIC].id; instr.dsts[0].reg.idx_count = 1; - instr.dsts[0].writemask = var->reg.writemask; + instr.dsts[0].writemask = var->regs[HLSL_REGSET_NUMERIC].writemask; }
if (instr.dsts[0].reg.type == VKD3D_SM4_RT_DEPTHOUT)
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl_codegen.c | 85 ++++++++++++++------------------ 1 file changed, 37 insertions(+), 48 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 5a0ec2bf..07e58750 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -2234,6 +2234,22 @@ static void dump_function(struct rb_entry *entry, void *context) rb_for_each_entry(&func->overloads, dump_function_decl, ctx); }
+static char get_regset_name(enum hlsl_regset regset) +{ + switch (regset) + { + case HLSL_REGSET_SAMPLERS: + return 's'; + case HLSL_REGSET_TEXTURES: + return 't'; + case HLSL_REGSET_UAVS: + return 'u'; + case HLSL_REGSET_NUMERIC: + vkd3d_unreachable(); + } + vkd3d_unreachable(); +} + /* Compute the earliest and latest liveness for each variable. In the case that * a variable is accessed inside of a loop, we promote its liveness to extend * to at least the range of the entire loop. Note that we don't need to do this @@ -2885,50 +2901,28 @@ static void allocate_buffers(struct hlsl_ctx *ctx) } }
-static const struct hlsl_ir_var *get_reserved_object(struct hlsl_ctx *ctx, char type, uint32_t index) +static const struct hlsl_ir_var *get_reserved_object(struct hlsl_ctx *ctx, enum hlsl_regset regset, + uint32_t index) { const struct hlsl_ir_var *var;
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, const struct hlsl_ir_var, extern_entry) { - if (var->last_read && var->reg_reservation.type == type && var->reg_reservation.index == index) + if (var->last_read && var->reg_reservation.type == get_regset_name(regset) + && var->reg_reservation.index == index) return var; } return NULL; }
-static const struct object_type_info -{ - enum hlsl_base_type type; - char reg_name; -} -object_types[] = -{ - { HLSL_TYPE_SAMPLER, 's' }, - { HLSL_TYPE_TEXTURE, 't' }, - { HLSL_TYPE_UAV, 'u' }, -}; - -static const struct object_type_info *get_object_type_info(enum hlsl_base_type type) -{ - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(object_types); ++i) - if (type == object_types[i].type) - return &object_types[i]; - - WARN("No type info for object type %u.\n", type); - return NULL; -} - -static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_base_type type) +static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) { - const struct object_type_info *type_info = get_object_type_info(type); + char regset_name = get_regset_name(regset); struct hlsl_ir_var *var; uint32_t min_index = 0; uint32_t index;
- if (type == HLSL_TYPE_UAV) + if (regset == HLSL_REGSET_UAVS) { LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { @@ -2942,21 +2936,17 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_base_type type)
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { - enum hlsl_regset regset; - - if (!var->last_read || var->data_type->type != HLSL_CLASS_OBJECT - || var->data_type->base_type != type) + if (!var->last_read || var->data_type->reg_size[regset] == 0) continue;
- regset = hlsl_type_get_regset(var->data_type); - - if (var->reg_reservation.type == type_info->reg_name) + if (var->reg_reservation.type == regset_name) { - const struct hlsl_ir_var *reserved_object = get_reserved_object(ctx, type_info->reg_name, + const struct hlsl_ir_var *reserved_object = get_reserved_object(ctx, regset, var->reg_reservation.index);
if (var->reg_reservation.index < min_index) { + assert(regset == HLSL_REGSET_UAVS); hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, "UAV index (%u) must be higher than the maximum render target index (%u).", var->reg_reservation.index, min_index - 1); @@ -2964,25 +2954,24 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_base_type type) else if (reserved_object && reserved_object != var) { hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, - "Multiple objects bound to %c%u.", type_info->reg_name, - var->reg_reservation.index); + "Multiple objects bound to %c%u.", regset_name, var->reg_reservation.index); hlsl_note(ctx, &reserved_object->loc, VKD3D_SHADER_LOG_ERROR, - "Object '%s' is already bound to %c%u.", reserved_object->name, - type_info->reg_name, var->reg_reservation.index); + "Object '%s' is already bound to %c%u.", reserved_object->name, regset_name, + var->reg_reservation.index); }
var->regs[regset].id = var->reg_reservation.index; var->regs[regset].allocated = true; - TRACE("Allocated reserved %s to %c%u.\n", var->name, type_info->reg_name, var->reg_reservation.index); + TRACE("Allocated reserved %s to %c%u.\n", var->name, regset_name, var->regs[regset].id); } else if (!var->reg_reservation.type) { - while (get_reserved_object(ctx, type_info->reg_name, index)) + while (get_reserved_object(ctx, regset, index)) ++index;
var->regs[regset].id = index; var->regs[regset].allocated = true; - TRACE("Allocated object to %c%u.\n", type_info->reg_name, index); + TRACE("Allocated object to %c%u.\n", regset_name, index); ++index; } else @@ -2992,7 +2981,7 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_base_type type) type_string = hlsl_type_to_string(ctx, var->data_type); hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, "Object of type '%s' must be bound to register type '%c'.", - type_string->buffer, type_info->reg_name); + type_string->buffer, regset_name); hlsl_release_string_buffer(ctx, type_string); } } @@ -3318,11 +3307,11 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry else { allocate_buffers(ctx); - allocate_objects(ctx, HLSL_TYPE_TEXTURE); - allocate_objects(ctx, HLSL_TYPE_UAV); + allocate_objects(ctx, HLSL_REGSET_TEXTURES); + allocate_objects(ctx, HLSL_REGSET_UAVS); } allocate_semantic_registers(ctx); - allocate_objects(ctx, HLSL_TYPE_SAMPLER); + allocate_objects(ctx, HLSL_REGSET_SAMPLERS);
if (ctx->result) return ctx->result;
From: Francisco Casas fcasas@codeweavers.com
--- Makefile.am | 1 + tests/register-reservations.shader_test | 52 +++++++++++++++++++++++++ tests/shader_runner_d3d12.c | 2 +- 3 files changed, 54 insertions(+), 1 deletion(-) create mode 100644 tests/register-reservations.shader_test
diff --git a/Makefile.am b/Makefile.am index 5eadfc29..1158a52b 100644 --- a/Makefile.am +++ b/Makefile.am @@ -134,6 +134,7 @@ vkd3d_shader_tests = \ tests/preproc-macro.shader_test \ tests/preproc-misc.shader_test \ tests/reflect.shader_test \ + tests/register-reservations.shader_test \ tests/return.shader_test \ tests/round.shader_test \ tests/sampler.shader_test \ diff --git a/tests/register-reservations.shader_test b/tests/register-reservations.shader_test new file mode 100644 index 00000000..cf324717 --- /dev/null +++ b/tests/register-reservations.shader_test @@ -0,0 +1,52 @@ +[require] +shader model >= 4.0 + + +[texture 0] +size (1, 1) +0.0 0.0 0.0 99.0 + +[texture 1] +size (1, 1) +1.0 1.0 1.0 99.0 + +[texture 2] +size (1, 1) +2.0 2.0 2.0 99.0 + +[texture 3] +size (1, 1) +3.0 3.0 3.0 99.0 + +[texture 4] +size (1, 1) +4.0 4.0 4.0 99.0 + + +[pixel shader] +Texture2D unused : register(t0); +Texture2D tex; + +float4 main() : sv_target +{ + return tex.Load(int3(0, 0, 0)); +} + +[test] +draw quad +todo probe all rgba (1.0, 1.0, 1.0, 99.0) + + +[pixel shader] +Texture2D unused[2][2] : register(t0); +Texture2D tex; + +float4 main() : sv_target +{ + return tex.Load(int3(0, 0, 0)); +} + +[test] +draw quad +todo probe all rgba (4.0, 4.0, 4.0, 99.0) + diff --git a/tests/shader_runner_d3d12.c b/tests/shader_runner_d3d12.c index bd94b4c9..3e661151 100644 --- a/tests/shader_runner_d3d12.c +++ b/tests/shader_runner_d3d12.c @@ -167,7 +167,7 @@ static ID3D12RootSignature *d3d12_runner_create_root_signature(struct d3d12_shad ID3D12GraphicsCommandList *command_list, unsigned int *uniform_index) { D3D12_ROOT_SIGNATURE_DESC root_signature_desc = {0}; - D3D12_ROOT_PARAMETER root_params[4], *root_param; + D3D12_ROOT_PARAMETER root_params[5], *root_param; D3D12_STATIC_SAMPLER_DESC static_samplers[1]; ID3D12RootSignature *root_signature; HRESULT hr;
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl_codegen.c | 2 +- tests/register-reservations.shader_test | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 07e58750..8fcac65b 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -2908,7 +2908,7 @@ static const struct hlsl_ir_var *get_reserved_object(struct hlsl_ctx *ctx, enum
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, const struct hlsl_ir_var, extern_entry) { - if (var->last_read && var->reg_reservation.type == get_regset_name(regset) + if (var->reg_reservation.type == get_regset_name(regset) && var->reg_reservation.index == index) return var; } diff --git a/tests/register-reservations.shader_test b/tests/register-reservations.shader_test index cf324717..9a6b09ff 100644 --- a/tests/register-reservations.shader_test +++ b/tests/register-reservations.shader_test @@ -34,7 +34,7 @@ float4 main() : sv_target
[test] draw quad -todo probe all rgba (1.0, 1.0, 1.0, 99.0) +probe all rgba (1.0, 1.0, 1.0, 99.0)
[pixel shader]
From: Francisco Casas fcasas@codeweavers.com
This refactoring is required for improving the allocation strategy so it works with multiple-register variables. --- libs/vkd3d-shader/hlsl_codegen.c | 77 ++++++++++++++++++++++---------- 1 file changed, 53 insertions(+), 24 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 8fcac65b..ef372eb8 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -2250,6 +2250,40 @@ static char get_regset_name(enum hlsl_regset regset) vkd3d_unreachable(); }
+static void allocate_register_reservations(struct hlsl_ctx *ctx) +{ + struct hlsl_ir_var *var; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + enum hlsl_regset regset; + + if (!hlsl_type_is_resource(var->data_type)) + continue; + regset = hlsl_type_get_regset(var->data_type); + + if (var->reg_reservation.type) + { + if (var->reg_reservation.type != get_regset_name(regset)) + { + struct vkd3d_string_buffer *type_string; + + type_string = hlsl_type_to_string(ctx, var->data_type); + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Object of type '%s' must be bound to register type '%c'.", + type_string->buffer, get_regset_name(regset)); + hlsl_release_string_buffer(ctx, type_string); + } + else + { + var->regs[regset].allocated = true; + var->regs[regset].id = var->reg_reservation.index; + TRACE("Allocated reserved %s to %c%u.\n", var->name, var->reg_reservation.type, var->reg_reservation.index); + } + } + } +} + /* Compute the earliest and latest liveness for each variable. In the case that * a variable is accessed inside of a loop, we promote its liveness to extend * to at least the range of the entire loop. Note that we don't need to do this @@ -2901,15 +2935,17 @@ static void allocate_buffers(struct hlsl_ctx *ctx) } }
-static const struct hlsl_ir_var *get_reserved_object(struct hlsl_ctx *ctx, enum hlsl_regset regset, +static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum hlsl_regset regset, uint32_t index) { const struct hlsl_ir_var *var;
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, const struct hlsl_ir_var, extern_entry) { - if (var->reg_reservation.type == get_regset_name(regset) - && var->reg_reservation.index == index) + if (!var->regs[regset].allocated) + continue; + + if (index == var->regs[regset].id) return var; } return NULL; @@ -2936,37 +2972,39 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset)
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { - if (!var->last_read || var->data_type->reg_size[regset] == 0) + if (!var->last_read || !var->data_type->reg_size[regset]) continue;
- if (var->reg_reservation.type == regset_name) + if (var->regs[regset].allocated) { - const struct hlsl_ir_var *reserved_object = get_reserved_object(ctx, regset, - var->reg_reservation.index); + const struct hlsl_ir_var *reserved_object; + unsigned int index = var->regs[regset].id;
- if (var->reg_reservation.index < min_index) + reserved_object = get_allocated_object(ctx, regset, index); + + if (var->regs[regset].id < min_index) { assert(regset == HLSL_REGSET_UAVS); hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, "UAV index (%u) must be higher than the maximum render target index (%u).", - var->reg_reservation.index, min_index - 1); + var->regs[regset].id, min_index - 1); } else if (reserved_object && reserved_object != var) { hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, - "Multiple objects bound to %c%u.", regset_name, var->reg_reservation.index); + "Multiple objects bound to %c%u.", regset_name, index); hlsl_note(ctx, &reserved_object->loc, VKD3D_SHADER_LOG_ERROR, - "Object '%s' is already bound to %c%u.", reserved_object->name, regset_name, - var->reg_reservation.index); + "Object '%s' is already bound to %c%u.", reserved_object->name, + regset_name, index); }
var->regs[regset].id = var->reg_reservation.index; var->regs[regset].allocated = true; TRACE("Allocated reserved %s to %c%u.\n", var->name, regset_name, var->regs[regset].id); } - else if (!var->reg_reservation.type) + else { - while (get_reserved_object(ctx, regset, index)) + while (get_allocated_object(ctx, regset, index)) ++index;
var->regs[regset].id = index; @@ -2974,16 +3012,6 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) TRACE("Allocated object to %c%u.\n", regset_name, index); ++index; } - else - { - struct vkd3d_string_buffer *type_string; - - type_string = hlsl_type_to_string(ctx, var->data_type); - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, - "Object of type '%s' must be bound to register type '%c'.", - type_string->buffer, regset_name); - hlsl_release_string_buffer(ctx, type_string); - } } }
@@ -3299,6 +3327,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry if (TRACE_ON()) rb_for_each_entry(&ctx->functions, dump_function, ctx);
+ allocate_register_reservations(ctx); allocate_temp_registers(ctx, entry_func); if (profile->major_version < 4) {
This merge request was approved by Henri Verbeet.