First part of the continuation of the implementation of non-constant offset dereferences (a.k.a. relative addressing) for SM4, now that we use vsir registers in tpf.c.
As a quick recap: while parsing HLSL we are expressing derefs as paths, and then we are lowering these paths into a single offset node (which is closer to the bytecode) using the replace_deref_path_with_offset() pass, right before register allocation.
This first part of the series splits this offset node into 2 parts: - A constant uint, which will be called hlsl_deref.offset_const. - A non-hlsl_ir_constant offset node that will only be present when we need relative addressing, that we will end up calling hlsl_deref.offset_rel.
Both these fields will be analog to the ones used in vsir register indexes, vkd3d_shader_register_index.rel_addr and vkd3d_shader_register_index.offset respectively, which is something we need for the second part of this series.
The following patches are in my [nonconst-offsets-8](https://gitlab.winehq.org/fcasas/vkd3d/-/commits/nonconst-offsets-8) branch, if something is not clear in this series, it may be worth skimming through them.
Supersedes !229.
-- v4: vkd3d-shader/tpf: Declare indexable temps. vkd3d-shader/hlsl: Mark vars that require non-constant dereferences. vkd3d-shader/hlsl: Rename hlsl_deref.offset to hlsl_deref.rel_offset. vkd3d-shader/hlsl: Absorb hlsl_ir_constant deref offsets into const_offset. vkd3d-shader/hlsl: Express deref->offset in whole registers. vkd3d-shader/hlsl: Split deref-offset into a node and a constant uint.
From: Francisco Casas fcasas@codeweavers.com
--- tests/hlsl/array-index-expr.shader_test | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+)
diff --git a/tests/hlsl/array-index-expr.shader_test b/tests/hlsl/array-index-expr.shader_test index 0a83080cc..f8d9f17b4 100644 --- a/tests/hlsl/array-index-expr.shader_test +++ b/tests/hlsl/array-index-expr.shader_test @@ -97,3 +97,22 @@ todo probe all rgba (5.0, 6.0, 7.0, 8.0) uniform 0 float4 1 1 0 0 todo draw quad todo probe all rgba (9.0, 10.0, 11.0, 12.0) + + +[pixel shader todo] +int4 a; + +float4 main() : sv_target +{ + float4 arr[] = {10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120}; + + float4 tmp = float4(1, 2, 3, 4); + tmp.yz = arr[a.z].wx; + + return tmp; +} + +[test] +uniform 0 int4 0 0 2 0 +todo draw quad +probe all rgba (1.0, 120.0, 90.0, 4.0)
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl_codegen.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index c0d18a3ef..3c80e8fc7 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -29,8 +29,6 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str struct hlsl_ir_node *idx_offset = NULL; struct hlsl_ir_node *c;
- hlsl_block_init(block); - switch (type->class) { case HLSL_CLASS_VECTOR: @@ -111,9 +109,14 @@ static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, st { struct hlsl_block idx_block;
+ hlsl_block_init(&idx_block); + if (!(offset = new_offset_from_path_index(ctx, &idx_block, type, offset, deref->path[i].node, regset, loc))) + { + hlsl_block_cleanup(&idx_block); return NULL; + }
hlsl_block_add_block(block, &idx_block);
From: Francisco Casas fcasas@codeweavers.com
Some functions work with dereferences and need to know if they are lowered yet.
This can be known checking if deref->offset.node is NULL or deref->data_type is NULL. I am using the latter since it keeps working even after the following patches that split deref->offset into constant and variable parts. --- libs/vkd3d-shader/hlsl.c | 20 +++++++------------- libs/vkd3d-shader/hlsl.h | 8 +++++++- libs/vkd3d-shader/hlsl_codegen.c | 4 +--- 3 files changed, 15 insertions(+), 17 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index 0bfba35f4..ae5a388cd 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -249,14 +249,7 @@ static enum hlsl_regset type_get_regset(const struct hlsl_type *type)
enum hlsl_regset hlsl_deref_get_regset(struct hlsl_ctx *ctx, const struct hlsl_deref *deref) { - struct hlsl_type *type; - - if (deref->data_type) - type = deref->data_type; - else - type = hlsl_deref_get_type(ctx, deref); - - return type_get_regset(type); + return type_get_regset(hlsl_deref_get_type(ctx, deref)); }
unsigned int hlsl_type_get_sm4_offset(const struct hlsl_type *type, unsigned int offset) @@ -520,6 +513,7 @@ static bool init_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hl deref->var = var; deref->path_len = path_len; deref->offset.node = NULL; + deref->data_type = NULL;
if (path_len == 0) { @@ -609,7 +603,7 @@ struct hlsl_type *hlsl_deref_get_type(struct hlsl_ctx *ctx, const struct hlsl_de
assert(deref);
- if (deref->offset.node) + if (hlsl_deref_is_lowered(deref)) return deref->data_type;
type = deref->var->data_type; @@ -1120,7 +1114,7 @@ bool hlsl_copy_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, const struc if (!other) return true;
- assert(!other->offset.node); + assert(!hlsl_deref_is_lowered(other));
if (!init_deref(ctx, deref, other->var, other->path_len)) return false; @@ -1177,7 +1171,7 @@ struct hlsl_ir_node *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hls unsigned int i;
assert(lhs); - assert(!lhs->offset.node); + assert(!hlsl_deref_is_lowered(lhs));
if (!(store = hlsl_alloc(ctx, sizeof(*store)))) return NULL; @@ -1350,7 +1344,7 @@ struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl struct hlsl_type *type; unsigned int i;
- assert(!deref->offset.node); + assert(!hlsl_deref_is_lowered(deref));
type = hlsl_deref_get_type(ctx, deref); if (idx) @@ -1623,7 +1617,7 @@ static bool clone_deref(struct hlsl_ctx *ctx, struct clone_instr_map *map, { unsigned int i;
- assert(!src->offset.node); + assert(!hlsl_deref_is_lowered(src));
if (!init_deref(ctx, dst, src->var, src->path_len)) return false; diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index e45256bce..9c3e16750 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -621,11 +621,17 @@ struct hlsl_deref * The path is lowered to this single offset -- whose value may vary between SM1 and SM4 -- * before writing the bytecode. * Since the type information cannot longer be retrieved from the offset alone, the type is - * stored in the data_type field. */ + * stored in the data_type field, which remains NULL if the deref hasn't been lowered yet. */ struct hlsl_src offset; struct hlsl_type *data_type; };
+/* Whether the path has been lowered to an offset or not. */ +static inline bool hlsl_deref_is_lowered(const struct hlsl_deref *deref) +{ + return !!deref->data_type; +} + struct hlsl_ir_load { struct hlsl_ir_node node; diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 3c80e8fc7..4787833a9 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -135,9 +135,7 @@ static bool replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_der struct hlsl_block block;
assert(deref->var); - - /* register offsets shouldn't be used before this point is reached. */ - assert(!deref->offset.node); + assert(!hlsl_deref_is_lowered(deref));
type = hlsl_deref_get_type(ctx, deref);
From: Francisco Casas fcasas@codeweavers.com
This uint will be used for the following:
- Since SM4's relative addressing (the capability of passing a register as an index to another register) only has whole-register granularity, we will need to make the offset node express the offset in whole-registers and specify the register component in this uint, otherwise we would have to add additional / and % operations in the output binary.
- If, after we apply constant folding and copy propagation, we determine that the offset is a single constant node, we can store all the offset in this uint constant, and remove the offset src.
This allows DCE to remove a good bunch of the nodes previously required only for the offset constants, which makes the output more liteweight and readable, and simplifies the implementation of relative addressing when writing tpf in the following patches.
In dump_deref(), we use "c" to indicate components instead of whole registers. Since now both the offset node and the offset uint are in components a lowered deref would look like:
var[@42c + 2c]
But, once we express the offset node in whole registers we will remove the "c" from the node part:
var[@22 + 3c] --- libs/vkd3d-shader/hlsl.c | 33 ++++++++++++---- libs/vkd3d-shader/hlsl.h | 12 +++--- libs/vkd3d-shader/hlsl_codegen.c | 68 +++++++++++++++++++------------- 3 files changed, 74 insertions(+), 39 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index ae5a388cd..c3ad46160 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -513,6 +513,7 @@ static bool init_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hl deref->var = var; deref->path_len = path_len; deref->offset.node = NULL; + deref->const_offset = 0; deref->data_type = NULL;
if (path_len == 0) @@ -541,6 +542,7 @@ bool hlsl_init_deref_from_index_chain(struct hlsl_ctx *ctx, struct hlsl_deref *d deref->path = NULL; deref->path_len = 0; deref->offset.node = NULL; + deref->const_offset = 0;
assert(chain); if (chain->type == HLSL_IR_INDEX) @@ -1137,6 +1139,7 @@ void hlsl_cleanup_deref(struct hlsl_deref *deref) deref->path_len = 0;
hlsl_src_remove(&deref->offset); + deref->const_offset = 0; }
/* Initializes a simple variable dereference, so that it can be passed to load/store functions. */ @@ -2317,21 +2320,37 @@ static void dump_deref(struct vkd3d_string_buffer *buffer, const struct hlsl_der if (deref->var) { vkd3d_string_buffer_printf(buffer, "%s", deref->var->name); - if (deref->path_len) + if (!hlsl_deref_is_lowered(deref)) { - vkd3d_string_buffer_printf(buffer, "["); - for (i = 0; i < deref->path_len; ++i) + if (deref->path_len) { vkd3d_string_buffer_printf(buffer, "["); - dump_src(buffer, &deref->path[i]); + for (i = 0; i < deref->path_len; ++i) + { + vkd3d_string_buffer_printf(buffer, "["); + dump_src(buffer, &deref->path[i]); + vkd3d_string_buffer_printf(buffer, "]"); + } vkd3d_string_buffer_printf(buffer, "]"); } - vkd3d_string_buffer_printf(buffer, "]"); } - else if (deref->offset.node) + else { + bool show_rel, show_const; + + show_rel = deref->offset.node; + show_const = deref->const_offset != 0 || !show_rel; + vkd3d_string_buffer_printf(buffer, "["); - dump_src(buffer, &deref->offset); + if (show_rel) + { + dump_src(buffer, &deref->offset); + vkd3d_string_buffer_printf(buffer, "c"); + } + if (show_rel && show_const) + vkd3d_string_buffer_printf(buffer, " + "); + if (show_const) + vkd3d_string_buffer_printf(buffer, "%uc", deref->const_offset); vkd3d_string_buffer_printf(buffer, "]"); } } diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 9c3e16750..ecfe6f22c 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -615,14 +615,16 @@ struct hlsl_deref unsigned int path_len; struct hlsl_src *path;
- /* Single instruction node of data type uint used to represent the register offset (in register - * components, within the pertaining regset), from the start of the variable, of the part - * referenced. - * The path is lowered to this single offset -- whose value may vary between SM1 and SM4 -- - * before writing the bytecode. + /* Before writing the bytecode, deref paths are lowered into an offset (within the pertaining + * regset) from the start of the variable, to the part of the variable that is referenced. + * This offset is stored using two fields, one for a variable part and other for a constant + * part, which are added together: + * - offset: An offset given by an instruction node, in number of register components. + * - const_offset: A constant number of register components. * Since the type information cannot longer be retrieved from the offset alone, the type is * stored in the data_type field, which remains NULL if the deref hasn't been lowered yet. */ struct hlsl_src offset; + unsigned int const_offset; struct hlsl_type *data_type; };
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 4787833a9..ede2f9d4f 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -23,8 +23,8 @@
/* TODO: remove when no longer needed, only used for new_offset_instr_from_deref() */ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, struct hlsl_block *block, - struct hlsl_type *type, struct hlsl_ir_node *offset, struct hlsl_ir_node *idx, - enum hlsl_regset regset, const struct vkd3d_shader_location *loc) + struct hlsl_type *type, struct hlsl_ir_node *base_offset, struct hlsl_ir_node *idx, + enum hlsl_regset regset, unsigned int *offset_component, const struct vkd3d_shader_location *loc) { struct hlsl_ir_node *idx_offset = NULL; struct hlsl_ir_node *c; @@ -32,7 +32,7 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str switch (type->class) { case HLSL_CLASS_VECTOR: - idx_offset = idx; + *offset_component += hlsl_ir_constant(idx)->value.u[0].u; break;
case HLSL_CLASS_MATRIX: @@ -67,8 +67,16 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str { unsigned int field_idx = hlsl_ir_constant(idx)->value.u[0].u; struct hlsl_struct_field *field = &type->e.record.fields[field_idx]; + unsigned int field_offset = field->reg_offset[regset];
- if (!(c = hlsl_new_uint_constant(ctx, field->reg_offset[regset], loc))) + if (regset == HLSL_REGSET_NUMERIC) + { + assert(*offset_component == 0); + *offset_component = field_offset % 4; + field_offset -= *offset_component; + } + + if (!(c = hlsl_new_uint_constant(ctx, field_offset, loc))) return NULL; hlsl_block_add_instr(block, c);
@@ -81,27 +89,33 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str vkd3d_unreachable(); }
- if (offset) + if (idx_offset) { - if (!(idx_offset = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, offset, idx_offset))) + if (!(base_offset = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, base_offset, idx_offset))) return NULL; - hlsl_block_add_instr(block, idx_offset); + hlsl_block_add_instr(block, base_offset); }
- return idx_offset; + return base_offset; }
/* TODO: remove when no longer needed, only used for replace_deref_path_with_offset() */ static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, struct hlsl_block *block, - const struct hlsl_deref *deref, const struct vkd3d_shader_location *loc) + const struct hlsl_deref *deref, unsigned int *offset_component, const struct vkd3d_shader_location *loc) { enum hlsl_regset regset = hlsl_deref_get_regset(ctx, deref); - struct hlsl_ir_node *offset = NULL; + struct hlsl_ir_node *offset; struct hlsl_type *type; unsigned int i;
+ *offset_component = 0; + hlsl_block_init(block);
+ if (!(offset = hlsl_new_uint_constant(ctx, 0, loc))) + return NULL; + hlsl_block_add_instr(block, offset); + assert(deref->var); type = deref->var->data_type;
@@ -112,7 +126,7 @@ static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, st hlsl_block_init(&idx_block);
if (!(offset = new_offset_from_path_index(ctx, &idx_block, type, offset, deref->path[i].node, - regset, loc))) + regset, offset_component, loc))) { hlsl_block_cleanup(&idx_block); return NULL; @@ -130,9 +144,10 @@ static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, st static bool replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hlsl_ir_node *instr) { - struct hlsl_type *type; + unsigned int offset_component; struct hlsl_ir_node *offset; struct hlsl_block block; + struct hlsl_type *type;
assert(deref->var); assert(!hlsl_deref_is_lowered(deref)); @@ -149,12 +164,13 @@ static bool replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_der
deref->data_type = type;
- if (!(offset = new_offset_instr_from_deref(ctx, &block, deref, &instr->loc))) + if (!(offset = new_offset_instr_from_deref(ctx, &block, deref, &offset_component, &instr->loc))) return false; list_move_before(&instr->entry, &block.instrs);
hlsl_cleanup_deref(deref); hlsl_src_from_node(&deref->offset, offset); + deref->const_offset = offset_component;
return true; } @@ -4194,30 +4210,28 @@ bool hlsl_regset_index_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref
bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, unsigned int *offset) { + enum hlsl_regset regset = hlsl_deref_get_regset(ctx, deref); struct hlsl_ir_node *offset_node = deref->offset.node; - enum hlsl_regset regset; unsigned int size;
- if (!offset_node) - { - *offset = 0; - return true; - } + *offset = deref->const_offset;
- /* We should always have generated a cast to UINT. */ - assert(offset_node->data_type->class == HLSL_CLASS_SCALAR - && offset_node->data_type->base_type == HLSL_TYPE_UINT); + if (offset_node) + { + /* We should always have generated a cast to UINT. */ + assert(offset_node->data_type->class == HLSL_CLASS_SCALAR + && offset_node->data_type->base_type == HLSL_TYPE_UINT);
- if (offset_node->type != HLSL_IR_CONSTANT) - return false; + if (offset_node->type != HLSL_IR_CONSTANT) + return false;
- *offset = hlsl_ir_constant(offset_node)->value.u[0].u; - regset = hlsl_deref_get_regset(ctx, deref); + *offset += hlsl_ir_constant(offset_node)->value.u[0].u; + }
size = deref->var->data_type->reg_size[regset]; if (*offset >= size) { - hlsl_error(ctx, &deref->offset.node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, + hlsl_error(ctx, &offset_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, "Dereference is out of bounds. %u/%u", *offset, size); return false; }
From: Francisco Casas fcasas@codeweavers.com
This is required to use SM4 relative addressing, because it is limited to whole-register granularity. --- libs/vkd3d-shader/hlsl.c | 3 --- libs/vkd3d-shader/hlsl.h | 2 +- libs/vkd3d-shader/hlsl_codegen.c | 22 ++++++++++++---------- 3 files changed, 13 insertions(+), 14 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index c3ad46160..7a505a025 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -2343,10 +2343,7 @@ static void dump_deref(struct vkd3d_string_buffer *buffer, const struct hlsl_der
vkd3d_string_buffer_printf(buffer, "["); if (show_rel) - { dump_src(buffer, &deref->offset); - vkd3d_string_buffer_printf(buffer, "c"); - } if (show_rel && show_const) vkd3d_string_buffer_printf(buffer, " + "); if (show_const) diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index ecfe6f22c..e49e94e23 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -619,7 +619,7 @@ struct hlsl_deref * regset) from the start of the variable, to the part of the variable that is referenced. * This offset is stored using two fields, one for a variable part and other for a constant * part, which are added together: - * - offset: An offset given by an instruction node, in number of register components. + * - offset: An offset given by an instruction node, in whole registers. * - const_offset: A constant number of register components. * Since the type information cannot longer be retrieved from the offset alone, the type is * stored in the data_type field, which remains NULL if the deref hasn't been lowered yet. */ diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index ede2f9d4f..b21b94b9c 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -37,14 +37,7 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str
case HLSL_CLASS_MATRIX: { - if (!(c = hlsl_new_uint_constant(ctx, 4, loc))) - return NULL; - hlsl_block_add_instr(block, c); - - if (!(idx_offset = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, c, idx))) - return NULL; - hlsl_block_add_instr(block, idx_offset); - + idx_offset = idx; break; }
@@ -52,6 +45,12 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str { unsigned int size = hlsl_type_get_array_element_reg_size(type->e.array.type, regset);
+ if (regset == HLSL_REGSET_NUMERIC) + { + assert(size % 4 == 0); + size /= 4; + } + if (!(c = hlsl_new_uint_constant(ctx, size, loc))) return NULL; hlsl_block_add_instr(block, c); @@ -73,7 +72,7 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str { assert(*offset_component == 0); *offset_component = field_offset % 4; - field_offset -= *offset_component; + field_offset /= 4; }
if (!(c = hlsl_new_uint_constant(ctx, field_offset, loc))) @@ -4225,7 +4224,10 @@ bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref if (offset_node->type != HLSL_IR_CONSTANT) return false;
- *offset += hlsl_ir_constant(offset_node)->value.u[0].u; + if (regset == HLSL_REGSET_NUMERIC) + *offset += 4 * hlsl_ir_constant(offset_node)->value.u[0].u; + else + *offset += hlsl_ir_constant(offset_node)->value.u[0].u; }
size = deref->var->data_type->reg_size[regset];
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl_codegen.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index b21b94b9c..44f8cf814 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -174,6 +174,24 @@ static bool replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_der return true; }
+static bool clean_constant_deref_offset_srcs(struct hlsl_ctx *ctx, struct hlsl_deref *deref, + struct hlsl_ir_node *instr) +{ + if (deref->offset.node && deref->offset.node->type == HLSL_IR_CONSTANT) + { + enum hlsl_regset regset = hlsl_deref_get_regset(ctx, deref); + + if (regset == HLSL_REGSET_NUMERIC) + deref->const_offset += 4 * hlsl_ir_constant(deref->offset.node)->value.u[0].u; + else + deref->const_offset += hlsl_ir_constant(deref->offset.node)->value.u[0].u; + hlsl_src_remove(&deref->offset); + return true; + } + return false; +} + + /* Split uniforms into two variables representing the constant and temp * registers, and copy the former to the latter, so that writes to uniforms * work. */ @@ -4220,14 +4238,8 @@ bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref /* We should always have generated a cast to UINT. */ assert(offset_node->data_type->class == HLSL_CLASS_SCALAR && offset_node->data_type->base_type == HLSL_TYPE_UINT); - - if (offset_node->type != HLSL_IR_CONSTANT) - return false; - - if (regset == HLSL_REGSET_NUMERIC) - *offset += 4 * hlsl_ir_constant(offset_node)->value.u[0].u; - else - *offset += hlsl_ir_constant(offset_node)->value.u[0].u; + assert(offset_node->type != HLSL_IR_CONSTANT); + return false; }
size = deref->var->data_type->reg_size[regset]; @@ -4486,6 +4498,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry /* TODO: move forward, remove when no longer needed */ transform_derefs(ctx, replace_deref_path_with_offset, body); while (hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL)); + transform_derefs(ctx, clean_constant_deref_offset_srcs, body);
do compute_liveness(ctx, entry_func);
From: Francisco Casas fcasas@codeweavers.com
This field is now analogous to vkd3d_shader_register_index.rel_addr.
Also, it makes sense to rename it now because all the constant part of the offset is now handled to hlsl_deref.const_offset. Consequently, it may also be NULL now. --- libs/vkd3d-shader/hlsl.c | 12 +++++------ libs/vkd3d-shader/hlsl.h | 4 ++-- libs/vkd3d-shader/hlsl_codegen.c | 36 ++++++++++++++++---------------- 3 files changed, 26 insertions(+), 26 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index 7a505a025..5df798ae9 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -512,7 +512,7 @@ static bool init_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hl { deref->var = var; deref->path_len = path_len; - deref->offset.node = NULL; + deref->rel_offset.node = NULL; deref->const_offset = 0; deref->data_type = NULL;
@@ -541,7 +541,7 @@ bool hlsl_init_deref_from_index_chain(struct hlsl_ctx *ctx, struct hlsl_deref *d
deref->path = NULL; deref->path_len = 0; - deref->offset.node = NULL; + deref->rel_offset.node = NULL; deref->const_offset = 0;
assert(chain); @@ -1138,7 +1138,7 @@ void hlsl_cleanup_deref(struct hlsl_deref *deref) deref->path = NULL; deref->path_len = 0;
- hlsl_src_remove(&deref->offset); + hlsl_src_remove(&deref->rel_offset); deref->const_offset = 0; }
@@ -2338,12 +2338,12 @@ static void dump_deref(struct vkd3d_string_buffer *buffer, const struct hlsl_der { bool show_rel, show_const;
- show_rel = deref->offset.node; + show_rel = deref->rel_offset.node; show_const = deref->const_offset != 0 || !show_rel;
vkd3d_string_buffer_printf(buffer, "["); if (show_rel) - dump_src(buffer, &deref->offset); + dump_src(buffer, &deref->rel_offset); if (show_rel && show_const) vkd3d_string_buffer_printf(buffer, " + "); if (show_const) @@ -2883,7 +2883,7 @@ static void free_ir_resource_load(struct hlsl_ir_resource_load *load)
static void free_ir_resource_store(struct hlsl_ir_resource_store *store) { - hlsl_src_remove(&store->resource.offset); + hlsl_src_remove(&store->resource.rel_offset); hlsl_src_remove(&store->coords); hlsl_src_remove(&store->value); vkd3d_free(store); diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index e49e94e23..9aa81f32c 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -619,11 +619,11 @@ struct hlsl_deref * regset) from the start of the variable, to the part of the variable that is referenced. * This offset is stored using two fields, one for a variable part and other for a constant * part, which are added together: - * - offset: An offset given by an instruction node, in whole registers. + * - rel_offset: An offset given by an instruction node, in whole registers. * - const_offset: A constant number of register components. * Since the type information cannot longer be retrieved from the offset alone, the type is * stored in the data_type field, which remains NULL if the deref hasn't been lowered yet. */ - struct hlsl_src offset; + struct hlsl_src rel_offset; unsigned int const_offset; struct hlsl_type *data_type; }; diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 44f8cf814..07967cf37 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -168,7 +168,7 @@ static bool replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_der list_move_before(&instr->entry, &block.instrs);
hlsl_cleanup_deref(deref); - hlsl_src_from_node(&deref->offset, offset); + hlsl_src_from_node(&deref->rel_offset, offset); deref->const_offset = offset_component;
return true; @@ -177,15 +177,15 @@ static bool replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_der static bool clean_constant_deref_offset_srcs(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hlsl_ir_node *instr) { - if (deref->offset.node && deref->offset.node->type == HLSL_IR_CONSTANT) + if (deref->rel_offset.node && deref->rel_offset.node->type == HLSL_IR_CONSTANT) { enum hlsl_regset regset = hlsl_deref_get_regset(ctx, deref);
if (regset == HLSL_REGSET_NUMERIC) - deref->const_offset += 4 * hlsl_ir_constant(deref->offset.node)->value.u[0].u; + deref->const_offset += 4 * hlsl_ir_constant(deref->rel_offset.node)->value.u[0].u; else - deref->const_offset += hlsl_ir_constant(deref->offset.node)->value.u[0].u; - hlsl_src_remove(&deref->offset); + deref->const_offset += hlsl_ir_constant(deref->rel_offset.node)->value.u[0].u; + hlsl_src_remove(&deref->rel_offset); return true; } return false; @@ -3087,8 +3087,8 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop if (!var->first_write) var->first_write = loop_first ? min(instr->index, loop_first) : instr->index; store->rhs.node->last_read = last_read; - if (store->lhs.offset.node) - store->lhs.offset.node->last_read = last_read; + if (store->lhs.rel_offset.node) + store->lhs.rel_offset.node->last_read = last_read; break; } case HLSL_IR_EXPR: @@ -3115,8 +3115,8 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop
var = load->src.var; var->last_read = max(var->last_read, last_read); - if (load->src.offset.node) - load->src.offset.node->last_read = last_read; + if (load->src.rel_offset.node) + load->src.rel_offset.node->last_read = last_read; break; } case HLSL_IR_LOOP: @@ -3133,14 +3133,14 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop
var = load->resource.var; var->last_read = max(var->last_read, last_read); - if (load->resource.offset.node) - load->resource.offset.node->last_read = last_read; + if (load->resource.rel_offset.node) + load->resource.rel_offset.node->last_read = last_read;
if ((var = load->sampler.var)) { var->last_read = max(var->last_read, last_read); - if (load->sampler.offset.node) - load->sampler.offset.node->last_read = last_read; + if (load->sampler.rel_offset.node) + load->sampler.rel_offset.node->last_read = last_read; }
if (load->coords.node) @@ -3165,8 +3165,8 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop
var = store->resource.var; var->last_read = max(var->last_read, last_read); - if (store->resource.offset.node) - store->resource.offset.node->last_read = last_read; + if (store->resource.rel_offset.node) + store->resource.rel_offset.node->last_read = last_read; store->coords.node->last_read = last_read; store->value.node->last_read = last_read; break; @@ -4228,7 +4228,7 @@ bool hlsl_regset_index_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, unsigned int *offset) { enum hlsl_regset regset = hlsl_deref_get_regset(ctx, deref); - struct hlsl_ir_node *offset_node = deref->offset.node; + struct hlsl_ir_node *offset_node = deref->rel_offset.node; unsigned int size;
*offset = deref->const_offset; @@ -4260,8 +4260,8 @@ unsigned int hlsl_offset_from_deref_safe(struct hlsl_ctx *ctx, const struct hlsl if (hlsl_offset_from_deref(ctx, deref, &offset)) return offset;
- hlsl_fixme(ctx, &deref->offset.node->loc, "Dereference with non-constant offset of type %s.", - hlsl_node_type_to_string(deref->offset.node->type)); + hlsl_fixme(ctx, &deref->rel_offset.node->loc, "Dereference with non-constant offset of type %s.", + hlsl_node_type_to_string(deref->rel_offset.node->type));
return 0; }
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl.h | 3 +++ libs/vkd3d-shader/hlsl_codegen.c | 15 +++++++++++++++ 2 files changed, 18 insertions(+)
diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 9aa81f32c..7777d7f0f 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -422,6 +422,9 @@ struct hlsl_ir_var * It may be less than the allocation size, e.g. for texture arrays. */ unsigned int bind_count[HLSL_REGSET_LAST_OBJECT + 1];
+ /* Whether the shader performs dereferences with non-constant offsets in the variable. */ + bool indexable; + uint32_t is_input_semantic : 1; uint32_t is_output_semantic : 1; uint32_t is_uniform : 1; diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 07967cf37..7c1da155a 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -2999,6 +2999,19 @@ static void dump_function(struct rb_entry *entry, void *context) rb_for_each_entry(&func->overloads, dump_function_decl, ctx); }
+static bool mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_deref *deref, + struct hlsl_ir_node *instr) +{ + if (!deref->rel_offset.node) + return false; + + assert(deref->var); + assert(deref->rel_offset.node->type != HLSL_IR_CONSTANT); + deref->var->indexable = true; + + return true; +} + static char get_regset_name(enum hlsl_regset regset) { switch (regset) @@ -4509,6 +4522,8 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry if (TRACE_ON()) rb_for_each_entry(&ctx->functions, dump_function, ctx);
+ transform_derefs(ctx, mark_indexable_vars, body); + calculate_resource_register_counts(ctx);
allocate_register_reservations(ctx);
From: Francisco Casas fcasas@codeweavers.com
If var->indexable, then the variable is given a unique register number, regardless of its lifetime. --- libs/vkd3d-shader/hlsl_codegen.c | 24 +++++++++++++++++---- libs/vkd3d-shader/tpf.c | 36 +++++++++++++++++++++++++++++++- 2 files changed, 55 insertions(+), 5 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 7c1da155a..4d47e46e5 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -3252,6 +3252,10 @@ struct register_allocator unsigned int writemask; unsigned int first_write, last_read; } *allocations; + + /* Indexable temps are allocated separately and always keep their index regardless of their + * lifetime. */ + size_t indexable_count; };
static unsigned int get_available_writemask(const struct register_allocator *allocator, @@ -3498,11 +3502,23 @@ static void allocate_variable_temp_register(struct hlsl_ctx *ctx,
if (!var->regs[HLSL_REGSET_NUMERIC].allocated && var->last_read) { - var->regs[HLSL_REGSET_NUMERIC] = allocate_numeric_registers_for_type(ctx, allocator, - var->first_write, var->last_read, var->data_type); + if (var->indexable) + { + var->regs[HLSL_REGSET_NUMERIC].id = allocator->indexable_count++; + var->regs[HLSL_REGSET_NUMERIC].allocation_size = 1; + var->regs[HLSL_REGSET_NUMERIC].writemask = 0; + var->regs[HLSL_REGSET_NUMERIC].allocated = true;
- TRACE("Allocated %s to %s (liveness %u-%u).\n", var->name, debug_register('r', - var->regs[HLSL_REGSET_NUMERIC], var->data_type), var->first_write, var->last_read); + TRACE("Allocated %s to x%u[].\n", var->name, var->regs[HLSL_REGSET_NUMERIC].id); + } + else + { + var->regs[HLSL_REGSET_NUMERIC] = allocate_numeric_registers_for_type(ctx, allocator, + var->first_write, var->last_read, var->data_type); + + TRACE("Allocated %s to %s (liveness %u-%u).\n", var->name, debug_register('r', + var->regs[HLSL_REGSET_NUMERIC], var->data_type), var->first_write, var->last_read); + } } }
diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index 0fc16b4c7..90b176003 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -3773,7 +3773,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref);
assert(hlsl_reg.allocated); - reg->type = VKD3DSPR_TEMP; + reg->type = deref->var->indexable ? VKD3DSPR_IDXTEMP : VKD3DSPR_TEMP; reg->dimension = VSIR_DIMENSION_VEC4; reg->idx[0].offset = hlsl_reg.id; reg->idx_count = 1; @@ -4266,6 +4266,20 @@ static void write_sm4_dcl_temps(const struct tpf_writer *tpf, uint32_t temp_coun write_sm4_instruction(tpf, &instr); }
+static void write_sm4_dcl_indexable_temp(const struct tpf_writer *tpf, uint32_t idx, + uint32_t size, uint32_t comp_count) +{ + struct sm4_instruction instr = + { + .opcode = VKD3D_SM4_OP_DCL_INDEXABLE_TEMP, + + .idx = {idx, size, comp_count}, + .idx_count = 3, + }; + + write_sm4_instruction(tpf, &instr); +} + static void write_sm4_dcl_thread_group(const struct tpf_writer *tpf, const uint32_t thread_count[3]) { struct sm4_instruction instr = @@ -5583,6 +5597,7 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, struct extern_resource *extern_resources; unsigned int extern_resources_count, i; const struct hlsl_buffer *cbuffer; + const struct hlsl_scope *scope; const struct hlsl_ir_var *var; size_t token_count_position; struct tpf_writer tpf; @@ -5637,6 +5652,25 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, if (ctx->temp_count) write_sm4_dcl_temps(&tpf, ctx->temp_count);
+ LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) + { + LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) + { + if (var->is_uniform || var->is_input_semantic || var->is_output_semantic) + continue; + if (!var->regs[HLSL_REGSET_NUMERIC].allocated) + continue; + + if (var->indexable) + { + unsigned int id = var->regs[HLSL_REGSET_NUMERIC].id; + unsigned int size = align(var->data_type->reg_size[HLSL_REGSET_NUMERIC], 4) / 4; + + write_sm4_dcl_indexable_temp(&tpf, id, size, 4); + } + } + } + write_sm4_block(&tpf, &entry_func->body);
write_sm4_ret(&tpf);
This one still bothers me. Neither [0] nor [[]] is actually wrong in that case, but moreover, I'd expect that if we're using hlsl_deref_is_lowered() then it should look something like
if (hlsl_deref_is_lowered()) { // dump the offset } else { // dump the path }
rather than this else-if logic we have.
For the record, currently we are not writing `[[]]` for unlowered paths that have length 0, which I think is good to not overload the HLSL dump, so I think there is no need to change that behavior.
I agree that separating the lowered/unlowered cases first makes it more readable, albeit I put the unlowered case first because it is what happens first. If that makes sense.
This is fine now, but the d3d9 test is broken due to its use of uniform ints.