[PATCH v6 0/7] MR435: vkd3d-shader/tpf: Support non-constant offset dereferences, v2, part 2.

List overview All Threads

newer

older

[PATCH v2 0/6] MR444:...

[PATCH v2 0/1] MR4327: user32: Use...

Francisco Casas (＠fcasas)

7 Nov 2023 7 Nov '23

4:10 p.m.

This is the last thing needed to support non-constant offset dereferences in SM4. It allows to perform relative addressing on temps.

Besides this, I have additional patches for relative addressing on uniforms, and input and output semantics, but these may not be useful for now, since we copy all these variables into temps instead of using them directly.

-- v6: vkd3d-shader/tpf: Support relative addressing for indexable temps in SM4. vkd3d-shader/tpf: Move sm4_register_from_node() up. vkd3d-shader/tpf: Support writing relative addressing indexes. vkd3d-shader/tpf: Write register index addressing. vkd3d-shader/tpf: Encode dst and src registers using the same function. tests: Add aditional relative addressing tests. tests: Rename array-index-expr.shader_test as non-const-indexing.shader_test.

https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/435

Show replies by date

Francisco Casas

7 Nov 7 Nov

4:10 p.m.

New subject: [PATCH v6 1/7] tests: Rename array-index-expr.shader_test as non-const-indexing.shader_test.

From: Francisco Casas fcasas@codeweavers.com

--- Makefile.am | 2 +- ...ay-index-expr.shader_test => non-const-indexing.shader_test} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename tests/hlsl/{array-index-expr.shader_test => non-const-indexing.shader_test} (100%)

diff --git a/Makefile.am b/Makefile.am index 531a2705f..14b5cf096 100644 --- a/Makefile.am +++ b/Makefile.am @@ -53,7 +53,6 @@ vkd3d_shader_tests = \ tests/hlsl/arithmetic-int.shader_test \ tests/hlsl/arithmetic-uint.shader_test \ tests/hlsl/array-dimension.shader_test \ - tests/hlsl/array-index-expr.shader_test \ tests/hlsl/array-parameters.shader_test \ tests/hlsl/array-size-expr.shader_test \ tests/hlsl/asfloat.shader_test \ @@ -134,6 +133,7 @@ vkd3d_shader_tests = \ tests/hlsl/multiple-rt.shader_test \ tests/hlsl/nested-arrays.shader_test \ tests/hlsl/nointerpolation.shader_test \ + tests/hlsl/non-const-indexing.shader_test \ tests/hlsl/normalize.shader_test \ tests/hlsl/numeric-constructor-truncation.shader_test \ tests/hlsl/numeric-types.shader_test \ diff --git a/tests/hlsl/array-index-expr.shader_test b/tests/hlsl/non-const-indexing.shader_test similarity index 100% rename from tests/hlsl/array-index-expr.shader_test rename to tests/hlsl/non-const-indexing.shader_test

-- GitLab https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/435

Francisco Casas

4:10 p.m.

New subject: [PATCH v6 2/7] tests: Add aditional relative addressing tests.

From: Francisco Casas fcasas@codeweavers.com

--- tests/hlsl/non-const-indexing.shader_test | 78 +++++++++++++++++++++++ 1 file changed, 78 insertions(+)

diff --git a/tests/hlsl/non-const-indexing.shader_test b/tests/hlsl/non-const-indexing.shader_test index b7d91ea09..9f79f6103 100644 --- a/tests/hlsl/non-const-indexing.shader_test +++ b/tests/hlsl/non-const-indexing.shader_test @@ -49,6 +49,22 @@ todo(sm>=6) draw quad probe all rgba (14.0, 14.0, 14.0, 14.0)

+[pixel shader todo] +float i; + +float4 main() : sv_target +{ + float a[4] = {1, 2, 3, 4}; + + return a[i]; +} + +[test] +uniform 0 float 2.3 +todo draw quad +todo probe all rgba (3, 3, 3, 3) + + [pixel shader] uniform float i;

@@ -116,3 +132,65 @@ float4 main() : sv_target uniform 0 float4 0 0 2.4 0 todo draw quad probe all rgba (1.0, 120.0, 90.0, 4.0) + + +% SM1 doesn't support relative addressing if it is used in a l-value. +[require] +shader model >= 4.0 + +[pixel shader todo] +int i, j; + +float4 main() : sv_target +{ + float mut1[4] = {1, 2, 3, 4}; + float mut2[4] = {5, 6, 7, 8}; + + mut1[i] = 100; + mut2[j] = mut1[j]; + + return float4(mut2[0], mut2[1], mut2[2], mut2[3]); +} + +[test] +uniform 0 int 0 +uniform 1 int 0 +todo draw quad +todo probe all rgba (100, 6, 7, 8) +uniform 0 int 2 +uniform 1 int 2 +todo draw quad +todo probe all rgba (5, 6, 100, 8) +uniform 0 int 1 +uniform 1 int 3 +todo draw quad +todo probe all rgba (5, 6, 7, 4) + + +[pixel shader todo] +float a, b, c, d; +float e, f, g, h; +int i, j; + +float4 main() : sv_target +{ + + float arr1[8] = {a, a, b, b, c, c, d, d}; + float arr2[8] = {e, e, f, f, g, g, h, h}; + + arr1[i] = arr2[i]; + arr2[j] = arr1[j]; + + return 1000 * float4(arr1[0], arr1[4], arr2[0], arr2[4]) + + 100 * float4(arr1[1], arr1[5], arr2[1], arr2[5]) + + 10 * float4(arr1[2], arr1[6], arr2[2], arr2[6]) + + 1 * float4(arr1[3], arr1[7], arr2[3], arr2[7]); +} + +[test] +uniform 0 float4 1 2 3 4 +uniform 4 float4 5 6 7 8 +uniform 8 int 3 +uniform 9 int 4 +todo draw quad +todo probe all rgba (1126, 3344, 5566, 3788)

-- GitLab https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/435

Francisco Casas

4:10 p.m.

New subject: [PATCH v6 3/7] vkd3d-shader/tpf: Encode dst and src registers using the same function.

From: Francisco Casas fcasas@codeweavers.com

This function will also be required to encode rel_addr registers. --- libs/vkd3d-shader/tpf.c | 96 +++++++++++++++++++++++------------------ 1 file changed, 54 insertions(+), 42 deletions(-)

diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index fbc04f61f..74dad536f 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -146,6 +146,9 @@ STATIC_ASSERT(SM4_MAX_SRC_COUNT <= SPIRV_MAX_SRC_COUNT); #define VKD3D_SM4_SWIZZLE_SHIFT 4 #define VKD3D_SM4_SWIZZLE_MASK (0xffu << VKD3D_SM4_SWIZZLE_SHIFT)

+#define VKD3D_SM4_SCALAR_DIM_SHIFT 4 +#define VKD3D_SM4_SCALAR_DIM_MASK (0x3u << VKD3D_SM4_SCALAR_DIM_SHIFT) + #define VKD3D_SM4_VERSION_MAJOR(version) (((version) >> 4) & 0xf) #define VKD3D_SM4_VERSION_MINOR(version) (((version) >> 0) & 0xf)

@@ -513,7 +516,8 @@ enum vkd3d_sm4_swizzle_type VKD3D_SM4_SWIZZLE_VEC4 = 0x1, VKD3D_SM4_SWIZZLE_SCALAR = 0x2,

- VKD3D_SM4_SWIZZLE_INVALID = ~0u, + VKD3D_SM4_SWIZZLE_DEFAULT = ~0u - 1, + VKD3D_SM4_SWIZZLE_INVALID = ~0u, };

enum vkd3d_sm4_dimension @@ -3837,32 +3841,67 @@ static void sm4_src_from_node(const struct tpf_writer *tpf, struct vkd3d_shader_ } }

-static void sm4_write_dst_register(const struct tpf_writer *tpf, const struct vkd3d_shader_dst_param *dst) +static uint32_t sm4_encode_register(const struct tpf_writer *tpf, const struct vkd3d_shader_register *reg, + enum vkd3d_sm4_swizzle_type sm4_swizzle_type, uint32_t sm4_swizzle) { const struct vkd3d_sm4_register_type_info *register_type_info; - struct vkd3d_bytecode_buffer *buffer = tpf->buffer; - uint32_t sm4_reg_type, reg_dim; + uint32_t sm4_reg_type, sm4_reg_dim; uint32_t token = 0; - unsigned int j;

- register_type_info = get_info_from_vkd3d_register_type(&tpf->lookup, dst->reg.type); + register_type_info = get_info_from_vkd3d_register_type(&tpf->lookup, reg->type); if (!register_type_info) { - FIXME("Unhandled vkd3d-shader register type %#x.\n", dst->reg.type); + FIXME("Unhandled vkd3d-shader register type %#x.\n", reg->type); sm4_reg_type = VKD3D_SM4_RT_TEMP; + if (sm4_swizzle_type == VKD3D_SM4_SWIZZLE_DEFAULT) + sm4_swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; } else { sm4_reg_type = register_type_info->sm4_type; + if (sm4_swizzle_type == VKD3D_SM4_SWIZZLE_DEFAULT) + sm4_swizzle_type = register_type_info->default_src_swizzle_type; } - - reg_dim = sm4_dimension_from_vsir_dimension(dst->reg.dimension); + sm4_reg_dim = sm4_dimension_from_vsir_dimension(reg->dimension);

token |= sm4_reg_type << VKD3D_SM4_REGISTER_TYPE_SHIFT; - token |= dst->reg.idx_count << VKD3D_SM4_REGISTER_ORDER_SHIFT; - token |= reg_dim << VKD3D_SM4_DIMENSION_SHIFT; - if (reg_dim == VKD3D_SM4_DIMENSION_VEC4) - token |= dst->write_mask << VKD3D_SM4_WRITEMASK_SHIFT; + token |= reg->idx_count << VKD3D_SM4_REGISTER_ORDER_SHIFT; + token |= sm4_reg_dim << VKD3D_SM4_DIMENSION_SHIFT; + + if (sm4_reg_dim == VKD3D_SM4_DIMENSION_VEC4) + { + token |= (uint32_t)sm4_swizzle_type << VKD3D_SM4_SWIZZLE_TYPE_SHIFT; + + switch (sm4_swizzle_type) + { + case VKD3D_SM4_SWIZZLE_NONE: + assert(sm4_swizzle || register_is_constant(reg)); + token |= (sm4_swizzle << VKD3D_SM4_WRITEMASK_SHIFT) & VKD3D_SM4_WRITEMASK_MASK; + break; + + case VKD3D_SM4_SWIZZLE_VEC4: + token |= (sm4_swizzle << VKD3D_SM4_SWIZZLE_SHIFT) & VKD3D_SM4_SWIZZLE_MASK; + break; + + case VKD3D_SM4_SWIZZLE_SCALAR: + token |= (sm4_swizzle << VKD3D_SM4_SCALAR_DIM_SHIFT) & VKD3D_SM4_SCALAR_DIM_MASK; + break; + + default: + vkd3d_unreachable(); + } + } + + return token; +} + +static void sm4_write_dst_register(const struct tpf_writer *tpf, const struct vkd3d_shader_dst_param *dst) +{ + struct vkd3d_bytecode_buffer *buffer = tpf->buffer; + uint32_t token = 0; + unsigned int j; + + token = sm4_encode_register(tpf, &dst->reg, VKD3D_SM4_SWIZZLE_NONE, dst->write_mask); put_u32(buffer, token);

for (j = 0; j < dst->reg.idx_count; ++j) @@ -3874,38 +3913,11 @@ static void sm4_write_dst_register(const struct tpf_writer *tpf, const struct vk

static void sm4_write_src_register(const struct tpf_writer *tpf, const struct vkd3d_shader_src_param *src) { - const struct vkd3d_sm4_register_type_info *register_type_info; struct vkd3d_bytecode_buffer *buffer = tpf->buffer; - uint32_t sm4_reg_type, reg_dim; uint32_t token = 0, mod_token = 0; unsigned int j;

- register_type_info = get_info_from_vkd3d_register_type(&tpf->lookup, src->reg.type); - if (!register_type_info) - { - FIXME("Unhandled vkd3d-shader register type %#x.\n", src->reg.type); - sm4_reg_type = VKD3D_SM4_RT_TEMP; - } - else - { - sm4_reg_type = register_type_info->sm4_type; - } - - reg_dim = sm4_dimension_from_vsir_dimension(src->reg.dimension); - - token |= sm4_reg_type << VKD3D_SM4_REGISTER_TYPE_SHIFT; - token |= src->reg.idx_count << VKD3D_SM4_REGISTER_ORDER_SHIFT; - token |= reg_dim << VKD3D_SM4_DIMENSION_SHIFT; - if (reg_dim == VKD3D_SM4_DIMENSION_VEC4) - { - uint32_t swizzle_type = (uint32_t)register_type_info->default_src_swizzle_type; - - token |= swizzle_type << VKD3D_SM4_SWIZZLE_TYPE_SHIFT; - if (swizzle_type == VKD3D_SM4_SWIZZLE_SCALAR) - token |= (swizzle_to_sm4(src->swizzle) & 0x3) << VKD3D_SM4_SWIZZLE_SHIFT; - else - token |= swizzle_to_sm4(src->swizzle) << VKD3D_SM4_SWIZZLE_SHIFT; - } + token = sm4_encode_register(tpf, &src->reg, VKD3D_SM4_SWIZZLE_DEFAULT, swizzle_to_sm4(src->swizzle));

switch (src->modifiers) { @@ -3954,7 +3966,7 @@ static void sm4_write_src_register(const struct tpf_writer *tpf, const struct vk if (src->reg.type == VKD3DSPR_IMMCONST) { put_u32(buffer, src->reg.u.immconst_uint[0]); - if (reg_dim == VKD3D_SM4_DIMENSION_VEC4) + if (src->reg.dimension == VSIR_DIMENSION_VEC4) { put_u32(buffer, src->reg.u.immconst_uint[1]); put_u32(buffer, src->reg.u.immconst_uint[2]);

-- GitLab https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/435

Francisco Casas

4:10 p.m.

New subject: [PATCH v6 4/7] vkd3d-shader/tpf: Write register index addressing.

From: Francisco Casas fcasas@codeweavers.com

--- libs/vkd3d-shader/tpf.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+)

diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index 74dad536f..79ea39d31 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -3841,6 +3841,20 @@ static void sm4_src_from_node(const struct tpf_writer *tpf, struct vkd3d_shader_ } }

+static unsigned int sm4_get_index_addressing_from_reg(const struct vkd3d_shader_register *reg, + unsigned int i) +{ + if (reg->idx[i].rel_addr) + { + if (reg->idx[i].offset == 0) + return VKD3D_SM4_ADDRESSING_RELATIVE; + else + return VKD3D_SM4_ADDRESSING_RELATIVE | VKD3D_SM4_ADDRESSING_OFFSET; + } + + return 0; +} + static uint32_t sm4_encode_register(const struct tpf_writer *tpf, const struct vkd3d_shader_register *reg, enum vkd3d_sm4_swizzle_type sm4_swizzle_type, uint32_t sm4_swizzle) { @@ -3867,6 +3881,12 @@ static uint32_t sm4_encode_register(const struct tpf_writer *tpf, const struct v token |= sm4_reg_type << VKD3D_SM4_REGISTER_TYPE_SHIFT; token |= reg->idx_count << VKD3D_SM4_REGISTER_ORDER_SHIFT; token |= sm4_reg_dim << VKD3D_SM4_DIMENSION_SHIFT; + if (reg->idx_count > 0) + token |= sm4_get_index_addressing_from_reg(reg, 0) << VKD3D_SM4_ADDRESSING_SHIFT0; + if (reg->idx_count > 1) + token |= sm4_get_index_addressing_from_reg(reg, 1) << VKD3D_SM4_ADDRESSING_SHIFT1; + if (reg->idx_count > 2) + token |= sm4_get_index_addressing_from_reg(reg, 2) << VKD3D_SM4_ADDRESSING_SHIFT2;

if (sm4_reg_dim == VKD3D_SM4_DIMENSION_VEC4) {

-- GitLab https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/435

Francisco Casas

4:10 p.m.

New subject: [PATCH v6 5/7] vkd3d-shader/tpf: Support writing relative addressing indexes.

From: Francisco Casas fcasas@codeweavers.com

--- libs/vkd3d-shader/tpf.c | 40 ++++++++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 8 deletions(-)

diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index 79ea39d31..70ee41242 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -3915,6 +3915,36 @@ static uint32_t sm4_encode_register(const struct tpf_writer *tpf, const struct v return token; }

+static void sm4_write_register_index(const struct tpf_writer *tpf, const struct vkd3d_shader_register *reg, + unsigned int j) +{ + unsigned int addressing = sm4_get_index_addressing_from_reg(reg, j); + struct vkd3d_bytecode_buffer *buffer = tpf->buffer; + unsigned int k; + + if (addressing & VKD3D_SM4_ADDRESSING_RELATIVE) + { + const struct vkd3d_shader_src_param *idx_src = reg->idx[j].rel_addr; + uint32_t idx_src_token; + + assert(idx_src); + assert(!idx_src->modifiers); + assert(idx_src->reg.type != VKD3DSPR_IMMCONST); + idx_src_token = sm4_encode_register(tpf, &idx_src->reg, VKD3D_SM4_SWIZZLE_SCALAR, idx_src->swizzle); + + put_u32(buffer, idx_src_token); + for (k = 0; k < idx_src->reg.idx_count; ++k) + { + put_u32(buffer, idx_src->reg.idx[k].offset); + assert(!idx_src->reg.idx[k].rel_addr); + } + } + else + { + put_u32(tpf->buffer, reg->idx[j].offset); + } +} + static void sm4_write_dst_register(const struct tpf_writer *tpf, const struct vkd3d_shader_dst_param *dst) { struct vkd3d_bytecode_buffer *buffer = tpf->buffer; @@ -3925,10 +3955,7 @@ static void sm4_write_dst_register(const struct tpf_writer *tpf, const struct vk put_u32(buffer, token);

for (j = 0; j < dst->reg.idx_count; ++j) - { - put_u32(buffer, dst->reg.idx[j].offset); - assert(!dst->reg.idx[j].rel_addr); - } + sm4_write_register_index(tpf, &dst->reg, j); }

static void sm4_write_src_register(const struct tpf_writer *tpf, const struct vkd3d_shader_src_param *src) @@ -3978,10 +4005,7 @@ static void sm4_write_src_register(const struct tpf_writer *tpf, const struct vk }

for (j = 0; j < src->reg.idx_count; ++j) - { - put_u32(buffer, src->reg.idx[j].offset); - assert(!src->reg.idx[j].rel_addr); - } + sm4_write_register_index(tpf, &src->reg, j);

if (src->reg.type == VKD3DSPR_IMMCONST) {

-- GitLab https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/435

Francisco Casas

4:10 p.m.

New subject: [PATCH v6 6/7] vkd3d-shader/tpf: Move sm4_register_from_node() up.

From: Francisco Casas fcasas@codeweavers.com

--- libs/vkd3d-shader/tpf.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index 70ee41242..b7fcfca5c 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -3637,6 +3637,17 @@ struct sm4_instruction unsigned int idx_count; };

+static void sm4_register_from_node(struct vkd3d_shader_register *reg, uint32_t *writemask, + const struct hlsl_ir_node *instr) +{ + assert(instr->reg.allocated); + reg->type = VKD3DSPR_TEMP; + reg->dimension = VSIR_DIMENSION_VEC4; + reg->idx[0].offset = instr->reg.id; + reg->idx_count = 1; + *writemask = instr->reg.writemask; +} + static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_register *reg, uint32_t *writemask, const struct hlsl_deref *deref) { @@ -3778,17 +3789,6 @@ static void sm4_src_from_deref(const struct tpf_writer *tpf, struct vkd3d_shader } }

-static void sm4_register_from_node(struct vkd3d_shader_register *reg, uint32_t *writemask, - const struct hlsl_ir_node *instr) -{ - assert(instr->reg.allocated); - reg->type = VKD3DSPR_TEMP; - reg->dimension = VSIR_DIMENSION_VEC4; - reg->idx[0].offset = instr->reg.id; - reg->idx_count = 1; - *writemask = instr->reg.writemask; -} - static void sm4_dst_from_node(struct vkd3d_shader_dst_param *dst, const struct hlsl_ir_node *instr) { sm4_register_from_node(&dst->reg, &dst->write_mask, instr);

-- GitLab https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/435

Francisco Casas

4:10 p.m.

New subject: [PATCH v6 7/7] vkd3d-shader/tpf: Support relative addressing for indexable temps in SM4.

From: Francisco Casas fcasas@codeweavers.com

For relative addressing, the vkd3d_shader_registers must point to another vkd3d_shader_src_param. For now, use the sm4_instruction to save them, since the only purpose of this struct is to be used as paramter for write_sm4_instruction.

---

Note to self: this could be achived by vkd3d_shader_param_allocator once we make tpf.c capable of working directly with vsir input. --- libs/vkd3d-shader/tpf.c | 86 +++++++++++++++++------ tests/hlsl/function-return.shader_test | 22 +++--- tests/hlsl/matrix-indexing.shader_test | 6 +- tests/hlsl/non-const-indexing.shader_test | 66 ++++++++--------- tests/hlsl/return.shader_test | 22 +++--- 5 files changed, 123 insertions(+), 79 deletions(-)

diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index b7fcfca5c..62b103904 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -3635,6 +3635,9 @@ struct sm4_instruction

uint32_t idx[3]; unsigned int idx_count; + + struct vkd3d_shader_src_param idx_srcs[7]; + unsigned int idx_src_count; };

static void sm4_register_from_node(struct vkd3d_shader_register *reg, uint32_t *writemask, @@ -3648,8 +3651,54 @@ static void sm4_register_from_node(struct vkd3d_shader_register *reg, uint32_t * *writemask = instr->reg.writemask; }

+static void sm4_numeric_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_register *reg, + enum vkd3d_shader_register_type type, uint32_t *writemask, const struct hlsl_deref *deref, + struct sm4_instruction *sm4_instr) +{ + const struct hlsl_ir_var *var = deref->var; + unsigned int offset_const_deref; + + reg->type = type; + reg->idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id; + reg->dimension = VSIR_DIMENSION_VEC4; + + assert(var->regs[HLSL_REGSET_NUMERIC].allocated); + + if (!var->indexable) + { + offset_const_deref = hlsl_offset_from_deref_safe(ctx, deref); + reg->idx[0].offset += offset_const_deref / 4; + reg->idx_count = 1; + } + else + { + offset_const_deref = deref->const_offset; + reg->idx[1].offset = offset_const_deref / 4; + reg->idx_count = 2; + + if (deref->rel_offset.node) + { + struct vkd3d_shader_src_param *idx_src; + unsigned int idx_writemask; + + assert(sm4_instr->idx_src_count < ARRAY_SIZE(sm4_instr->idx_srcs)); + idx_src = &sm4_instr->idx_srcs[sm4_instr->idx_src_count++]; + memset(idx_src, 0, sizeof(*idx_src)); + + reg->idx[1].rel_addr = idx_src; + sm4_register_from_node(&idx_src->reg, &idx_writemask, deref->rel_offset.node); + assert(idx_writemask != 0); + idx_src->swizzle = swizzle_from_sm4(hlsl_swizzle_from_writemask(idx_writemask)); + } + } + + *writemask = 0xf & (0xf << (offset_const_deref % 4)); + if (var->regs[HLSL_REGSET_NUMERIC].writemask) + *writemask = hlsl_combine_writemasks(var->regs[HLSL_REGSET_NUMERIC].writemask, *writemask); +} + static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_register *reg, - uint32_t *writemask, const struct hlsl_deref *deref) + uint32_t *writemask, const struct hlsl_deref *deref, struct sm4_instruction *sm4_instr) { const struct hlsl_type *data_type = hlsl_deref_get_type(ctx, deref); const struct hlsl_ir_var *var = deref->var; @@ -3764,24 +3813,19 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re } else { - struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); + enum vkd3d_shader_register_type type = deref->var->indexable ? VKD3DSPR_IDXTEMP : VKD3DSPR_TEMP;

- assert(hlsl_reg.allocated); - reg->type = deref->var->indexable ? VKD3DSPR_IDXTEMP : VKD3DSPR_TEMP; - reg->dimension = VSIR_DIMENSION_VEC4; - reg->idx[0].offset = hlsl_reg.id; - reg->idx_count = 1; - *writemask = hlsl_reg.writemask; + sm4_numeric_register_from_deref(ctx, reg, type, writemask, deref, sm4_instr); } }

static void sm4_src_from_deref(const struct tpf_writer *tpf, struct vkd3d_shader_src_param *src, - const struct hlsl_deref *deref, unsigned int map_writemask) + const struct hlsl_deref *deref, unsigned int map_writemask, struct sm4_instruction *sm4_instr) { unsigned int hlsl_swizzle; uint32_t writemask;

- sm4_register_from_deref(tpf->ctx, &src->reg, &writemask, deref); + sm4_register_from_deref(tpf->ctx, &src->reg, &writemask, deref, sm4_instr); if (vkd3d_sm4_get_default_swizzle_type(&tpf->lookup, src->reg.type) == VKD3D_SM4_SWIZZLE_VEC4) { hlsl_swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); @@ -4532,7 +4576,7 @@ static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node

sm4_src_from_node(tpf, &instr.srcs[0], coords, coords_writemask);

- sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask); + sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask, &instr);

instr.src_count = 2;

@@ -4619,8 +4663,8 @@ static void write_sm4_sample(const struct tpf_writer *tpf, const struct hlsl_ir_ instr.dst_count = 1;

sm4_src_from_node(tpf, &instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); - sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask); - sm4_src_from_deref(tpf, &instr.srcs[2], sampler, VKD3DSP_WRITEMASK_ALL); + sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask, &instr); + sm4_src_from_deref(tpf, &instr.srcs[2], sampler, VKD3DSP_WRITEMASK_ALL, &instr); instr.src_count = 3;

if (load->load_type == HLSL_RESOURCE_SAMPLE_LOD @@ -4661,7 +4705,7 @@ static void write_sm4_sampleinfo(const struct tpf_writer *tpf, const struct hlsl sm4_dst_from_node(&instr.dsts[0], dst); instr.dst_count = 1;

- sm4_src_from_deref(tpf, &instr.srcs[0], resource, instr.dsts[0].write_mask); + sm4_src_from_deref(tpf, &instr.srcs[0], resource, instr.dsts[0].write_mask, &instr); instr.src_count = 1;

write_sm4_instruction(tpf, &instr); @@ -4684,7 +4728,7 @@ static void write_sm4_resinfo(const struct tpf_writer *tpf, const struct hlsl_ir instr.dst_count = 1;

sm4_src_from_node(tpf, &instr.srcs[0], load->lod.node, VKD3DSP_WRITEMASK_ALL); - sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask); + sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask, &instr); instr.src_count = 2;

write_sm4_instruction(tpf, &instr); @@ -4832,7 +4876,7 @@ static void write_sm4_store_uav_typed(const struct tpf_writer *tpf, const struct memset(&instr, 0, sizeof(instr)); instr.opcode = VKD3D_SM5_OP_STORE_UAV_TYPED;

- sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &instr.dsts[0].write_mask, dst); + sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &instr.dsts[0].write_mask, dst, &instr); instr.dst_count = 1;

sm4_src_from_node(tpf, &instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); @@ -5378,7 +5422,7 @@ static void write_sm4_load(const struct tpf_writer *tpf, const struct hlsl_ir_lo

instr.opcode = VKD3D_SM4_OP_MOVC;

- sm4_src_from_deref(tpf, &instr.srcs[0], &load->src, instr.dsts[0].write_mask); + sm4_src_from_deref(tpf, &instr.srcs[0], &load->src, instr.dsts[0].write_mask, &instr);

memset(&value, 0xff, sizeof(value)); sm4_src_from_constant_value(&instr.srcs[1], &value, type->dimx, instr.dsts[0].write_mask); @@ -5390,7 +5434,7 @@ static void write_sm4_load(const struct tpf_writer *tpf, const struct hlsl_ir_lo { instr.opcode = VKD3D_SM4_OP_MOV;

- sm4_src_from_deref(tpf, &instr.srcs[0], &load->src, instr.dsts[0].write_mask); + sm4_src_from_deref(tpf, &instr.srcs[0], &load->src, instr.dsts[0].write_mask, &instr); instr.src_count = 1; }

@@ -5443,10 +5487,10 @@ static void write_sm4_gather(const struct tpf_writer *tpf, const struct hlsl_ir_ } }

- sm4_src_from_deref(tpf, &instr.srcs[instr.src_count++], resource, instr.dsts[0].write_mask); + sm4_src_from_deref(tpf, &instr.srcs[instr.src_count++], resource, instr.dsts[0].write_mask, &instr);

src = &instr.srcs[instr.src_count++]; - sm4_src_from_deref(tpf, src, sampler, VKD3DSP_WRITEMASK_ALL); + sm4_src_from_deref(tpf, src, sampler, VKD3DSP_WRITEMASK_ALL, &instr); src->reg.dimension = VSIR_DIMENSION_VEC4; src->swizzle = swizzle;

@@ -5547,7 +5591,7 @@ static void write_sm4_store(const struct tpf_writer *tpf, const struct hlsl_ir_s memset(&instr, 0, sizeof(instr)); instr.opcode = VKD3D_SM4_OP_MOV;

- sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &writemask, &store->lhs); + sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &writemask, &store->lhs, &instr); instr.dsts[0].write_mask = hlsl_combine_writemasks(writemask, store->writemask); instr.dst_count = 1;

diff --git a/tests/hlsl/function-return.shader_test b/tests/hlsl/function-return.shader_test index 98aac4fa7..be997d0c3 100644 --- a/tests/hlsl/function-return.shader_test +++ b/tests/hlsl/function-return.shader_test @@ -258,7 +258,7 @@ uniform 0 float 0.9 todo(sm>=6) draw quad probe all rgba (0.4, 0.1, 0.7, 0.6) 1

-[pixel shader todo] +[pixel shader]

uniform float4 f[3];

@@ -295,21 +295,21 @@ float4 main() : sv_target uniform 0 float4 0.3 0.0 0.0 0.0 uniform 4 float4 0.0 0.0 0.0 0.0 uniform 8 float4 0.1 0.0 0.0 0.0 -todo draw quad -todo probe all rgba (0.3, 0.2, 0.6, 0.6) 1 +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (0.3, 0.2, 0.6, 0.6) 1

uniform 4 float4 0.35 0.0 0.0 0.0 -todo draw quad -todo probe all rgba (0.3, 0.3, 0.6, 0.6) 1 +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (0.3, 0.3, 0.6, 0.6) 1

uniform 8 float4 0.5 0.0 0.0 0.0 -todo draw quad -todo probe all rgba (0.3, 0.5, 0.6, 0.6) 1 +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (0.3, 0.5, 0.6, 0.6) 1

uniform 0 float4 1.0 0.0 0.0 0.0 -todo draw quad -todo probe all rgba (0.3, 0.5, 0.6, 0.6) 1 +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (0.3, 0.5, 0.6, 0.6) 1

uniform 4 float4 2.0 0.0 0.0 0.0 -todo draw quad -todo probe all rgba (0.4, 0.1, 0.6, 0.6) 1 +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (0.4, 0.1, 0.6, 0.6) 1 diff --git a/tests/hlsl/matrix-indexing.shader_test b/tests/hlsl/matrix-indexing.shader_test index 170036475..b8e6dec68 100644 --- a/tests/hlsl/matrix-indexing.shader_test +++ b/tests/hlsl/matrix-indexing.shader_test @@ -124,7 +124,7 @@ todo(sm>=6) draw quad probe all rgba (8, 9, 10, 11)

-[pixel shader todo] +[pixel shader] uniform float i;

float4 main() : sv_target @@ -136,5 +136,5 @@ float4 main() : sv_target

[test] uniform 0 float 3 -todo draw quad -todo probe all rgba (12, 13, 14, 15) +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (12, 13, 14, 15) diff --git a/tests/hlsl/non-const-indexing.shader_test b/tests/hlsl/non-const-indexing.shader_test index 9f79f6103..bc550738c 100644 --- a/tests/hlsl/non-const-indexing.shader_test +++ b/tests/hlsl/non-const-indexing.shader_test @@ -1,4 +1,4 @@ -[pixel shader todo] +[pixel shader] uniform float4 f[3]; uniform float2 i;

@@ -12,17 +12,17 @@ uniform 0 float4 1.0 2.0 3.0 4.0 uniform 4 float4 5.0 6.0 7.0 8.0 uniform 8 float4 9.0 10.0 11.0 12.0 uniform 12 float4 0 0 0 0 -todo draw quad -todo probe all rgba (1.0, 2.0, 3.0, 4.0) +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (1.0, 2.0, 3.0, 4.0) uniform 12 float4 1 0 0 0 -todo draw quad -todo probe all rgba (5.0, 6.0, 7.0, 8.0) +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (5.0, 6.0, 7.0, 8.0) uniform 12 float4 0 1 0 0 -todo draw quad -todo probe all rgba (5.0, 6.0, 7.0, 8.0) +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (5.0, 6.0, 7.0, 8.0) uniform 12 float4 1 1 0 0 -todo draw quad -todo probe all rgba (9.0, 10.0, 11.0, 12.0) +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (9.0, 10.0, 11.0, 12.0)

[pixel shader] @@ -49,7 +49,7 @@ todo(sm>=6) draw quad probe all rgba (14.0, 14.0, 14.0, 14.0)

-[pixel shader todo] +[pixel shader] float i;

float4 main() : sv_target @@ -61,8 +61,8 @@ float4 main() : sv_target

[test] uniform 0 float 2.3 -todo draw quad -todo probe all rgba (3, 3, 3, 3) +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (3, 3, 3, 3)

[pixel shader] @@ -90,7 +90,7 @@ todo(sm>=6) draw quad probe all rgba (24.0, 0.0, 21.0, 1.0)

-[pixel shader todo] +[pixel shader] uniform float2 i;

float4 main() : sv_target @@ -102,20 +102,20 @@ float4 main() : sv_target

[test] uniform 0 float4 0 0 0 0 -todo draw quad -todo probe all rgba (1.0, 2.0, 3.0, 4.0) +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (1.0, 2.0, 3.0, 4.0) uniform 0 float4 1 0 0 0 -todo draw quad -todo probe all rgba (5.0, 6.0, 7.0, 8.0) +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (5.0, 6.0, 7.0, 8.0) uniform 0 float4 0 1 0 0 -todo draw quad -todo probe all rgba (5.0, 6.0, 7.0, 8.0) +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (5.0, 6.0, 7.0, 8.0) uniform 0 float4 1 1 0 0 -todo draw quad -todo probe all rgba (9.0, 10.0, 11.0, 12.0) +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (9.0, 10.0, 11.0, 12.0)

-[pixel shader todo] +[pixel shader] float4 a;

float4 main() : sv_target @@ -130,7 +130,7 @@ float4 main() : sv_target

[test] uniform 0 float4 0 0 2.4 0 -todo draw quad +todo(sm>=6) draw quad probe all rgba (1.0, 120.0, 90.0, 4.0)

@@ -138,7 +138,7 @@ probe all rgba (1.0, 120.0, 90.0, 4.0) [require] shader model >= 4.0

-[pixel shader todo] +[pixel shader] int i, j;

float4 main() : sv_target @@ -155,19 +155,19 @@ float4 main() : sv_target [test] uniform 0 int 0 uniform 1 int 0 -todo draw quad -todo probe all rgba (100, 6, 7, 8) +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (100, 6, 7, 8) uniform 0 int 2 uniform 1 int 2 -todo draw quad -todo probe all rgba (5, 6, 100, 8) +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (5, 6, 100, 8) uniform 0 int 1 uniform 1 int 3 -todo draw quad -todo probe all rgba (5, 6, 7, 4) +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (5, 6, 7, 4)

-[pixel shader todo] +[pixel shader] float a, b, c, d; float e, f, g, h; int i, j; @@ -192,5 +192,5 @@ uniform 0 float4 1 2 3 4 uniform 4 float4 5 6 7 8 uniform 8 int 3 uniform 9 int 4 -todo draw quad -todo probe all rgba (1126, 3344, 5566, 3788) +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (1126, 3344, 5566, 3788) diff --git a/tests/hlsl/return.shader_test b/tests/hlsl/return.shader_test index b8ebac0a1..29621b006 100644 --- a/tests/hlsl/return.shader_test +++ b/tests/hlsl/return.shader_test @@ -217,7 +217,7 @@ uniform 0 float 0.8 todo(sm>=6) draw quad probe all rgba (0.5, 0.5, 0.5, 0.5)

-[pixel shader todo] +[pixel shader]

uniform float4 f[3];

@@ -243,21 +243,21 @@ void main(out float4 ret : sv_target) uniform 0 float4 0.3 0.0 0.0 0.0 uniform 4 float4 0.0 0.0 0.0 0.0 uniform 8 float4 0.1 0.0 0.0 0.0 -todo draw quad -todo probe all rgba (0.1, 0.1, 0.1, 0.1) 1 +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (0.1, 0.1, 0.1, 0.1) 1

uniform 4 float4 0.35 0.0 0.0 0.0 -todo draw quad -todo probe all rgba (0.2, 0.2, 0.2, 0.2) 1 +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (0.2, 0.2, 0.2, 0.2) 1

uniform 8 float4 0.5 0.0 0.0 0.0 -todo draw quad -todo probe all rgba (0.4, 0.4, 0.4, 0.4) 1 +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (0.4, 0.4, 0.4, 0.4) 1

uniform 0 float4 1.0 0.0 0.0 0.0 -todo draw quad -todo probe all rgba (0.4, 0.4, 0.4, 0.4) 1 +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (0.4, 0.4, 0.4, 0.4) 1

uniform 4 float4 2.0 0.0 0.0 0.0 -todo draw quad -todo probe all rgba (0.9, 0.9, 0.9, 0.9) 1 +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (0.9, 0.9, 0.9, 0.9) 1

-- GitLab https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/435

Henri Verbeet (＠hverbeet)

6:42 p.m.

New subject: [PATCH v6 0/7] MR435: vkd3d-shader/tpf: Support non-constant offset dereferences, v2, part 2. - approved

This merge request was approved by Henri Verbeet.

-- https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/435

711

Age (days ago)

711

Last active (days ago)

wine-gitlab@winehq.org

8 comments

3 participants

tags (0)

participants (3)

Francisco Casas
Francisco Casas (＠fcasas)
Henri Verbeet (＠hverbeet)