This is the last thing needed to support non-constant offset dereferences in SM4. It allows to perform relative addressing on temps.
Besides this, I have additional patches for relative addressing on uniforms, and input and output semantics, but these may not be useful for now, since we copy all these variables into temps instead of using them directly.
-- v2: vkd3d-shader/tpf: Support relative addressing for indexable temps in SM4. vkd3d-shader/tpf: Move sm4_register_from_node() up. vkd3d-shader/tpf: Support writing relative addressing indexes. vkd3d-shader/tpf: Write register index addressing. vkd3d-shader/tpf: Encode dst and src registers using the same function. tests: Test relative addressing.
From: Francisco Casas fcasas@codeweavers.com
--- Makefile.am | 1 + tests/hlsl/relative-addressing.shader_test | 76 ++++++++++++++++++++++ 2 files changed, 77 insertions(+) create mode 100644 tests/hlsl/relative-addressing.shader_test
diff --git a/Makefile.am b/Makefile.am index 5fbac802..b82ec974 100644 --- a/Makefile.am +++ b/Makefile.am @@ -143,6 +143,7 @@ vkd3d_shader_tests = \ tests/hlsl/pow.shader_test \ tests/hlsl/reflect.shader_test \ tests/hlsl/register-reservations.shader_test \ + tests/hlsl/relative-addressing.shader_test \ tests/hlsl/return-implicit-conversion.shader_test \ tests/hlsl/return.shader_test \ tests/hlsl/round.shader_test \ diff --git a/tests/hlsl/relative-addressing.shader_test b/tests/hlsl/relative-addressing.shader_test new file mode 100644 index 00000000..4700dc54 --- /dev/null +++ b/tests/hlsl/relative-addressing.shader_test @@ -0,0 +1,76 @@ +% Note: SM1 supports relative addressing as long as it is not used as an l-value. +[pixel shader todo] +float i; + +float4 main() : sv_target +{ + float a[4] = {1, 2, 3, 4}; + + return a[i]; +} + +[test] +uniform 0 float 2.3 +todo draw quad +todo probe all rgba (3, 3, 3, 3) + + +[require] +shader model >= 4.0 + +[pixel shader todo] +int i, j; + +float4 main() : sv_target +{ + float mut1[4] = {1, 2, 3, 4}; + float mut2[4] = {5, 6, 7, 8}; + + mut1[i] = 100; + mut2[j] = mut1[j]; + + return float4(mut2[0], mut2[1], mut2[2], mut2[3]); +} + +[test] +uniform 0 int 0 +uniform 1 int 0 +todo draw quad +todo probe all rgba (100, 6, 7, 8) +uniform 0 int 2 +uniform 1 int 2 +todo draw quad +todo probe all rgba (5, 6, 100, 8) +uniform 0 int 1 +uniform 1 int 3 +todo draw quad +todo probe all rgba (5, 6, 7, 4) + + +[pixel shader todo] +float a, b, c, d; +float e, f, g, h; +int i, j; + +float4 main() : sv_target +{ + + float arr1[8] = {a, a, b, b, c, c, d, d}; + float arr2[8] = {e, e, f, f, g, g, h, h}; + + arr1[i] = arr2[i]; + arr2[j] = arr1[j]; + + return 1000 * float4(arr1[0], arr1[4], arr2[0], arr2[4]) + + 100 * float4(arr1[1], arr1[5], arr2[1], arr2[5]) + + 10 * float4(arr1[2], arr1[6], arr2[2], arr2[6]) + + 1 * float4(arr1[3], arr1[7], arr2[3], arr2[7]); +} + +[test] +uniform 0 float4 1 2 3 4 +uniform 4 float4 5 6 7 8 +uniform 8 int 3 +uniform 9 int 4 +todo draw quad +todo probe all rgba (1126, 3344, 5566, 3788)
From: Francisco Casas fcasas@codeweavers.com
This function will also be required to encode rel_addr registers. --- libs/vkd3d-shader/tpf.c | 94 +++++++++++++++++++++++------------------ 1 file changed, 53 insertions(+), 41 deletions(-)
diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index a49457dc..3dd6de37 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -146,6 +146,9 @@ STATIC_ASSERT(SM4_MAX_SRC_COUNT <= SPIRV_MAX_SRC_COUNT); #define VKD3D_SM4_SWIZZLE_SHIFT 4 #define VKD3D_SM4_SWIZZLE_MASK (0xffu << VKD3D_SM4_SWIZZLE_SHIFT)
+#define VKD3D_SM4_SCALAR_DIM_SHIFT 4 +#define VKD3D_SM4_SCALAR_DIM_MASK (0x3u << VKD3D_SM4_SCALAR_DIM_SHIFT) + #define VKD3D_SM4_VERSION_MAJOR(version) (((version) >> 4) & 0xf) #define VKD3D_SM4_VERSION_MINOR(version) (((version) >> 0) & 0xf)
@@ -513,6 +516,7 @@ enum vkd3d_sm4_swizzle_type VKD3D_SM4_SWIZZLE_VEC4 = 0x1, VKD3D_SM4_SWIZZLE_SCALAR = 0x2,
+ VKD3D_SM4_SWIZZLE_DEFAULT = ~0u-1, VKD3D_SM4_SWIZZLE_INVALID = ~0u, };
@@ -3832,32 +3836,67 @@ static void sm4_src_from_node(const struct tpf_writer *tpf, struct vkd3d_shader_ } }
-static void sm4_write_dst_register(const struct tpf_writer *tpf, const struct vkd3d_shader_dst_param *dst) +static uint32_t sm4_encode_register(const struct tpf_writer *tpf, const struct vkd3d_shader_register *reg, + enum vkd3d_sm4_swizzle_type sm4_swizzle_type, uint32_t sm4_swizzle) { const struct vkd3d_sm4_register_type_info *register_type_info; - struct vkd3d_bytecode_buffer *buffer = tpf->buffer; - uint32_t sm4_reg_type, reg_dim; + uint32_t sm4_reg_type, sm4_reg_dim; uint32_t token = 0; - unsigned int j;
- register_type_info = get_info_from_vkd3d_register_type(&tpf->lookup, dst->reg.type); + register_type_info = get_info_from_vkd3d_register_type(&tpf->lookup, reg->type); if (!register_type_info) { - FIXME("Unhandled vkd3d-shader register type %#x.\n", dst->reg.type); + FIXME("Unhandled vkd3d-shader register type %#x.\n", reg->type); sm4_reg_type = VKD3D_SM4_RT_TEMP; + if (sm4_swizzle_type == VKD3D_SM4_SWIZZLE_DEFAULT) + sm4_swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; } else { sm4_reg_type = register_type_info->sm4_type; + if (sm4_swizzle_type == VKD3D_SM4_SWIZZLE_DEFAULT) + sm4_swizzle_type = register_type_info->default_src_swizzle_type; } - - reg_dim = sm4_dimension_from_vsir_dimension(dst->reg.dimension); + sm4_reg_dim = sm4_dimension_from_vsir_dimension(reg->dimension);
token |= sm4_reg_type << VKD3D_SM4_REGISTER_TYPE_SHIFT; - token |= dst->reg.idx_count << VKD3D_SM4_REGISTER_ORDER_SHIFT; - token |= reg_dim << VKD3D_SM4_DIMENSION_SHIFT; - if (reg_dim == VKD3D_SM4_DIMENSION_VEC4) - token |= dst->write_mask << VKD3D_SM4_WRITEMASK_SHIFT; + token |= reg->idx_count << VKD3D_SM4_REGISTER_ORDER_SHIFT; + token |= sm4_reg_dim << VKD3D_SM4_DIMENSION_SHIFT; + + if (sm4_reg_dim == VKD3D_SM4_DIMENSION_VEC4) + { + token |= (uint32_t)sm4_swizzle_type << VKD3D_SM4_SWIZZLE_TYPE_SHIFT; + + switch (sm4_swizzle_type) + { + case VKD3D_SM4_SWIZZLE_NONE: + assert(sm4_swizzle); + token |= (sm4_swizzle << VKD3D_SM4_WRITEMASK_SHIFT) & VKD3D_SM4_WRITEMASK_MASK; + break; + + case VKD3D_SM4_SWIZZLE_VEC4: + token |= (sm4_swizzle << VKD3D_SM4_SWIZZLE_SHIFT) & VKD3D_SM4_SWIZZLE_MASK; + break; + + case VKD3D_SM4_SWIZZLE_SCALAR: + token |= (sm4_swizzle << VKD3D_SM4_SCALAR_DIM_SHIFT) & VKD3D_SM4_SCALAR_DIM_MASK; + break; + + default: + vkd3d_unreachable(); + } + } + + return token; +} + +static void sm4_write_dst_register(const struct tpf_writer *tpf, const struct vkd3d_shader_dst_param *dst) +{ + struct vkd3d_bytecode_buffer *buffer = tpf->buffer; + uint32_t token = 0; + unsigned int j; + + token = sm4_encode_register(tpf, &dst->reg, VKD3D_SM4_SWIZZLE_NONE, dst->write_mask); put_u32(buffer, token);
for (j = 0; j < dst->reg.idx_count; ++j) @@ -3869,38 +3908,11 @@ static void sm4_write_dst_register(const struct tpf_writer *tpf, const struct vk
static void sm4_write_src_register(const struct tpf_writer *tpf, const struct vkd3d_shader_src_param *src) { - const struct vkd3d_sm4_register_type_info *register_type_info; struct vkd3d_bytecode_buffer *buffer = tpf->buffer; - uint32_t sm4_reg_type, reg_dim; uint32_t token = 0, mod_token = 0; unsigned int j;
- register_type_info = get_info_from_vkd3d_register_type(&tpf->lookup, src->reg.type); - if (!register_type_info) - { - FIXME("Unhandled vkd3d-shader register type %#x.\n", src->reg.type); - sm4_reg_type = VKD3D_SM4_RT_TEMP; - } - else - { - sm4_reg_type = register_type_info->sm4_type; - } - - reg_dim = sm4_dimension_from_vsir_dimension(src->reg.dimension); - - token |= sm4_reg_type << VKD3D_SM4_REGISTER_TYPE_SHIFT; - token |= src->reg.idx_count << VKD3D_SM4_REGISTER_ORDER_SHIFT; - token |= reg_dim << VKD3D_SM4_DIMENSION_SHIFT; - if (reg_dim == VKD3D_SM4_DIMENSION_VEC4) - { - uint32_t swizzle_type = (uint32_t)register_type_info->default_src_swizzle_type; - - token |= swizzle_type << VKD3D_SM4_SWIZZLE_TYPE_SHIFT; - if (swizzle_type == VKD3D_SM4_SWIZZLE_SCALAR) - token |= (swizzle_to_sm4(src->swizzle) & 0x3) << VKD3D_SM4_SWIZZLE_SHIFT; - else - token |= swizzle_to_sm4(src->swizzle) << VKD3D_SM4_SWIZZLE_SHIFT; - } + token = sm4_encode_register(tpf, &src->reg, VKD3D_SM4_SWIZZLE_DEFAULT, swizzle_to_sm4(src->swizzle));
switch (src->modifiers) { @@ -3949,7 +3961,7 @@ static void sm4_write_src_register(const struct tpf_writer *tpf, const struct vk if (src->reg.type == VKD3DSPR_IMMCONST) { put_u32(buffer, src->reg.u.immconst_uint[0]); - if (reg_dim == VKD3D_SM4_DIMENSION_VEC4) + if (src->reg.dimension == VSIR_DIMENSION_VEC4) { put_u32(buffer, src->reg.u.immconst_uint[1]); put_u32(buffer, src->reg.u.immconst_uint[2]);
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/tpf.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+)
diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index 3dd6de37..e716a452 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -3836,9 +3836,29 @@ static void sm4_src_from_node(const struct tpf_writer *tpf, struct vkd3d_shader_ } }
+static unsigned int sm4_get_index_addressing_from_reg(const struct vkd3d_shader_register *reg, + unsigned int i) +{ + if (i >= reg->idx_count) + return 0; + + if (reg->idx[i].rel_addr) + { + if (reg->idx[i].offset == 0) + return VKD3D_SM4_ADDRESSING_RELATIVE; + else + return VKD3D_SM4_ADDRESSING_RELATIVE | VKD3D_SM4_ADDRESSING_OFFSET; + } + + return 0; +} + static uint32_t sm4_encode_register(const struct tpf_writer *tpf, const struct vkd3d_shader_register *reg, enum vkd3d_sm4_swizzle_type sm4_swizzle_type, uint32_t sm4_swizzle) { + unsigned int idx0_addressing = sm4_get_index_addressing_from_reg(reg, 0); + unsigned int idx1_addressing = sm4_get_index_addressing_from_reg(reg, 1); + unsigned int idx2_addressing = sm4_get_index_addressing_from_reg(reg, 2); const struct vkd3d_sm4_register_type_info *register_type_info; uint32_t sm4_reg_type, sm4_reg_dim; uint32_t token = 0; @@ -3862,6 +3882,9 @@ static uint32_t sm4_encode_register(const struct tpf_writer *tpf, const struct v token |= sm4_reg_type << VKD3D_SM4_REGISTER_TYPE_SHIFT; token |= reg->idx_count << VKD3D_SM4_REGISTER_ORDER_SHIFT; token |= sm4_reg_dim << VKD3D_SM4_DIMENSION_SHIFT; + token |= idx0_addressing << VKD3D_SM4_ADDRESSING_SHIFT0; + token |= idx1_addressing << VKD3D_SM4_ADDRESSING_SHIFT1; + token |= idx2_addressing << VKD3D_SM4_ADDRESSING_SHIFT2;
if (sm4_reg_dim == VKD3D_SM4_DIMENSION_VEC4) {
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/tpf.c | 40 ++++++++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 8 deletions(-)
diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index e716a452..b5f8a54e 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -3913,6 +3913,36 @@ static uint32_t sm4_encode_register(const struct tpf_writer *tpf, const struct v return token; }
+static void sm4_write_register_index(const struct tpf_writer *tpf, const struct vkd3d_shader_register *reg, + unsigned int j) +{ + unsigned int addressing = sm4_get_index_addressing_from_reg(reg, j); + struct vkd3d_bytecode_buffer *buffer = tpf->buffer; + unsigned int k; + + if (addressing & VKD3D_SM4_ADDRESSING_RELATIVE) + { + const struct vkd3d_shader_src_param *idx_src = reg->idx[j].rel_addr; + uint32_t idx_src_token; + + assert(idx_src); + assert(!idx_src->modifiers); + assert(idx_src->reg.type != VKD3DSPR_IMMCONST); + idx_src_token = sm4_encode_register(tpf, &idx_src->reg, VKD3D_SM4_SWIZZLE_SCALAR, idx_src->swizzle); + + put_u32(buffer, idx_src_token); + for (k = 0; k < idx_src->reg.idx_count; ++k) + { + put_u32(buffer, idx_src->reg.idx[k].offset); + assert(!idx_src->reg.idx[k].rel_addr); + } + } + else + { + put_u32(tpf->buffer, reg->idx[j].offset); + } +} + static void sm4_write_dst_register(const struct tpf_writer *tpf, const struct vkd3d_shader_dst_param *dst) { struct vkd3d_bytecode_buffer *buffer = tpf->buffer; @@ -3923,10 +3953,7 @@ static void sm4_write_dst_register(const struct tpf_writer *tpf, const struct vk put_u32(buffer, token);
for (j = 0; j < dst->reg.idx_count; ++j) - { - put_u32(buffer, dst->reg.idx[j].offset); - assert(!dst->reg.idx[j].rel_addr); - } + sm4_write_register_index(tpf, &dst->reg, j); }
static void sm4_write_src_register(const struct tpf_writer *tpf, const struct vkd3d_shader_src_param *src) @@ -3976,10 +4003,7 @@ static void sm4_write_src_register(const struct tpf_writer *tpf, const struct vk }
for (j = 0; j < src->reg.idx_count; ++j) - { - put_u32(buffer, src->reg.idx[j].offset); - assert(!src->reg.idx[j].rel_addr); - } + sm4_write_register_index(tpf, &src->reg, j);
if (src->reg.type == VKD3DSPR_IMMCONST) {
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/tpf.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-)
diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index b5f8a54e..37b3a3a4 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -3632,6 +3632,17 @@ struct sm4_instruction unsigned int idx_count; };
+static void sm4_register_from_node(struct vkd3d_shader_register *reg, uint32_t *writemask, + const struct hlsl_ir_node *instr) +{ + assert(instr->reg.allocated); + reg->type = VKD3DSPR_TEMP; + reg->dimension = VSIR_DIMENSION_VEC4; + reg->idx[0].offset = instr->reg.id; + reg->idx_count = 1; + *writemask = instr->reg.writemask; +} + static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_register *reg, uint32_t *writemask, const struct hlsl_deref *deref) { @@ -3773,17 +3784,6 @@ static void sm4_src_from_deref(const struct tpf_writer *tpf, struct vkd3d_shader } }
-static void sm4_register_from_node(struct vkd3d_shader_register *reg, uint32_t *writemask, - const struct hlsl_ir_node *instr) -{ - assert(instr->reg.allocated); - reg->type = VKD3DSPR_TEMP; - reg->dimension = VSIR_DIMENSION_VEC4; - reg->idx[0].offset = instr->reg.id; - reg->idx_count = 1; - *writemask = instr->reg.writemask; -} - static void sm4_dst_from_node(struct vkd3d_shader_dst_param *dst, const struct hlsl_ir_node *instr) { sm4_register_from_node(&dst->reg, &dst->write_mask, instr);
From: Francisco Casas fcasas@codeweavers.com
For relative addressing, the vkd3d_shader_registers must point to another vkd3d_shader_src_param. For now, use the sm4_instruction to save them, since the only purpose of this struct is to be used as paramter for write_sm4_instruction.
---
Note to self: this could be achived by vkd3d_shader_param_allocator once we make tpf.c capable of working directly with vsir input. --- libs/vkd3d-shader/tpf.c | 86 ++++++++++++++++------ tests/hlsl/array-index-expr.shader_test | 42 +++++------ tests/hlsl/function-return.shader_test | 22 +++--- tests/hlsl/matrix-indexing.shader_test | 6 +- tests/hlsl/relative-addressing.shader_test | 26 +++---- tests/hlsl/return.shader_test | 22 +++--- 6 files changed, 124 insertions(+), 80 deletions(-)
diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index 37b3a3a4..57e88f1f 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -3630,6 +3630,9 @@ struct sm4_instruction
uint32_t idx[3]; unsigned int idx_count; + + struct vkd3d_shader_src_param idx_srcs[7]; + unsigned int idx_src_count; };
static void sm4_register_from_node(struct vkd3d_shader_register *reg, uint32_t *writemask, @@ -3643,8 +3646,54 @@ static void sm4_register_from_node(struct vkd3d_shader_register *reg, uint32_t * *writemask = instr->reg.writemask; }
+static void sm4_numeric_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_register *reg, + enum vkd3d_shader_register_type type, uint32_t *writemask, const struct hlsl_deref *deref, + struct sm4_instruction *sm4_instr) +{ + const struct hlsl_ir_var *var = deref->var; + unsigned int offset_const_deref; + + reg->type = type; + reg->idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id; + reg->dimension = VSIR_DIMENSION_VEC4; + + assert(var->regs[HLSL_REGSET_NUMERIC].allocated); + + if (!var->indexable) + { + offset_const_deref = hlsl_offset_from_deref_safe(ctx, deref); + reg->idx[0].offset += offset_const_deref / 4; + reg->idx_count = 1; + } + else + { + offset_const_deref = deref->const_offset; + reg->idx[1].offset = offset_const_deref / 4; + reg->idx_count = 2; + + if (deref->rel_offset.node) + { + struct vkd3d_shader_src_param *idx_src; + unsigned int idx_writemask; + + assert(sm4_instr->idx_src_count < ARRAY_SIZE(sm4_instr->idx_srcs)); + idx_src = &sm4_instr->idx_srcs[sm4_instr->idx_src_count++]; + memset(idx_src, 0, sizeof(*idx_src)); + + reg->idx[1].rel_addr = idx_src; + sm4_register_from_node(&idx_src->reg, &idx_writemask, deref->rel_offset.node); + assert(idx_writemask != 0); + idx_src->swizzle = swizzle_from_sm4(hlsl_swizzle_from_writemask(idx_writemask)); + } + } + + *writemask = 0xf & (0xf << (offset_const_deref % 4)); + if (var->regs[HLSL_REGSET_NUMERIC].writemask) + *writemask = hlsl_combine_writemasks(var->regs[HLSL_REGSET_NUMERIC].writemask, *writemask); +} + static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_register *reg, - uint32_t *writemask, const struct hlsl_deref *deref) + uint32_t *writemask, const struct hlsl_deref *deref, struct sm4_instruction *sm4_instr) { const struct hlsl_type *data_type = hlsl_deref_get_type(ctx, deref); const struct hlsl_ir_var *var = deref->var; @@ -3759,24 +3808,19 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re } else { - struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); + enum vkd3d_shader_register_type type = deref->var->indexable ? VKD3DSPR_IDXTEMP : VKD3DSPR_TEMP;
- assert(hlsl_reg.allocated); - reg->type = deref->var->indexable ? VKD3DSPR_IDXTEMP : VKD3DSPR_TEMP; - reg->dimension = VSIR_DIMENSION_VEC4; - reg->idx[0].offset = hlsl_reg.id; - reg->idx_count = 1; - *writemask = hlsl_reg.writemask; + sm4_numeric_register_from_deref(ctx, reg, type, writemask, deref, sm4_instr); } }
static void sm4_src_from_deref(const struct tpf_writer *tpf, struct vkd3d_shader_src_param *src, - const struct hlsl_deref *deref, unsigned int map_writemask) + const struct hlsl_deref *deref, unsigned int map_writemask, struct sm4_instruction *sm4_instr) { unsigned int hlsl_swizzle; uint32_t writemask;
- sm4_register_from_deref(tpf->ctx, &src->reg, &writemask, deref); + sm4_register_from_deref(tpf->ctx, &src->reg, &writemask, deref, sm4_instr); if (vkd3d_sm4_get_default_swizzle_type(&tpf->lookup, src->reg.type) == VKD3D_SM4_SWIZZLE_VEC4) { hlsl_swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); @@ -4530,7 +4574,7 @@ static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node
sm4_src_from_node(tpf, &instr.srcs[0], coords, coords_writemask);
- sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask); + sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask, &instr);
instr.src_count = 2;
@@ -4617,8 +4661,8 @@ static void write_sm4_sample(const struct tpf_writer *tpf, const struct hlsl_ir_ instr.dst_count = 1;
sm4_src_from_node(tpf, &instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); - sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask); - sm4_src_from_deref(tpf, &instr.srcs[2], sampler, VKD3DSP_WRITEMASK_ALL); + sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask, &instr); + sm4_src_from_deref(tpf, &instr.srcs[2], sampler, VKD3DSP_WRITEMASK_ALL, &instr); instr.src_count = 3;
if (load->load_type == HLSL_RESOURCE_SAMPLE_LOD @@ -4659,7 +4703,7 @@ static void write_sm4_sampleinfo(const struct tpf_writer *tpf, const struct hlsl sm4_dst_from_node(&instr.dsts[0], dst); instr.dst_count = 1;
- sm4_src_from_deref(tpf, &instr.srcs[0], resource, instr.dsts[0].write_mask); + sm4_src_from_deref(tpf, &instr.srcs[0], resource, instr.dsts[0].write_mask, &instr); instr.src_count = 1;
write_sm4_instruction(tpf, &instr); @@ -4682,7 +4726,7 @@ static void write_sm4_resinfo(const struct tpf_writer *tpf, const struct hlsl_ir instr.dst_count = 1;
sm4_src_from_node(tpf, &instr.srcs[0], load->lod.node, VKD3DSP_WRITEMASK_ALL); - sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask); + sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask, &instr); instr.src_count = 2;
write_sm4_instruction(tpf, &instr); @@ -4830,7 +4874,7 @@ static void write_sm4_store_uav_typed(const struct tpf_writer *tpf, const struct memset(&instr, 0, sizeof(instr)); instr.opcode = VKD3D_SM5_OP_STORE_UAV_TYPED;
- sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &instr.dsts[0].write_mask, dst); + sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &instr.dsts[0].write_mask, dst, &instr); instr.dst_count = 1;
sm4_src_from_node(tpf, &instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); @@ -5376,7 +5420,7 @@ static void write_sm4_load(const struct tpf_writer *tpf, const struct hlsl_ir_lo
instr.opcode = VKD3D_SM4_OP_MOVC;
- sm4_src_from_deref(tpf, &instr.srcs[0], &load->src, instr.dsts[0].write_mask); + sm4_src_from_deref(tpf, &instr.srcs[0], &load->src, instr.dsts[0].write_mask, &instr);
memset(&value, 0xff, sizeof(value)); sm4_src_from_constant_value(&instr.srcs[1], &value, type->dimx, instr.dsts[0].write_mask); @@ -5388,7 +5432,7 @@ static void write_sm4_load(const struct tpf_writer *tpf, const struct hlsl_ir_lo { instr.opcode = VKD3D_SM4_OP_MOV;
- sm4_src_from_deref(tpf, &instr.srcs[0], &load->src, instr.dsts[0].write_mask); + sm4_src_from_deref(tpf, &instr.srcs[0], &load->src, instr.dsts[0].write_mask, &instr); instr.src_count = 1; }
@@ -5441,10 +5485,10 @@ static void write_sm4_gather(const struct tpf_writer *tpf, const struct hlsl_ir_ } }
- sm4_src_from_deref(tpf, &instr.srcs[instr.src_count++], resource, instr.dsts[0].write_mask); + sm4_src_from_deref(tpf, &instr.srcs[instr.src_count++], resource, instr.dsts[0].write_mask, &instr);
src = &instr.srcs[instr.src_count++]; - sm4_src_from_deref(tpf, src, sampler, VKD3DSP_WRITEMASK_ALL); + sm4_src_from_deref(tpf, src, sampler, VKD3DSP_WRITEMASK_ALL, &instr); src->reg.dimension = VSIR_DIMENSION_VEC4; src->swizzle = swizzle;
@@ -5545,7 +5589,7 @@ static void write_sm4_store(const struct tpf_writer *tpf, const struct hlsl_ir_s memset(&instr, 0, sizeof(instr)); instr.opcode = VKD3D_SM4_OP_MOV;
- sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &writemask, &store->lhs); + sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &writemask, &store->lhs, &instr); instr.dsts[0].write_mask = hlsl_combine_writemasks(writemask, store->writemask); instr.dst_count = 1;
diff --git a/tests/hlsl/array-index-expr.shader_test b/tests/hlsl/array-index-expr.shader_test index b7d91ea0..9a118f5a 100644 --- a/tests/hlsl/array-index-expr.shader_test +++ b/tests/hlsl/array-index-expr.shader_test @@ -1,4 +1,4 @@ -[pixel shader todo] +[pixel shader] uniform float4 f[3]; uniform float2 i;
@@ -12,17 +12,17 @@ uniform 0 float4 1.0 2.0 3.0 4.0 uniform 4 float4 5.0 6.0 7.0 8.0 uniform 8 float4 9.0 10.0 11.0 12.0 uniform 12 float4 0 0 0 0 -todo draw quad -todo probe all rgba (1.0, 2.0, 3.0, 4.0) +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (1.0, 2.0, 3.0, 4.0) uniform 12 float4 1 0 0 0 -todo draw quad -todo probe all rgba (5.0, 6.0, 7.0, 8.0) +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (5.0, 6.0, 7.0, 8.0) uniform 12 float4 0 1 0 0 -todo draw quad -todo probe all rgba (5.0, 6.0, 7.0, 8.0) +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (5.0, 6.0, 7.0, 8.0) uniform 12 float4 1 1 0 0 -todo draw quad -todo probe all rgba (9.0, 10.0, 11.0, 12.0) +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (9.0, 10.0, 11.0, 12.0)
[pixel shader] @@ -74,7 +74,7 @@ todo(sm>=6) draw quad probe all rgba (24.0, 0.0, 21.0, 1.0)
-[pixel shader todo] +[pixel shader] uniform float2 i;
float4 main() : sv_target @@ -86,20 +86,20 @@ float4 main() : sv_target
[test] uniform 0 float4 0 0 0 0 -todo draw quad -todo probe all rgba (1.0, 2.0, 3.0, 4.0) +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (1.0, 2.0, 3.0, 4.0) uniform 0 float4 1 0 0 0 -todo draw quad -todo probe all rgba (5.0, 6.0, 7.0, 8.0) +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (5.0, 6.0, 7.0, 8.0) uniform 0 float4 0 1 0 0 -todo draw quad -todo probe all rgba (5.0, 6.0, 7.0, 8.0) +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (5.0, 6.0, 7.0, 8.0) uniform 0 float4 1 1 0 0 -todo draw quad -todo probe all rgba (9.0, 10.0, 11.0, 12.0) +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (9.0, 10.0, 11.0, 12.0)
-[pixel shader todo] +[pixel shader] float4 a;
float4 main() : sv_target @@ -114,5 +114,5 @@ float4 main() : sv_target
[test] uniform 0 float4 0 0 2.4 0 -todo draw quad -probe all rgba (1.0, 120.0, 90.0, 4.0) +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (1.0, 120.0, 90.0, 4.0) diff --git a/tests/hlsl/function-return.shader_test b/tests/hlsl/function-return.shader_test index 98aac4fa..be997d0c 100644 --- a/tests/hlsl/function-return.shader_test +++ b/tests/hlsl/function-return.shader_test @@ -258,7 +258,7 @@ uniform 0 float 0.9 todo(sm>=6) draw quad probe all rgba (0.4, 0.1, 0.7, 0.6) 1
-[pixel shader todo] +[pixel shader]
uniform float4 f[3];
@@ -295,21 +295,21 @@ float4 main() : sv_target uniform 0 float4 0.3 0.0 0.0 0.0 uniform 4 float4 0.0 0.0 0.0 0.0 uniform 8 float4 0.1 0.0 0.0 0.0 -todo draw quad -todo probe all rgba (0.3, 0.2, 0.6, 0.6) 1 +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (0.3, 0.2, 0.6, 0.6) 1
uniform 4 float4 0.35 0.0 0.0 0.0 -todo draw quad -todo probe all rgba (0.3, 0.3, 0.6, 0.6) 1 +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (0.3, 0.3, 0.6, 0.6) 1
uniform 8 float4 0.5 0.0 0.0 0.0 -todo draw quad -todo probe all rgba (0.3, 0.5, 0.6, 0.6) 1 +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (0.3, 0.5, 0.6, 0.6) 1
uniform 0 float4 1.0 0.0 0.0 0.0 -todo draw quad -todo probe all rgba (0.3, 0.5, 0.6, 0.6) 1 +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (0.3, 0.5, 0.6, 0.6) 1
uniform 4 float4 2.0 0.0 0.0 0.0 -todo draw quad -todo probe all rgba (0.4, 0.1, 0.6, 0.6) 1 +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (0.4, 0.1, 0.6, 0.6) 1 diff --git a/tests/hlsl/matrix-indexing.shader_test b/tests/hlsl/matrix-indexing.shader_test index a61983ee..5924a76c 100644 --- a/tests/hlsl/matrix-indexing.shader_test +++ b/tests/hlsl/matrix-indexing.shader_test @@ -124,7 +124,7 @@ todo(sm>=6) draw quad probe all rgba (8, 9, 10, 11)
-[pixel shader todo] +[pixel shader] uniform float i;
float4 main() : sv_target @@ -136,5 +136,5 @@ float4 main() : sv_target
[test] uniform 0 float 3 -todo draw quad -todo probe all rgba (12, 13, 14, 15) +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (12, 13, 14, 15) diff --git a/tests/hlsl/relative-addressing.shader_test b/tests/hlsl/relative-addressing.shader_test index 4700dc54..ecabbd40 100644 --- a/tests/hlsl/relative-addressing.shader_test +++ b/tests/hlsl/relative-addressing.shader_test @@ -1,5 +1,5 @@ % Note: SM1 supports relative addressing as long as it is not used as an l-value. -[pixel shader todo] +[pixel shader] float i;
float4 main() : sv_target @@ -11,14 +11,14 @@ float4 main() : sv_target
[test] uniform 0 float 2.3 -todo draw quad -todo probe all rgba (3, 3, 3, 3) +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (3, 3, 3, 3)
[require] shader model >= 4.0
-[pixel shader todo] +[pixel shader] int i, j;
float4 main() : sv_target @@ -35,19 +35,19 @@ float4 main() : sv_target [test] uniform 0 int 0 uniform 1 int 0 -todo draw quad -todo probe all rgba (100, 6, 7, 8) +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (100, 6, 7, 8) uniform 0 int 2 uniform 1 int 2 -todo draw quad -todo probe all rgba (5, 6, 100, 8) +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (5, 6, 100, 8) uniform 0 int 1 uniform 1 int 3 -todo draw quad -todo probe all rgba (5, 6, 7, 4) +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (5, 6, 7, 4)
-[pixel shader todo] +[pixel shader] float a, b, c, d; float e, f, g, h; int i, j; @@ -72,5 +72,5 @@ uniform 0 float4 1 2 3 4 uniform 4 float4 5 6 7 8 uniform 8 int 3 uniform 9 int 4 -todo draw quad -todo probe all rgba (1126, 3344, 5566, 3788) +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (1126, 3344, 5566, 3788) diff --git a/tests/hlsl/return.shader_test b/tests/hlsl/return.shader_test index b8ebac0a..29621b00 100644 --- a/tests/hlsl/return.shader_test +++ b/tests/hlsl/return.shader_test @@ -217,7 +217,7 @@ uniform 0 float 0.8 todo(sm>=6) draw quad probe all rgba (0.5, 0.5, 0.5, 0.5)
-[pixel shader todo] +[pixel shader]
uniform float4 f[3];
@@ -243,21 +243,21 @@ void main(out float4 ret : sv_target) uniform 0 float4 0.3 0.0 0.0 0.0 uniform 4 float4 0.0 0.0 0.0 0.0 uniform 8 float4 0.1 0.0 0.0 0.0 -todo draw quad -todo probe all rgba (0.1, 0.1, 0.1, 0.1) 1 +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (0.1, 0.1, 0.1, 0.1) 1
uniform 4 float4 0.35 0.0 0.0 0.0 -todo draw quad -todo probe all rgba (0.2, 0.2, 0.2, 0.2) 1 +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (0.2, 0.2, 0.2, 0.2) 1
uniform 8 float4 0.5 0.0 0.0 0.0 -todo draw quad -todo probe all rgba (0.4, 0.4, 0.4, 0.4) 1 +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (0.4, 0.4, 0.4, 0.4) 1
uniform 0 float4 1.0 0.0 0.0 0.0 -todo draw quad -todo probe all rgba (0.4, 0.4, 0.4, 0.4) 1 +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (0.4, 0.4, 0.4, 0.4) 1
uniform 4 float4 2.0 0.0 0.0 0.0 -todo draw quad -todo probe all rgba (0.9, 0.9, 0.9, 0.9) 1 +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (0.9, 0.9, 0.9, 0.9) 1
:arrow_up: I just replaced a `uniform int` with an `uniform float` because we are not passing int uniforms correctly to SM1 yet, and the pipeline detected this error.
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/tpf.c:
VKD3D_SM4_SWIZZLE_VEC4 = 0x1, VKD3D_SM4_SWIZZLE_SCALAR = 0x2,
- VKD3D_SM4_SWIZZLE_DEFAULT = ~0u-1,
Spaces around the binary operator.
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/tpf.c:
- {
if (reg->idx[i].offset == 0)
return VKD3D_SM4_ADDRESSING_RELATIVE;
else
return VKD3D_SM4_ADDRESSING_RELATIVE | VKD3D_SM4_ADDRESSING_OFFSET;
- }
- return 0;
+}
static uint32_t sm4_encode_register(const struct tpf_writer *tpf, const struct vkd3d_shader_register *reg, enum vkd3d_sm4_swizzle_type sm4_swizzle_type, uint32_t sm4_swizzle) {
- unsigned int idx0_addressing = sm4_get_index_addressing_from_reg(reg, 0);
- unsigned int idx1_addressing = sm4_get_index_addressing_from_reg(reg, 1);
- unsigned int idx2_addressing = sm4_get_index_addressing_from_reg(reg, 2);
Not really important, but it doesn't look like you need to create three variables for these. Just inline the call to `sm4_get_index_addressing_from_reg()` below.
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/tpf.c:
} else {
struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref);
enum vkd3d_shader_register_type type = deref->var->indexable ? VKD3DSPR_IDXTEMP : VKD3DSPR_TEMP;
You can compute this inside `sm4_numeric_register_from_deref()`, since `deref` is available there too. I see no real reason for moving it here.
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/tpf.c:
+static void sm4_numeric_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_register *reg,
enum vkd3d_shader_register_type type, uint32_t *writemask, const struct hlsl_deref *deref,
struct sm4_instruction *sm4_instr)
+{
- const struct hlsl_ir_var *var = deref->var;
- unsigned int offset_const_deref;
- reg->type = type;
- reg->idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id;
- reg->dimension = VSIR_DIMENSION_VEC4;
- assert(var->regs[HLSL_REGSET_NUMERIC].allocated);
- if (!var->indexable)
- {
offset_const_deref = hlsl_offset_from_deref_safe(ctx, deref);
I am a bit confused because here I would have assumed that the non-indexable execution path should have been identical to what happened before this commit. Instead here you're adding `offset_const_deref` and a few lines below you're combining writemasks, and I don't think I'm seeing this operations in the removed lines. Are you also fixing a previous bug here in the non-indexable case? Or maybe I'm just missing something obvious...
On Thu Nov 2 13:26:14 2023 +0000, Giovanni Mascellani wrote:
Not really important, but it doesn't look like you need to create three variables for these. Just inline the call to `sm4_get_index_addressing_from_reg()` below.
While you're at it I'd advocate for moving the range check to the caller; I think it's clearer what its purpose is there.
I'm confused, what makes these new tests different from the array-index-expr tests?
On Thu Nov 2 13:26:15 2023 +0000, Giovanni Mascellani wrote:
I am a bit confused because here I would have assumed that the non-indexable execution path should have been identical to what happened before this commit. Instead here you're adding `offset_const_deref` and a few lines below you're combining writemasks, and I don't think I'm seeing this operations in the removed lines. Are you also fixing a previous bug here in the non-indexable case? Or maybe I'm just missing something obvious...
It should give the same results.
I think that the confusion arises because the previous path (deleted lines) called `hlsl_reg_from_deref()`, which calls `hlsl_offset_from_deref_safe()`, and then does these operations.
And, OTOH, the new code calls `hlsl_offset_from_deref_safe()` directly and takes care of doing the same operations that `hlsl_reg_from_deref()` did.
On Thu Nov 2 23:32:36 2023 +0000, Zebediah Figura wrote:
I'm confused, what makes these new tests different from the array-index-expr tests?
Now that you mention it, the first test is pretty similar to the second one in array-index-expr.
The second test is different however, because it is testing relative addressing in the dst instead of the src. But I guess it makes sense to move it to array-index-expr.shader_test, if we are deleting the first one. I will do that.
On Thu Nov 2 13:26:15 2023 +0000, Giovanni Mascellani wrote:
You can compute this inside `sm4_numeric_register_from_deref()`, since `deref` is available there too. I see no real reason for moving it here.
I prefer it to be here, since in (possible) future patches, e.g. [efaa9b76b](https://gitlab.winehq.org/fcasas/vkd3d/-/commit/efaa9b76ba3d6ed71c07926c34a5...) and [559d4121](https://gitlab.winehq.org/fcasas/vkd3d/-/commit/559d412188ea57f196653183bfce...), sm4_numeric_register_from_deref() is called from other places in sm4_register_from_deref() and I don't want to introduce redundant checks.
On Thu Nov 2 23:32:36 2023 +0000, Francisco Casas wrote:
Now that you mention it, the first test is pretty similar to the second one in array-index-expr. The second test is different however, because it is testing relative addressing in the dst instead of the src. But I guess it makes sense to move it to array-index-expr.shader_test, if we are deleting the first one. I will do that.
Oh, sorry. I changed my mind.
I just realized that the 1st test is also different to test 2 on `array-index-expr.shader_test`, because it actually indexes a `float4` instead of a `float[]`, which is not relative addressing. So it may be worth keeping it as the simplest case of relative addressing.
I can still move them both to array-index-expr.shader_test if you want.