[PATCH v6 7/7] vkd3d-shader/tpf: Support relative addressing for indexable temps in SM4.

7 Nov 2023

From: Francisco Casas fcasas@codeweavers.com
For relative addressing, the vkd3d_shader_registers must point to
another vkd3d_shader_src_param. For now, use the sm4_instruction to save
them, since the only purpose of this struct is to be used as paramter
for write_sm4_instruction.
---
Note to self: this could be achived by vkd3d_shader_param_allocator
once we make tpf.c capable of working directly with vsir input.
---
 libs/vkd3d-shader/tpf.c                   | 86 +++++++++++++++++------
 tests/hlsl/function-return.shader_test    | 22 +++---
 tests/hlsl/matrix-indexing.shader_test    |  6 +-
 tests/hlsl/non-const-indexing.shader_test | 66 ++++++++---------
 tests/hlsl/return.shader_test             | 22 +++---
 5 files changed, 123 insertions(+), 79 deletions(-)

diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c
index b7fcfca5c..62b103904 100644
--- a/libs/vkd3d-shader/tpf.c
+++ b/libs/vkd3d-shader/tpf.c
@@ -3635,6 +3635,9 @@ struct sm4_instruction
uint32_t idx[3];
     unsigned int idx_count;
+
+    struct vkd3d_shader_src_param idx_srcs[7];
+    unsigned int idx_src_count;
 };
static void sm4_register_from_node(struct vkd3d_shader_register *reg, uint32_t *writemask,
@@ -3648,8 +3651,54 @@ static void sm4_register_from_node(struct vkd3d_shader_register *reg, uint32_t *
     *writemask = instr->reg.writemask;
 }
+static void sm4_numeric_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_register *reg,
+        enum vkd3d_shader_register_type type, uint32_t *writemask, const struct hlsl_deref *deref,
+        struct sm4_instruction *sm4_instr)
+{
+    const struct hlsl_ir_var *var = deref->var;
+    unsigned int offset_const_deref;
+
+    reg->type = type;
+    reg->idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id;
+    reg->dimension = VSIR_DIMENSION_VEC4;
+
+    assert(var->regs[HLSL_REGSET_NUMERIC].allocated);
+
+    if (!var->indexable)
+    {
+        offset_const_deref = hlsl_offset_from_deref_safe(ctx, deref);
+        reg->idx[0].offset += offset_const_deref / 4;
+        reg->idx_count = 1;
+    }
+    else
+    {
+        offset_const_deref = deref->const_offset;
+        reg->idx[1].offset = offset_const_deref / 4;
+        reg->idx_count = 2;
+
+        if (deref->rel_offset.node)
+        {
+            struct vkd3d_shader_src_param *idx_src;
+            unsigned int idx_writemask;
+
+            assert(sm4_instr->idx_src_count < ARRAY_SIZE(sm4_instr->idx_srcs));
+            idx_src = &sm4_instr->idx_srcs[sm4_instr->idx_src_count++];
+            memset(idx_src, 0, sizeof(*idx_src));
+
+            reg->idx[1].rel_addr = idx_src;
+            sm4_register_from_node(&idx_src->reg, &idx_writemask, deref->rel_offset.node);
+            assert(idx_writemask != 0);
+            idx_src->swizzle = swizzle_from_sm4(hlsl_swizzle_from_writemask(idx_writemask));
+        }
+    }
+
+    *writemask = 0xf & (0xf << (offset_const_deref % 4));
+    if (var->regs[HLSL_REGSET_NUMERIC].writemask)
+        *writemask = hlsl_combine_writemasks(var->regs[HLSL_REGSET_NUMERIC].writemask, *writemask);
+}
+
 static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_register *reg,
-        uint32_t *writemask, const struct hlsl_deref *deref)
+        uint32_t *writemask, const struct hlsl_deref *deref, struct sm4_instruction *sm4_instr)
 {
     const struct hlsl_type *data_type = hlsl_deref_get_type(ctx, deref);
     const struct hlsl_ir_var *var = deref->var;
@@ -3764,24 +3813,19 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re
     }
     else
     {
-        struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref);
+        enum vkd3d_shader_register_type type = deref->var->indexable ? VKD3DSPR_IDXTEMP : VKD3DSPR_TEMP;
-        assert(hlsl_reg.allocated);
-        reg->type =  deref->var->indexable ? VKD3DSPR_IDXTEMP : VKD3DSPR_TEMP;
-        reg->dimension = VSIR_DIMENSION_VEC4;
-        reg->idx[0].offset = hlsl_reg.id;
-        reg->idx_count = 1;
-        *writemask = hlsl_reg.writemask;
+        sm4_numeric_register_from_deref(ctx, reg, type, writemask, deref, sm4_instr);
     }
 }
static void sm4_src_from_deref(const struct tpf_writer *tpf, struct vkd3d_shader_src_param *src,
-        const struct hlsl_deref *deref, unsigned int map_writemask)
+        const struct hlsl_deref *deref, unsigned int map_writemask, struct sm4_instruction *sm4_instr)
 {
     unsigned int hlsl_swizzle;
     uint32_t writemask;
-    sm4_register_from_deref(tpf->ctx, &src->reg, &writemask, deref);
+    sm4_register_from_deref(tpf->ctx, &src->reg, &writemask, deref, sm4_instr);
     if (vkd3d_sm4_get_default_swizzle_type(&tpf->lookup, src->reg.type) == VKD3D_SM4_SWIZZLE_VEC4)
     {
         hlsl_swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask);
@@ -4532,7 +4576,7 @@ static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node
sm4_src_from_node(tpf, &instr.srcs[0], coords, coords_writemask);
-    sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask);
+    sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask, &instr);
instr.src_count = 2;
@@ -4619,8 +4663,8 @@ static void write_sm4_sample(const struct tpf_writer *tpf, const struct hlsl_ir_
     instr.dst_count = 1;
sm4_src_from_node(tpf, &instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL);
-    sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask);
-    sm4_src_from_deref(tpf, &instr.srcs[2], sampler, VKD3DSP_WRITEMASK_ALL);
+    sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask, &instr);
+    sm4_src_from_deref(tpf, &instr.srcs[2], sampler, VKD3DSP_WRITEMASK_ALL, &instr);
     instr.src_count = 3;
if (load->load_type == HLSL_RESOURCE_SAMPLE_LOD
@@ -4661,7 +4705,7 @@ static void write_sm4_sampleinfo(const struct tpf_writer *tpf, const struct hlsl
     sm4_dst_from_node(&instr.dsts[0], dst);
     instr.dst_count = 1;
-    sm4_src_from_deref(tpf, &instr.srcs[0], resource, instr.dsts[0].write_mask);
+    sm4_src_from_deref(tpf, &instr.srcs[0], resource, instr.dsts[0].write_mask, &instr);
     instr.src_count = 1;
write_sm4_instruction(tpf, &instr);
@@ -4684,7 +4728,7 @@ static void write_sm4_resinfo(const struct tpf_writer *tpf, const struct hlsl_ir
     instr.dst_count = 1;
sm4_src_from_node(tpf, &instr.srcs[0], load->lod.node, VKD3DSP_WRITEMASK_ALL);
-    sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask);
+    sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask, &instr);
     instr.src_count = 2;
write_sm4_instruction(tpf, &instr);
@@ -4832,7 +4876,7 @@ static void write_sm4_store_uav_typed(const struct tpf_writer *tpf, const struct
     memset(&instr, 0, sizeof(instr));
     instr.opcode = VKD3D_SM5_OP_STORE_UAV_TYPED;
-    sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &instr.dsts[0].write_mask, dst);
+    sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &instr.dsts[0].write_mask, dst, &instr);
     instr.dst_count = 1;
sm4_src_from_node(tpf, &instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL);
@@ -5378,7 +5422,7 @@ static void write_sm4_load(const struct tpf_writer *tpf, const struct hlsl_ir_lo
instr.opcode = VKD3D_SM4_OP_MOVC;
-        sm4_src_from_deref(tpf, &instr.srcs[0], &load->src, instr.dsts[0].write_mask);
+        sm4_src_from_deref(tpf, &instr.srcs[0], &load->src, instr.dsts[0].write_mask, &instr);
memset(&value, 0xff, sizeof(value));
         sm4_src_from_constant_value(&instr.srcs[1], &value, type->dimx, instr.dsts[0].write_mask);
@@ -5390,7 +5434,7 @@ static void write_sm4_load(const struct tpf_writer *tpf, const struct hlsl_ir_lo
     {
         instr.opcode = VKD3D_SM4_OP_MOV;
-        sm4_src_from_deref(tpf, &instr.srcs[0], &load->src, instr.dsts[0].write_mask);
+        sm4_src_from_deref(tpf, &instr.srcs[0], &load->src, instr.dsts[0].write_mask, &instr);
         instr.src_count = 1;
     }
@@ -5443,10 +5487,10 @@ static void write_sm4_gather(const struct tpf_writer *tpf, const struct hlsl_ir_
         }
     }
-    sm4_src_from_deref(tpf, &instr.srcs[instr.src_count++], resource, instr.dsts[0].write_mask);
+    sm4_src_from_deref(tpf, &instr.srcs[instr.src_count++], resource, instr.dsts[0].write_mask, &instr);
src = &instr.srcs[instr.src_count++];
-    sm4_src_from_deref(tpf, src, sampler, VKD3DSP_WRITEMASK_ALL);
+    sm4_src_from_deref(tpf, src, sampler, VKD3DSP_WRITEMASK_ALL, &instr);
     src->reg.dimension = VSIR_DIMENSION_VEC4;
     src->swizzle = swizzle;
@@ -5547,7 +5591,7 @@ static void write_sm4_store(const struct tpf_writer *tpf, const struct hlsl_ir_s
     memset(&instr, 0, sizeof(instr));
     instr.opcode = VKD3D_SM4_OP_MOV;
-    sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &writemask, &store->lhs);
+    sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &writemask, &store->lhs, &instr);
     instr.dsts[0].write_mask = hlsl_combine_writemasks(writemask, store->writemask);
     instr.dst_count = 1;
diff --git a/tests/hlsl/function-return.shader_test b/tests/hlsl/function-return.shader_test
index 98aac4fa7..be997d0c3 100644
--- a/tests/hlsl/function-return.shader_test
+++ b/tests/hlsl/function-return.shader_test
@@ -258,7 +258,7 @@ uniform 0 float 0.9
 todo(sm>=6) draw quad
 probe all rgba (0.4, 0.1, 0.7, 0.6) 1
-[pixel shader todo]
+[pixel shader]
uniform float4 f[3];
@@ -295,21 +295,21 @@ float4 main() : sv_target
 uniform 0 float4 0.3 0.0 0.0 0.0
 uniform 4 float4 0.0 0.0 0.0 0.0
 uniform 8 float4 0.1 0.0 0.0 0.0
-todo draw quad
-todo probe all rgba (0.3, 0.2, 0.6, 0.6) 1
+todo(sm>=6) draw quad
+todo(sm>=6) probe all rgba (0.3, 0.2, 0.6, 0.6) 1
uniform 4 float4 0.35 0.0 0.0 0.0
-todo draw quad
-todo probe all rgba (0.3, 0.3, 0.6, 0.6) 1
+todo(sm>=6) draw quad
+todo(sm>=6) probe all rgba (0.3, 0.3, 0.6, 0.6) 1
uniform 8 float4 0.5 0.0 0.0 0.0
-todo draw quad
-todo probe all rgba (0.3, 0.5, 0.6, 0.6) 1
+todo(sm>=6) draw quad
+todo(sm>=6) probe all rgba (0.3, 0.5, 0.6, 0.6) 1
uniform 0 float4 1.0 0.0 0.0 0.0
-todo draw quad
-todo probe all rgba (0.3, 0.5, 0.6, 0.6) 1
+todo(sm>=6) draw quad
+todo(sm>=6) probe all rgba (0.3, 0.5, 0.6, 0.6) 1
uniform 4 float4 2.0 0.0 0.0 0.0
-todo draw quad
-todo probe all rgba (0.4, 0.1, 0.6, 0.6) 1
+todo(sm>=6) draw quad
+todo(sm>=6) probe all rgba (0.4, 0.1, 0.6, 0.6) 1
diff --git a/tests/hlsl/matrix-indexing.shader_test b/tests/hlsl/matrix-indexing.shader_test
index 170036475..b8e6dec68 100644
--- a/tests/hlsl/matrix-indexing.shader_test
+++ b/tests/hlsl/matrix-indexing.shader_test
@@ -124,7 +124,7 @@ todo(sm>=6) draw quad
 probe all rgba (8, 9, 10, 11)
-[pixel shader todo]
+[pixel shader]
 uniform float i;
float4 main() : sv_target
@@ -136,5 +136,5 @@ float4 main() : sv_target
[test]
 uniform 0 float 3
-todo draw quad
-todo probe all rgba (12, 13, 14, 15)
+todo(sm>=6) draw quad
+todo(sm>=6) probe all rgba (12, 13, 14, 15)
diff --git a/tests/hlsl/non-const-indexing.shader_test b/tests/hlsl/non-const-indexing.shader_test
index 9f79f6103..bc550738c 100644
--- a/tests/hlsl/non-const-indexing.shader_test
+++ b/tests/hlsl/non-const-indexing.shader_test
@@ -1,4 +1,4 @@
-[pixel shader todo]
+[pixel shader]
 uniform float4 f[3];
 uniform float2 i;
@@ -12,17 +12,17 @@ uniform 0 float4 1.0 2.0 3.0 4.0
 uniform 4 float4 5.0 6.0 7.0 8.0
 uniform 8 float4 9.0 10.0 11.0 12.0
 uniform 12 float4 0 0 0 0
-todo draw quad
-todo probe all rgba (1.0, 2.0, 3.0, 4.0)
+todo(sm>=6) draw quad
+todo(sm>=6) probe all rgba (1.0, 2.0, 3.0, 4.0)
 uniform 12 float4 1 0 0 0
-todo draw quad
-todo probe all rgba (5.0, 6.0, 7.0, 8.0)
+todo(sm>=6) draw quad
+todo(sm>=6) probe all rgba (5.0, 6.0, 7.0, 8.0)
 uniform 12 float4 0 1 0 0
-todo draw quad
-todo probe all rgba (5.0, 6.0, 7.0, 8.0)
+todo(sm>=6) draw quad
+todo(sm>=6) probe all rgba (5.0, 6.0, 7.0, 8.0)
 uniform 12 float4 1 1 0 0
-todo draw quad
-todo probe all rgba (9.0, 10.0, 11.0, 12.0)
+todo(sm>=6) draw quad
+todo(sm>=6) probe all rgba (9.0, 10.0, 11.0, 12.0)
[pixel shader]
@@ -49,7 +49,7 @@ todo(sm>=6) draw quad
 probe all rgba (14.0, 14.0, 14.0, 14.0)
-[pixel shader todo]
+[pixel shader]
 float i;
float4 main() : sv_target
@@ -61,8 +61,8 @@ float4 main() : sv_target
[test]
 uniform 0 float 2.3
-todo draw quad
-todo probe all rgba (3, 3, 3, 3)
+todo(sm>=6) draw quad
+todo(sm>=6) probe all rgba (3, 3, 3, 3)
[pixel shader]
@@ -90,7 +90,7 @@ todo(sm>=6) draw quad
 probe all rgba (24.0, 0.0, 21.0, 1.0)
-[pixel shader todo]
+[pixel shader]
 uniform float2 i;
float4 main() : sv_target
@@ -102,20 +102,20 @@ float4 main() : sv_target
[test]
 uniform 0 float4 0 0 0 0
-todo draw quad
-todo probe all rgba (1.0, 2.0, 3.0, 4.0)
+todo(sm>=6) draw quad
+todo(sm>=6) probe all rgba (1.0, 2.0, 3.0, 4.0)
 uniform 0 float4 1 0 0 0
-todo draw quad
-todo probe all rgba (5.0, 6.0, 7.0, 8.0)
+todo(sm>=6) draw quad
+todo(sm>=6) probe all rgba (5.0, 6.0, 7.0, 8.0)
 uniform 0 float4 0 1 0 0
-todo draw quad
-todo probe all rgba (5.0, 6.0, 7.0, 8.0)
+todo(sm>=6) draw quad
+todo(sm>=6) probe all rgba (5.0, 6.0, 7.0, 8.0)
 uniform 0 float4 1 1 0 0
-todo draw quad
-todo probe all rgba (9.0, 10.0, 11.0, 12.0)
+todo(sm>=6) draw quad
+todo(sm>=6) probe all rgba (9.0, 10.0, 11.0, 12.0)
-[pixel shader todo]
+[pixel shader]
 float4 a;
float4 main() : sv_target
@@ -130,7 +130,7 @@ float4 main() : sv_target
[test]
 uniform 0 float4 0 0 2.4 0
-todo draw quad
+todo(sm>=6) draw quad
 probe all rgba (1.0, 120.0, 90.0, 4.0)
@@ -138,7 +138,7 @@ probe all rgba (1.0, 120.0, 90.0, 4.0)
 [require]
 shader model >= 4.0
-[pixel shader todo]
+[pixel shader]
 int i, j;
float4 main() : sv_target
@@ -155,19 +155,19 @@ float4 main() : sv_target
 [test]
 uniform 0 int 0
 uniform 1 int 0
-todo draw quad
-todo probe all rgba (100, 6, 7, 8)
+todo(sm>=6) draw quad
+todo(sm>=6) probe all rgba (100, 6, 7, 8)
 uniform 0 int 2
 uniform 1 int 2
-todo draw quad
-todo probe all rgba (5, 6, 100, 8)
+todo(sm>=6) draw quad
+todo(sm>=6) probe all rgba (5, 6, 100, 8)
 uniform 0 int 1
 uniform 1 int 3
-todo draw quad
-todo probe all rgba (5, 6, 7, 4)
+todo(sm>=6) draw quad
+todo(sm>=6) probe all rgba (5, 6, 7, 4)
-[pixel shader todo]
+[pixel shader]
 float a, b, c, d;
 float e, f, g, h;
 int i, j;
@@ -192,5 +192,5 @@ uniform 0 float4 1 2 3 4
 uniform 4 float4 5 6 7 8
 uniform 8 int 3
 uniform 9 int 4
-todo draw quad
-todo probe all rgba (1126, 3344, 5566, 3788)
+todo(sm>=6) draw quad
+todo(sm>=6) probe all rgba (1126, 3344, 5566, 3788)
diff --git a/tests/hlsl/return.shader_test b/tests/hlsl/return.shader_test
index b8ebac0a1..29621b006 100644
--- a/tests/hlsl/return.shader_test
+++ b/tests/hlsl/return.shader_test
@@ -217,7 +217,7 @@ uniform 0 float 0.8
 todo(sm>=6) draw quad
 probe all rgba (0.5, 0.5, 0.5, 0.5)
-[pixel shader todo]
+[pixel shader]
uniform float4 f[3];
@@ -243,21 +243,21 @@ void main(out float4 ret : sv_target)
 uniform 0 float4 0.3 0.0 0.0 0.0
 uniform 4 float4 0.0 0.0 0.0 0.0
 uniform 8 float4 0.1 0.0 0.0 0.0
-todo draw quad
-todo probe all rgba (0.1, 0.1, 0.1, 0.1) 1
+todo(sm>=6) draw quad
+todo(sm>=6) probe all rgba (0.1, 0.1, 0.1, 0.1) 1
uniform 4 float4 0.35 0.0 0.0 0.0
-todo draw quad
-todo probe all rgba (0.2, 0.2, 0.2, 0.2) 1
+todo(sm>=6) draw quad
+todo(sm>=6) probe all rgba (0.2, 0.2, 0.2, 0.2) 1
uniform 8 float4 0.5 0.0 0.0 0.0
-todo draw quad
-todo probe all rgba (0.4, 0.4, 0.4, 0.4) 1
+todo(sm>=6) draw quad
+todo(sm>=6) probe all rgba (0.4, 0.4, 0.4, 0.4) 1
uniform 0 float4 1.0 0.0 0.0 0.0
-todo draw quad
-todo probe all rgba (0.4, 0.4, 0.4, 0.4) 1
+todo(sm>=6) draw quad
+todo(sm>=6) probe all rgba (0.4, 0.4, 0.4, 0.4) 1
uniform 4 float4 2.0 0.0 0.0 0.0
-todo draw quad
-todo probe all rgba (0.9, 0.9, 0.9, 0.9) 1
+todo(sm>=6) draw quad
+todo(sm>=6) probe all rgba (0.9, 0.9, 0.9, 0.9) 1
-- 
GitLab

https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/435

    

2025

2024

2023

2022

[PATCH v6 7/7] vkd3d-shader/tpf: Support relative addressing for indexable temps in SM4.