From: Francisco Casas fcasas@codeweavers.com
For relative addressing, the vkd3d_shader_registers must point to another vkd3d_shader_src_param. For now, use the sm4_instruction to save them, since the only purpose of this struct is to be used as paramter for write_sm4_instruction.
---
Note to self: this could be achived by vkd3d_shader_param_allocator once we make tpf.c capable of working directly with vsir input. --- libs/vkd3d-shader/tpf.c | 86 +++++++++++++++++------ tests/hlsl/function-return.shader_test | 22 +++--- tests/hlsl/matrix-indexing.shader_test | 6 +- tests/hlsl/non-const-indexing.shader_test | 66 ++++++++--------- tests/hlsl/return.shader_test | 22 +++--- 5 files changed, 123 insertions(+), 79 deletions(-)
diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index b7fcfca5c..62b103904 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -3635,6 +3635,9 @@ struct sm4_instruction
uint32_t idx[3]; unsigned int idx_count; + + struct vkd3d_shader_src_param idx_srcs[7]; + unsigned int idx_src_count; };
static void sm4_register_from_node(struct vkd3d_shader_register *reg, uint32_t *writemask, @@ -3648,8 +3651,54 @@ static void sm4_register_from_node(struct vkd3d_shader_register *reg, uint32_t * *writemask = instr->reg.writemask; }
+static void sm4_numeric_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_register *reg, + enum vkd3d_shader_register_type type, uint32_t *writemask, const struct hlsl_deref *deref, + struct sm4_instruction *sm4_instr) +{ + const struct hlsl_ir_var *var = deref->var; + unsigned int offset_const_deref; + + reg->type = type; + reg->idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id; + reg->dimension = VSIR_DIMENSION_VEC4; + + assert(var->regs[HLSL_REGSET_NUMERIC].allocated); + + if (!var->indexable) + { + offset_const_deref = hlsl_offset_from_deref_safe(ctx, deref); + reg->idx[0].offset += offset_const_deref / 4; + reg->idx_count = 1; + } + else + { + offset_const_deref = deref->const_offset; + reg->idx[1].offset = offset_const_deref / 4; + reg->idx_count = 2; + + if (deref->rel_offset.node) + { + struct vkd3d_shader_src_param *idx_src; + unsigned int idx_writemask; + + assert(sm4_instr->idx_src_count < ARRAY_SIZE(sm4_instr->idx_srcs)); + idx_src = &sm4_instr->idx_srcs[sm4_instr->idx_src_count++]; + memset(idx_src, 0, sizeof(*idx_src)); + + reg->idx[1].rel_addr = idx_src; + sm4_register_from_node(&idx_src->reg, &idx_writemask, deref->rel_offset.node); + assert(idx_writemask != 0); + idx_src->swizzle = swizzle_from_sm4(hlsl_swizzle_from_writemask(idx_writemask)); + } + } + + *writemask = 0xf & (0xf << (offset_const_deref % 4)); + if (var->regs[HLSL_REGSET_NUMERIC].writemask) + *writemask = hlsl_combine_writemasks(var->regs[HLSL_REGSET_NUMERIC].writemask, *writemask); +} + static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_register *reg, - uint32_t *writemask, const struct hlsl_deref *deref) + uint32_t *writemask, const struct hlsl_deref *deref, struct sm4_instruction *sm4_instr) { const struct hlsl_type *data_type = hlsl_deref_get_type(ctx, deref); const struct hlsl_ir_var *var = deref->var; @@ -3764,24 +3813,19 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re } else { - struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); + enum vkd3d_shader_register_type type = deref->var->indexable ? VKD3DSPR_IDXTEMP : VKD3DSPR_TEMP;
- assert(hlsl_reg.allocated); - reg->type = deref->var->indexable ? VKD3DSPR_IDXTEMP : VKD3DSPR_TEMP; - reg->dimension = VSIR_DIMENSION_VEC4; - reg->idx[0].offset = hlsl_reg.id; - reg->idx_count = 1; - *writemask = hlsl_reg.writemask; + sm4_numeric_register_from_deref(ctx, reg, type, writemask, deref, sm4_instr); } }
static void sm4_src_from_deref(const struct tpf_writer *tpf, struct vkd3d_shader_src_param *src, - const struct hlsl_deref *deref, unsigned int map_writemask) + const struct hlsl_deref *deref, unsigned int map_writemask, struct sm4_instruction *sm4_instr) { unsigned int hlsl_swizzle; uint32_t writemask;
- sm4_register_from_deref(tpf->ctx, &src->reg, &writemask, deref); + sm4_register_from_deref(tpf->ctx, &src->reg, &writemask, deref, sm4_instr); if (vkd3d_sm4_get_default_swizzle_type(&tpf->lookup, src->reg.type) == VKD3D_SM4_SWIZZLE_VEC4) { hlsl_swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); @@ -4532,7 +4576,7 @@ static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node
sm4_src_from_node(tpf, &instr.srcs[0], coords, coords_writemask);
- sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask); + sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask, &instr);
instr.src_count = 2;
@@ -4619,8 +4663,8 @@ static void write_sm4_sample(const struct tpf_writer *tpf, const struct hlsl_ir_ instr.dst_count = 1;
sm4_src_from_node(tpf, &instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); - sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask); - sm4_src_from_deref(tpf, &instr.srcs[2], sampler, VKD3DSP_WRITEMASK_ALL); + sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask, &instr); + sm4_src_from_deref(tpf, &instr.srcs[2], sampler, VKD3DSP_WRITEMASK_ALL, &instr); instr.src_count = 3;
if (load->load_type == HLSL_RESOURCE_SAMPLE_LOD @@ -4661,7 +4705,7 @@ static void write_sm4_sampleinfo(const struct tpf_writer *tpf, const struct hlsl sm4_dst_from_node(&instr.dsts[0], dst); instr.dst_count = 1;
- sm4_src_from_deref(tpf, &instr.srcs[0], resource, instr.dsts[0].write_mask); + sm4_src_from_deref(tpf, &instr.srcs[0], resource, instr.dsts[0].write_mask, &instr); instr.src_count = 1;
write_sm4_instruction(tpf, &instr); @@ -4684,7 +4728,7 @@ static void write_sm4_resinfo(const struct tpf_writer *tpf, const struct hlsl_ir instr.dst_count = 1;
sm4_src_from_node(tpf, &instr.srcs[0], load->lod.node, VKD3DSP_WRITEMASK_ALL); - sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask); + sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask, &instr); instr.src_count = 2;
write_sm4_instruction(tpf, &instr); @@ -4832,7 +4876,7 @@ static void write_sm4_store_uav_typed(const struct tpf_writer *tpf, const struct memset(&instr, 0, sizeof(instr)); instr.opcode = VKD3D_SM5_OP_STORE_UAV_TYPED;
- sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &instr.dsts[0].write_mask, dst); + sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &instr.dsts[0].write_mask, dst, &instr); instr.dst_count = 1;
sm4_src_from_node(tpf, &instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); @@ -5378,7 +5422,7 @@ static void write_sm4_load(const struct tpf_writer *tpf, const struct hlsl_ir_lo
instr.opcode = VKD3D_SM4_OP_MOVC;
- sm4_src_from_deref(tpf, &instr.srcs[0], &load->src, instr.dsts[0].write_mask); + sm4_src_from_deref(tpf, &instr.srcs[0], &load->src, instr.dsts[0].write_mask, &instr);
memset(&value, 0xff, sizeof(value)); sm4_src_from_constant_value(&instr.srcs[1], &value, type->dimx, instr.dsts[0].write_mask); @@ -5390,7 +5434,7 @@ static void write_sm4_load(const struct tpf_writer *tpf, const struct hlsl_ir_lo { instr.opcode = VKD3D_SM4_OP_MOV;
- sm4_src_from_deref(tpf, &instr.srcs[0], &load->src, instr.dsts[0].write_mask); + sm4_src_from_deref(tpf, &instr.srcs[0], &load->src, instr.dsts[0].write_mask, &instr); instr.src_count = 1; }
@@ -5443,10 +5487,10 @@ static void write_sm4_gather(const struct tpf_writer *tpf, const struct hlsl_ir_ } }
- sm4_src_from_deref(tpf, &instr.srcs[instr.src_count++], resource, instr.dsts[0].write_mask); + sm4_src_from_deref(tpf, &instr.srcs[instr.src_count++], resource, instr.dsts[0].write_mask, &instr);
src = &instr.srcs[instr.src_count++]; - sm4_src_from_deref(tpf, src, sampler, VKD3DSP_WRITEMASK_ALL); + sm4_src_from_deref(tpf, src, sampler, VKD3DSP_WRITEMASK_ALL, &instr); src->reg.dimension = VSIR_DIMENSION_VEC4; src->swizzle = swizzle;
@@ -5547,7 +5591,7 @@ static void write_sm4_store(const struct tpf_writer *tpf, const struct hlsl_ir_s memset(&instr, 0, sizeof(instr)); instr.opcode = VKD3D_SM4_OP_MOV;
- sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &writemask, &store->lhs); + sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &writemask, &store->lhs, &instr); instr.dsts[0].write_mask = hlsl_combine_writemasks(writemask, store->writemask); instr.dst_count = 1;
diff --git a/tests/hlsl/function-return.shader_test b/tests/hlsl/function-return.shader_test index 98aac4fa7..be997d0c3 100644 --- a/tests/hlsl/function-return.shader_test +++ b/tests/hlsl/function-return.shader_test @@ -258,7 +258,7 @@ uniform 0 float 0.9 todo(sm>=6) draw quad probe all rgba (0.4, 0.1, 0.7, 0.6) 1
-[pixel shader todo] +[pixel shader]
uniform float4 f[3];
@@ -295,21 +295,21 @@ float4 main() : sv_target uniform 0 float4 0.3 0.0 0.0 0.0 uniform 4 float4 0.0 0.0 0.0 0.0 uniform 8 float4 0.1 0.0 0.0 0.0 -todo draw quad -todo probe all rgba (0.3, 0.2, 0.6, 0.6) 1 +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (0.3, 0.2, 0.6, 0.6) 1
uniform 4 float4 0.35 0.0 0.0 0.0 -todo draw quad -todo probe all rgba (0.3, 0.3, 0.6, 0.6) 1 +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (0.3, 0.3, 0.6, 0.6) 1
uniform 8 float4 0.5 0.0 0.0 0.0 -todo draw quad -todo probe all rgba (0.3, 0.5, 0.6, 0.6) 1 +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (0.3, 0.5, 0.6, 0.6) 1
uniform 0 float4 1.0 0.0 0.0 0.0 -todo draw quad -todo probe all rgba (0.3, 0.5, 0.6, 0.6) 1 +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (0.3, 0.5, 0.6, 0.6) 1
uniform 4 float4 2.0 0.0 0.0 0.0 -todo draw quad -todo probe all rgba (0.4, 0.1, 0.6, 0.6) 1 +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (0.4, 0.1, 0.6, 0.6) 1 diff --git a/tests/hlsl/matrix-indexing.shader_test b/tests/hlsl/matrix-indexing.shader_test index 170036475..b8e6dec68 100644 --- a/tests/hlsl/matrix-indexing.shader_test +++ b/tests/hlsl/matrix-indexing.shader_test @@ -124,7 +124,7 @@ todo(sm>=6) draw quad probe all rgba (8, 9, 10, 11)
-[pixel shader todo] +[pixel shader] uniform float i;
float4 main() : sv_target @@ -136,5 +136,5 @@ float4 main() : sv_target
[test] uniform 0 float 3 -todo draw quad -todo probe all rgba (12, 13, 14, 15) +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (12, 13, 14, 15) diff --git a/tests/hlsl/non-const-indexing.shader_test b/tests/hlsl/non-const-indexing.shader_test index 9f79f6103..bc550738c 100644 --- a/tests/hlsl/non-const-indexing.shader_test +++ b/tests/hlsl/non-const-indexing.shader_test @@ -1,4 +1,4 @@ -[pixel shader todo] +[pixel shader] uniform float4 f[3]; uniform float2 i;
@@ -12,17 +12,17 @@ uniform 0 float4 1.0 2.0 3.0 4.0 uniform 4 float4 5.0 6.0 7.0 8.0 uniform 8 float4 9.0 10.0 11.0 12.0 uniform 12 float4 0 0 0 0 -todo draw quad -todo probe all rgba (1.0, 2.0, 3.0, 4.0) +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (1.0, 2.0, 3.0, 4.0) uniform 12 float4 1 0 0 0 -todo draw quad -todo probe all rgba (5.0, 6.0, 7.0, 8.0) +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (5.0, 6.0, 7.0, 8.0) uniform 12 float4 0 1 0 0 -todo draw quad -todo probe all rgba (5.0, 6.0, 7.0, 8.0) +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (5.0, 6.0, 7.0, 8.0) uniform 12 float4 1 1 0 0 -todo draw quad -todo probe all rgba (9.0, 10.0, 11.0, 12.0) +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (9.0, 10.0, 11.0, 12.0)
[pixel shader] @@ -49,7 +49,7 @@ todo(sm>=6) draw quad probe all rgba (14.0, 14.0, 14.0, 14.0)
-[pixel shader todo] +[pixel shader] float i;
float4 main() : sv_target @@ -61,8 +61,8 @@ float4 main() : sv_target
[test] uniform 0 float 2.3 -todo draw quad -todo probe all rgba (3, 3, 3, 3) +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (3, 3, 3, 3)
[pixel shader] @@ -90,7 +90,7 @@ todo(sm>=6) draw quad probe all rgba (24.0, 0.0, 21.0, 1.0)
-[pixel shader todo] +[pixel shader] uniform float2 i;
float4 main() : sv_target @@ -102,20 +102,20 @@ float4 main() : sv_target
[test] uniform 0 float4 0 0 0 0 -todo draw quad -todo probe all rgba (1.0, 2.0, 3.0, 4.0) +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (1.0, 2.0, 3.0, 4.0) uniform 0 float4 1 0 0 0 -todo draw quad -todo probe all rgba (5.0, 6.0, 7.0, 8.0) +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (5.0, 6.0, 7.0, 8.0) uniform 0 float4 0 1 0 0 -todo draw quad -todo probe all rgba (5.0, 6.0, 7.0, 8.0) +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (5.0, 6.0, 7.0, 8.0) uniform 0 float4 1 1 0 0 -todo draw quad -todo probe all rgba (9.0, 10.0, 11.0, 12.0) +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (9.0, 10.0, 11.0, 12.0)
-[pixel shader todo] +[pixel shader] float4 a;
float4 main() : sv_target @@ -130,7 +130,7 @@ float4 main() : sv_target
[test] uniform 0 float4 0 0 2.4 0 -todo draw quad +todo(sm>=6) draw quad probe all rgba (1.0, 120.0, 90.0, 4.0)
@@ -138,7 +138,7 @@ probe all rgba (1.0, 120.0, 90.0, 4.0) [require] shader model >= 4.0
-[pixel shader todo] +[pixel shader] int i, j;
float4 main() : sv_target @@ -155,19 +155,19 @@ float4 main() : sv_target [test] uniform 0 int 0 uniform 1 int 0 -todo draw quad -todo probe all rgba (100, 6, 7, 8) +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (100, 6, 7, 8) uniform 0 int 2 uniform 1 int 2 -todo draw quad -todo probe all rgba (5, 6, 100, 8) +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (5, 6, 100, 8) uniform 0 int 1 uniform 1 int 3 -todo draw quad -todo probe all rgba (5, 6, 7, 4) +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (5, 6, 7, 4)
-[pixel shader todo] +[pixel shader] float a, b, c, d; float e, f, g, h; int i, j; @@ -192,5 +192,5 @@ uniform 0 float4 1 2 3 4 uniform 4 float4 5 6 7 8 uniform 8 int 3 uniform 9 int 4 -todo draw quad -todo probe all rgba (1126, 3344, 5566, 3788) +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (1126, 3344, 5566, 3788) diff --git a/tests/hlsl/return.shader_test b/tests/hlsl/return.shader_test index b8ebac0a1..29621b006 100644 --- a/tests/hlsl/return.shader_test +++ b/tests/hlsl/return.shader_test @@ -217,7 +217,7 @@ uniform 0 float 0.8 todo(sm>=6) draw quad probe all rgba (0.5, 0.5, 0.5, 0.5)
-[pixel shader todo] +[pixel shader]
uniform float4 f[3];
@@ -243,21 +243,21 @@ void main(out float4 ret : sv_target) uniform 0 float4 0.3 0.0 0.0 0.0 uniform 4 float4 0.0 0.0 0.0 0.0 uniform 8 float4 0.1 0.0 0.0 0.0 -todo draw quad -todo probe all rgba (0.1, 0.1, 0.1, 0.1) 1 +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (0.1, 0.1, 0.1, 0.1) 1
uniform 4 float4 0.35 0.0 0.0 0.0 -todo draw quad -todo probe all rgba (0.2, 0.2, 0.2, 0.2) 1 +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (0.2, 0.2, 0.2, 0.2) 1
uniform 8 float4 0.5 0.0 0.0 0.0 -todo draw quad -todo probe all rgba (0.4, 0.4, 0.4, 0.4) 1 +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (0.4, 0.4, 0.4, 0.4) 1
uniform 0 float4 1.0 0.0 0.0 0.0 -todo draw quad -todo probe all rgba (0.4, 0.4, 0.4, 0.4) 1 +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (0.4, 0.4, 0.4, 0.4) 1
uniform 4 float4 2.0 0.0 0.0 0.0 -todo draw quad -todo probe all rgba (0.9, 0.9, 0.9, 0.9) 1 +todo(sm>=6) draw quad +todo(sm>=6) probe all rgba (0.9, 0.9, 0.9, 0.9) 1