If a hlsl_ir_load loads a variable whose components are stored from different instructions, copy propagation doesn't replace it.
But if all these instructions are constants (which currently is the case for value constructors), the load can be replaced with a constant value, which is what the first patch of this series does.
For instance, this shader:
``` sampler s; Texture2D t;
float4 main() : sv_target { return t.Gather(s, float2(0.6, 0.6), int2(0, 0)); } ```
results in the following IR before applying the patch: ``` float | 6.00000024e-01 float | 6.00000024e-01 uint | 0 | = (<constructor-2>[@4].x @2) uint | 1 | = (<constructor-2>[@6].x @3) float2 | <constructor-2> int | 0 int | 0 uint | 0 | = (<constructor-5>[@11].x @9) uint | 1 | = (<constructor-5>[@13].x @10) int2 | <constructor-5> float4 | gather_red(resource = t, sampler = s, coords = @8, offset = @15) | return | = (<output-sv_target0> @16) ```
and this IR afterwards: ``` float2 | {6.00000024e-01 6.00000024e-01 } int2 | {0 0 } float4 | gather_red(resource = t, sampler = s, coords = @2, offset = @3) | return | = (<output-sv_target0> @4) ```
This is required to write texel_offsets as aoffimmi modifiers in the sm4 backend, since it expects the texel_offset arguments to be hlsl_ir_constant.
This series also: * Allows Gather() methods to use aoffimmi modifiers instead of an additional source register (which is the only way allowed for shader model 4.1), when possible. * Adds support to texel_offsets in the Load() method via aoffimmi modifiers (the only allowed method).
-- v7: vkd3d-shader/hlsl: Fold swizzle chains. vkd3d-shader/hlsl: Refactor common code on copy-prop of both loads and swizzles. vkd3d-shader/hlsl: Propagate copies through swizzles. vkd3d-shader/hlsl: Replace swizzles with constants in copy prop.
From: Francisco Casas fcasas@codeweavers.com
If a hlsl_ir_load loads a variable whose components are stored from different instructions, copy propagation doesn't replace it.
But if all these instructions are constants (which currently is the case for value constructors), the load could be replaced with a constant value. Which is expected in some other instructions, e.g. texel_offsets when using aoffimmi modifiers.
For instance, this shader:
``` sampler s; Texture2D t;
float4 main() : sv_target { return t.Gather(s, float2(0.6, 0.6), int2(0, 0)); } ```
results in the following IR before applying the patch: ``` float | 6.00000024e-01 float | 6.00000024e-01 uint | 0 | = (<constructor-2>[@4].x @2) uint | 1 | = (<constructor-2>[@6].x @3) float2 | <constructor-2> int | 0 int | 0 uint | 0 | = (<constructor-5>[@11].x @9) uint | 1 | = (<constructor-5>[@13].x @10) int2 | <constructor-5> float4 | gather_red(resource = t, sampler = s, coords = @8, offset = @15) | return | = (<output-sv_target0> @16) ```
and this IR afterwards: ``` float2 | {6.00000024e-01 6.00000024e-01 } int2 | {0 0 } float4 | gather_red(resource = t, sampler = s, coords = @2, offset = @3) | return | = (<output-sv_target0> @4) ``` --- libs/vkd3d-shader/hlsl_codegen.c | 42 ++++++++++++++++++++++ tests/hlsl-initializer-objects.shader_test | 8 ++--- tests/object-references.shader_test | 6 ++-- tests/sampler-offset.shader_test | 12 +++---- tests/shader_runner_d3d12.c | 2 +- 5 files changed, 56 insertions(+), 14 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 6e4168fc..9bdbd57c 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -718,6 +718,41 @@ static struct hlsl_ir_node *copy_propagation_compute_replacement(struct hlsl_ctx return instr; }
+static struct hlsl_ir_node *copy_propagation_compute_load_constant_replacement(struct hlsl_ctx *ctx, + const struct copy_propagation_state *state, const struct hlsl_ir_load *load) +{ + const struct hlsl_ir_var *var = load->src.var; + union hlsl_constant_value values[4] = {0}; + struct hlsl_ir_constant *cons; + unsigned int start, count, i; + + if (load->node.data_type->type != HLSL_CLASS_SCALAR && load->node.data_type->type != HLSL_CLASS_VECTOR) + return NULL; + + if (!hlsl_component_index_range_from_deref(ctx, &load->src, &start, &count)) + return NULL; + + for (i = 0; i < count; ++i) + { + struct copy_propagation_value *value = copy_propagation_get_value(state, var, start + i); + + if (!value || value->node->type != HLSL_IR_CONSTANT) + return NULL; + + values[i] = hlsl_ir_constant(value->node)->value[value->component]; + } + + if (!(cons = hlsl_new_constant(ctx, load->node.data_type, &load->node.loc))) + return NULL; + cons->value[0] = values[0]; + cons->value[1] = values[1]; + cons->value[2] = values[2]; + cons->value[3] = values[3]; + + TRACE("Load from %s[%u-%u] turned into a constant %p.\n", var->name, start, start + count, cons); + return &cons->node; +} + static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, struct hlsl_ir_load *load, struct copy_propagation_state *state) { @@ -746,6 +781,13 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, return false; }
+ if ((new_instr = copy_propagation_compute_load_constant_replacement(ctx, state, load))) + { + list_add_before(&instr->entry, &new_instr->entry); + hlsl_replace_node(instr, new_instr); + return true; + } + if (!(new_instr = copy_propagation_compute_replacement(ctx, state, &load->src, &swizzle))) return false;
diff --git a/tests/hlsl-initializer-objects.shader_test b/tests/hlsl-initializer-objects.shader_test index d40ede46..d9c0bc91 100644 --- a/tests/hlsl-initializer-objects.shader_test +++ b/tests/hlsl-initializer-objects.shader_test @@ -29,7 +29,7 @@ draw quad probe all rgba (0.2, 0.2, 0.2, 0.1)
-[pixel shader todo] +[pixel shader] Texture2D tex;
struct foo @@ -48,11 +48,11 @@ float4 main() : sv_target }
[test] -todo draw quad -todo probe all rgba (31.1, 41.1, 51.1, 61.1) 1 +draw quad +probe all rgba (31.1, 41.1, 51.1, 61.1) 1
-[pixel shader todo] +[pixel shader] Texture2D tex1; Texture2D tex2;
diff --git a/tests/object-references.shader_test b/tests/object-references.shader_test index 12f745e6..ba9b1235 100644 --- a/tests/object-references.shader_test +++ b/tests/object-references.shader_test @@ -132,7 +132,7 @@ float4 main() : sv_target }
-[pixel shader todo] +[pixel shader] Texture2D tex; uniform float f;
@@ -153,5 +153,5 @@ float4 main() : sv_target
[test] uniform 0 float 10.0 -todo draw quad -todo probe (0, 0) rgba (11.0, 12.0, 13.0, 11.0) +draw quad +probe (0, 0) rgba (11.0, 12.0, 13.0, 11.0) diff --git a/tests/sampler-offset.shader_test b/tests/sampler-offset.shader_test index 2aa8f9b3..6f8357df 100644 --- a/tests/sampler-offset.shader_test +++ b/tests/sampler-offset.shader_test @@ -12,7 +12,7 @@ size (3, 3) 0.0 0.2 0.0 0.4 0.1 0.2 0.5 0.0 0.2 0.2 0.0 0.4
-[pixel shader todo] +[pixel shader] sampler s; Texture2D t;
@@ -22,11 +22,11 @@ float4 main() : sv_target }
[test] -todo draw quad +draw quad probe all rgba (0.1, 0.2, 0.5, 0.0)
-[pixel shader todo] +[pixel shader] sampler s; Texture2D t;
@@ -36,11 +36,11 @@ float4 main() : sv_target }
[test] -todo draw quad +draw quad probe all rgba (0.2, 0.2, 0.0, 0.4)
-[pixel shader todo] +[pixel shader] sampler s; Texture2D t;
@@ -50,5 +50,5 @@ float4 main() : sv_target }
[test] -todo draw quad +draw quad probe all rgba (0.0, 0.2, 0.0, 0.4) diff --git a/tests/shader_runner_d3d12.c b/tests/shader_runner_d3d12.c index bb4d9c5a..bd94b4c9 100644 --- a/tests/shader_runner_d3d12.c +++ b/tests/shader_runner_d3d12.c @@ -167,7 +167,7 @@ static ID3D12RootSignature *d3d12_runner_create_root_signature(struct d3d12_shad ID3D12GraphicsCommandList *command_list, unsigned int *uniform_index) { D3D12_ROOT_SIGNATURE_DESC root_signature_desc = {0}; - D3D12_ROOT_PARAMETER root_params[3], *root_param; + D3D12_ROOT_PARAMETER root_params[4], *root_param; D3D12_STATIC_SAMPLER_DESC static_samplers[1]; ID3D12RootSignature *root_signature; HRESULT hr;
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl_sm4.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_sm4.c b/libs/vkd3d-shader/hlsl_sm4.c index ae5bb1ac..4059d618 100644 --- a/libs/vkd3d-shader/hlsl_sm4.c +++ b/libs/vkd3d-shader/hlsl_sm4.c @@ -2110,11 +2110,19 @@ static void write_sm4_gather(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer
sm4_src_from_node(&instr.srcs[instr.src_count++], coords, VKD3DSP_WRITEMASK_ALL);
- /* FIXME: Use an aoffimmi modifier if possible. */ if (texel_offset) { - instr.opcode = VKD3D_SM5_OP_GATHER4_PO; - sm4_src_from_node(&instr.srcs[instr.src_count++], texel_offset, VKD3DSP_WRITEMASK_ALL); + if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) + { + if (ctx->profile->major_version < 5) + { + hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, + "Offset must resolve to integer literal in the range -8 to 7 for profiles < 5."); + return; + } + instr.opcode = VKD3D_SM5_OP_GATHER4_PO; + sm4_src_from_node(&instr.srcs[instr.src_count++], texel_offset, VKD3DSP_WRITEMASK_ALL); + } }
sm4_src_from_deref(ctx, &instr.srcs[instr.src_count++], resource, resource_type, instr.dsts[0].writemask);
From: Francisco Casas fcasas@codeweavers.com
--- Makefile.am | 1 + tests/texture-load-offset.shader_test | 51 +++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 tests/texture-load-offset.shader_test
diff --git a/Makefile.am b/Makefile.am index 85cd4642..84d75497 100644 --- a/Makefile.am +++ b/Makefile.am @@ -147,6 +147,7 @@ vkd3d_shader_tests = \ tests/swizzle-6.shader_test \ tests/swizzle-7.shader_test \ tests/texture-load.shader_test \ + tests/texture-load-offset.shader_test \ tests/texture-load-typed.shader_test \ tests/trigonometry.shader_test \ tests/uav.shader_test \ diff --git a/tests/texture-load-offset.shader_test b/tests/texture-load-offset.shader_test new file mode 100644 index 00000000..ab233c58 --- /dev/null +++ b/tests/texture-load-offset.shader_test @@ -0,0 +1,51 @@ +[require] +shader model >= 4.0 + +[texture 0] +size (3, 3) +0 0 0 1 1 0 0 1 2 0 0 1 +0 1 0 1 1 1 0 1 2 1 0 1 +0 2 0 1 1 2 0 1 2 2 0 1 + + +[pixel shader] +Texture2D t; + +float4 main(float4 pos : sv_position) : sv_target +{ + return t.Load(int3(pos.xy, 0), int2(0, 1)); +} + + +[test] +draw quad +todo probe (0, 0) rgba (0, 1, 0, 1) +todo probe (1, 0) rgba (1, 1, 0, 1) +todo probe (0, 1) rgba (0, 2, 0, 1) +todo probe (1, 1) rgba (1, 2, 0, 1) + + +[pixel shader] +Texture2D t; + +float4 main(float4 pos : sv_position) : sv_target +{ + return t.Load(int3(pos.xy, 0), int2(-2, 0)); +} + + +[test] +draw quad +todo probe (3, 0) rgba (1, 0, 0, 1) +todo probe (4, 0) rgba (2, 0, 0, 1) +todo probe (3, 1) rgba (1, 1, 0, 1) +todo probe (4, 1) rgba (2, 1, 0, 1) + + +[pixel shader fail todo] +Texture2D t; + +float4 main(float4 pos : sv_position) : sv_target +{ + return t.Load(int3(pos.xy, 0), int2(8, 1)); +}
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl_sm4.c | 16 ++++++++++++++-- tests/texture-load-offset.shader_test | 18 +++++++++--------- 2 files changed, 23 insertions(+), 11 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_sm4.c b/libs/vkd3d-shader/hlsl_sm4.c index 4059d618..1595e5be 100644 --- a/libs/vkd3d-shader/hlsl_sm4.c +++ b/libs/vkd3d-shader/hlsl_sm4.c @@ -1418,7 +1418,8 @@ static void write_sm4_constant(struct hlsl_ctx *ctx,
static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, - const struct hlsl_deref *resource, const struct hlsl_ir_node *coords) + const struct hlsl_deref *resource, const struct hlsl_ir_node *coords, + const struct hlsl_ir_node *texel_offset) { bool uav = (resource_type->base_type == HLSL_TYPE_UAV); struct sm4_instruction instr; @@ -1427,6 +1428,16 @@ static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buf memset(&instr, 0, sizeof(instr)); instr.opcode = uav ? VKD3D_SM5_OP_LD_UAV_TYPED : VKD3D_SM4_OP_LD;
+ if (texel_offset) + { + if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) + { + hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, + "Offset must resolve to integer literal in the range -8 to 7."); + return; + } + } + sm4_dst_from_node(&instr.dsts[0], dst); instr.dst_count = 1;
@@ -2179,7 +2190,8 @@ static void write_sm4_resource_load(struct hlsl_ctx *ctx, switch (load->load_type) { case HLSL_RESOURCE_LOAD: - write_sm4_ld(ctx, buffer, resource_type, &load->node, &load->resource, coords); + write_sm4_ld(ctx, buffer, resource_type, &load->node, &load->resource, + coords, texel_offset); break;
case HLSL_RESOURCE_SAMPLE: diff --git a/tests/texture-load-offset.shader_test b/tests/texture-load-offset.shader_test index ab233c58..52b6a5f9 100644 --- a/tests/texture-load-offset.shader_test +++ b/tests/texture-load-offset.shader_test @@ -19,10 +19,10 @@ float4 main(float4 pos : sv_position) : sv_target
[test] draw quad -todo probe (0, 0) rgba (0, 1, 0, 1) -todo probe (1, 0) rgba (1, 1, 0, 1) -todo probe (0, 1) rgba (0, 2, 0, 1) -todo probe (1, 1) rgba (1, 2, 0, 1) +probe (0, 0) rgba (0, 1, 0, 1) +probe (1, 0) rgba (1, 1, 0, 1) +probe (0, 1) rgba (0, 2, 0, 1) +probe (1, 1) rgba (1, 2, 0, 1)
[pixel shader] @@ -36,13 +36,13 @@ float4 main(float4 pos : sv_position) : sv_target
[test] draw quad -todo probe (3, 0) rgba (1, 0, 0, 1) -todo probe (4, 0) rgba (2, 0, 0, 1) -todo probe (3, 1) rgba (1, 1, 0, 1) -todo probe (4, 1) rgba (2, 1, 0, 1) +probe (3, 0) rgba (1, 0, 0, 1) +probe (4, 0) rgba (2, 0, 0, 1) +probe (3, 1) rgba (1, 1, 0, 1) +probe (4, 1) rgba (2, 1, 0, 1)
-[pixel shader fail todo] +[pixel shader fail] Texture2D t;
float4 main(float4 pos : sv_position) : sv_target
From: Francisco Casas fcasas@codeweavers.com
The Load() method offsets are used for these tests since these must solve to constants in order to pass. --- Makefile.am | 1 + tests/swizzle-constant-prop.shader_test | 60 +++++++++++++++++++++++++ 2 files changed, 61 insertions(+) create mode 100644 tests/swizzle-constant-prop.shader_test
diff --git a/Makefile.am b/Makefile.am index 84d75497..e3ff2941 100644 --- a/Makefile.am +++ b/Makefile.am @@ -146,6 +146,7 @@ vkd3d_shader_tests = \ tests/swizzle-5.shader_test \ tests/swizzle-6.shader_test \ tests/swizzle-7.shader_test \ + tests/swizzle-constant-prop.shader_test \ tests/texture-load.shader_test \ tests/texture-load-offset.shader_test \ tests/texture-load-typed.shader_test \ diff --git a/tests/swizzle-constant-prop.shader_test b/tests/swizzle-constant-prop.shader_test new file mode 100644 index 00000000..9aed5e19 --- /dev/null +++ b/tests/swizzle-constant-prop.shader_test @@ -0,0 +1,60 @@ +[require] +shader model >= 4.0 + + +[texture 0] +size (4, 4) + 1 1 1 1 2 2 2 2 3 3 3 3 4 4 4 4 + 5 5 5 5 6 6 6 6 7 7 7 7 8 8 8 8 + 9 9 9 9 10 10 10 10 11 11 11 11 12 12 12 12 +13 13 13 13 14 14 14 14 14 15 15 15 16 16 16 16 + + +[pixel shader todo] +Texture2D tex; +uniform int i; + +float4 main() : sv_target +{ + int4 a = {1, 2, i, i}; + return 100 * a + tex.Load(int3(0, 0, 0), a.xy); +} + +[test] +uniform 0 int 4 +todo draw quad +todo probe all rgba (110, 210, 410, 410) + + +[pixel shader todo] +Texture2D tex; +uniform int i; + +float4 main() : sv_target +{ + int4 a = {0, 1, 2, i}; + int4 b = a.yxww; + int3 c = b.wyx; + return 100 * b + tex.Load(int3(0, 0, 0), c.yz); +} + +[test] +uniform 0 int 3 +todo draw quad +todo probe all rgba (105, 5, 305, 305) + + +[pixel shader todo] +Texture2D tex; +uniform int i; + +float4 main() : sv_target +{ + int4 a = {1, 2, 3, i}; + return tex.Load(int3(0, 0, 0), a.wzxx.yxw.zx); +} + +[test] +uniform 0 int 1 +todo draw quad +todo probe all rgba (14.0, 14.0, 14.0, 14.0)
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl_codegen.c | 43 +++++++++++++++++++++---- tests/swizzle-constant-prop.shader_test | 6 ++-- 2 files changed, 39 insertions(+), 10 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 9bdbd57c..c639bd4a 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -719,7 +719,8 @@ static struct hlsl_ir_node *copy_propagation_compute_replacement(struct hlsl_ctx }
static struct hlsl_ir_node *copy_propagation_compute_load_constant_replacement(struct hlsl_ctx *ctx, - const struct copy_propagation_state *state, const struct hlsl_ir_load *load) + const struct copy_propagation_state *state, const struct hlsl_ir_load *load, + unsigned int swizzle, unsigned int comp_count) { const struct hlsl_ir_var *var = load->src.var; union hlsl_constant_value values[4] = {0}; @@ -732,9 +733,10 @@ static struct hlsl_ir_node *copy_propagation_compute_load_constant_replacement(s if (!hlsl_component_index_range_from_deref(ctx, &load->src, &start, &count)) return NULL;
- for (i = 0; i < count; ++i) + for (i = 0; i < comp_count; ++i) { - struct copy_propagation_value *value = copy_propagation_get_value(state, var, start + i); + unsigned int k = start + ((swizzle >> i * 2) & 3); + struct copy_propagation_value *value = copy_propagation_get_value(state, var, k);
if (!value || value->node->type != HLSL_IR_CONSTANT) return NULL; @@ -742,14 +744,16 @@ static struct hlsl_ir_node *copy_propagation_compute_load_constant_replacement(s values[i] = hlsl_ir_constant(value->node)->value[value->component]; }
- if (!(cons = hlsl_new_constant(ctx, load->node.data_type, &load->node.loc))) + if (!(cons = hlsl_new_constant(ctx, hlsl_get_vector_type(ctx, load->node.data_type->base_type, comp_count), + &load->node.loc))) return NULL; cons->value[0] = values[0]; cons->value[1] = values[1]; cons->value[2] = values[2]; cons->value[3] = values[3];
- TRACE("Load from %s[%u-%u] turned into a constant %p.\n", var->name, start, start + count, cons); + TRACE("Load from %s[%u-%u]%s turned into a constant %p.\n", var->name, start, start + count, + debug_hlsl_swizzle(swizzle, comp_count), cons); return &cons->node; }
@@ -757,10 +761,10 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, struct hlsl_ir_load *load, struct copy_propagation_state *state) { struct hlsl_ir_node *instr = &load->node, *new_instr; + unsigned int swizzle = HLSL_SWIZZLE(X, Y, Z, W); struct hlsl_type *type = instr->data_type; struct hlsl_ir_swizzle *swizzle_node; unsigned int dimx = 0; - unsigned int swizzle;
switch (type->type) { @@ -781,7 +785,7 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, return false; }
- if ((new_instr = copy_propagation_compute_load_constant_replacement(ctx, state, load))) + if ((new_instr = copy_propagation_compute_load_constant_replacement(ctx, state, load, swizzle, dimx))) { list_add_before(&instr->entry, &new_instr->entry); hlsl_replace_node(instr, new_instr); @@ -832,6 +836,27 @@ static bool copy_propagation_transform_resource_load(struct hlsl_ctx *ctx, return progress; }
+ +static bool copy_propagation_transform_swizzle(struct hlsl_ctx *ctx, + struct hlsl_ir_swizzle *swizzle, struct copy_propagation_state *state) +{ + struct hlsl_ir_node *instr = &swizzle->node, *new_instr; + struct hlsl_type *type = swizzle->node.data_type; + struct hlsl_ir_load *load; + + if (swizzle->val.node->type != HLSL_IR_LOAD) + return false; + load = hlsl_ir_load(swizzle->val.node); + + if ((new_instr = copy_propagation_compute_load_constant_replacement(ctx, state, load, swizzle->swizzle, type->dimx))) + { + list_add_before(&instr->entry, &new_instr->entry); + hlsl_replace_node(instr, new_instr); + return true; + } + return false; +} + static bool copy_propagation_transform_resource_store(struct hlsl_ctx *ctx, struct hlsl_ir_resource_store *store, struct copy_propagation_state *state) { @@ -995,6 +1020,10 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b copy_propagation_record_store(ctx, hlsl_ir_store(instr), state); break;
+ case HLSL_IR_SWIZZLE: + progress |= copy_propagation_transform_swizzle(ctx, hlsl_ir_swizzle(instr), state); + break; + case HLSL_IR_IF: progress |= copy_propagation_process_if(ctx, hlsl_ir_if(instr), state); break; diff --git a/tests/swizzle-constant-prop.shader_test b/tests/swizzle-constant-prop.shader_test index 9aed5e19..e9aa52a4 100644 --- a/tests/swizzle-constant-prop.shader_test +++ b/tests/swizzle-constant-prop.shader_test @@ -10,7 +10,7 @@ size (4, 4) 13 13 13 13 14 14 14 14 14 15 15 15 16 16 16 16
-[pixel shader todo] +[pixel shader] Texture2D tex; uniform int i;
@@ -22,8 +22,8 @@ float4 main() : sv_target
[test] uniform 0 int 4 -todo draw quad -todo probe all rgba (110, 210, 410, 410) +draw quad +probe all rgba (110, 210, 410, 410)
[pixel shader todo]
From: Francisco Casas fcasas@codeweavers.com
Allows to replace load+swizzle instruction pairs with direct references to the value source, requiring only the components that appear in the swizzle to originate from the same source.
For instance, this shader:
``` uniform float4 f;
float4 main() : sv_target { float4 g = f; g.yz = float2(2, 3); return g.xwxw; } ```
results in a swizzle that directly references the uniform f:
``` 2: float4 | f 3: | return 4: float4 | @2.xwxw 5: | = (<output-sv_target0> @4) ``` --- libs/vkd3d-shader/hlsl_codegen.c | 43 +++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 15 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index c639bd4a..566888c5 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -683,20 +683,19 @@ static void copy_propagation_set_value(struct copy_propagation_var_def *var_def,
static struct hlsl_ir_node *copy_propagation_compute_replacement(struct hlsl_ctx *ctx, const struct copy_propagation_state *state, const struct hlsl_deref *deref, - unsigned int *swizzle) + unsigned int *swizzle, unsigned int comp_count) { + unsigned int start, count, i, out_swizzle = 0; const struct hlsl_ir_var *var = deref->var; struct hlsl_ir_node *instr = NULL; - unsigned int start, count, i;
if (!hlsl_component_index_range_from_deref(ctx, deref, &start, &count)) return NULL;
- *swizzle = 0; - - for (i = 0; i < count; ++i) + for (i = 0; i < comp_count; ++i) { - struct copy_propagation_value *value = copy_propagation_get_value(state, var, start + i); + unsigned int k = start + (((*swizzle) >> i * 2) & 3); + struct copy_propagation_value *value = copy_propagation_get_value(state, var, k);
if (!value) return NULL; @@ -707,14 +706,17 @@ static struct hlsl_ir_node *copy_propagation_compute_replacement(struct hlsl_ctx } else if (instr != value->node) { - TRACE("No single source for propagating load from %s[%u-%u].\n", var->name, start, start + count); + TRACE("No single source for propagating load from %s[%u-%u]%s.\n", var->name, start, + start + count, debug_hlsl_swizzle(*swizzle, comp_count)); return NULL; } - *swizzle |= value->component << i * 2; + out_swizzle |= value->component << i * 2; }
- TRACE("Load from %s[%u-%u] propagated as instruction %p%s.\n", - var->name, start, start + count, instr, debug_hlsl_swizzle(*swizzle, count)); + TRACE("Load from %s[%u-%u]%s propagated as instruction %p%s.\n", + var->name, start, start + count, debug_hlsl_swizzle(*swizzle, comp_count), instr, + debug_hlsl_swizzle(out_swizzle, comp_count)); + *swizzle = out_swizzle; return instr; }
@@ -792,7 +794,7 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, return true; }
- if (!(new_instr = copy_propagation_compute_replacement(ctx, state, &load->src, &swizzle))) + if (!(new_instr = copy_propagation_compute_replacement(ctx, state, &load->src, &swizzle, dimx))) return false;
if (type->type != HLSL_CLASS_OBJECT) @@ -809,11 +811,11 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, static bool copy_propagation_transform_object_load(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct copy_propagation_state *state) { + unsigned int swizzle = HLSL_SWIZZLE(X, Y, Z, W); struct hlsl_ir_load *load; struct hlsl_ir_node *instr; - unsigned int swizzle;
- if (!(instr = copy_propagation_compute_replacement(ctx, state, deref, &swizzle))) + if (!(instr = copy_propagation_compute_replacement(ctx, state, deref, &swizzle, 1))) return false;
/* Only HLSL_IR_LOAD can produce an object. */ @@ -842,19 +844,30 @@ static bool copy_propagation_transform_swizzle(struct hlsl_ctx *ctx, { struct hlsl_ir_node *instr = &swizzle->node, *new_instr; struct hlsl_type *type = swizzle->node.data_type; + unsigned int swizzle_bits = swizzle->swizzle; + struct hlsl_ir_swizzle *new_swizzle; struct hlsl_ir_load *load;
if (swizzle->val.node->type != HLSL_IR_LOAD) return false; load = hlsl_ir_load(swizzle->val.node);
- if ((new_instr = copy_propagation_compute_load_constant_replacement(ctx, state, load, swizzle->swizzle, type->dimx))) + if ((new_instr = copy_propagation_compute_load_constant_replacement(ctx, state, load, swizzle_bits, type->dimx))) { list_add_before(&instr->entry, &new_instr->entry); hlsl_replace_node(instr, new_instr); return true; } - return false; + + if (!(new_instr = copy_propagation_compute_replacement(ctx, state, &load->src, &swizzle_bits, type->dimx))) + return false; + if (!(new_swizzle = hlsl_new_swizzle(ctx, swizzle_bits, type->dimx, new_instr, &instr->loc))) + return false; + new_instr = &new_swizzle->node; + list_add_before(&instr->entry, &new_instr->entry); + + hlsl_replace_node(instr, new_instr); + return true; }
static bool copy_propagation_transform_resource_store(struct hlsl_ctx *ctx,
From: Francisco Casas fcasas@codeweavers.com
Since the new copy_propagation_compute_load_replacement() function doesn't have information about the swizzle instruction (in case it is called from copy_propagation_transform_swizzle()), new instructions are inserted after the load, which can cause a difference in the ordering of the resulting instructions but no one that should change the behavior of the resulting shader. --- libs/vkd3d-shader/hlsl_codegen.c | 60 +++++++++++++++----------------- 1 file changed, 29 insertions(+), 31 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 566888c5..96bf1523 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -759,13 +759,36 @@ static struct hlsl_ir_node *copy_propagation_compute_load_constant_replacement(s return &cons->node; }
+static struct hlsl_ir_node *copy_propagation_compute_load_replacement(struct hlsl_ctx *ctx, + const struct copy_propagation_state *state, struct hlsl_ir_load *load, + unsigned int swizzle, unsigned int comp_count) +{ + struct hlsl_ir_swizzle *new_swizzle; + struct hlsl_ir_node *new_instr; + + if ((new_instr = copy_propagation_compute_load_constant_replacement(ctx, state, load, swizzle, comp_count))) + { + list_add_before(&load->node.entry, &new_instr->entry); + return new_instr; + } + + if (!(new_instr = copy_propagation_compute_replacement(ctx, state, &load->src, &swizzle, comp_count))) + return NULL; + if (load->node.data_type->type != HLSL_CLASS_OBJECT) + { + if (!(new_swizzle = hlsl_new_swizzle(ctx, swizzle, comp_count, new_instr, &load->node.loc))) + return NULL; + new_instr = &new_swizzle->node; + list_add_before(&load->node.entry, &new_instr->entry); + } + return new_instr; +} + static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, struct hlsl_ir_load *load, struct copy_propagation_state *state) { struct hlsl_ir_node *instr = &load->node, *new_instr; - unsigned int swizzle = HLSL_SWIZZLE(X, Y, Z, W); struct hlsl_type *type = instr->data_type; - struct hlsl_ir_swizzle *swizzle_node; unsigned int dimx = 0;
switch (type->type) @@ -787,25 +810,12 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, return false; }
- if ((new_instr = copy_propagation_compute_load_constant_replacement(ctx, state, load, swizzle, dimx))) + if ((new_instr = copy_propagation_compute_load_replacement(ctx, state, load, HLSL_SWIZZLE(X, Y, Z, W), dimx))) { - list_add_before(&instr->entry, &new_instr->entry); hlsl_replace_node(instr, new_instr); return true; } - - if (!(new_instr = copy_propagation_compute_replacement(ctx, state, &load->src, &swizzle, dimx))) - return false; - - if (type->type != HLSL_CLASS_OBJECT) - { - if (!(swizzle_node = hlsl_new_swizzle(ctx, swizzle, dimx, new_instr, &instr->loc))) - return false; - list_add_before(&instr->entry, &swizzle_node->node.entry); - new_instr = &swizzle_node->node; - } - hlsl_replace_node(instr, new_instr); - return true; + return false; }
static bool copy_propagation_transform_object_load(struct hlsl_ctx *ctx, @@ -844,30 +854,18 @@ static bool copy_propagation_transform_swizzle(struct hlsl_ctx *ctx, { struct hlsl_ir_node *instr = &swizzle->node, *new_instr; struct hlsl_type *type = swizzle->node.data_type; - unsigned int swizzle_bits = swizzle->swizzle; - struct hlsl_ir_swizzle *new_swizzle; struct hlsl_ir_load *load;
if (swizzle->val.node->type != HLSL_IR_LOAD) return false; load = hlsl_ir_load(swizzle->val.node);
- if ((new_instr = copy_propagation_compute_load_constant_replacement(ctx, state, load, swizzle_bits, type->dimx))) + if ((new_instr = copy_propagation_compute_load_replacement(ctx, state, load, swizzle->swizzle, type->dimx))) { - list_add_before(&instr->entry, &new_instr->entry); hlsl_replace_node(instr, new_instr); return true; } - - if (!(new_instr = copy_propagation_compute_replacement(ctx, state, &load->src, &swizzle_bits, type->dimx))) - return false; - if (!(new_swizzle = hlsl_new_swizzle(ctx, swizzle_bits, type->dimx, new_instr, &instr->loc))) - return false; - new_instr = &new_swizzle->node; - list_add_before(&instr->entry, &new_instr->entry); - - hlsl_replace_node(instr, new_instr); - return true; + return false; }
static bool copy_propagation_transform_resource_store(struct hlsl_ctx *ctx,
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl_codegen.c | 34 +++++++++++++++++++++++++ tests/swizzle-constant-prop.shader_test | 12 ++++----- 2 files changed, 40 insertions(+), 6 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 96bf1523..08431d01 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -1322,6 +1322,39 @@ static bool lower_narrowing_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins return false; }
+static bool fold_swizzle_chains(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_swizzle *swizzle; + struct hlsl_ir_node *next_instr; + + if (instr->type != HLSL_IR_SWIZZLE) + return false; + swizzle = hlsl_ir_swizzle(instr); + + next_instr = swizzle->val.node; + + if (next_instr->type == HLSL_IR_SWIZZLE) + { + struct hlsl_ir_swizzle *new_swizzle; + struct hlsl_ir_node *new_instr; + unsigned int combined_swizzle; + + combined_swizzle = hlsl_combine_swizzles(hlsl_ir_swizzle(next_instr)->swizzle, + swizzle->swizzle, instr->data_type->dimx); + next_instr = hlsl_ir_swizzle(next_instr)->val.node; + + if (!(new_swizzle = hlsl_new_swizzle(ctx, combined_swizzle, instr->data_type->dimx, next_instr, &instr->loc))) + return false; + + new_instr = &new_swizzle->node; + list_add_before(&instr->entry, &new_instr->entry); + hlsl_replace_node(instr, new_instr); + return true; + } + + return false; +} + static bool remove_trivial_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { struct hlsl_ir_swizzle *swizzle; @@ -2759,6 +2792,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry progress = transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); progress |= transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL); progress |= copy_propagation_execute(ctx, body); + progress |= transform_ir(ctx, fold_swizzle_chains, body, NULL); progress |= transform_ir(ctx, remove_trivial_swizzles, body, NULL); } while (progress); diff --git a/tests/swizzle-constant-prop.shader_test b/tests/swizzle-constant-prop.shader_test index e9aa52a4..944d8702 100644 --- a/tests/swizzle-constant-prop.shader_test +++ b/tests/swizzle-constant-prop.shader_test @@ -26,7 +26,7 @@ draw quad probe all rgba (110, 210, 410, 410)
-[pixel shader todo] +[pixel shader] Texture2D tex; uniform int i;
@@ -40,11 +40,11 @@ float4 main() : sv_target
[test] uniform 0 int 3 -todo draw quad -todo probe all rgba (105, 5, 305, 305) +draw quad +probe all rgba (105, 5, 305, 305)
-[pixel shader todo] +[pixel shader] Texture2D tex; uniform int i;
@@ -56,5 +56,5 @@ float4 main() : sv_target
[test] uniform 0 int 1 -todo draw quad -todo probe all rgba (14.0, 14.0, 14.0, 14.0) +draw quad +probe all rgba (14.0, 14.0, 14.0, 14.0)
:arrow_up: This is the new version following Zeb's comments. 8/9 does the refactoring of the common code between `copy_propagation_transform_load()` and `copy_propagation_transform_swizzle()` which is introduced in the previous 2 patches.
8/9 could be squashed into 7/9, but I am not totally sure of doing it.