Currently we are not properly handling register(cX) reservations for SM1, this is one of the things required for the SNK shaders (CW Bug Bug 18092).
register(cX) reservations also change the offset in the $Globals buffer in SM4, so support for this is also included.
---
Patch 1/4 is required to specify: ``` [require] shader model < 4.0 ``` so that the tests that follow do not get run with the vulkan backend on SM4. I think nobody disagreed with that patch.
-- v2: vkd3d-shader/hlsl: Turn register(cX) reservations into buffer offset for SM4. vkd3d-shader/hlsl: Make register(cX) reservations work for SM1. tests: Test register(cX) reservations.
From: Francisco Casas fcasas@codeweavers.com
We need these checks to properly handle tests that require target profiles 3.0 and lower, or tests that require target profile 6.0 and higher.
Since all backeds require an ops->check_requirements now, expect it to always be defined. --- tests/shader_runner.c | 3 ++- tests/shader_runner.h | 4 ++-- tests/shader_runner_d3d11.c | 13 +++++++++++++ tests/shader_runner_d3d12.c | 14 ++++++++++++++ tests/shader_runner_vulkan.c | 13 +++++++++++++ 5 files changed, 44 insertions(+), 3 deletions(-)
diff --git a/tests/shader_runner.c b/tests/shader_runner.c index dadfe3338..43949c01a 100644 --- a/tests/shader_runner.c +++ b/tests/shader_runner.c @@ -1089,8 +1089,9 @@ void run_shader_tests(struct shader_runner *runner, const struct shader_runner_o break;
case STATE_REQUIRE: + assert(runner->ops->check_requirements); if (runner->maximum_shader_model < runner->minimum_shader_model - || (runner->ops->check_requirements && !runner->ops->check_requirements(runner))) + || !runner->ops->check_requirements(runner)) { skip_tests = true; } diff --git a/tests/shader_runner.h b/tests/shader_runner.h index bda44a429..ee0e9094d 100644 --- a/tests/shader_runner.h +++ b/tests/shader_runner.h @@ -142,9 +142,9 @@ struct shader_runner
struct shader_runner_ops { - /* Returns false if unable to run the given tests. If NULL, all tests are - * run. */ + /* Returns false if unable to run the given tests. */ bool (*check_requirements)(struct shader_runner *runner); + struct resource *(*create_resource)(struct shader_runner *runner, const struct resource_params *params); void (*destroy_resource)(struct shader_runner *runner, struct resource *resource); bool (*draw)(struct shader_runner *runner, D3D_PRIMITIVE_TOPOLOGY primitive_topology, unsigned int vertex_count); diff --git a/tests/shader_runner_d3d11.c b/tests/shader_runner_d3d11.c index bd9d363ce..7a8f8dac0 100644 --- a/tests/shader_runner_d3d11.c +++ b/tests/shader_runner_d3d11.c @@ -100,6 +100,18 @@ static ID3D10Blob *compile_shader(const struct d3d11_shader_runner *runner, cons return blob; }
+static bool d3d11_runner_check_requirements(struct shader_runner *r) +{ + struct d3d11_shader_runner *runner = d3d11_shader_runner(r); + + if (runner->r.maximum_shader_model < SHADER_MODEL_4_0) + return false; + if (runner->r.minimum_shader_model >= SHADER_MODEL_6_0) + return false; + + return true; +} + static IDXGIAdapter *create_adapter(void) { IDXGIFactory4 *factory4; @@ -724,6 +736,7 @@ static void d3d11_runner_release_readback(struct shader_runner *r, struct resour
static const struct shader_runner_ops d3d11_runner_ops = { + .check_requirements = d3d11_runner_check_requirements, .create_resource = d3d11_runner_create_resource, .destroy_resource = d3d11_runner_destroy_resource, .dispatch = d3d11_runner_dispatch, diff --git a/tests/shader_runner_d3d12.c b/tests/shader_runner_d3d12.c index a05dfd059..bb50564fa 100644 --- a/tests/shader_runner_d3d12.c +++ b/tests/shader_runner_d3d12.c @@ -96,6 +96,19 @@ static ID3D10Blob *compile_shader(const struct d3d12_shader_runner *runner, cons return blob; }
+static bool d3d12_runner_check_requirements(struct shader_runner *r) +{ + struct d3d12_shader_runner *runner = d3d12_shader_runner(r); + + if (runner->r.maximum_shader_model < SHADER_MODEL_4_0) + return false; + + if (runner->r.minimum_shader_model >= SHADER_MODEL_6_0 && !runner->dxc_compiler) + return false; + + return true; +} + #define MAX_RESOURCE_DESCRIPTORS (MAX_RESOURCES * 2)
static struct resource *d3d12_runner_create_resource(struct shader_runner *r, const struct resource_params *params) @@ -561,6 +574,7 @@ static void d3d12_runner_release_readback(struct shader_runner *r, struct resour
static const struct shader_runner_ops d3d12_runner_ops = { + .check_requirements = d3d12_runner_check_requirements, .create_resource = d3d12_runner_create_resource, .destroy_resource = d3d12_runner_destroy_resource, .dispatch = d3d12_runner_dispatch, diff --git a/tests/shader_runner_vulkan.c b/tests/shader_runner_vulkan.c index f89b4d624..ec2d1fc9e 100644 --- a/tests/shader_runner_vulkan.c +++ b/tests/shader_runner_vulkan.c @@ -83,6 +83,18 @@ static struct vulkan_shader_runner *vulkan_shader_runner(struct shader_runner *r
#define VK_CALL(f) (runner->f)
+static bool vulkan_runner_check_requirements(struct shader_runner *r) +{ + struct vulkan_shader_runner *runner = vulkan_shader_runner(r); + + if (runner->r.maximum_shader_model < SHADER_MODEL_4_0) + return false; + if (runner->r.minimum_shader_model >= SHADER_MODEL_6_0) + return false; + + return true; +} + static void begin_command_buffer(struct vulkan_shader_runner *runner) { VkCommandBufferBeginInfo buffer_begin_desc = {.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO}; @@ -1157,6 +1169,7 @@ static void vulkan_runner_release_readback(struct shader_runner *r, struct resou
static const struct shader_runner_ops vulkan_runner_ops = { + .check_requirements = vulkan_runner_check_requirements, .create_resource = vulkan_runner_create_resource, .destroy_resource = vulkan_runner_destroy_resource, .dispatch = vulkan_runner_dispatch,
From: Francisco Casas fcasas@codeweavers.com
--- tests/hlsl/register-reservations.shader_test | 108 +++++++++++++++++++ 1 file changed, 108 insertions(+)
diff --git a/tests/hlsl/register-reservations.shader_test b/tests/hlsl/register-reservations.shader_test index 9539b05ff..056c6d1b5 100644 --- a/tests/hlsl/register-reservations.shader_test +++ b/tests/hlsl/register-reservations.shader_test @@ -1,6 +1,114 @@ +[pixel shader fail(sm<6) todo] +// Overlapping register(cX) reservations should only be allowed on SM6. +float a : register(c0); +float b : register(c0); + +float4 main() : sv_target +{ + return a + b; +} + + +[require] +shader model < 4.0 + +[pixel shader] +struct +{ + float2 a; + float b; +} apple : register(c2); + +float4 main() : sv_target +{ + return float4(apple.a, apple.b, 0); +} + +[test] +uniform 0 float4 0.1 0.2 0.3 0.4 +uniform 4 float4 1.1 1.2 1.3 1.4 +uniform 8 float4 2.1 2.2 2.3 2.4 +uniform 12 float4 3.1 3.2 3.3 3.4 +draw quad +todo probe all rgba (2.1, 2.2, 3.1, 0.0) + + [require] shader model >= 4.0
+[pixel shader] +struct +{ + float2 a; + float b; +} apple : register(c2); + +float4 main() : sv_target +{ + return float4(apple.a, apple.b, 0); +} + +[test] +uniform 0 float4 0.1 0.2 0.3 0.4 +uniform 4 float4 1.1 1.2 1.3 1.4 +uniform 8 float4 2.1 2.2 2.3 2.4 +uniform 12 float4 3.1 3.2 3.3 3.4 +draw quad +todo(sm<6) probe all rgba (2.1, 2.2, 2.3, 0.0) + + +[pixel shader] +// On SM4 register(cX) has no effect unless in the $Globals buffer. +cbuffer extra +{ + float a : register(c1); +}; + +float4 main() : sv_target +{ + return a; +} + +[test] +uniform 0 float 100 +uniform 4 float 101 +draw quad +probe all rgba (100, 100, 100, 100) + + +[pixel shader fail(sm>=6)] +// On SM4 register(cX) has no effect unless in the $Globals buffer. +float4 main(uniform float a : register(c1)) : sv_target +{ + return a; +} + +[test] +uniform 0 float 100 +uniform 4 float 101 +draw quad +probe all rgba (100, 100, 100, 100) + +[pixel shader todo] +cbuffer c +{ + float a : packoffset(c1); + float b : packoffset(c2) : register(c1); + // ^ register(c1) ignored for cbuffer that is not $Globals. +} + +float4 main() : sv_target +{ + return float4(a, b, 0, 0); +} + +[test] +uniform 0 float 200 +uniform 4 float 201 +uniform 8 float 202 +todo(sm<6) draw quad +todo(sm<6) probe all rgba (201.0, 202.0, 0.0, 0.0) +
[texture 0] size (1, 1)
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl_codegen.c | 28 ++++++++++++++++++-- tests/hlsl/register-reservations.shader_test | 2 +- 2 files changed, 27 insertions(+), 3 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 76a010fb3..56ef0aa5b 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -3954,8 +3954,32 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi if (reg_size == 0) continue;
- var->regs[HLSL_REGSET_NUMERIC] = allocate_numeric_registers_for_type(ctx, &allocator, - 1, UINT_MAX, var->data_type); + if (var->reg_reservation.reg_type == 'c') + { + unsigned int reg_idx = var->reg_reservation.reg_index; + unsigned int i; + + assert(reg_size % 4 == 0); + for (i = 0; i < reg_size / 4; ++i) + { + if (get_available_writemask(&allocator, 1, UINT_MAX, reg_idx + i) != VKD3DSP_WRITEMASK_ALL) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Overlapping register() reservations on 'c%u'.", reg_idx + i); + } + + record_allocation(ctx, &allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX); + } + + var->regs[HLSL_REGSET_NUMERIC].id = reg_idx; + var->regs[HLSL_REGSET_NUMERIC].allocation_size = reg_size / 4; + var->regs[HLSL_REGSET_NUMERIC].allocated = true; + } + else + { + var->regs[HLSL_REGSET_NUMERIC] = allocate_numeric_registers_for_type(ctx, &allocator, + 1, UINT_MAX, var->data_type); + } TRACE("Allocated %s to %s.\n", var->name, debug_register('c', var->regs[HLSL_REGSET_NUMERIC], var->data_type)); } diff --git a/tests/hlsl/register-reservations.shader_test b/tests/hlsl/register-reservations.shader_test index 056c6d1b5..7ac6f1414 100644 --- a/tests/hlsl/register-reservations.shader_test +++ b/tests/hlsl/register-reservations.shader_test @@ -30,7 +30,7 @@ uniform 4 float4 1.1 1.2 1.3 1.4 uniform 8 float4 2.1 2.2 2.3 2.4 uniform 12 float4 3.1 3.2 3.3 3.4 draw quad -todo probe all rgba (2.1, 2.2, 3.1, 0.0) +probe all rgba (2.1, 2.2, 3.1, 0.0)
[require]
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl_codegen.c | 7 ++++++- tests/hlsl/register-reservations.shader_test | 4 ++-- 2 files changed, 8 insertions(+), 3 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 56ef0aa5b..caf766f43 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -4121,7 +4121,12 @@ static void calculate_buffer_offset(struct hlsl_ctx *ctx, struct hlsl_ir_var *va enum hlsl_type_class var_class = var->data_type->class; struct hlsl_buffer *buffer = var->buffer;
- if (var->reg_reservation.offset_type == 'c') + if (var->reg_reservation.reg_type == 'c' && var->buffer == ctx->globals_buffer) + { + /* On SM4, register(cX) reservations are translated to buffer offsets, but only for the $Globals buffer. */ + var->buffer_offset = 4 * var->reg_reservation.reg_index; + } + else if (var->reg_reservation.offset_type == 'c') { if (var->reg_reservation.offset_index % 4) { diff --git a/tests/hlsl/register-reservations.shader_test b/tests/hlsl/register-reservations.shader_test index 7ac6f1414..c9df8512a 100644 --- a/tests/hlsl/register-reservations.shader_test +++ b/tests/hlsl/register-reservations.shader_test @@ -1,4 +1,4 @@ -[pixel shader fail(sm<6) todo] +[pixel shader fail(sm<6)] // Overlapping register(cX) reservations should only be allowed on SM6. float a : register(c0); float b : register(c0); @@ -54,7 +54,7 @@ uniform 4 float4 1.1 1.2 1.3 1.4 uniform 8 float4 2.1 2.2 2.3 2.4 uniform 12 float4 3.1 3.2 3.3 3.4 draw quad -todo(sm<6) probe all rgba (2.1, 2.2, 2.3, 0.0) +probe all rgba (2.1, 2.2, 2.3, 0.0)
[pixel shader]
On Thu Nov 9 19:54:25 2023 +0000, Zebediah Figura wrote:
Why, what happens on sm6?
They just [share](https://shader-playground.timjones.io/76e0cf11d22d55aff7a29c7a7220387e) the same offset, so my guess is that they would be aliased.
On Thu Nov 9 19:54:26 2023 +0000, Zebediah Figura wrote:
What about types other than 'c'? I know 'i' and 'b' do something that we don't implement [and should maybe have a hlsl_fixme() for]; what about invalid types?
I did manual testing and got to the following (I hope correct) conclusions:
The data type of the variable of each variable doesn't matter directly, it only matters whether its usage needs it to be passed in the 'c', 'i', or 'b' register group (as they are called on error messages).
Most variables are needed in the 'c' register group (used for floating point ops), if the variable is needed in the 'c' register group another reservation like `register(iX)` or `register(bX)` cannot be passed unless `register(cX)` is also provided.
As you have discovered, the need for the variable to be on the 'i' group can be forced using a 'for' loop.
Some examples:
The following shader results in the following error: ``` error X4509: invalid register semantic 'i0', or variable must be bound to multiple register banks (c register binding required) ``` ```hlsl int k : register(i0);
float4 main() : sv_target { return k; } ```
So, some `register(cX)` must be also provided for it to compile: ```hlsl int k : register(i0) : register(c1);
float4 main() : sv_target { return k; } ```
The 'b' group seems to be bound to the same rules as the 'i' group, it is allowed to pass `register(bX)` to any variable regardless of the type but, like for the 'i' group, if it is needed in the 'c' register group, a `register(cX)` must be specified.
This shader is exceptional because the variable 'k' is only needed in the 'i' group, so in this rare case a `register(cX)` is not necessary: ```hlsl int k : register(i0);
float4 main() : sv_target { float f = 0;
for (int i = 0; i < k; ++i) f += i; return f; } ```
The converse is not true, if a variable is needed in the 'i' register group any other reservation is allowed:
```hlsl int k : register(c0) : register(b0);
float4 main() : sv_target { float f = 0;
for (int i = 0; i < k; ++i) f += i; return f; } ```
Besides 'i', 'b', and 'c', the 's', 't', and 'u' register groups are also valid regardless of the variable type. The 's' register group is required for used samplers if another reservation is specified. The remaining letters result in an error.
SM4 is a lot more permissive, and any letter can be used. I would prefer for us to also be permissive for SM1, and basically ignore unused register() reservations and don't error out when register(cX) reservations are not provided if any other is.
If we choose to be permissive, should these tests be included or not?
On Thu Nov 9 19:54:27 2023 +0000, Zebediah Figura wrote:
This doesn't include any of the validity checks for packoffset(). Should it? We don't have tests for this.
We have validate_buffer_offsets() to perform this check already.
I think that the first test added to register-reserations.shader_test already covers this, when run for SM4: ``` [pixel shader fail(sm<6) todo] // Overlapping register(cX) reservations should only be allowed on SM6. float a : register(c0); float b : register(c0);
float4 main() : sv_target { return a + b; } ```
On Fri Nov 10 02:48:40 2023 +0000, Zebediah Figura wrote:
It'd be nice to have tests for mixing register and non-register allocations, e.g. to make sure that precedence and overlapping works as expected.
Well, `register(cX)` only has effect on the `$Globals` constant buffer and `packoffset()` cannot be used on it (because the variable must be inside a `cbuffer` declaration).
I added a test that checks that `register(cX)` has no effect inside a cbuffer: ```hlsl cbuffer c { float4 a : packoffset(c1); float4 b : packoffset(c2) : register(c1); // ^ register(c1) ignored for cbuffer that is not $Globals. }
float4 main() : sv_target { return a + b; } ``` Note that the packoffset(cX) is required on the second field because if one cbuffer member uses packoffset() all others must.
However, this would still be a todo since currently don't support multiple register() or packoffset() declarations in the same variable, we would have to modify the parser and vectorize the `reservation` field, but I didn't think this was something to worry about yet.
On Fri Nov 10 02:48:40 2023 +0000, Francisco Casas wrote:
They just [share](https://shader-playground.timjones.io/76e0cf11d22d55aff7a29c7a7220387e) the same offset, so my guess is that they would be aliased.
It'd be nice to have that written down in a comment so other people don't waste time wondering. Even better if we can test it too.
If we choose to be permissive, should these tests be included or not?
I don't think we need to implement the logic right now, but having our logic match native in this respect I think is a good idea. I.e. we should implement the logic "eventually". So I would keep the tests, yes.
On Fri Nov 10 02:48:40 2023 +0000, Francisco Casas wrote:
Well, `register(cX)` only has effect on the `$Globals` constant buffer and `packoffset()` cannot be used on it (because the variable must be inside a `cbuffer` declaration). I added a test that checks that `register(cX)` has no effect inside a cbuffer:
cbuffer c { float4 a : packoffset(c1); float4 b : packoffset(c2) : register(c1); // ^ register(c1) ignored for cbuffer that is not $Globals. } float4 main() : sv_target { return a + b; }
Note that the packoffset(cX) is required on the second field because if one cbuffer member uses packoffset() all others must. However, this would still be a todo since currently don't support multiple register() or packoffset() declarations in the same variable, we would have to modify the parser and vectorize the `reservation` field, but I didn't think this was something to worry about yet.
I don't understand? I'm not talking about cbuffers. I just mean tests for declaring multiple global variables, some with register(cX) and some without.
On Fri Nov 10 02:48:40 2023 +0000, Francisco Casas wrote:
We have validate_buffer_offsets() to perform this check already. I think that the first test added to register-reserations.shader_test already covers this, when run for SM4:
[pixel shader fail(sm<6) todo] // Overlapping register(cX) reservations should only be allowed on SM6. float a : register(c0); float b : register(c0); float4 main() : sv_target { return a + b; }
I meant the checks done by calculate_buffer_offset(). But I see those are only relevant to unaligned reservations, now that I look again.
I think that the first test added to register-reserations.shader_test already covers this, when run for SM4:
It covers the "no overlap" error, but not the "all or nothing" error.