Currently we are not properly handling register(cX) reservations for SM1, this is one of the things required for the SNK shaders (CW Bug Bug 18092).
register(cX) reservations also change the offset in the $Globals buffer in SM4, so support for this is also included.
---
Patch 1/4 is required to specify: ``` [require] shader model < 4.0 ``` so that the tests that follow do not get run with the vulkan backend on SM4. I think nobody disagreed with that patch.
-- v9: vkd3d-shader/hlsl: Turn register(cX) reservations into buffer offset for SM4. vkd3d-shader/hlsl: Make register(cX) reservations work for SM1. tests: Test register(cX) reservations. tests: Rename register-reservations.shader_test to register-reservations-resources.shader_test.
From: Francisco Casas fcasas@codeweavers.com
--- Makefile.am | 2 +- ....shader_test => register-reservations-resources.shader_test} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename tests/hlsl/{register-reservations.shader_test => register-reservations-resources.shader_test} (100%)
diff --git a/Makefile.am b/Makefile.am index bfd11fdb4..0cb97b66e 100644 --- a/Makefile.am +++ b/Makefile.am @@ -152,7 +152,7 @@ vkd3d_shader_tests = \ tests/hlsl/object-references.shader_test \ tests/hlsl/pow.shader_test \ tests/hlsl/reflect.shader_test \ - tests/hlsl/register-reservations.shader_test \ + tests/hlsl/register-reservations-resources.shader_test \ tests/hlsl/return-implicit-conversion.shader_test \ tests/hlsl/return.shader_test \ tests/hlsl/round.shader_test \ diff --git a/tests/hlsl/register-reservations.shader_test b/tests/hlsl/register-reservations-resources.shader_test similarity index 100% rename from tests/hlsl/register-reservations.shader_test rename to tests/hlsl/register-reservations-resources.shader_test
From: Francisco Casas fcasas@codeweavers.com
--- Makefile.am | 1 + .../register-reservations-numeric.shader_test | 279 ++++++++++++++++++ 2 files changed, 280 insertions(+) create mode 100644 tests/hlsl/register-reservations-numeric.shader_test
diff --git a/Makefile.am b/Makefile.am index 0cb97b66e..90e7dcfcc 100644 --- a/Makefile.am +++ b/Makefile.am @@ -152,6 +152,7 @@ vkd3d_shader_tests = \ tests/hlsl/object-references.shader_test \ tests/hlsl/pow.shader_test \ tests/hlsl/reflect.shader_test \ + tests/hlsl/register-reservations-numeric.shader_test \ tests/hlsl/register-reservations-resources.shader_test \ tests/hlsl/return-implicit-conversion.shader_test \ tests/hlsl/return.shader_test \ diff --git a/tests/hlsl/register-reservations-numeric.shader_test b/tests/hlsl/register-reservations-numeric.shader_test new file mode 100644 index 000000000..fa3c94f75 --- /dev/null +++ b/tests/hlsl/register-reservations-numeric.shader_test @@ -0,0 +1,279 @@ +[pixel shader fail(sm<6) todo] +// Overlapping register(cX) reservations are not allowed except on SM6, where they are aliased. +// On SM1 this gives hr 0x88760b59. +float a : register(c0); +float b : register(c0); + +float4 main() : sv_target +{ + return a + b; +} + + +[pixel shader] +// It is not required to provide a register(cX) for all elements in the $Globals buffer. +float4 a; // will get register(c1) +float4 b : register(c0); + +float4 main() : sv_target +{ + return float4(a.xw, b.yz); +} + +[test] +uniform 0 float4 0.1 0.2 0.3 0.4 +uniform 4 float4 1.1 1.2 1.3 1.4 +draw quad +todo(sm<6) probe all rgba (1.1, 1.4, 0.2, 0.3) + + +[pixel shader] +float4 a[3]; // will get register(c3) +float4 b[2] : register(c1); + +float4 main() : sv_target +{ + return float4(a[1].xy, b[0].zw); +} + +[test] +uniform 0 float4 0.1 0.2 0.3 0.4 +uniform 4 float4 1.1 1.2 1.3 1.4 +uniform 8 float4 2.1 2.2 2.3 2.4 +uniform 12 float4 3.1 3.2 3.3 3.4 +uniform 16 float4 4.1 4.2 4.3 4.4 +draw quad +todo(sm<6) probe all rgba (4.1, 4.2, 1.3, 1.4) + + +[require] +shader model < 4.0 + +[pixel shader] +float a : register(c2); +float b; // will get register c0 in SM1 + +float4 main() : sv_target +{ + return float4(a, b, 0.0, 0.0); +} + +[test] +uniform 0 float4 0.1 0.2 0.3 0.4 +uniform 4 float4 1.1 1.2 1.3 1.4 +uniform 8 float4 2.1 2.2 2.3 2.4 +uniform 12 float4 3.1 3.2 3.3 3.4 +draw quad +todo probe all rgba (2.1, 0.1, 0.0, 0.0) + + +[require] +shader model >= 4.0 + +[pixel shader] +float a : register(c2); +float b; // will get offset equivalent to c2.y in SM4 and SM6 + +float4 main() : sv_target +{ + return float4(a, b, 0.0, 0.0); +} + +[test] +uniform 0 float4 0.1 0.2 0.3 0.4 +uniform 4 float4 1.1 1.2 1.3 1.4 +uniform 8 float4 2.1 2.2 2.3 2.4 +uniform 12 float4 3.1 3.2 3.3 3.4 +draw quad +todo(sm<6) probe all rgba (2.1, 2.2, 0.0, 0.0) + + +[require] +shader model >= 6.0 + +[pixel shader] +// Variables with overlapping register(cX) reservations are aliased in SM6. +float2 a : register(c2); +float3 b : register(c2); + +float4 main() : sv_target +{ + return float4(a, b.yz); +} + +[test] +uniform 0 float4 0.1 0.2 0.3 0.4 +uniform 4 float4 1.1 1.2 1.3 1.4 +uniform 8 float4 2.1 2.2 2.3 2.4 +draw quad +probe all rgba (2.1, 2.2, 2.2, 2.3) + + +% Results differ between SM1 and SM4 because in the latter variables can share the same register, +% using different writemasks. +[require] +shader model < 4.0 + +[pixel shader] +struct +{ + float2 a; + float b; +} apple : register(c2); + +float4 main() : sv_target +{ + return float4(apple.a, apple.b, 0); +} + +[test] +uniform 0 float4 0.1 0.2 0.3 0.4 +uniform 4 float4 1.1 1.2 1.3 1.4 +uniform 8 float4 2.1 2.2 2.3 2.4 +uniform 12 float4 3.1 3.2 3.3 3.4 +draw quad +todo probe all rgba (2.1, 2.2, 3.1, 0.0) + + +[require] +shader model >= 4.0 + +[pixel shader] +struct +{ + float2 a; + float b; +} apple : register(c2); + +float4 main() : sv_target +{ + return float4(apple.a, apple.b, 0); +} + +[test] +uniform 0 float4 0.1 0.2 0.3 0.4 +uniform 4 float4 1.1 1.2 1.3 1.4 +uniform 8 float4 2.1 2.2 2.3 2.4 +uniform 12 float4 3.1 3.2 3.3 3.4 +draw quad +todo(sm<6) probe all rgba (2.1, 2.2, 2.3, 0.0) + + +[pixel shader] +// On SM4, register(cX) has no effect unless in the $Globals buffer. +cbuffer extra +{ + float a : register(c1); +}; + +float4 main() : sv_target +{ + return a; +} + +[test] +uniform 0 float 100 +uniform 4 float 101 +draw quad +probe all rgba (100, 100, 100, 100) + + +[pixel shader fail(sm>=6)] +// On SM4 register(cX) has no effect unless in the $Globals buffer. +float4 main(uniform float a : register(c1)) : sv_target +{ + return a; +} + +[test] +uniform 0 float 100 +uniform 4 float 101 +draw quad +probe all rgba (100, 100, 100, 100) + +[pixel shader todo] +cbuffer c +{ + float a : packoffset(c1); + float b : packoffset(c2) : register(c1); + // ^ register(c1) ignored for cbuffer that is not $Globals. +} + +float4 main() : sv_target +{ + return float4(a, b, 0, 0); +} + +[test] +uniform 0 float 200 +uniform 4 float 201 +uniform 8 float 202 +todo(sm<6) draw quad +todo(sm<6) probe all rgba (201.0, 202.0, 0.0, 0.0) + + +[pixel shader fail(sm<4)] +int k : register(i0); // register(cX) is also required. + +float4 main() : sv_target +{ + return k; +} + + +[require] +% All shader models. + +% In SM1, most variables are needed in the "c" register group, for float operations. +% If a variable is needed in the "c" register group, register() reservations in other groups can be +% provided only if a register(cX) reservation is also provided. + +[pixel shader fail(sm<4)] +int k : register(i0); +// ^^ register(cX) is also required in SM1. + +float4 main() : sv_target +{ + return k; +} + +[pixel shader todo] +int k : register(i0) : register(c1); +// Shader compiles because a "c" register reservation is provided for "k". + +float4 main() : sv_target +{ + return k; +} + + +[require] +shader model >= 3.0 +% model 2.0 doesn't support unrollable loops. + +[pixel shader todo(sm<4)] +int k : register(i0); +// ^^ register(cX) is not required since "k" is just needed in the "i" register group. + +float4 main() : sv_target +{ + float f = 0; + + for (int i = 0; i < k; ++i) + f += i; + return f; +} + + +[pixel shader todo] +int k : register(c0) : register(b0); +// ^^ unlike the "c" register group, a reservation is not required for the "i" group, even though "k" is needed on it. + +float4 main() : sv_target +{ + float f = 0; + + for (int i = 0; i < k; ++i) + f += i; + return f; +}
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl_codegen.c | 41 +++++++++++++++++-- .../register-reservations-numeric.shader_test | 12 +++--- 2 files changed, 43 insertions(+), 10 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 1fe141a34..2d2e904ba 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -4123,13 +4123,46 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { - if (var->is_uniform && var->last_read) + unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; + + if (!var->is_uniform || !var->last_read || reg_size == 0) + continue; + + if (var->reg_reservation.reg_type == 'c') { - unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; + unsigned int reg_idx = var->reg_reservation.reg_index; + unsigned int i;
- if (reg_size == 0) - continue; + assert(reg_size % 4 == 0); + for (i = 0; i < reg_size / 4; ++i) + { + if (get_available_writemask(&allocator, 1, UINT_MAX, reg_idx + i) != VKD3DSP_WRITEMASK_ALL) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Overlapping register() reservations on 'c%u'.", reg_idx + i); + } + + record_allocation(ctx, &allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX); + }
+ var->regs[HLSL_REGSET_NUMERIC].id = reg_idx; + var->regs[HLSL_REGSET_NUMERIC].allocation_size = reg_size / 4; + var->regs[HLSL_REGSET_NUMERIC].writemask = VKD3DSP_WRITEMASK_ALL; + var->regs[HLSL_REGSET_NUMERIC].allocated = true; + TRACE("Allocated reserved %s to %s.\n", var->name, + debug_register('c', var->regs[HLSL_REGSET_NUMERIC], var->data_type)); + } + } + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; + + if (!var->is_uniform || !var->last_read || reg_size == 0) + continue; + + if (!var->regs[HLSL_REGSET_NUMERIC].allocated) + { var->regs[HLSL_REGSET_NUMERIC] = allocate_numeric_registers_for_type(ctx, &allocator, 1, UINT_MAX, var->data_type); TRACE("Allocated %s to %s.\n", var->name, diff --git a/tests/hlsl/register-reservations-numeric.shader_test b/tests/hlsl/register-reservations-numeric.shader_test index fa3c94f75..9b2bae571 100644 --- a/tests/hlsl/register-reservations-numeric.shader_test +++ b/tests/hlsl/register-reservations-numeric.shader_test @@ -1,4 +1,4 @@ -[pixel shader fail(sm<6) todo] +[pixel shader fail(sm<6) todo(sm>=4)] // Overlapping register(cX) reservations are not allowed except on SM6, where they are aliased. // On SM1 this gives hr 0x88760b59. float a : register(c0); @@ -24,7 +24,7 @@ float4 main() : sv_target uniform 0 float4 0.1 0.2 0.3 0.4 uniform 4 float4 1.1 1.2 1.3 1.4 draw quad -todo(sm<6) probe all rgba (1.1, 1.4, 0.2, 0.3) +todo(sm>=4 & sm<6) probe all rgba (1.1, 1.4, 0.2, 0.3)
[pixel shader] @@ -43,7 +43,7 @@ uniform 8 float4 2.1 2.2 2.3 2.4 uniform 12 float4 3.1 3.2 3.3 3.4 uniform 16 float4 4.1 4.2 4.3 4.4 draw quad -todo(sm<6) probe all rgba (4.1, 4.2, 1.3, 1.4) +todo(sm>=4 & sm<6) probe all rgba (4.1, 4.2, 1.3, 1.4)
[require] @@ -64,7 +64,7 @@ uniform 4 float4 1.1 1.2 1.3 1.4 uniform 8 float4 2.1 2.2 2.3 2.4 uniform 12 float4 3.1 3.2 3.3 3.4 draw quad -todo probe all rgba (2.1, 0.1, 0.0, 0.0) +probe all rgba (2.1, 0.1, 0.0, 0.0)
[require] @@ -132,7 +132,7 @@ uniform 4 float4 1.1 1.2 1.3 1.4 uniform 8 float4 2.1 2.2 2.3 2.4 uniform 12 float4 3.1 3.2 3.3 3.4 draw quad -todo probe all rgba (2.1, 2.2, 3.1, 0.0) +probe all rgba (2.1, 2.2, 3.1, 0.0)
[require] @@ -228,7 +228,7 @@ float4 main() : sv_target % If a variable is needed in the "c" register group, register() reservations in other groups can be % provided only if a register(cX) reservation is also provided.
-[pixel shader fail(sm<4)] +[pixel shader fail(sm<4) todo(sm<4)] int k : register(i0); // ^^ register(cX) is also required in SM1.
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl_codegen.c | 96 ++++++++++++------- .../register-reservations-numeric.shader_test | 10 +- 2 files changed, 67 insertions(+), 39 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 2d2e904ba..6ad60e4c6 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -4302,45 +4302,52 @@ static const struct hlsl_buffer *get_reserved_buffer(struct hlsl_ctx *ctx, uint3 return NULL; }
-static void calculate_buffer_offset(struct hlsl_ctx *ctx, struct hlsl_ir_var *var) +static void calculate_buffer_offset(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, bool register_reservation) { unsigned int var_reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; enum hlsl_type_class var_class = var->data_type->class; struct hlsl_buffer *buffer = var->buffer;
- if (var->reg_reservation.offset_type == 'c') + if (register_reservation) { - if (var->reg_reservation.offset_index % 4) + var->buffer_offset = 4 * var->reg_reservation.reg_index; + } + else + { + if (var->reg_reservation.offset_type == 'c') { - if (var_class == HLSL_CLASS_MATRIX) - { - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, - "packoffset() reservations with matrix types must be aligned with the beginning of a register."); - } - else if (var_class == HLSL_CLASS_ARRAY) - { - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, - "packoffset() reservations with array types must be aligned with the beginning of a register."); - } - else if (var_class == HLSL_CLASS_STRUCT) - { - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, - "packoffset() reservations with struct types must be aligned with the beginning of a register."); - } - else if (var_class == HLSL_CLASS_VECTOR) + if (var->reg_reservation.offset_index % 4) { - unsigned int aligned_offset = hlsl_type_get_sm4_offset(var->data_type, var->reg_reservation.offset_index); - - if (var->reg_reservation.offset_index != aligned_offset) + if (var_class == HLSL_CLASS_MATRIX) + { hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, - "packoffset() reservations with vector types cannot span multiple registers."); + "packoffset() reservations with matrix types must be aligned with the beginning of a register."); + } + else if (var_class == HLSL_CLASS_ARRAY) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "packoffset() reservations with array types must be aligned with the beginning of a register."); + } + else if (var_class == HLSL_CLASS_STRUCT) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "packoffset() reservations with struct types must be aligned with the beginning of a register."); + } + else if (var_class == HLSL_CLASS_VECTOR) + { + unsigned int aligned_offset = hlsl_type_get_sm4_offset(var->data_type, var->reg_reservation.offset_index); + + if (var->reg_reservation.offset_index != aligned_offset) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "packoffset() reservations with vector types cannot span multiple registers."); + } } + var->buffer_offset = var->reg_reservation.offset_index; + } + else + { + var->buffer_offset = hlsl_type_get_sm4_offset(var->data_type, buffer->size); } - var->buffer_offset = var->reg_reservation.offset_index; - } - else - { - var->buffer_offset = hlsl_type_get_sm4_offset(var->data_type, buffer->size); }
TRACE("Allocated buffer offset %u to %s.\n", var->buffer_offset, var->name); @@ -4409,6 +4416,11 @@ static void validate_buffer_offsets(struct hlsl_ctx *ctx) } }
+static bool var_has_buffer_offset_register_reservation(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var) +{ + return var->reg_reservation.reg_type == 'c' && var->buffer == ctx->globals_buffer; +} + static void allocate_buffers(struct hlsl_ctx *ctx) { struct hlsl_buffer *buffer; @@ -4417,13 +4429,29 @@ static void allocate_buffers(struct hlsl_ctx *ctx)
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { - if (var->is_uniform && !hlsl_type_is_resource(var->data_type)) - { - if (var->is_param) - var->buffer = ctx->params_buffer; + if (!var->is_uniform || hlsl_type_is_resource(var->data_type)) + continue;
- calculate_buffer_offset(ctx, var); - } + if (var->is_param) + var->buffer = ctx->params_buffer; + } + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (!var->is_uniform || hlsl_type_is_resource(var->data_type)) + continue; + + if (var_has_buffer_offset_register_reservation(ctx, var)) + calculate_buffer_offset(ctx, var, true); + } + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (!var->is_uniform || hlsl_type_is_resource(var->data_type)) + continue; + + if (!var_has_buffer_offset_register_reservation(ctx, var)) + calculate_buffer_offset(ctx, var, false); }
validate_buffer_offsets(ctx); diff --git a/tests/hlsl/register-reservations-numeric.shader_test b/tests/hlsl/register-reservations-numeric.shader_test index 9b2bae571..5067d54f9 100644 --- a/tests/hlsl/register-reservations-numeric.shader_test +++ b/tests/hlsl/register-reservations-numeric.shader_test @@ -1,4 +1,4 @@ -[pixel shader fail(sm<6) todo(sm>=4)] +[pixel shader fail(sm<6)] // Overlapping register(cX) reservations are not allowed except on SM6, where they are aliased. // On SM1 this gives hr 0x88760b59. float a : register(c0); @@ -24,7 +24,7 @@ float4 main() : sv_target uniform 0 float4 0.1 0.2 0.3 0.4 uniform 4 float4 1.1 1.2 1.3 1.4 draw quad -todo(sm>=4 & sm<6) probe all rgba (1.1, 1.4, 0.2, 0.3) +probe all rgba (1.1, 1.4, 0.2, 0.3)
[pixel shader] @@ -43,7 +43,7 @@ uniform 8 float4 2.1 2.2 2.3 2.4 uniform 12 float4 3.1 3.2 3.3 3.4 uniform 16 float4 4.1 4.2 4.3 4.4 draw quad -todo(sm>=4 & sm<6) probe all rgba (4.1, 4.2, 1.3, 1.4) +probe all rgba (4.1, 4.2, 1.3, 1.4)
[require] @@ -85,7 +85,7 @@ uniform 4 float4 1.1 1.2 1.3 1.4 uniform 8 float4 2.1 2.2 2.3 2.4 uniform 12 float4 3.1 3.2 3.3 3.4 draw quad -todo(sm<6) probe all rgba (2.1, 2.2, 0.0, 0.0) +probe all rgba (2.1, 2.2, 0.0, 0.0)
[require] @@ -156,7 +156,7 @@ uniform 4 float4 1.1 1.2 1.3 1.4 uniform 8 float4 2.1 2.2 2.3 2.4 uniform 12 float4 3.1 3.2 3.3 3.4 draw quad -todo(sm<6) probe all rgba (2.1, 2.2, 2.3, 0.0) +probe all rgba (2.1, 2.2, 2.3, 0.0)
[pixel shader]
This fails on the CI.