From: Nikolay Sivov nsivov@codeweavers.com
--- libs/vkd3d-shader/d3dbc.c | 2 ++ libs/vkd3d-shader/hlsl.h | 3 ++- libs/vkd3d-shader/hlsl.y | 23 +++++++++++++++++-- libs/vkd3d-shader/hlsl_codegen.c | 39 ++++++++++++++++++++++++++++++++ libs/vkd3d-shader/tpf.c | 3 +++ 5 files changed, 67 insertions(+), 3 deletions(-)
diff --git a/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d-shader/d3dbc.c index c35f8ca0f..2ae8df5f7 100644 --- a/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d-shader/d3dbc.c @@ -1907,6 +1907,8 @@ static void write_sm1_resource_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_
.src_count = 2, }; + if (load->load_type == HLSL_RESOURCE_SAMPLE_PROJ) + sm1_instr.opcode |= VKD3DSI_TEXLD_PROJECT << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT;
assert(instr->reg.allocated);
diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 7d02448e0..2b88181f4 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -617,9 +617,10 @@ enum hlsl_resource_load_type HLSL_RESOURCE_SAMPLE, HLSL_RESOURCE_SAMPLE_CMP, HLSL_RESOURCE_SAMPLE_CMP_LZ, + HLSL_RESOURCE_SAMPLE_GRAD, HLSL_RESOURCE_SAMPLE_LOD, HLSL_RESOURCE_SAMPLE_LOD_BIAS, - HLSL_RESOURCE_SAMPLE_GRAD, + HLSL_RESOURCE_SAMPLE_PROJ, HLSL_RESOURCE_GATHER_RED, HLSL_RESOURCE_GATHER_GREEN, HLSL_RESOURCE_GATHER_BLUE, diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index cf483d82c..9d64ac1ee 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -3295,9 +3295,10 @@ static bool intrinsic_step(struct hlsl_ctx *ctx, static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc, const char *name, enum hlsl_sampler_dim dim) { - struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_SAMPLE}; + struct hlsl_resource_load_params load_params = { 0 }; const struct hlsl_type *sampler_type; struct hlsl_ir_node *coords, *load; + unsigned int coords_dim;
if (params->args_count != 2 && params->args_count != 4) { @@ -3324,8 +3325,19 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * hlsl_release_string_buffer(ctx, string); }
+ if (!strcmp(name, "tex2Dproj")) + { + load_params.type = HLSL_RESOURCE_SAMPLE_PROJ; + coords_dim = 4; + } + else + { + load_params.type = HLSL_RESOURCE_SAMPLE; + coords_dim = hlsl_sampler_dim_count(dim); + } + if (!(coords = add_implicit_conversion(ctx, params->instrs, params->args[1], - hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, hlsl_sampler_dim_count(dim)), loc))) + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, coords_dim), loc))) coords = params->args[1];
load_params.coords = coords; @@ -3345,6 +3357,12 @@ static bool intrinsic_tex2D(struct hlsl_ctx *ctx, return intrinsic_tex(ctx, params, loc, "tex2D", HLSL_SAMPLER_DIM_2D); }
+static bool intrinsic_tex2Dproj(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return intrinsic_tex(ctx, params, loc, "tex2Dproj", HLSL_SAMPLER_DIM_2D); +} + static bool intrinsic_tex3D(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -3515,6 +3533,7 @@ intrinsic_functions[] = {"sqrt", 1, true, intrinsic_sqrt}, {"step", 2, true, intrinsic_step}, {"tex2D", -1, false, intrinsic_tex2D}, + {"tex2Dproj", 2, false, intrinsic_tex2Dproj}, {"tex3D", -1, false, intrinsic_tex3D}, {"transpose", 1, true, intrinsic_transpose}, {"trunc", 1, true, intrinsic_trunc}, diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 2b6c595a1..00dab6f57 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -2446,6 +2446,40 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr return true; }
+/* For SM4 turn HLSL_RESOURCE_SAMPLE_PROJ to HLSL_RESOURCE_SAMPLE + DIV */ +static bool lower_tex_proj(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }; + struct hlsl_ir_node *swizzle, *coords; + struct hlsl_ir_resource_load *load; + unsigned int dim_count; + + if (instr->type != HLSL_IR_RESOURCE_LOAD) + return false; + load = hlsl_ir_resource_load(instr); + if (load->load_type != HLSL_RESOURCE_SAMPLE_PROJ) + return false; + + dim_count = hlsl_sampler_dim_count(load->sampling_dim); + if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(W, W, W, W), dim_count, load->coords.node, &instr->loc))) + return false; + list_add_before(&instr->entry, &swizzle->entry); + + operands[0] = load->coords.node; + operands[1] = swizzle; + if (!(coords = hlsl_new_expr(ctx, HLSL_OP2_DIV, operands, hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, dim_count), + &instr->loc))) + return false; + list_add_before(&instr->entry, &coords->entry); + + load->load_type = HLSL_RESOURCE_SAMPLE; + + hlsl_src_remove(&load->coords); + hlsl_src_from_node(&load->coords, coords); + + return true; +} + static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { switch (instr->type) @@ -3939,6 +3973,11 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry } while (progress);
+ if (profile->major_version >= 4) + { + hlsl_transform_ir(ctx, lower_tex_proj, body, NULL); + } + if (profile->major_version < 4) { hlsl_transform_ir(ctx, lower_division, body, NULL); diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index 60948d649..57315b98e 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -4989,6 +4989,9 @@ static void write_sm4_resource_load(struct hlsl_ctx *ctx, write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, &load->sampler, coords, HLSL_SWIZZLE(W, W, W, W), texel_offset); break; + + case HLSL_RESOURCE_SAMPLE_PROJ: + vkd3d_unreachable(); } }
From: Nikolay Sivov nsivov@codeweavers.com
--- libs/vkd3d-shader/hlsl.y | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index 9d64ac1ee..b16ae548c 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -3325,7 +3325,8 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * hlsl_release_string_buffer(ctx, string); }
- if (!strcmp(name, "tex2Dproj")) + if (!strcmp(name, "tex2Dproj") + || !strcmp(name, "tex3Dproj")) { load_params.type = HLSL_RESOURCE_SAMPLE_PROJ; coords_dim = 4; @@ -3369,6 +3370,12 @@ static bool intrinsic_tex3D(struct hlsl_ctx *ctx, return intrinsic_tex(ctx, params, loc, "tex3D", HLSL_SAMPLER_DIM_3D); }
+static bool intrinsic_tex3Dproj(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return intrinsic_tex(ctx, params, loc, "tex3Dproj", HLSL_SAMPLER_DIM_3D); +} + static bool intrinsic_transpose(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -3535,6 +3542,7 @@ intrinsic_functions[] = {"tex2D", -1, false, intrinsic_tex2D}, {"tex2Dproj", 2, false, intrinsic_tex2Dproj}, {"tex3D", -1, false, intrinsic_tex3D}, + {"tex3Dproj", 2, false, intrinsic_tex3Dproj}, {"transpose", 1, true, intrinsic_transpose}, {"trunc", 1, true, intrinsic_trunc}, };
Francisco Casas (@fcasas) commented about libs/vkd3d-shader/hlsl.y:
hlsl_release_string_buffer(ctx, string); }
- if (!strcmp(name, "tex2Dproj"))
- if (!strcmp(name, "tex2Dproj")
|| !strcmp(name, "tex3Dproj"))
nitpick: the double line doesn't seem necessary.
I suggest adding at least this test:
```hlsl [pixel shader todo] sampler sam; float4 p;
float4 main() : sv_target { return tex2Dproj(sam, p); }
[test] uniform 0 float4 1.0 1.0 1234.0 2.0 todo draw quad todo probe all rgba (0.25, 0.0, 0.25, 0.0) ``` I confirmed that is the expected behavior using the minitestbot.
Even though to get this to pass we need to lower combined sample expressions, but that seems close in the horizon (!209).
Francisco Casas (@fcasas) commented about libs/vkd3d-shader/hlsl_codegen.c:
- unsigned int dim_count;
- if (instr->type != HLSL_IR_RESOURCE_LOAD)
return false;
- load = hlsl_ir_resource_load(instr);
- if (load->load_type != HLSL_RESOURCE_SAMPLE_PROJ)
return false;
- dim_count = hlsl_sampler_dim_count(load->sampling_dim);
- if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(W, W, W, W), dim_count, load->coords.node, &instr->loc)))
return false;
- list_add_before(&instr->entry, &swizzle->entry);
- operands[0] = load->coords.node;
- operands[1] = swizzle;
- if (!(coords = hlsl_new_expr(ctx, HLSL_OP2_DIV, operands, hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, dim_count),
Since the result has the same type as the operands, you can use hlsl_new_binary_expr() here.
On Wed May 31 19:20:45 2023 +0000, Francisco Casas wrote:
Since the result has the same type as the operands, you can use hlsl_new_binary_expr() here.
It does not accept coords.node / swizzle, because types are different, I think.
On Wed May 31 19:20:43 2023 +0000, Francisco Casas wrote:
nitpick: the double line doesn't seem necessary.
There will be 1D and cube names too at some point.
On Wed May 31 20:21:33 2023 +0000, Francisco Casas wrote:
I suggest adding at least this test at the end of `tests/sampler.shader_test`:
[pixel shader todo] sampler sam; float4 p; float4 main() : sv_target { return tex2Dproj(sam, p); } [test] uniform 0 float4 1.0 1.0 1234.0 2.0 todo draw quad todo probe all rgba (0.25, 0.0, 0.25, 0.0)
I confirmed that is the expected behavior using the minitestbot. Even though to get this to pass we need to lower combined sample expressions, but that seems close in the horizon (!209).
Yes, that was exactly the reason for a lack of tests. I'm going to wait for !209 to get in first.
On Wed May 31 20:16:16 2023 +0000, Nikolay Sivov wrote:
It does not accept coords.node / swizzle, because types are different, I think.
Just to summarize what we discussed in IRC: there are few hlsl_ir_expr ops that are allowed to handle different types of arguments and return value. My understanding is that HLSL_OP2_DIV is not one of them (like, e.g. HLSL_OP2_DOT), when replacing this hlsl_new_expr() call with a call to hlsl_new_binary_expr(), the assertion should hold true.
This is not happening however, because `load->coords.node` is float4 and the swizzle is float2 (or float3 for tex3Dproj). What is missing is to create a new .XY (or .XYZ) swizzle for `load->coords.node` and pass it as arg1 instead of `load->coords.node` directly.
On Thu Jun 1 21:26:47 2023 +0000, Francisco Casas wrote:
Just to summarize what we discussed in IRC: there are few hlsl_ir_expr ops that are allowed to handle different types of arguments and return value. My understanding is that HLSL_OP2_DIV is not one of them (like, e.g. HLSL_OP2_DOT). When replacing this hlsl_new_expr() call with a call to hlsl_new_binary_expr(), the assertion should hold true. This is not happening however, because `load->coords.node` is float4 and the swizzle is float2 (or float3 for tex3Dproj). What is missing is to create a new .XY (or .XYZ) swizzle for `load->coords.node` and pass it as arg1 instead of `load->coords.node` directly.
Turns out 1D case works differently. It's using 2d resource for all profiles, using (x/w, 0.5) as a coordinate in SM4+, I think this justifies moving profile-specific logic out of codegen.c, back to the function.
On Fri Jun 2 09:46:31 2023 +0000, Nikolay Sivov wrote:
Turns out 1D case works differently. It's using 2d resource for all profiles, using (x/w, 0.5) as a coordinate in SM4+, I think this justifies moving profile-specific logic out of codegen.c, back to the function.
Another option is to retain original 1D dim value and carry it all the way to lower_tex_proj(), but it will produce awkward combination of dim and coordinates types that is resolved too late and for each binary format, when writing declarations.
On Fri Jun 2 09:52:49 2023 +0000, Nikolay Sivov wrote:
Another option is to retain original 1D dim value and carry it all the way to lower_tex_proj(), but it will produce awkward combination of dim and coordinates types that is resolved too late and for each binary format, when writing declarations.
After some thought, I think that what we are missing is a pass that lowers 1D combined samples to 2D. But **only** combined samples. So we end up with something like:
```c // Only 1D combined samples (with load->sampler == NULL) get converted into 2D. hlsl_transform_ir(ctx, lower_1d_combined_samples_to_2d, body, NULL); // ... if (profile->major_version >= 4) { hlsl_transform_ir(ctx, lower_tex_proj, body, NULL); } // ... if (profile->major_version >= 4) { hlsl_transform_ir(ctx, lower_combined_samples, body, NULL); // From !209 } // ... ```
If we do this, you wouldn't have to worry about 1D HLSL_RESOURCE_SAMPLE_PROJ here.
On Fri Jun 2 21:03:18 2023 +0000, Francisco Casas wrote:
~~After some thought, I think that what we are missing is a pass that lowers 1D combined samples to 2D. But **only** combined samples. So we end up with something like:~~
// Only 1D combined samples (with load->sampler == NULL) get converted into 2D. hlsl_transform_ir(ctx, lower_1d_combined_samples_to_2d, body, NULL); // ... if (profile->major_version >= 4) { hlsl_transform_ir(ctx, lower_tex_proj, body, NULL); } // ... if (profile->major_version >= 4) { hlsl_transform_ir(ctx, lower_combined_samples, body, NULL); // From !209 } // ...
~~If we do this, you wouldn't have to worry about 1D HLSL_RESOURCE_SAMPLE_PROJ here.~~
nevermind, that is not a solution because you would end up dividing 0.5 by w anyways.
So, SM1 doesn't care what we pass in the Y component of the `src0` of `texld` nor `texldp` when we are working with tex1D() and tex1Dproj(), we don't have to worry about making any transformation to 2D.
It is for SM4 that we have to care about making the second coordinate 0.5. Since this is also true for tex1D(). So my opinion is that this should be deferred to a latter pass:
```c if (profile->major_version >= 4) { // Lower [tex1Dproj -> 1d sample] normally. hlsl_transform_ir(ctx, lower_tex_proj, body, NULL); // Add a 0.5 component to the coords of 1D combined sample resource loads, making them 2D. hlsl_transform_ir(ctx, lower_1d_combined_samples_to_2d, body, NULL); } // ... hlsl_transform_ir(ctx, lower_combined_samples, body, NULL); // as in !209 ```
On Fri Jun 2 21:04:18 2023 +0000, Francisco Casas wrote:
nevermind, that is not a solution because you would end up dividing 0.5 by w anyways.
Native pretty much throws whatever junk into the y coordinate for regular tex1D(). It's more consistent with tex1Dproj() but I don't think that's saying much. I doubt we want to worry that much about what we're putting in the y coordinate.
I also don't think that tex1D() should really emit anything 1d. It seems like it should just emit 2D IR.
On Sat Jun 3 08:15:05 2023 +0000, Francisco Casas wrote:
So, SM1 doesn't care what we pass in the Y component of the `src0` of `texld` nor `texldp` when we are working with tex1D() and tex1Dproj(), we don't have to worry about making any transformation to 2D. It is for SM4 that we have to care about making the second coordinate 0.5. Since this is also true for tex1D(), my opinion is that this should be deferred to a later pass:
if (profile->major_version >= 4) { // Lower [tex1Dproj -> 1d sample] normally. hlsl_transform_ir(ctx, lower_tex_proj, body, NULL); // Add a 0.5 component to the coords of 1D combined sample resource loads, making them 2D. hlsl_transform_ir(ctx, lower_1d_combined_samples_to_2d, body, NULL); } // ... hlsl_transform_ir(ctx, lower_combined_samples, body, NULL); // as in !209
That means tex1D* should produce 1D or 2D load initially, depending on profile. It might as well set coordinates correctly at the same time.