-- v2: vkd3d-shader/hlsl: Implement texCUBEproj(). vkd3d-shader/d3dbc: Disallow 1D sampler types when writing sampler declaration. vkd3d-shader/hlsl: Implement tex3Dproj(). vkd3d-shader/hlsl: Implement tex1Dproj(). vkd3d-shader/hlsl: Implement tex2Dproj().
From: Nikolay Sivov nsivov@codeweavers.com
--- libs/vkd3d-shader/d3dbc.c | 2 ++ libs/vkd3d-shader/hlsl.h | 3 ++- libs/vkd3d-shader/hlsl.y | 23 +++++++++++++++++-- libs/vkd3d-shader/hlsl_codegen.c | 39 ++++++++++++++++++++++++++++++++ libs/vkd3d-shader/tpf.c | 3 +++ 5 files changed, 67 insertions(+), 3 deletions(-)
diff --git a/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d-shader/d3dbc.c index c35f8ca0f..2ae8df5f7 100644 --- a/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d-shader/d3dbc.c @@ -1907,6 +1907,8 @@ static void write_sm1_resource_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_
.src_count = 2, }; + if (load->load_type == HLSL_RESOURCE_SAMPLE_PROJ) + sm1_instr.opcode |= VKD3DSI_TEXLD_PROJECT << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT;
assert(instr->reg.allocated);
diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 7d02448e0..2b88181f4 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -617,9 +617,10 @@ enum hlsl_resource_load_type HLSL_RESOURCE_SAMPLE, HLSL_RESOURCE_SAMPLE_CMP, HLSL_RESOURCE_SAMPLE_CMP_LZ, + HLSL_RESOURCE_SAMPLE_GRAD, HLSL_RESOURCE_SAMPLE_LOD, HLSL_RESOURCE_SAMPLE_LOD_BIAS, - HLSL_RESOURCE_SAMPLE_GRAD, + HLSL_RESOURCE_SAMPLE_PROJ, HLSL_RESOURCE_GATHER_RED, HLSL_RESOURCE_GATHER_GREEN, HLSL_RESOURCE_GATHER_BLUE, diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index cf483d82c..9d64ac1ee 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -3295,9 +3295,10 @@ static bool intrinsic_step(struct hlsl_ctx *ctx, static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc, const char *name, enum hlsl_sampler_dim dim) { - struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_SAMPLE}; + struct hlsl_resource_load_params load_params = { 0 }; const struct hlsl_type *sampler_type; struct hlsl_ir_node *coords, *load; + unsigned int coords_dim;
if (params->args_count != 2 && params->args_count != 4) { @@ -3324,8 +3325,19 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * hlsl_release_string_buffer(ctx, string); }
+ if (!strcmp(name, "tex2Dproj")) + { + load_params.type = HLSL_RESOURCE_SAMPLE_PROJ; + coords_dim = 4; + } + else + { + load_params.type = HLSL_RESOURCE_SAMPLE; + coords_dim = hlsl_sampler_dim_count(dim); + } + if (!(coords = add_implicit_conversion(ctx, params->instrs, params->args[1], - hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, hlsl_sampler_dim_count(dim)), loc))) + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, coords_dim), loc))) coords = params->args[1];
load_params.coords = coords; @@ -3345,6 +3357,12 @@ static bool intrinsic_tex2D(struct hlsl_ctx *ctx, return intrinsic_tex(ctx, params, loc, "tex2D", HLSL_SAMPLER_DIM_2D); }
+static bool intrinsic_tex2Dproj(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return intrinsic_tex(ctx, params, loc, "tex2Dproj", HLSL_SAMPLER_DIM_2D); +} + static bool intrinsic_tex3D(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -3515,6 +3533,7 @@ intrinsic_functions[] = {"sqrt", 1, true, intrinsic_sqrt}, {"step", 2, true, intrinsic_step}, {"tex2D", -1, false, intrinsic_tex2D}, + {"tex2Dproj", 2, false, intrinsic_tex2Dproj}, {"tex3D", -1, false, intrinsic_tex3D}, {"transpose", 1, true, intrinsic_transpose}, {"trunc", 1, true, intrinsic_trunc}, diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 2b6c595a1..d376d46df 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -2446,6 +2446,40 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr return true; }
+/* For SM4 turn HLSL_RESOURCE_SAMPLE_PROJ to HLSL_RESOURCE_SAMPLE + DIV */ +static bool lower_tex_proj(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_node *divisor, *c, *coords; + struct hlsl_ir_resource_load *load; + unsigned int dim_count; + + if (instr->type != HLSL_IR_RESOURCE_LOAD) + return false; + load = hlsl_ir_resource_load(instr); + if (load->load_type != HLSL_RESOURCE_SAMPLE_PROJ) + return false; + + dim_count = hlsl_sampler_dim_count(load->sampling_dim); + if (!(divisor = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(W, W, W, W), dim_count, load->coords.node, &instr->loc))) + return false; + list_add_before(&instr->entry, &divisor->entry); + + if (!(c = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), dim_count, load->coords.node, &instr->loc))) + return false; + list_add_before(&instr->entry, &c->entry); + + if (!(coords = hlsl_new_binary_expr(ctx, HLSL_OP2_DIV, c, divisor))) + return false; + list_add_before(&instr->entry, &coords->entry); + + load->load_type = HLSL_RESOURCE_SAMPLE; + + hlsl_src_remove(&load->coords); + hlsl_src_from_node(&load->coords, coords); + + return true; +} + static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { switch (instr->type) @@ -3939,6 +3973,11 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry } while (progress);
+ if (profile->major_version >= 4) + { + hlsl_transform_ir(ctx, lower_tex_proj, body, NULL); + } + if (profile->major_version < 4) { hlsl_transform_ir(ctx, lower_division, body, NULL); diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index 60948d649..57315b98e 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -4989,6 +4989,9 @@ static void write_sm4_resource_load(struct hlsl_ctx *ctx, write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, &load->sampler, coords, HLSL_SWIZZLE(W, W, W, W), texel_offset); break; + + case HLSL_RESOURCE_SAMPLE_PROJ: + vkd3d_unreachable(); } }
From: Nikolay Sivov nsivov@codeweavers.com
--- libs/vkd3d-shader/hlsl.y | 13 ++++++++- libs/vkd3d-shader/hlsl_codegen.c | 50 ++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 1 deletion(-)
diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index 9d64ac1ee..840b0030b 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -3325,7 +3325,11 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * hlsl_release_string_buffer(ctx, string); }
- if (!strcmp(name, "tex2Dproj")) + if (ctx->profile->major_version < 4 && dim == HLSL_SAMPLER_DIM_1D) + dim = HLSL_SAMPLER_DIM_2D; + + if (!strcmp(name, "tex1Dproj") + || !strcmp(name, "tex2Dproj")) { load_params.type = HLSL_RESOURCE_SAMPLE_PROJ; coords_dim = 4; @@ -3351,6 +3355,12 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * return true; }
+static bool intrinsic_tex1Dproj(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return intrinsic_tex(ctx, params, loc, "tex1Dproj", HLSL_SAMPLER_DIM_1D); +} + static bool intrinsic_tex2D(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -3532,6 +3542,7 @@ intrinsic_functions[] = {"smoothstep", 3, true, intrinsic_smoothstep}, {"sqrt", 1, true, intrinsic_sqrt}, {"step", 2, true, intrinsic_step}, + {"tex1Dproj", 2, false, intrinsic_tex1Dproj}, {"tex2D", -1, false, intrinsic_tex2D}, {"tex2Dproj", 2, false, intrinsic_tex2Dproj}, {"tex3D", -1, false, intrinsic_tex3D}, diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index d376d46df..2bad337b6 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -2480,6 +2480,55 @@ static bool lower_tex_proj(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, voi return true; }
+static bool lower_tex_1d(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_node *store, *half, *x; + struct hlsl_ir_resource_load *load; + struct hlsl_deref coords_deref; + struct hlsl_ir_load *var_load; + struct hlsl_ir_var *coords; + + if (instr->type != HLSL_IR_RESOURCE_LOAD) + return false; + load = hlsl_ir_resource_load(instr); + if (load->sampler.var) + return false; + if (load->sampling_dim != HLSL_SAMPLER_DIM_1D) + return false; + + if (!(half = hlsl_new_float_constant(ctx, 0.5f, &instr->loc))) + return false; + list_add_before(&instr->entry, &half->entry); + + if (!(coords = hlsl_new_synthetic_var(ctx, "coords", + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 2), &instr->loc))) + return false; + + if (!(x = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), 1, load->coords.node, &instr->loc))) + return false; + list_add_before(&instr->entry, &x->entry); + + hlsl_init_simple_deref_from_var(&coords_deref, coords); + if (!(store = hlsl_new_store_index(ctx, &coords_deref, NULL, x, 0, &instr->loc))) + return false; + list_add_before(&instr->entry, &store->entry); + + if (!(store = hlsl_new_store_index(ctx, &coords_deref, NULL, half, 1 << 1, &instr->loc))) + return false; + list_add_before(&instr->entry, &store->entry); + + if (!(var_load = hlsl_new_var_load(ctx, coords, &instr->loc))) + return false; + list_add_before(&instr->entry, &var_load->node.entry); + + load->sampling_dim = HLSL_SAMPLER_DIM_2D; + + hlsl_src_remove(&load->coords); + hlsl_src_from_node(&load->coords, &var_load->node); + + return true; +} + static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { switch (instr->type) @@ -3976,6 +4025,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry if (profile->major_version >= 4) { hlsl_transform_ir(ctx, lower_tex_proj, body, NULL); + hlsl_transform_ir(ctx, lower_tex_1d, body, NULL); }
if (profile->major_version < 4)
From: Nikolay Sivov nsivov@codeweavers.com
Signed-off-by: Nikolay Sivov nsivov@codeweavers.com --- libs/vkd3d-shader/hlsl.y | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index 840b0030b..f4e37f697 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -3329,7 +3329,8 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * dim = HLSL_SAMPLER_DIM_2D;
if (!strcmp(name, "tex1Dproj") - || !strcmp(name, "tex2Dproj")) + || !strcmp(name, "tex2Dproj") + || !strcmp(name, "tex3Dproj")) { load_params.type = HLSL_RESOURCE_SAMPLE_PROJ; coords_dim = 4; @@ -3379,6 +3380,12 @@ static bool intrinsic_tex3D(struct hlsl_ctx *ctx, return intrinsic_tex(ctx, params, loc, "tex3D", HLSL_SAMPLER_DIM_3D); }
+static bool intrinsic_tex3Dproj(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return intrinsic_tex(ctx, params, loc, "tex3Dproj", HLSL_SAMPLER_DIM_3D); +} + static bool intrinsic_transpose(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -3546,6 +3553,7 @@ intrinsic_functions[] = {"tex2D", -1, false, intrinsic_tex2D}, {"tex2Dproj", 2, false, intrinsic_tex2Dproj}, {"tex3D", -1, false, intrinsic_tex3D}, + {"tex3Dproj", 2, false, intrinsic_tex3Dproj}, {"transpose", 1, true, intrinsic_transpose}, {"trunc", 1, true, intrinsic_trunc}, };
From: Nikolay Sivov nsivov@codeweavers.com
It should not be used in this context.
Signed-off-by: Nikolay Sivov nsivov@codeweavers.com --- libs/vkd3d-shader/d3dbc.c | 4 ---- 1 file changed, 4 deletions(-)
diff --git a/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d-shader/d3dbc.c index 2ae8df5f7..2b1129a83 100644 --- a/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d-shader/d3dbc.c @@ -1630,10 +1630,6 @@ static void write_sm1_sampler_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_bu
switch (sampler_dim) { - case HLSL_SAMPLER_DIM_1D: - res_type = VKD3D_SM1_RESOURCE_TEXTURE_1D; - break; - case HLSL_SAMPLER_DIM_2D: res_type = VKD3D_SM1_RESOURCE_TEXTURE_2D; break;
From: Nikolay Sivov nsivov@codeweavers.com
--- libs/vkd3d-shader/hlsl.y | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index f4e37f697..745287eab 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -3330,7 +3330,8 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer *
if (!strcmp(name, "tex1Dproj") || !strcmp(name, "tex2Dproj") - || !strcmp(name, "tex3Dproj")) + || !strcmp(name, "tex3Dproj") + || !strcmp(name, "texCUBEproj")) { load_params.type = HLSL_RESOURCE_SAMPLE_PROJ; coords_dim = 4; @@ -3386,6 +3387,12 @@ static bool intrinsic_tex3Dproj(struct hlsl_ctx *ctx, return intrinsic_tex(ctx, params, loc, "tex3Dproj", HLSL_SAMPLER_DIM_3D); }
+static bool intrinsic_texCUBEproj(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return intrinsic_tex(ctx, params, loc, "texCUBEproj", HLSL_SAMPLER_DIM_CUBE); +} + static bool intrinsic_transpose(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -3554,6 +3561,7 @@ intrinsic_functions[] = {"tex2Dproj", 2, false, intrinsic_tex2Dproj}, {"tex3D", -1, false, intrinsic_tex3D}, {"tex3Dproj", 2, false, intrinsic_tex3Dproj}, + {"texCUBEproj", 2, false, intrinsic_texCUBEproj}, {"transpose", 1, true, intrinsic_transpose}, {"trunc", 1, true, intrinsic_trunc}, };
Alright, pushed something that should work.
On Sat Jun 3 08:15:04 2023 +0000, Nikolay Sivov wrote:
That means tex1D* should produce 1D or 2D load initially, depending on profile. It might as well set coordinates correctly at the same time.
No, my proposal is to always produce a 1D load, which should be promoted to a 2D load by a SM4-specific pass.
On Mon Jun 5 13:59:31 2023 +0000, Francisco Casas wrote:
No, my proposal is to always produce a 1D load, which should be promoted to a 2D load by a SM4-specific pass.
I don't see how it ever makes sense to emit a 1D load in the IR from intrinsic_tex1D(). The load is fundamentally 2D, regardless of profile version. Lowering is a thing to deal with differences between profile bytecode, or high-level constructions that can't easily be emitted already lowered; I don't see how either applies here?
On Mon Jun 5 20:32:54 2023 +0000, Zebediah Figura wrote:
I don't see how it ever makes sense to emit a 1D load in the IR from intrinsic_tex1D(). The load is fundamentally 2D, regardless of profile version. Lowering is a thing to deal with differences between profile bytecode, or high-level constructions that can't easily be emitted already lowered; I don't see how either applies here?
It's not to make sense as a load, but to be able to tell them apart later. If it was tex2D() initially there is nothing else to do, if it was tex1D(), SM4 needs to do something (checking for combined sampling case to distinguish it from regular SM4 1D load).
What was proposed is to produce something "common" for functions, and then have multiple passes to fix that up, instead of producing final result immediately.
This merge request was approved by Francisco Casas.
On Mon Jun 5 21:08:01 2023 +0000, Nikolay Sivov wrote:
It's not to make sense as a load, but to be able to tell them apart later. If it was tex2D() initially there is nothing else to do, if it was tex1D(), SM4 needs to do something (checking for combined sampling case to distinguish it from regular SM4 1D load). What was proposed is to produce something "common" for functions, and then have multiple passes to fix that up, instead of producing final result immediately.
It's still not clear to me that we need to treat the combined sampling case specially, though.
On Wed Jun 7 23:06:12 2023 +0000, Zebediah Figura wrote:
It's still not clear to me that we need to treat the combined sampling case specially, though.
I don't understand what you are proposing to do. Treat combined case specially where? Turning all 1D to 2D right away does not work without extra logic to adjust coordinates in intrinsic_tex() for sm4, or later.
On Thu Jun 8 04:56:46 2023 +0000, Nikolay Sivov wrote:
I don't understand what you are proposing to do. Treat combined case specially where? Turning all 1D to 2D right away does not work without extra logic to adjust coordinates in intrinsic_tex() for sm4, or later.
I'm trying to say that we can just implement tex1D(s, coords) as if it were tex2D(s, float2(coords.x, 0.5)) in the intrinsic handler. Similarly tex1Dproj(s, coords) can be tex2Dproj(s, float4(coords.x, 0.5, 0, coords.w)). The only potential difference is that native throws different garbage into the y coordinate, depending on shader target, but I'm not really sure that it matters.