Mainly promoting single object components to variables for SM 5.0's RDEF block and lowering combined samplers to separate sampler+texture objects for SM 4.
Following patches (including prepending uniform copies resource components within struct parameters) in: https://gitlab.winehq.org/fcasas/vkd3d/-/commits/master6c
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl.c | 19 +++++++ libs/vkd3d-shader/hlsl.h | 3 ++ libs/vkd3d-shader/tpf.c | 103 ++++++++++++++++++++++++++++++++++++++ tests/cbuffer.shader_test | 6 +-- 4 files changed, 128 insertions(+), 3 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index 617aef30..ce007f14 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -430,6 +430,25 @@ struct hlsl_type *hlsl_type_get_component_type(struct hlsl_ctx *ctx, struct hlsl return type; }
+void hlsl_write_component_name(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, + const struct hlsl_ir_var *var, unsigned int index) +{ + struct hlsl_type *type = var->data_type, *current_type; + unsigned int element_index; + + vkd3d_string_buffer_printf(buffer, "%s", var->name); + + while (!type_is_single_component(type)) + { + current_type = type; + element_index = traverse_path_from_component_index(ctx, &type, &index); + if (current_type->class == HLSL_CLASS_STRUCT) + vkd3d_string_buffer_printf(buffer, ".%s", current_type->e.record.fields[element_index].name); + else + vkd3d_string_buffer_printf(buffer, "[%u]", element_index); + } +} + static bool init_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hlsl_ir_var *var, unsigned int path_len) { diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 6a4e314d..7ed6428a 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -1202,6 +1202,9 @@ bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref unsigned int hlsl_offset_from_deref_safe(struct hlsl_ctx *ctx, const struct hlsl_deref *deref); struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref);
+void hlsl_write_component_name(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, + const struct hlsl_ir_var *var, unsigned int index); + bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index eadad3b9..a807a166 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -2515,6 +2515,106 @@ static bool type_is_integer(const struct hlsl_type *type) } }
+/* Create a new extern variable for each object component within each extern variable. */ +static void promote_object_components_to_variables(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) +{ + struct hlsl_ir_var *var; + unsigned int k, i; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + unsigned int component_count = hlsl_type_component_count(var->data_type); + unsigned int offsets[HLSL_REGSET_LAST_OBJECT + 1]; + struct vkd3d_string_buffer *name; + const char *new_var_name; + + if (var->data_type->class == HLSL_CLASS_OBJECT) + continue; + + for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) + offsets[k] = 0; + + for (i = 0; i < component_count; ++i) + { + struct hlsl_type *component_type = hlsl_type_get_component_type(ctx, var->data_type, i); + struct hlsl_ir_var *new_var; + struct hlsl_ir_node *instr; + enum hlsl_regset regset; + + if (!hlsl_type_is_resource(component_type)) + continue; + + regset = hlsl_type_get_regset(component_type); + + if (offsets[regset] > var->regs[regset].bind_count) + continue; + + if (var->objects_usage[regset][offsets[regset]].used) + { + if (!(name = hlsl_get_string_buffer(ctx))) + return; + hlsl_write_component_name(ctx, name, var, i); + new_var_name = hlsl_strdup(ctx, name->buffer); + hlsl_release_string_buffer(ctx, name); + + TRACE("Promoting %s as a stand-alone variable.\n", debugstr_a(new_var_name)); + + if (!(new_var = hlsl_new_var(ctx, new_var_name, component_type, &var->loc, NULL, + var->data_type->modifiers, NULL))) + return; + new_var->is_uniform = 1; + list_add_before(&var->scope_entry, &new_var->scope_entry); + list_add_before(&var->extern_entry, &new_var->extern_entry); + + new_var->regs[regset].id = var->regs[regset].id + offsets[regset]; + new_var->regs[regset].allocated = true; + new_var->regs[regset].bind_count = 1; + + new_var->objects_usage[regset][0] = var->objects_usage[regset][offsets[regset]]; + + /* Make all derefs pointing to the component to point to the new variable instead. */ + LIST_FOR_EACH_ENTRY(instr, &entry_func->body.instrs, struct hlsl_ir_node, entry) + { + if (instr->type == HLSL_IR_RESOURCE_LOAD) + { + struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); + + if (load->resource.var == var && load->resource.offset_regset == regset + && hlsl_offset_from_deref_safe(ctx, &load->resource) == offsets[regset]) + { + load->resource.var = new_var; + hlsl_src_remove(&load->resource.offset); + } + + if (load->sampler.var == var && load->sampler.offset_regset == regset + && hlsl_offset_from_deref_safe(ctx, &load->sampler) == offsets[regset]) + { + load->sampler.var = new_var; + hlsl_src_remove(&load->sampler.offset); + } + } + else if (instr->type == HLSL_IR_RESOURCE_STORE) + { + struct hlsl_ir_resource_store *store = hlsl_ir_resource_store(instr); + + if (store->resource.var == var && store->resource.offset_regset == regset + && hlsl_offset_from_deref_safe(ctx, &store->resource) == offsets[regset]) + { + store->resource.var = new_var; + hlsl_src_remove(&store->resource.offset); + } + } + } + } + + ++offsets[regset]; + } + + for (i = 0; i <= HLSL_REGSET_LAST_OBJECT; ++i) + var->regs[i].allocated = false; + } +} + bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, bool output, unsigned int *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx) { @@ -5120,6 +5220,9 @@ int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_fun size_t i; int ret;
+ if (ctx->profile->major_version >= 5) + promote_object_components_to_variables(ctx, entry_func); + dxbc_writer_init(&dxbc);
write_sm4_signature(ctx, &dxbc, false); diff --git a/tests/cbuffer.shader_test b/tests/cbuffer.shader_test index 7e2a91dc..83397c18 100644 --- a/tests/cbuffer.shader_test +++ b/tests/cbuffer.shader_test @@ -694,7 +694,7 @@ shader model >= 5.0 size (1, 1) 0.0 0.0 0.0 0.5
-[pixel shader todo] +[pixel shader] struct apple { float2 a; @@ -718,5 +718,5 @@ uniform 0 float4 0.0 1.0 2.0 3.0 uniform 4 float4 4.0 5.0 6.0 7.0 uniform 8 float4 8.0 9.0 10.0 11.0 uniform 12 float4 12.0 13.0 14.0 15.0 -todo draw quad -todo probe all rgba (124.0, 135.0, 146.0, 150.5) +draw quad +probe all rgba (124.0, 135.0, 146.0, 150.5)
From: Francisco Casas fcasas@codeweavers.com
--- Makefile.am | 1 + tests/hlsl-combined-samplers.shader_test | 125 +++++++++++++++++++++++ 2 files changed, 126 insertions(+) create mode 100644 tests/hlsl-combined-samplers.shader_test
diff --git a/Makefile.am b/Makefile.am index e06d0eeb..560c2c81 100644 --- a/Makefile.am +++ b/Makefile.am @@ -79,6 +79,7 @@ vkd3d_shader_tests = \ tests/hlsl-attributes.shader_test \ tests/hlsl-bool-cast.shader_test \ tests/hlsl-clamp.shader_test \ + tests/hlsl-combined-samplers.shader_test \ tests/hlsl-comma.shader_test \ tests/hlsl-cross.shader_test \ tests/hlsl-d3dcolor-to-ubyte4.shader_test \ diff --git a/tests/hlsl-combined-samplers.shader_test b/tests/hlsl-combined-samplers.shader_test new file mode 100644 index 00000000..b9677749 --- /dev/null +++ b/tests/hlsl-combined-samplers.shader_test @@ -0,0 +1,125 @@ +[sampler 0] +filter linear linear linear +address clamp clamp clamp + +[sampler 1] +filter linear linear linear +address clamp clamp clamp + +[sampler 2] +filter linear linear linear +address clamp clamp clamp + +[texture 0] +size (1, 1) +0.0 0.0 0.0 1.0 + +[texture 1] +size (1, 1) +1.0 1.0 1.0 1.0 + +[texture 2] +size (1, 1) +2.0 2.0 2.0 1.0 + +[texture 3] +size (1, 1) +3.0 3.0 3.0 1.0 + +[texture 4] +size (1, 1) +4.0 4.0 4.0 1.0 + + +[pixel shader todo] +sampler sam; + +float4 main() : sv_target +{ + return tex2D(sam, float2(0, 0)); +} + +[test] +todo draw quad +todo probe all rgba (0, 0, 0, 1) + + +[pixel shader todo] +Texture2D tex; +sampler sam; + +// Textures for new separated samplers are allocated before regular textures. +float4 main() : sv_target +{ + return 10 * tex.Sample(sam, float2(0, 0)) + tex2D(sam, float2(0, 0)); +} + +[test] +todo draw quad +todo probe all rgba (10, 10, 10, 11) + + +[pixel shader todo] +Texture2D tex; +sampler sam[2]; + +float4 main() : sv_target +{ + return 10 * tex.Sample(sam[0], float2(0, 0)) + tex2D(sam[1], float2(0, 0)); +} + +[test] +todo draw quad +todo probe all rgba (21, 21, 21, 11) + + +[pixel shader todo] +sampler sam0; +sampler sam1; +sampler sam2; + +float4 main() : sv_target +{ + return 100 * tex2D(sam1, float2(0, 0)) + 10 * tex2D(sam0, float2(0, 0)) + + tex2D(sam2, float2(0, 0)); +} + +[test] +todo draw quad +todo probe all rgba (102, 102, 102, 111) + + +[pixel shader todo] +Texture2D tex[2][2]; +sampler sam; + +float4 main() : sv_target +{ + return 100 * tex[0][0].Load(int3(0, 0, 0)) + 10 * tex2D(sam, float2(0, 0)) + + tex[1][1].Sample(sam, float2(0, 0)); +} + +[test] +todo draw quad +todo probe all rgba (104, 104, 104, 111) + + +[require] +shader model >= 5.0 + + +[pixel shader todo] +struct +{ + Texture2D tex; + sampler sam; +} foo; + +float4 main() : sv_target +{ + return 10 * foo.tex.Sample(foo.sam, float2(0, 0)) + tex2D(foo.sam, float2(0, 0)); +} + +[test] +todo draw quad +todo probe all rgba (10, 10, 10, 11)
From: Francisco Casas fcasas@codeweavers.com
We are using the hlsl_ir_var.is_uniform flag to indicate when an object is a uniform copy created from a variable with the HLSL_STORAGE_UNIFORM modifier.
We should be checking for this instead of the HLSL_STORAGE_UNIFORM flag which is also set to 1 for the original variables, and there should be no reason to use this flag instead of "is_uniform" after the uniform copies and combined/separated samplers are created. --- libs/vkd3d-shader/hlsl_codegen.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 638dab6e..9344b193 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -1679,7 +1679,7 @@ static bool validate_static_object_references(struct hlsl_ctx *ctx, struct hlsl_ { struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr);
- if (!(load->resource.var->storage_modifiers & HLSL_STORAGE_UNIFORM)) + if (!load->resource.var->is_uniform) { hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, "Loaded resource must have a single uniform source."); @@ -1694,7 +1694,7 @@ static bool validate_static_object_references(struct hlsl_ctx *ctx, struct hlsl_
if (load->sampler.var) { - if (!(load->sampler.var->storage_modifiers & HLSL_STORAGE_UNIFORM)) + if (!load->sampler.var->is_uniform) { hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, "Resource load sampler must have a single uniform source."); @@ -1712,7 +1712,7 @@ static bool validate_static_object_references(struct hlsl_ctx *ctx, struct hlsl_ { struct hlsl_ir_resource_store *store = hlsl_ir_resource_store(instr);
- if (!(store->resource.var->storage_modifiers & HLSL_STORAGE_UNIFORM)) + if (!store->resource.var->is_uniform) { hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, "Accessed resource must have a single uniform source.");
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl.c | 20 +++++++++++++------- libs/vkd3d-shader/hlsl.h | 2 ++ 2 files changed, 15 insertions(+), 7 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index ce007f14..b68ff5f2 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -1011,18 +1011,24 @@ struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *tem struct vkd3d_string_buffer *string; struct hlsl_ir_var *var; static LONG counter; - const char *name;
if (!(string = hlsl_get_string_buffer(ctx))) return NULL; vkd3d_string_buffer_printf(string, "<%s-%u>", template, InterlockedIncrement(&counter)); - if (!(name = hlsl_strdup(ctx, string->buffer))) - { - hlsl_release_string_buffer(ctx, string); - return NULL; - } - var = hlsl_new_var(ctx, name, type, loc, NULL, 0, NULL); + var = hlsl_new_synthetic_var_named(ctx, string->buffer, type, loc); hlsl_release_string_buffer(ctx, string); + return var; +} + +struct hlsl_ir_var *hlsl_new_synthetic_var_named(struct hlsl_ctx *ctx, const char *name, + struct hlsl_type *type, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_var *var; + const char *name_copy; + + if (!(name_copy = hlsl_strdup(ctx, name))) + return NULL; + var = hlsl_new_var(ctx, name_copy, type, loc, NULL, 0, NULL); if (var) list_add_tail(&ctx->dummy_scope->vars, &var->scope_entry); return var; diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 7ed6428a..b7117f0f 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -1145,6 +1145,8 @@ struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, DWORD s, unsigned in struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc); struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *template, struct hlsl_type *type, const struct vkd3d_shader_location *loc); +struct hlsl_ir_var *hlsl_new_synthetic_var_named(struct hlsl_ctx *ctx, const char *name, + struct hlsl_type *type, const struct vkd3d_shader_location *loc); struct hlsl_type *hlsl_new_texture_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, struct hlsl_type *format, unsigned int sample_count); struct hlsl_type *hlsl_new_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, struct hlsl_type *format);
From: Francisco Casas fcasas@codeweavers.com
Co-authored-by: Zebediah Figura zfigura@codeweavers.com
hlsl_new_synthetic_var_named() is introduced to create the new resources.
track_object_components_usage() must be called twice:
- One to detect that the same generic sampler isn't used in both tex2D() and tex3D().
- One after the lower_combined_samples() pass so that the object components usage of the new textures is initialized correctly. --- libs/vkd3d-shader/hlsl.c | 48 ++++++++++++ libs/vkd3d-shader/hlsl.h | 4 + libs/vkd3d-shader/hlsl_codegen.c | 99 ++++++++++++++++++++++-- libs/vkd3d-shader/tpf.c | 13 ++-- tests/hlsl-combined-samplers.shader_test | 30 +++---- tests/shader_runner.c | 2 + 6 files changed, 169 insertions(+), 27 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index b68ff5f2..378c9ab0 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -449,6 +449,25 @@ void hlsl_write_component_name(struct hlsl_ctx *ctx, struct vkd3d_string_buffer } }
+void hlsl_write_deref_path(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, + const struct hlsl_deref *deref) +{ + const struct hlsl_type *type = deref->var->data_type; + unsigned int element_index, i; + + vkd3d_string_buffer_printf(buffer, "%s", deref->var->name); + + for (i = 0; i < deref->path_len; ++i) + { + element_index = hlsl_ir_constant(deref->path[i].node)->value.u[0].u; + if (type->class == HLSL_CLASS_STRUCT) + vkd3d_string_buffer_printf(buffer, ".%s", type->e.record.fields[element_index].name); + else + vkd3d_string_buffer_printf(buffer, "[%u]", element_index); + type = hlsl_get_element_type_from_path_index(ctx, type, deref->path[i].node); + } +} + static bool init_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hlsl_ir_var *var, unsigned int path_len) { @@ -1059,6 +1078,35 @@ bool hlsl_copy_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, const struc return true; }
+void hlsl_strip_deref_suffix(struct hlsl_ctx *ctx, struct hlsl_deref *deref, unsigned int at) +{ + unsigned int i; + + assert(at <= deref->path_len); + + for (i = at; i < deref->path_len; ++i) + hlsl_src_remove(&deref->path[i]); + deref->path_len = at; +} + +void hlsl_strip_deref_prefix(struct hlsl_ctx *ctx, struct hlsl_deref *deref, unsigned int at) +{ + unsigned int i; + + assert(at <= deref->path_len); + + if (at == 0) + return; + + for (i = 0; i < deref->path_len; ++i) + { + hlsl_src_remove(&deref->path[i]); + if (i < deref->path_len - at) + hlsl_src_from_node(&deref->path[i], deref->path[i + at].node); + } + deref->path_len = deref->path_len - at; +} + void hlsl_cleanup_deref(struct hlsl_deref *deref) { unsigned int i; diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index b7117f0f..5281412a 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -1064,6 +1064,8 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry
bool hlsl_init_deref_from_index_chain(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hlsl_ir_node *chain); bool hlsl_copy_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, const struct hlsl_deref *other); +void hlsl_strip_deref_suffix(struct hlsl_ctx *ctx, struct hlsl_deref *deref, unsigned int at); +void hlsl_strip_deref_prefix(struct hlsl_ctx *ctx, struct hlsl_deref *deref, unsigned int at);
void hlsl_cleanup_deref(struct hlsl_deref *deref); void hlsl_cleanup_semantic(struct hlsl_semantic *semantic); @@ -1206,6 +1208,8 @@ struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_dere
void hlsl_write_component_name(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_var *var, unsigned int index); +void hlsl_write_deref_path(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, + const struct hlsl_deref *deref);
bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 9344b193..239f686e 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -1981,6 +1981,88 @@ static bool remove_trivial_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *i return true; }
+/* Lower combined samples and sampler variables to synthesized separated textures and samplers. + * That is, translate SM1-style samples in the source to SM4-style samples in the bytecode. */ +static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + unsigned int i, k, path_len_to_sampler_array = 0; + struct hlsl_deref deref_to_sampler_array; + struct hlsl_ir_resource_load *sample; + struct hlsl_type *sampler_array_type; + struct vkd3d_string_buffer *name; + struct hlsl_ir_var *var; + + if (instr->type != HLSL_IR_RESOURCE_LOAD) + return false; + sample = hlsl_ir_resource_load(instr); + if (sample->load_type != HLSL_RESOURCE_SAMPLE || sample->sampler.var) + return false; + + /* Find the corresponding sampler array (or single sample component) in the path. */ + sampler_array_type = sample->resource.var->data_type; + for (i = 0; i <= sample->resource.path_len; ++i) + { + if (hlsl_type_is_resource(sampler_array_type)) + { + path_len_to_sampler_array = i; + break; + } + assert(i < sample->resource.path_len); + sampler_array_type = hlsl_get_element_type_from_path_index(ctx, sampler_array_type, sample->resource.path[i].node); + } + assert(hlsl_type_get_regset(sampler_array_type) == HLSL_REGSET_SAMPLERS); + + hlsl_copy_deref(ctx, &deref_to_sampler_array, &sample->resource); + hlsl_strip_deref_suffix(ctx, &deref_to_sampler_array, path_len_to_sampler_array); + + if (!(name = hlsl_get_string_buffer(ctx))) + return false; + vkd3d_string_buffer_printf(name, "<resource>"); + hlsl_write_deref_path(ctx, name, &deref_to_sampler_array); + + TRACE("Lowering to separate resource %s.\n", debugstr_a(name->buffer)); + + if (!(var = hlsl_get_var(ctx->globals, name->buffer))) + { + struct hlsl_type *data_type = hlsl_new_texture_type(ctx, sample->sampling_dim, + ctx->builtin_types.vector[HLSL_TYPE_FLOAT][4 - 1], 0); + unsigned int array_dims = sample->resource.path_len - path_len_to_sampler_array; + + /* Create (possibly multi-dimensional) texture array type with the same dims as the sampler array. */ + for (i = 0; i < array_dims; ++i) + { + struct hlsl_type *arr_type = sampler_array_type; + + for (k = 1; k < array_dims - i; ++k) + { + assert(arr_type->class == HLSL_CLASS_ARRAY); + arr_type = arr_type->e.array.type; + } + data_type = hlsl_new_array_type(ctx, data_type, hlsl_type_element_count(arr_type)); + } + + if (!(var = hlsl_new_synthetic_var_named(ctx, name->buffer, data_type, &instr->loc))) + { + hlsl_release_string_buffer(ctx, name); + hlsl_cleanup_deref(&deref_to_sampler_array); + return false; + } + var->is_uniform = 1; + + list_add_before(&sample->resource.var->extern_entry, &var->extern_entry); + } + hlsl_release_string_buffer(ctx, name); + hlsl_cleanup_deref(&deref_to_sampler_array); + + hlsl_copy_deref(ctx, &sample->sampler, &sample->resource); + sample->resource.var = var; + hlsl_strip_deref_prefix(ctx, &sample->resource, path_len_to_sampler_array); + assert(hlsl_deref_get_type(ctx, &sample->resource)->base_type == HLSL_TYPE_TEXTURE); + assert(hlsl_deref_get_type(ctx, &sample->sampler)->base_type == HLSL_TYPE_SAMPLER); + + return true; +} + /* Lower DIV to RCP + MUL. */ static bool lower_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { @@ -3348,7 +3430,7 @@ static void validate_buffer_offsets(struct hlsl_ctx *ctx)
LIST_FOR_EACH_ENTRY(var1, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { - if (!var1->is_uniform || var1->data_type->class == HLSL_CLASS_OBJECT) + if (!var1->is_uniform || hlsl_type_is_resource(var1->data_type)) continue;
buffer = var1->buffer; @@ -3359,7 +3441,7 @@ static void validate_buffer_offsets(struct hlsl_ctx *ctx) { unsigned int var1_reg_size, var2_reg_size;
- if (!var2->is_uniform || var2->data_type->class == HLSL_CLASS_OBJECT) + if (!var2->is_uniform || hlsl_type_is_resource(var2->data_type)) continue;
if (var1 == var2 || var1->buffer != var2->buffer) @@ -3409,7 +3491,7 @@ static void allocate_buffers(struct hlsl_ctx *ctx)
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { - if (var->is_uniform && var->data_type->class != HLSL_CLASS_OBJECT) + if (var->is_uniform && !hlsl_type_is_resource(var->data_type)) { if (var->is_param) var->buffer = ctx->params_buffer; @@ -3927,6 +4009,14 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry } while (progress);
+ hlsl_transform_ir(ctx, validate_static_object_references, body, NULL); + hlsl_transform_ir(ctx, track_object_components_usage, body, NULL); + if (profile->major_version >= 4) + { + hlsl_transform_ir(ctx, lower_combined_samples, body, NULL); + hlsl_transform_ir(ctx, track_object_components_usage, body, NULL); + } + if (profile->major_version < 4) { hlsl_transform_ir(ctx, lower_division, body, NULL); @@ -3940,9 +4030,6 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry hlsl_transform_ir(ctx, lower_abs, body, NULL); }
- hlsl_transform_ir(ctx, validate_static_object_references, body, NULL); - hlsl_transform_ir(ctx, track_object_components_usage, body, NULL); - /* TODO: move forward, remove when no longer needed */ hlsl_transform_ir(ctx, transform_deref_paths_into_offsets, body, NULL); while (hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL)); diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index a807a166..7e9e1cfb 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -3225,7 +3225,10 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) { var = extern_resources[i];
- string_offset = put_string(&buffer, var->name); + if (!strncmp(var->name, "<resource>", strlen("<resource>"))) + string_offset = put_string(&buffer, var->name + strlen("<resource>")); + else + string_offset = put_string(&buffer, var->name); set_u32(&buffer, resources_offset + i * 8 * sizeof(uint32_t), string_offset); }
@@ -4973,11 +4976,9 @@ static void write_sm4_resource_load(struct hlsl_ctx *ctx,
case HLSL_RESOURCE_SAMPLE: case HLSL_RESOURCE_SAMPLE_LOD_BIAS: - if (!load->sampler.var) - { - hlsl_fixme(ctx, &load->node.loc, "SM4 combined sample expression."); - return; - } + /* Combined sample expressions were lowered. */ + assert(load->sampler.var); + write_sm4_sample(ctx, buffer, load); break;
diff --git a/tests/hlsl-combined-samplers.shader_test b/tests/hlsl-combined-samplers.shader_test index b9677749..d013c92d 100644 --- a/tests/hlsl-combined-samplers.shader_test +++ b/tests/hlsl-combined-samplers.shader_test @@ -31,7 +31,7 @@ size (1, 1) 4.0 4.0 4.0 1.0
-[pixel shader todo] +[pixel shader] sampler sam;
float4 main() : sv_target @@ -40,11 +40,11 @@ float4 main() : sv_target }
[test] -todo draw quad -todo probe all rgba (0, 0, 0, 1) +draw quad +probe all rgba (0, 0, 0, 1)
-[pixel shader todo] +[pixel shader] Texture2D tex; sampler sam;
@@ -55,11 +55,11 @@ float4 main() : sv_target }
[test] -todo draw quad +draw quad todo probe all rgba (10, 10, 10, 11)
-[pixel shader todo] +[pixel shader] Texture2D tex; sampler sam[2];
@@ -69,11 +69,11 @@ float4 main() : sv_target }
[test] -todo draw quad +draw quad todo probe all rgba (21, 21, 21, 11)
-[pixel shader todo] +[pixel shader] sampler sam0; sampler sam1; sampler sam2; @@ -85,11 +85,11 @@ float4 main() : sv_target }
[test] -todo draw quad -todo probe all rgba (102, 102, 102, 111) +draw quad +probe all rgba (102, 102, 102, 111)
-[pixel shader todo] +[pixel shader] Texture2D tex[2][2]; sampler sam;
@@ -100,7 +100,7 @@ float4 main() : sv_target }
[test] -todo draw quad +draw quad todo probe all rgba (104, 104, 104, 111)
@@ -108,7 +108,7 @@ todo probe all rgba (104, 104, 104, 111) shader model >= 5.0
-[pixel shader todo] +[pixel shader] struct { Texture2D tex; @@ -121,5 +121,5 @@ float4 main() : sv_target }
[test] -todo draw quad -todo probe all rgba (10, 10, 10, 11) +draw quad +probe all rgba (10, 10, 10, 11) diff --git a/tests/shader_runner.c b/tests/shader_runner.c index b51145c7..de2a2608 100644 --- a/tests/shader_runner.c +++ b/tests/shader_runner.c @@ -1208,6 +1208,8 @@ out:
START_TEST(shader_runner) { + fflush(stdout); + parse_args(argc, argv);
#if defined(VKD3D_CROSSTEST)
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl_codegen.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 239f686e..d066daf1 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -3047,9 +3047,12 @@ static void calculate_resource_register_counts(struct hlsl_ctx *ctx) { for (i = 0; i < type->reg_size[k]; ++i) { - /* Samplers are only allocated until the last used one. */ + bool is_separated = !strncmp(var->name, "<resource>", strlen("<resource>")); + + /* Samplers (and textures separated from them) are only allocated until the last + * used one. */ if (var->objects_usage[k][i].used) - var->regs[k].bind_count = (k == HLSL_REGSET_SAMPLERS) ? i + 1 : type->reg_size[k]; + var->regs[k].bind_count = (k == HLSL_REGSET_SAMPLERS || is_separated) ? i + 1 : type->reg_size[k]; } } }
Zebediah Figura (@zfigura) commented about libs/vkd3d-shader/hlsl.c:
return type;
}
+void hlsl_write_component_name(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer,
const struct hlsl_ir_var *var, unsigned int index)
Should this function return the string buffer? (Or its duplicated string?)
Zebediah Figura (@zfigura) commented about libs/vkd3d-shader/tpf.c:
for (i = 0; i < component_count; ++i)
{
struct hlsl_type *component_type = hlsl_type_get_component_type(ctx, var->data_type, i);
struct hlsl_ir_var *new_var;
struct hlsl_ir_node *instr;
enum hlsl_regset regset;
if (!hlsl_type_is_resource(component_type))
continue;
regset = hlsl_type_get_regset(component_type);
if (offsets[regset] > var->regs[regset].bind_count)
continue;
if (var->objects_usage[regset][offsets[regset]].used)
See, this logic is complicated, and it makes me think I was right to say that this information should be tracked per-component.
Either way I feel like what we want is some helper that translates a component index to its register set + offset.
Zebediah Figura (@zfigura) commented about libs/vkd3d-shader/tpf.c:
size_t i; int ret;
- if (ctx->profile->major_version >= 5)
Somewhat out of curiosity, do you have any indication of whether this should be done for 5.1? I tried to write a shader that put objects in structures, but native d3dcompiler_47 just crashed with it.
(This was probably talked about at some point, but I've just forgotten by now.)
Zebediah Figura (@zfigura) commented about libs/vkd3d-shader/tpf.c:
if (store->resource.var == var && store->resource.offset_regset == regset
&& hlsl_offset_from_deref_safe(ctx, &store->resource) == offsets[regset])
{
store->resource.var = new_var;
hlsl_src_remove(&store->resource.offset);
}
}
}
}
++offsets[regset];
}
for (i = 0; i <= HLSL_REGSET_LAST_OBJECT; ++i)
var->regs[i].allocated = false;
This feels fragile.
I've probably asked this already, but do we really need to create new hlsl_ir_vars? If this is just specific to RDEF logic, can we just add code to generate multiple RDEF entries from a single var?
Zebediah Figura (@zfigura) commented about tests/shader_runner.c:
START_TEST(shader_runner) {
- fflush(stdout);
This probably belongs in its own patch. Some rationale would be nice too.
Zebediah Figura (@zfigura) commented about libs/vkd3d-shader/hlsl_codegen.c:
+/* Lower combined samples and sampler variables to synthesized separated textures and samplers.
- That is, translate SM1-style samples in the source to SM4-style samples in the bytecode. */
+static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{
- unsigned int i, k, path_len_to_sampler_array = 0;
- struct hlsl_deref deref_to_sampler_array;
- struct hlsl_ir_resource_load *sample;
- struct hlsl_type *sampler_array_type;
- struct vkd3d_string_buffer *name;
- struct hlsl_ir_var *var;
- if (instr->type != HLSL_IR_RESOURCE_LOAD)
return false;
- sample = hlsl_ir_resource_load(instr);
- if (sample->load_type != HLSL_RESOURCE_SAMPLE || sample->sampler.var)
return false;
There's actually multiple load types this can apply to (bias, grad, lod, proj at least). We don't implement any of the other intrinsics yet, but in the interest of not forgetting to change this code later, we may want to make this a proper switch.
Zebediah Figura (@zfigura) commented about libs/vkd3d-shader/hlsl_codegen.c:
- /* Find the corresponding sampler array (or single sample component) in the path. */
- sampler_array_type = sample->resource.var->data_type;
- for (i = 0; i <= sample->resource.path_len; ++i)
- {
if (hlsl_type_is_resource(sampler_array_type))
{
path_len_to_sampler_array = i;
break;
}
assert(i < sample->resource.path_len);
sampler_array_type = hlsl_get_element_type_from_path_index(ctx, sampler_array_type, sample->resource.path[i].node);
- }
- assert(hlsl_type_get_regset(sampler_array_type) == HLSL_REGSET_SAMPLERS);
- hlsl_copy_deref(ctx, &deref_to_sampler_array, &sample->resource);
- hlsl_strip_deref_suffix(ctx, &deref_to_sampler_array, path_len_to_sampler_array);
This seems very complicated, especially this deref manipulation.
I'm rather tempted to propose that we just leave off handling anything but top-level sm1 variables for an hlsl_fixme(). For that matter, do we even have anything that actually needs this combined-sampler logic?
From the subject of patch 5/6:
track_object_components_usage() must be called twice:
This kind of suggests that making it do two different things wasn't actually desirable after all.
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/tpf.c:
}
}
+/* Create a new extern variable for each object component within each extern variable. */ +static void promote_object_components_to_variables(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func)
I guess that with this pass the `hlsl_fixme()`'s in `write_sm4_resource_load()` and `write_sm4_resource_store()` can be promoted to `assert()`'s?
Also, but very minor: I don't feel the verb "promote" is really appropriate here. To me "promotion" usually means we're somehow playing with types, but no type is involved here. Maybe "split" would be more appropriate?
On Fri May 19 21:57:41 2023 +0000, Zebediah Figura wrote:
This feels fragile. I've probably asked this already, but do we really need to create new hlsl_ir_vars? If this is just specific to RDEF logic, can we just add code to generate multiple RDEF entries from a single var?
Agreed. It feels like you might need to change something in `sm4_register_from_deref()`, but it doesn't look like that complicated in comparison with doing this pass, which is not really intuitive.
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/hlsl_codegen.c:
- assert(hlsl_type_get_regset(sampler_array_type) == HLSL_REGSET_SAMPLERS);
- hlsl_copy_deref(ctx, &deref_to_sampler_array, &sample->resource);
- hlsl_strip_deref_suffix(ctx, &deref_to_sampler_array, path_len_to_sampler_array);
- if (!(name = hlsl_get_string_buffer(ctx)))
return false;
- vkd3d_string_buffer_printf(name, "<resource>");
- hlsl_write_deref_path(ctx, name, &deref_to_sampler_array);
- TRACE("Lowering to separate resource %s.\n", debugstr_a(name->buffer));
- if (!(var = hlsl_get_var(ctx->globals, name->buffer)))
- {
struct hlsl_type *data_type = hlsl_new_texture_type(ctx, sample->sampling_dim,
ctx->builtin_types.vector[HLSL_TYPE_FLOAT][4 - 1], 0);
You can use `hlsl_get_vector_type()`.
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/hlsl_codegen.c:
- if (!(var = hlsl_get_var(ctx->globals, name->buffer)))
- {
struct hlsl_type *data_type = hlsl_new_texture_type(ctx, sample->sampling_dim,
ctx->builtin_types.vector[HLSL_TYPE_FLOAT][4 - 1], 0);
unsigned int array_dims = sample->resource.path_len - path_len_to_sampler_array;
/* Create (possibly multi-dimensional) texture array type with the same dims as the sampler array. */
for (i = 0; i < array_dims; ++i)
{
struct hlsl_type *arr_type = sampler_array_type;
for (k = 1; k < array_dims - i; ++k)
{
assert(arr_type->class == HLSL_CLASS_ARRAY);
arr_type = arr_type->e.array.type;
}
If I am not mistaken, it makes sense to add another `assert(arr_type->class == HLSL_CLASS_ARRAY);` after the loop.
Trying to run this shader: ```hlsl struct { Texture2D tex; sampler sam[2][2]; } foo;
float4 main() : sv_target { return 10 * foo.tex.Sample(foo.sam[0][0], float2(0, 0)) + tex2D(foo.sam[1][1], float2(0, 0)); } ``` triggers an assertion: ``` vkd3d:fixme:spirv_compiler_get_descriptor_binding Could not find descriptor binding for type 0x3, space 0, registers [3:3], shader type 0. shader_runner:422: Section [test], line 139: Test failed: Failed to create state, hr 0x80004005. shader_runner: ../vkd3d/libs/vkd3d/state.c:1937: unsafe_impl_from_ID3D12PipelineState: Assertion `iface->lpVtbl == &d3d12_pipeline_state_vtbl' failed. Annullato ```
This seems to be a missing feature in vkd3d, not a problem of vkd3d-shader, so it's not really related to this patch, but it seemed to be something worth mentioning.
On Fri May 19 21:57:43 2023 +0000, Zebediah Figura wrote:
This seems very complicated, especially this deref manipulation. I'm rather tempted to propose that we just leave off handling anything but top-level sm1 variables for an hlsl_fixme(). For that matter, do we even have anything that actually needs this combined-sampler logic?
It's true it's a bit intricate (not that I know of any better way to do the same thing), but I don't think we should drop it. I know we already discussed that, but to me whether a feature is used somewhere or not shouldn't be a criterion to decide whether to have that feature or not (it can be a reasonable criterion to decide on what to concentrate development). In theory our shader compiler should be able to compile any shader that is accepted by the native compiler.
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/tpf.c:
{ var = extern_resources[i];
string_offset = put_string(&buffer, var->name);
if (!strncmp(var->name, "<resource>", strlen("<resource>")))
string_offset = put_string(&buffer, var->name + strlen("<resource>"));
else
string_offset = put_string(&buffer, var->name);
I'm not sure I like this (specifically, emitting a variable name that it mangled or not depending on the name itself). I guess you're doing it to mimic native?
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/hlsl_codegen.c:
{ for (i = 0; i < type->reg_size[k]; ++i) {
/* Samplers are only allocated until the last used one. */
bool is_separated = !strncmp(var->name, "<resource>", strlen("<resource>"));
Similarly to another comment, I don't really like the keep business information in a field like a name string. Can't this be encoded with a flag?
On Mon May 22 13:33:34 2023 +0000, Giovanni Mascellani wrote:
I'm not sure I like this (specifically, emitting a variable name that it mangled or not depending on the name itself). I guess you're doing it to mimic native?
No, this is never visible in native. I originally wrote this part, and the motivation was to avoid creating multiple variables with the same name. In order to do it differently we'd need to adjust hlsl_add_var() and hlsl_get_var(), and we may also want to adjust dump_var().
On Mon May 22 13:33:33 2023 +0000, Giovanni Mascellani wrote:
It's true it's a bit intricate (not that I know of any better way to do the same thing), but I don't think we should drop it. I know we already discussed that, but to me whether a feature is used somewhere or not shouldn't be a criterion to decide whether to have that feature or not (it can be a reasonable criterion to decide on what to concentrate development). In theory our shader compiler should be able to compile any shader that is accepted by the native compiler.
I understand that mindset, but I'm really not happy about maintaining complicated code that might never be used. It's the same reason we discourage implementing things in Wine that no application asks for.
Perhaps more saliently, after doing a bit of testing, it seems that native logic here gets stupidly complicated, and this code isn't quite correct as-is. I don't think it's really worth trying to fix the code just for that.
On Mon May 22 17:19:52 2023 +0000, Giovanni Mascellani wrote:
Trying to run this shader:
struct { Texture2D tex; sampler sam[2][2]; } foo; float4 main() : sv_target { return 10 * foo.tex.Sample(foo.sam[0][0], float2(0, 0)) + tex2D(foo.sam[1][1], float2(0, 0)); }
triggers an assertion:
vkd3d:fixme:spirv_compiler_get_descriptor_binding Could not find descriptor binding for type 0x3, space 0, registers [3:3], shader type 0. shader_runner:422: Section [test], line 139: Test failed: Failed to create state, hr 0x80004005. shader_runner: ../vkd3d/libs/vkd3d/state.c:1937: unsafe_impl_from_ID3D12PipelineState: Assertion `iface->lpVtbl == &d3d12_pipeline_state_vtbl' failed. Annullato
This seems to be a missing feature in vkd3d, not a problem of vkd3d-shader, so it's not really related to this patch, but it seemed to be something worth mentioning.
That just looks like your resources aren't all bound. In this case it seems to be complaining about sampler 3.
On Mon May 22 16:56:48 2023 +0000, Zebediah Figura wrote:
No, this is never visible in native. I originally wrote this part, and the motivation was to avoid creating multiple variables with the same name. In order to do it differently we'd need to adjust hlsl_add_var() and hlsl_get_var(), and we may also want to adjust dump_var().
Then I think I would be in favor to emit the `<resource>` as well.
On Mon May 22 17:17:45 2023 +0000, Zebediah Figura wrote:
I understand that mindset, but I'm really not happy about maintaining complicated code that might never be used. It's the same reason we discourage implementing things in Wine that no application asks for. Perhaps more saliently, after doing a bit of testing, it seems that native logic here gets stupidly complicated, and this code isn't quite correct as-is. I don't think it's really worth trying to fix the code just for that.
If there are counterexamples, then we're in a different league, sure.
On Mon May 22 17:19:52 2023 +0000, Zebediah Figura wrote:
That just looks like your resources aren't all bound. In this case it seems to be complaining about sampler 3.
Ah, yes, brain fart. Ignore this.
On Fri May 19 21:57:42 2023 +0000, Zebediah Figura wrote:
This probably belongs in its own patch. Some rationale would be nice too.
I forgot to delete that line before pushing. Also, it should actually be a ```c setvbuf(stdout, NULL, _IONBF, 0); ```
As we have have talked with Giovanni, the idea is to disable buffering so that output gets always written to stdout, even if the program encounters a segfault. Which is useful when debugging the cross-tests on Windows.
It probably deserves to be a separate MR.