Mainly promoting single object components to variables for SM 5.0's RDEF block and lowering combined samplers to separate sampler+texture objects for SM 4.
Following patches (including prepending uniform copies resource components within struct parameters) in: https://gitlab.winehq.org/fcasas/vkd3d/-/commits/master6c
-- v2: vkd3d-shader/hlsl: Don't allocate all texture registers for synthetic separated samplers. vkd3d-shader/hlsl: Lower combined samplers to separate sampler and texture objects for SM4. vkd3d-shader/hlsl: Separate tracking of sampler_dim and usage for object components. vkd3d-shader/hlsl: Introduce hlsl_new_synthetic_var_named(). vkd3d-shader/hlsl: Check is_uniform instead of HLSL_STORAGE_UNIFORM when validating object refs. tests: Add lowering combined samplers tests. vkd3d-shader/hlsl: Handle resource components individually for SM 5.0. vkd3d-shader/tpf: Introduce struct extern_resource. vkd3d-shader/hlsl: Allow derefs to provide the data_type.
From: Francisco Casas fcasas@codeweavers.com
After lowering the derefs path to a single offset node, there was no way of knowing the type of the referenced part of the variable. This little modification allows to avoid having to pass the data type everywhere and it is required for supporting instructions that reference objects components within struct types. --- libs/vkd3d-shader/hlsl.c | 4 ++- libs/vkd3d-shader/hlsl.h | 5 ++- libs/vkd3d-shader/hlsl_codegen.c | 3 +- libs/vkd3d-shader/tpf.c | 56 ++++++++++++++++---------------- 4 files changed, 37 insertions(+), 31 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index 617aef309..b12d08b7c 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -524,7 +524,9 @@ struct hlsl_type *hlsl_deref_get_type(struct hlsl_ctx *ctx, const struct hlsl_de unsigned int i;
assert(deref); - assert(!deref->offset.node); + + if (deref->offset.node) + return deref->offset_type_backup;
type = deref->var->data_type; for (i = 0; i < deref->path_len; ++i) diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 6a4e314d0..dcd3c51c9 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -595,9 +595,12 @@ struct hlsl_deref * components, within the pertaining regset), from the start of the variable, of the part * referenced. * The path is lowered to this single offset -- whose value may vary between SM1 and SM4 -- - * before writing the bytecode. */ + * before writing the bytecode. + * Since the type information cannot longer be retrieved from the offset alone, the type is + * stored in the offset_type_backup field. */ struct hlsl_src offset; enum hlsl_regset offset_regset; + struct hlsl_type *offset_type_backup; };
struct hlsl_ir_load diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 638dab6e1..fa5cda313 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -126,7 +126,7 @@ static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, st static void replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hlsl_ir_node *instr) { - const struct hlsl_type *type; + struct hlsl_type *type; struct hlsl_ir_node *offset; struct hlsl_block block;
@@ -147,6 +147,7 @@ static void replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_der }
deref->offset_regset = hlsl_type_get_regset(type); + deref->offset_type_backup = type;
if (!(offset = new_offset_instr_from_deref(ctx, &block, deref, &instr->loc))) return; diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index eadad3b9d..9c86aa890 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -3336,8 +3336,9 @@ struct sm4_instruction
static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *reg, unsigned int *writemask, enum vkd3d_sm4_swizzle_type *swizzle_type, - const struct hlsl_deref *deref, const struct hlsl_type *data_type) + const struct hlsl_deref *deref) { + const struct hlsl_type *data_type = hlsl_deref_get_type(ctx, deref); const struct hlsl_ir_var *var = deref->var;
if (var->is_uniform) @@ -3474,11 +3475,11 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r }
static void sm4_src_from_deref(struct hlsl_ctx *ctx, struct sm4_src_register *src, - const struct hlsl_deref *deref, const struct hlsl_type *data_type, unsigned int map_writemask) + const struct hlsl_deref *deref, unsigned int map_writemask) { unsigned int writemask;
- sm4_register_from_deref(ctx, &src->reg, &writemask, &src->swizzle_type, deref, data_type); + sm4_register_from_deref(ctx, &src->reg, &writemask, &src->swizzle_type, deref); if (src->swizzle_type == VKD3D_SM4_SWIZZLE_VEC4) src->swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); } @@ -3982,10 +3983,11 @@ static void write_sm4_constant(struct hlsl_ctx *ctx, }
static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, - const struct hlsl_deref *resource, const struct hlsl_ir_node *coords, - const struct hlsl_ir_node *texel_offset, enum hlsl_sampler_dim dim) + const struct hlsl_ir_node *dst, const struct hlsl_deref *resource, + const struct hlsl_ir_node *coords, const struct hlsl_ir_node *texel_offset, + enum hlsl_sampler_dim dim) { + const struct hlsl_type *resource_type = hlsl_deref_get_type(ctx, resource); bool uav = (hlsl_type_get_regset(resource_type) == HLSL_REGSET_UAVS); struct sm4_instruction instr; unsigned int dim_count; @@ -4019,7 +4021,7 @@ static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buf instr.srcs[0].swizzle = hlsl_combine_swizzles(instr.srcs[0].swizzle, HLSL_SWIZZLE(X, Y, X, Z), 4); }
- sm4_src_from_deref(ctx, &instr.srcs[1], resource, resource_type, instr.dsts[0].writemask); + sm4_src_from_deref(ctx, &instr.srcs[1], resource, instr.dsts[0].writemask);
instr.src_count = 2;
@@ -4029,7 +4031,6 @@ static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buf static void write_sm4_sample(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_resource_load *load) { - const struct hlsl_type *resource_type = load->resource.var->data_type; const struct hlsl_ir_node *texel_offset = load->texel_offset.node; const struct hlsl_ir_node *coords = load->coords.node; const struct hlsl_deref *resource = &load->resource; @@ -4066,8 +4067,8 @@ static void write_sm4_sample(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer instr.dst_count = 1;
sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); - sm4_src_from_deref(ctx, &instr.srcs[1], resource, resource_type, instr.dsts[0].writemask); - sm4_src_from_deref(ctx, &instr.srcs[2], sampler, sampler->var->data_type, VKD3DSP_WRITEMASK_ALL); + sm4_src_from_deref(ctx, &instr.srcs[1], resource, instr.dsts[0].writemask); + sm4_src_from_deref(ctx, &instr.srcs[2], sampler, VKD3DSP_WRITEMASK_ALL); instr.src_count = 3;
if (load->load_type == HLSL_RESOURCE_SAMPLE_LOD_BIAS) @@ -4224,7 +4225,7 @@ static void write_sm4_store_uav_typed(struct hlsl_ctx *ctx, struct vkd3d_bytecod memset(&instr, 0, sizeof(instr)); instr.opcode = VKD3D_SM5_OP_STORE_UAV_TYPED;
- sm4_register_from_deref(ctx, &instr.dsts[0].reg, &instr.dsts[0].writemask, NULL, dst, dst->var->data_type); + sm4_register_from_deref(ctx, &instr.dsts[0].reg, &instr.dsts[0].writemask, NULL, dst); instr.dst_count = 1;
sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); @@ -4750,7 +4751,7 @@ static void write_sm4_load(struct hlsl_ctx *ctx,
instr.opcode = VKD3D_SM4_OP_MOVC;
- sm4_src_from_deref(ctx, &instr.srcs[0], &load->src, type, instr.dsts[0].writemask); + sm4_src_from_deref(ctx, &instr.srcs[0], &load->src, instr.dsts[0].writemask);
memset(&value, 0xff, sizeof(value)); sm4_src_from_constant_value(&instr.srcs[1], &value, type->dimx, instr.dsts[0].writemask); @@ -4762,7 +4763,7 @@ static void write_sm4_load(struct hlsl_ctx *ctx, { instr.opcode = VKD3D_SM4_OP_MOV;
- sm4_src_from_deref(ctx, &instr.srcs[0], &load->src, type, instr.dsts[0].writemask); + sm4_src_from_deref(ctx, &instr.srcs[0], &load->src, instr.dsts[0].writemask); instr.src_count = 1; }
@@ -4786,8 +4787,7 @@ static void write_sm4_loop(struct hlsl_ctx *ctx, }
static void write_sm4_gather(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, - const struct hlsl_deref *resource, const struct hlsl_deref *sampler, + const struct hlsl_ir_node *dst, const struct hlsl_deref *resource, const struct hlsl_deref *sampler, const struct hlsl_ir_node *coords, unsigned int swizzle, const struct hlsl_ir_node *texel_offset) { struct sm4_src_register *src; @@ -4817,10 +4817,10 @@ static void write_sm4_gather(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer } }
- sm4_src_from_deref(ctx, &instr.srcs[instr.src_count++], resource, resource_type, instr.dsts[0].writemask); + sm4_src_from_deref(ctx, &instr.srcs[instr.src_count++], resource, instr.dsts[0].writemask);
src = &instr.srcs[instr.src_count++]; - sm4_src_from_deref(ctx, src, sampler, sampler->var->data_type, VKD3DSP_WRITEMASK_ALL); + sm4_src_from_deref(ctx, src, sampler, VKD3DSP_WRITEMASK_ALL); src->reg.dim = VKD3D_SM4_DIMENSION_VEC4; src->swizzle_type = VKD3D_SM4_SWIZZLE_SCALAR; src->swizzle = swizzle; @@ -4867,8 +4867,8 @@ static void write_sm4_resource_load(struct hlsl_ctx *ctx, switch (load->load_type) { case HLSL_RESOURCE_LOAD: - write_sm4_ld(ctx, buffer, resource_type, &load->node, &load->resource, - coords, texel_offset, load->sampling_dim); + write_sm4_ld(ctx, buffer, &load->node, &load->resource, coords, texel_offset, + load->sampling_dim); break;
case HLSL_RESOURCE_SAMPLE: @@ -4882,23 +4882,23 @@ static void write_sm4_resource_load(struct hlsl_ctx *ctx, break;
case HLSL_RESOURCE_GATHER_RED: - write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, - &load->sampler, coords, HLSL_SWIZZLE(X, X, X, X), texel_offset); + write_sm4_gather(ctx, buffer, &load->node, &load->resource, &load->sampler, coords, + HLSL_SWIZZLE(X, X, X, X), texel_offset); break;
case HLSL_RESOURCE_GATHER_GREEN: - write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, - &load->sampler, coords, HLSL_SWIZZLE(Y, Y, Y, Y), texel_offset); + write_sm4_gather(ctx, buffer, &load->node, &load->resource, &load->sampler, coords, + HLSL_SWIZZLE(Y, Y, Y, Y), texel_offset); break;
case HLSL_RESOURCE_GATHER_BLUE: - write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, - &load->sampler, coords, HLSL_SWIZZLE(Z, Z, Z, Z), texel_offset); + write_sm4_gather(ctx, buffer, &load->node, &load->resource, &load->sampler, coords, + HLSL_SWIZZLE(Z, Z, Z, Z), texel_offset); break;
case HLSL_RESOURCE_GATHER_ALPHA: - write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, - &load->sampler, coords, HLSL_SWIZZLE(W, W, W, W), texel_offset); + write_sm4_gather(ctx, buffer, &load->node, &load->resource, &load->sampler, coords, + HLSL_SWIZZLE(W, W, W, W), texel_offset); break;
case HLSL_RESOURCE_SAMPLE_LOD: @@ -4937,7 +4937,7 @@ static void write_sm4_store(struct hlsl_ctx *ctx, memset(&instr, 0, sizeof(instr)); instr.opcode = VKD3D_SM4_OP_MOV;
- sm4_register_from_deref(ctx, &instr.dsts[0].reg, &writemask, NULL, &store->lhs, rhs->data_type); + sm4_register_from_deref(ctx, &instr.dsts[0].reg, &writemask, NULL, &store->lhs); instr.dsts[0].writemask = hlsl_combine_writemasks(writemask, store->writemask); instr.dst_count = 1;
From: Francisco Casas fcasas@codeweavers.com
This struct is required for handling both whole-variable resources for SM < 5 and single-component resources for SM 5 in the same way, when writting the RDEF block and resource declarations within the shader. --- libs/vkd3d-shader/tpf.c | 136 +++++++++++++++++++++++++--------------- 1 file changed, 86 insertions(+), 50 deletions(-)
diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index 9c86aa890..e4ccc7046 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -2962,27 +2962,48 @@ static D3D_SRV_DIMENSION sm4_rdef_resource_dimension(const struct hlsl_type *typ } }
+struct extern_resource +{ + /* var is only not NULL if this resource is a whole variable, so it may be responsible for more + * than one component. */ + const struct hlsl_ir_var *var; + + char *name; + struct hlsl_type *data_type; + + enum hlsl_regset regset; + unsigned int id, bind_count; + + bool user_packed; +}; + static int sm4_compare_extern_resources(const void *a, const void *b) { - const struct hlsl_ir_var *aa = *(const struct hlsl_ir_var **)a; - const struct hlsl_ir_var *bb = *(const struct hlsl_ir_var **)b; - enum hlsl_regset aa_regset, bb_regset; + const struct extern_resource *aa = (const struct extern_resource *)a; + const struct extern_resource *bb = (const struct extern_resource *)b; + + if (aa->regset != bb->regset) + return aa->regset - bb->regset;
- aa_regset = hlsl_type_get_regset(aa->data_type); - bb_regset = hlsl_type_get_regset(bb->data_type); + return aa->id - bb->id; +}
- if (aa_regset != bb_regset) - return aa_regset - bb_regset; +static void sm4_free_extern_resources(struct extern_resource *extern_resources, unsigned int count) +{ + unsigned int i;
- return aa->regs[aa_regset].id - bb->regs[bb_regset].id; + for (i = 0; i < count; ++i) + vkd3d_free(extern_resources[i].name); + vkd3d_free(extern_resources); }
-static const struct hlsl_ir_var **sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count) +static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count) { - const struct hlsl_ir_var **extern_resources = NULL; + struct extern_resource *extern_resources = NULL; const struct hlsl_ir_var *var; enum hlsl_regset regset; size_t capacity = 0; + char *name;
*count = 0;
@@ -2997,11 +3018,28 @@ static const struct hlsl_ir_var **sm4_get_extern_resources(struct hlsl_ctx *ctx, if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, sizeof(*extern_resources)))) { + sm4_free_extern_resources(extern_resources, *count); *count = 0; return NULL; }
- extern_resources[*count] = var; + if (!(name = hlsl_strdup(ctx, var->name))) + { + sm4_free_extern_resources(extern_resources, *count); + *count = 0; + return NULL; + } + + extern_resources[*count].var = var; + extern_resources[*count].name = name; + extern_resources[*count].data_type = var->data_type; + + extern_resources[*count].regset = regset; + extern_resources[*count].id = var->regs[regset].id; + extern_resources[*count].bind_count = var->regs[regset].bind_count; + + extern_resources[*count].user_packed = var->reg_reservation.reg_type; + ++*count; }
@@ -3015,8 +3053,8 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) size_t cbuffers_offset, resources_offset, creator_offset, string_offset; size_t cbuffer_position, resource_position, creator_position; const struct hlsl_profile_info *profile = ctx->profile; - const struct hlsl_ir_var **extern_resources; struct vkd3d_bytecode_buffer buffer = {0}; + struct extern_resource *extern_resources; const struct hlsl_buffer *cbuffer; const struct hlsl_ir_var *var;
@@ -3070,18 +3108,15 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc)
for (i = 0; i < extern_resources_count; ++i) { - enum hlsl_regset regset; + const struct extern_resource *resource = &extern_resources[i]; uint32_t flags = 0;
- var = extern_resources[i]; - regset = hlsl_type_get_regset(var->data_type); - - if (var->reg_reservation.reg_type) + if (resource->user_packed) flags |= D3D_SIF_USERPACKED;
put_u32(&buffer, 0); /* name */ - put_u32(&buffer, sm4_resource_type(var->data_type)); - if (regset == HLSL_REGSET_SAMPLERS) + put_u32(&buffer, sm4_resource_type(resource->data_type)); + if (resource->regset == HLSL_REGSET_SAMPLERS) { put_u32(&buffer, 0); put_u32(&buffer, 0); @@ -3089,15 +3124,15 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) } else { - unsigned int dimx = hlsl_type_get_component_type(ctx, var->data_type, 0)->e.resource_format->dimx; + unsigned int dimx = hlsl_type_get_component_type(ctx, resource->data_type, 0)->e.resource_format->dimx;
- put_u32(&buffer, sm4_resource_format(var->data_type)); - put_u32(&buffer, sm4_rdef_resource_dimension(var->data_type)); + put_u32(&buffer, sm4_resource_format(resource->data_type)); + put_u32(&buffer, sm4_rdef_resource_dimension(resource->data_type)); put_u32(&buffer, ~0u); /* FIXME: multisample count */ flags |= (dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT; } - put_u32(&buffer, var->regs[regset].id); - put_u32(&buffer, var->regs[regset].bind_count); + put_u32(&buffer, resource->id); + put_u32(&buffer, resource->bind_count); put_u32(&buffer, flags); }
@@ -3123,9 +3158,9 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc)
for (i = 0; i < extern_resources_count; ++i) { - var = extern_resources[i]; + const struct extern_resource *resource = &extern_resources[i];
- string_offset = put_string(&buffer, var->name); + string_offset = put_string(&buffer, resource->name); set_u32(&buffer, resources_offset + i * 8 * sizeof(uint32_t), string_offset); }
@@ -3231,7 +3266,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc)
add_section(dxbc, TAG_RDEF, &buffer);
- vkd3d_free(extern_resources); + sm4_free_extern_resources(extern_resources, extern_resources_count); }
static enum vkd3d_sm4_resource_type sm4_resource_dimension(const struct hlsl_type *type) @@ -3663,14 +3698,16 @@ static void write_sm4_dcl_constant_buffer(struct vkd3d_bytecode_buffer *buffer, write_sm4_instruction(buffer, &instr); }
-static void write_sm4_dcl_samplers(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) +static void write_sm4_dcl_samplers(struct vkd3d_bytecode_buffer *buffer, const struct extern_resource *resource) { - unsigned int i, count = var->data_type->reg_size[HLSL_REGSET_SAMPLERS]; struct sm4_instruction instr; + unsigned int i;
- for (i = 0; i < count; ++i) + assert(resource->regset == HLSL_REGSET_SAMPLERS); + + for (i = 0; i < resource->bind_count; ++i) { - if (!var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) + if (resource->var && !resource->var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) continue;
instr = (struct sm4_instruction) @@ -3678,7 +3715,7 @@ static void write_sm4_dcl_samplers(struct vkd3d_bytecode_buffer *buffer, const s .opcode = VKD3D_SM4_OP_DCL_SAMPLER,
.dsts[0].reg.type = VKD3D_SM4_RT_SAMPLER, - .dsts[0].reg.idx = {var->regs[HLSL_REGSET_SAMPLERS].id + i}, + .dsts[0].reg.idx = {resource->id + i}, .dsts[0].reg.idx_count = 1, .dst_count = 1, }; @@ -3688,18 +3725,20 @@ static void write_sm4_dcl_samplers(struct vkd3d_bytecode_buffer *buffer, const s }
static void write_sm4_dcl_textures(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_ir_var *var, bool uav) + const struct extern_resource *resource, bool uav) { enum hlsl_regset regset = uav ? HLSL_REGSET_UAVS : HLSL_REGSET_TEXTURES; - unsigned int i, count = var->data_type->reg_size[regset]; struct hlsl_type *component_type; struct sm4_instruction instr; + unsigned int i;
- component_type = hlsl_type_get_component_type(ctx, var->data_type, 0); + assert(resource->regset == regset);
- for (i = 0; i < count; ++i) + component_type = hlsl_type_get_component_type(ctx, resource->data_type, 0); + + for (i = 0; i < resource->bind_count; ++i) { - if (!var->objects_usage[regset][i].used) + if (resource->var && !resource->var->objects_usage[regset][i].used) continue;
instr = (struct sm4_instruction) @@ -3708,7 +3747,7 @@ static void write_sm4_dcl_textures(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b | (sm4_resource_dimension(component_type) << VKD3D_SM4_RESOURCE_TYPE_SHIFT),
.dsts[0].reg.type = uav ? VKD3D_SM5_RT_UAV : VKD3D_SM4_RT_RESOURCE, - .dsts[0].reg.idx = {var->regs[regset].id + i}, + .dsts[0].reg.idx = {resource->id + i}, .dsts[0].reg.idx_count = 1, .dst_count = 1,
@@ -5045,8 +5084,8 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *entry_func, struct dxbc_writer *dxbc) { const struct hlsl_profile_info *profile = ctx->profile; - const struct hlsl_ir_var **extern_resources; struct vkd3d_bytecode_buffer buffer = {0}; + struct extern_resource *extern_resources; unsigned int extern_resources_count, i; const struct hlsl_buffer *cbuffer; const struct hlsl_ir_var *var; @@ -5078,17 +5117,14 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx,
for (i = 0; i < extern_resources_count; ++i) { - enum hlsl_regset regset; + const struct extern_resource *resource = &extern_resources[i];
- var = extern_resources[i]; - regset = hlsl_type_get_regset(var->data_type); - - if (regset == HLSL_REGSET_SAMPLERS) - write_sm4_dcl_samplers(&buffer, var); - else if (regset == HLSL_REGSET_TEXTURES) - write_sm4_dcl_textures(ctx, &buffer, var, false); - else if (regset == HLSL_REGSET_UAVS) - write_sm4_dcl_textures(ctx, &buffer, var, true); + if (resource->regset == HLSL_REGSET_SAMPLERS) + write_sm4_dcl_samplers(&buffer, resource); + else if (resource->regset == HLSL_REGSET_TEXTURES) + write_sm4_dcl_textures(ctx, &buffer, resource, false); + else if (resource->regset == HLSL_REGSET_UAVS) + write_sm4_dcl_textures(ctx, &buffer, resource, true); }
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) @@ -5111,7 +5147,7 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx,
add_section(dxbc, TAG_SHDR, &buffer);
- vkd3d_free(extern_resources); + sm4_free_extern_resources(extern_resources, extern_resources_count); }
int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out)
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl.c | 70 +++++++++++++++++++ libs/vkd3d-shader/hlsl.h | 4 ++ libs/vkd3d-shader/tpf.c | 142 ++++++++++++++++++++++++-------------- tests/cbuffer.shader_test | 6 +- 4 files changed, 166 insertions(+), 56 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index b12d08b7c..b27968d9e 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -430,6 +430,51 @@ struct hlsl_type *hlsl_type_get_component_type(struct hlsl_ctx *ctx, struct hlsl return type; }
+unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_type *type, + enum hlsl_regset regset, unsigned int index) +{ + struct hlsl_type *next_type; + unsigned int offset = 0; + unsigned int idx; + + while (!type_is_single_component(type)) + { + next_type = type; + idx = traverse_path_from_component_index(ctx, &next_type, &index); + + switch (type->class) + { + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_VECTOR: + case HLSL_CLASS_MATRIX: + if (regset == HLSL_REGSET_NUMERIC) + offset += idx; + break; + + case HLSL_CLASS_STRUCT: + offset += type->e.record.fields[idx].reg_offset[regset]; + break; + + case HLSL_CLASS_ARRAY: + if (regset == HLSL_REGSET_NUMERIC) + offset += idx * align(type->e.array.type->reg_size[regset], 4); + else + offset += idx * type->e.array.type->reg_size[regset]; + break; + + case HLSL_CLASS_OBJECT: + assert(idx == 0); + break; + + default: + vkd3d_unreachable(); + } + type = next_type; + } + + return offset; +} + static bool init_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hlsl_ir_var *var, unsigned int path_len) { @@ -2050,6 +2095,31 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru } }
+struct vkd3d_string_buffer *hlsl_component_to_string(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var, + unsigned int index) +{ + struct hlsl_type *type = var->data_type, *current_type; + struct vkd3d_string_buffer *buffer; + unsigned int element_index; + + if (!(buffer = hlsl_get_string_buffer(ctx))) + return NULL; + + vkd3d_string_buffer_printf(buffer, "%s", var->name); + + while (!type_is_single_component(type)) + { + current_type = type; + element_index = traverse_path_from_component_index(ctx, &type, &index); + if (current_type->class == HLSL_CLASS_STRUCT) + vkd3d_string_buffer_printf(buffer, ".%s", current_type->e.record.fields[element_index].name); + else + vkd3d_string_buffer_printf(buffer, "[%u]", element_index); + } + + return buffer; +} + const char *debug_hlsl_type(struct hlsl_ctx *ctx, const struct hlsl_type *type) { struct vkd3d_string_buffer *string; diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index dcd3c51c9..81a68b820 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -1049,6 +1049,8 @@ const char *debug_hlsl_writemask(unsigned int writemask); const char *debug_hlsl_swizzle(unsigned int swizzle, unsigned int count);
struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const struct hlsl_type *type); +struct vkd3d_string_buffer *hlsl_component_to_string(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var, + unsigned int index); struct vkd3d_string_buffer *hlsl_modifiers_to_string(struct hlsl_ctx *ctx, unsigned int modifiers); const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type);
@@ -1179,6 +1181,8 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type); unsigned int hlsl_type_get_array_element_reg_size(const struct hlsl_type *type, enum hlsl_regset regset); struct hlsl_type *hlsl_type_get_component_type(struct hlsl_ctx *ctx, struct hlsl_type *type, unsigned int index); +unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_type *type, + enum hlsl_regset regset, unsigned int index); bool hlsl_type_is_row_major(const struct hlsl_type *type); unsigned int hlsl_type_minor_size(const struct hlsl_type *type); unsigned int hlsl_type_major_size(const struct hlsl_type *type); diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index e4ccc7046..11cbf6db8 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -2999,6 +2999,7 @@ static void sm4_free_extern_resources(struct extern_resource *extern_resources,
static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count) { + bool separate_components = ctx->profile->major_version == 5 && ctx->profile->minor_version == 0; struct extern_resource *extern_resources = NULL; const struct hlsl_ir_var *var; enum hlsl_regset regset; @@ -3009,38 +3010,99 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { - if (!hlsl_type_is_resource(var->data_type)) - continue; - regset = hlsl_type_get_regset(var->data_type); - if (!var->regs[regset].allocated) - continue; - - if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, - sizeof(*extern_resources)))) + if (separate_components) { - sm4_free_extern_resources(extern_resources, *count); - *count = 0; - return NULL; - } + unsigned int component_count = hlsl_type_component_count(var->data_type); + unsigned int k, regset_offset;
- if (!(name = hlsl_strdup(ctx, var->name))) - { - sm4_free_extern_resources(extern_resources, *count); - *count = 0; - return NULL; + for (k = 0; k < component_count; ++k) + { + struct hlsl_type *component_type = hlsl_type_get_component_type(ctx, var->data_type, k); + struct vkd3d_string_buffer *name_buffer; + + if (!hlsl_type_is_resource(component_type)) + continue; + + regset = hlsl_type_get_regset(component_type); + regset_offset = hlsl_type_get_component_offset(ctx, var->data_type, regset, k); + + if (regset_offset > var->regs[regset].bind_count) + continue; + + if (var->objects_usage[regset][regset_offset].used) + { + if (!(name_buffer = hlsl_component_to_string(ctx, var, k))) + { + sm4_free_extern_resources(extern_resources, *count); + *count = 0; + return NULL; + } + if (!(name = hlsl_strdup(ctx, name_buffer->buffer))) + { + sm4_free_extern_resources(extern_resources, *count); + *count = 0; + hlsl_release_string_buffer(ctx, name_buffer); + return NULL; + } + hlsl_release_string_buffer(ctx, name_buffer); + + if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, + sizeof(*extern_resources)))) + { + sm4_free_extern_resources(extern_resources, *count); + *count = 0; + return NULL; + } + + extern_resources[*count].var = NULL; + extern_resources[*count].name = name; + extern_resources[*count].data_type = component_type; + + extern_resources[*count].regset = regset; + extern_resources[*count].id = var->regs[regset].id + regset_offset; + extern_resources[*count].bind_count = 1; + + extern_resources[*count].user_packed = false; + + ++*count; + } + } } + else + { + if (!hlsl_type_is_resource(var->data_type)) + continue; + regset = hlsl_type_get_regset(var->data_type); + if (!var->regs[regset].allocated) + continue; + + if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, + sizeof(*extern_resources)))) + { + sm4_free_extern_resources(extern_resources, *count); + *count = 0; + return NULL; + } + + if (!(name = hlsl_strdup(ctx, var->name))) + { + sm4_free_extern_resources(extern_resources, *count); + *count = 0; + return NULL; + }
- extern_resources[*count].var = var; - extern_resources[*count].name = name; - extern_resources[*count].data_type = var->data_type; + extern_resources[*count].var = var; + extern_resources[*count].name = name; + extern_resources[*count].data_type = var->data_type;
- extern_resources[*count].regset = regset; - extern_resources[*count].id = var->regs[regset].id; - extern_resources[*count].bind_count = var->regs[regset].bind_count; + extern_resources[*count].regset = regset; + extern_resources[*count].id = var->regs[regset].id; + extern_resources[*count].bind_count = var->regs[regset].bind_count;
- extern_resources[*count].user_packed = var->reg_reservation.reg_type; + extern_resources[*count].user_packed = var->reg_reservation.reg_type;
- ++*count; + ++*count; + } }
qsort(extern_resources, *count, sizeof(*extern_resources), sm4_compare_extern_resources); @@ -4870,33 +4932,15 @@ static void write_sm4_gather(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer static void write_sm4_resource_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_resource_load *load) { - const struct hlsl_type *resource_type = load->resource.var->data_type; const struct hlsl_ir_node *texel_offset = load->texel_offset.node; const struct hlsl_ir_node *coords = load->coords.node;
- if (!hlsl_type_is_resource(resource_type)) + if (load->sampler.var && !load->sampler.var->is_uniform) { - hlsl_fixme(ctx, &load->node.loc, "Separate object fields as new variables."); + hlsl_fixme(ctx, &load->node.loc, "Sample using non-uniform sampler variable."); return; }
- if (load->sampler.var) - { - const struct hlsl_type *sampler_type = load->sampler.var->data_type; - - if (!hlsl_type_is_resource(sampler_type)) - { - hlsl_fixme(ctx, &load->node.loc, "Separate object fields as new variables."); - return; - } - - if (!load->sampler.var->is_uniform) - { - hlsl_fixme(ctx, &load->node.loc, "Sample using non-uniform sampler variable."); - return; - } - } - if (!load->resource.var->is_uniform) { hlsl_fixme(ctx, &load->node.loc, "Load from non-uniform resource variable."); @@ -4949,14 +4993,6 @@ static void write_sm4_resource_load(struct hlsl_ctx *ctx, static void write_sm4_resource_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_resource_store *store) { - const struct hlsl_type *resource_type = store->resource.var->data_type; - - if (!hlsl_type_is_resource(resource_type)) - { - hlsl_fixme(ctx, &store->node.loc, "Separate object fields as new variables."); - return; - } - if (!store->resource.var->is_uniform) { hlsl_fixme(ctx, &store->node.loc, "Store to non-uniform resource variable."); diff --git a/tests/cbuffer.shader_test b/tests/cbuffer.shader_test index 7e2a91dca..83397c189 100644 --- a/tests/cbuffer.shader_test +++ b/tests/cbuffer.shader_test @@ -694,7 +694,7 @@ shader model >= 5.0 size (1, 1) 0.0 0.0 0.0 0.5
-[pixel shader todo] +[pixel shader] struct apple { float2 a; @@ -718,5 +718,5 @@ uniform 0 float4 0.0 1.0 2.0 3.0 uniform 4 float4 4.0 5.0 6.0 7.0 uniform 8 float4 8.0 9.0 10.0 11.0 uniform 12 float4 12.0 13.0 14.0 15.0 -todo draw quad -todo probe all rgba (124.0, 135.0, 146.0, 150.5) +draw quad +probe all rgba (124.0, 135.0, 146.0, 150.5)
From: Francisco Casas fcasas@codeweavers.com
--- Makefile.am | 1 + tests/hlsl-combined-samplers.shader_test | 125 +++++++++++++++++++++++ 2 files changed, 126 insertions(+) create mode 100644 tests/hlsl-combined-samplers.shader_test
diff --git a/Makefile.am b/Makefile.am index e06d0eeb4..560c2c819 100644 --- a/Makefile.am +++ b/Makefile.am @@ -79,6 +79,7 @@ vkd3d_shader_tests = \ tests/hlsl-attributes.shader_test \ tests/hlsl-bool-cast.shader_test \ tests/hlsl-clamp.shader_test \ + tests/hlsl-combined-samplers.shader_test \ tests/hlsl-comma.shader_test \ tests/hlsl-cross.shader_test \ tests/hlsl-d3dcolor-to-ubyte4.shader_test \ diff --git a/tests/hlsl-combined-samplers.shader_test b/tests/hlsl-combined-samplers.shader_test new file mode 100644 index 000000000..b9677749a --- /dev/null +++ b/tests/hlsl-combined-samplers.shader_test @@ -0,0 +1,125 @@ +[sampler 0] +filter linear linear linear +address clamp clamp clamp + +[sampler 1] +filter linear linear linear +address clamp clamp clamp + +[sampler 2] +filter linear linear linear +address clamp clamp clamp + +[texture 0] +size (1, 1) +0.0 0.0 0.0 1.0 + +[texture 1] +size (1, 1) +1.0 1.0 1.0 1.0 + +[texture 2] +size (1, 1) +2.0 2.0 2.0 1.0 + +[texture 3] +size (1, 1) +3.0 3.0 3.0 1.0 + +[texture 4] +size (1, 1) +4.0 4.0 4.0 1.0 + + +[pixel shader todo] +sampler sam; + +float4 main() : sv_target +{ + return tex2D(sam, float2(0, 0)); +} + +[test] +todo draw quad +todo probe all rgba (0, 0, 0, 1) + + +[pixel shader todo] +Texture2D tex; +sampler sam; + +// Textures for new separated samplers are allocated before regular textures. +float4 main() : sv_target +{ + return 10 * tex.Sample(sam, float2(0, 0)) + tex2D(sam, float2(0, 0)); +} + +[test] +todo draw quad +todo probe all rgba (10, 10, 10, 11) + + +[pixel shader todo] +Texture2D tex; +sampler sam[2]; + +float4 main() : sv_target +{ + return 10 * tex.Sample(sam[0], float2(0, 0)) + tex2D(sam[1], float2(0, 0)); +} + +[test] +todo draw quad +todo probe all rgba (21, 21, 21, 11) + + +[pixel shader todo] +sampler sam0; +sampler sam1; +sampler sam2; + +float4 main() : sv_target +{ + return 100 * tex2D(sam1, float2(0, 0)) + 10 * tex2D(sam0, float2(0, 0)) + + tex2D(sam2, float2(0, 0)); +} + +[test] +todo draw quad +todo probe all rgba (102, 102, 102, 111) + + +[pixel shader todo] +Texture2D tex[2][2]; +sampler sam; + +float4 main() : sv_target +{ + return 100 * tex[0][0].Load(int3(0, 0, 0)) + 10 * tex2D(sam, float2(0, 0)) + + tex[1][1].Sample(sam, float2(0, 0)); +} + +[test] +todo draw quad +todo probe all rgba (104, 104, 104, 111) + + +[require] +shader model >= 5.0 + + +[pixel shader todo] +struct +{ + Texture2D tex; + sampler sam; +} foo; + +float4 main() : sv_target +{ + return 10 * foo.tex.Sample(foo.sam, float2(0, 0)) + tex2D(foo.sam, float2(0, 0)); +} + +[test] +todo draw quad +todo probe all rgba (10, 10, 10, 11)
From: Francisco Casas fcasas@codeweavers.com
We are using the hlsl_ir_var.is_uniform flag to indicate when an object is a uniform copy created from a variable with the HLSL_STORAGE_UNIFORM modifier.
We should be checking for this instead of the HLSL_STORAGE_UNIFORM flag which is also set to 1 for the original variables, and there should be no reason to use this flag instead of "is_uniform" after the uniform copies and combined/separated samplers are created. --- libs/vkd3d-shader/hlsl_codegen.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index fa5cda313..3b75ed06e 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -1680,7 +1680,7 @@ static bool validate_static_object_references(struct hlsl_ctx *ctx, struct hlsl_ { struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr);
- if (!(load->resource.var->storage_modifiers & HLSL_STORAGE_UNIFORM)) + if (!load->resource.var->is_uniform) { hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, "Loaded resource must have a single uniform source."); @@ -1695,7 +1695,7 @@ static bool validate_static_object_references(struct hlsl_ctx *ctx, struct hlsl_
if (load->sampler.var) { - if (!(load->sampler.var->storage_modifiers & HLSL_STORAGE_UNIFORM)) + if (!load->sampler.var->is_uniform) { hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, "Resource load sampler must have a single uniform source."); @@ -1713,7 +1713,7 @@ static bool validate_static_object_references(struct hlsl_ctx *ctx, struct hlsl_ { struct hlsl_ir_resource_store *store = hlsl_ir_resource_store(instr);
- if (!(store->resource.var->storage_modifiers & HLSL_STORAGE_UNIFORM)) + if (!store->resource.var->is_uniform) { hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, "Accessed resource must have a single uniform source.");
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl.c | 20 +++++++++++++------- libs/vkd3d-shader/hlsl.h | 2 ++ 2 files changed, 15 insertions(+), 7 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index b27968d9e..5967d1b10 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -1039,18 +1039,24 @@ struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *tem struct vkd3d_string_buffer *string; struct hlsl_ir_var *var; static LONG counter; - const char *name;
if (!(string = hlsl_get_string_buffer(ctx))) return NULL; vkd3d_string_buffer_printf(string, "<%s-%u>", template, InterlockedIncrement(&counter)); - if (!(name = hlsl_strdup(ctx, string->buffer))) - { - hlsl_release_string_buffer(ctx, string); - return NULL; - } - var = hlsl_new_var(ctx, name, type, loc, NULL, 0, NULL); + var = hlsl_new_synthetic_var_named(ctx, string->buffer, type, loc); hlsl_release_string_buffer(ctx, string); + return var; +} + +struct hlsl_ir_var *hlsl_new_synthetic_var_named(struct hlsl_ctx *ctx, const char *name, + struct hlsl_type *type, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_var *var; + const char *name_copy; + + if (!(name_copy = hlsl_strdup(ctx, name))) + return NULL; + var = hlsl_new_var(ctx, name_copy, type, loc, NULL, 0, NULL); if (var) list_add_tail(&ctx->dummy_scope->vars, &var->scope_entry); return var; diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 81a68b820..50dee2822 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -1150,6 +1150,8 @@ struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, DWORD s, unsigned in struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc); struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *template, struct hlsl_type *type, const struct vkd3d_shader_location *loc); +struct hlsl_ir_var *hlsl_new_synthetic_var_named(struct hlsl_ctx *ctx, const char *name, + struct hlsl_type *type, const struct vkd3d_shader_location *loc); struct hlsl_type *hlsl_new_texture_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, struct hlsl_type *format, unsigned int sample_count); struct hlsl_type *hlsl_new_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, struct hlsl_type *format);
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl_codegen.c | 50 ++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 18 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 3b75ed06e..319e922b2 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -2890,7 +2890,7 @@ static const char *debug_register(char class, struct hlsl_reg reg, const struct return vkd3d_dbg_sprintf("%c%u%s", class, reg.id, debug_hlsl_writemask(reg.writemask)); }
-static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +static bool track_object_components_sampler_dim(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { struct hlsl_ir_resource_load *load; struct hlsl_ir_var *var; @@ -2902,15 +2902,16 @@ static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_n
load = hlsl_ir_resource_load(instr); var = load->resource.var; + regset = hlsl_type_get_regset(hlsl_deref_get_type(ctx, &load->resource)); + if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) + return false;
if (regset == HLSL_REGSET_SAMPLERS) { enum hlsl_sampler_dim dim;
assert(!load->sampler.var); - if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) - return false;
dim = var->objects_usage[regset][index].sampler_dim; if (dim != load->sampling_dim) @@ -2928,25 +2929,37 @@ static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_n return false; } } - var->objects_usage[regset][index].used = true; - var->objects_usage[regset][index].sampler_dim = load->sampling_dim; } - else - { - if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) - return false; + var->objects_usage[regset][index].sampler_dim = load->sampling_dim;
- var->objects_usage[regset][index].used = true; - var->objects_usage[regset][index].sampler_dim = load->sampling_dim; + return false; +}
- if (load->sampler.var) - { - var = load->sampler.var; - if (!hlsl_regset_index_from_deref(ctx, &load->sampler, HLSL_REGSET_SAMPLERS, &index)) - return false; +static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_resource_load *load; + struct hlsl_ir_var *var; + enum hlsl_regset regset; + unsigned int index;
- var->objects_usage[HLSL_REGSET_SAMPLERS][index].used = true; - } + if (instr->type != HLSL_IR_RESOURCE_LOAD) + return false; + + load = hlsl_ir_resource_load(instr); + var = load->resource.var; + + regset = hlsl_type_get_regset(hlsl_deref_get_type(ctx, &load->resource)); + if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) + return false; + + var->objects_usage[regset][index].used = true; + if (load->sampler.var) + { + var = load->sampler.var; + if (!hlsl_regset_index_from_deref(ctx, &load->sampler, HLSL_REGSET_SAMPLERS, &index)) + return false; + + var->objects_usage[HLSL_REGSET_SAMPLERS][index].used = true; }
return false; @@ -3942,6 +3955,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry }
hlsl_transform_ir(ctx, validate_static_object_references, body, NULL); + hlsl_transform_ir(ctx, track_object_components_sampler_dim, body, NULL); hlsl_transform_ir(ctx, track_object_components_usage, body, NULL);
/* TODO: move forward, remove when no longer needed */
From: Zebediah Figura zfigura@codeweavers.com
Co-authored-by: Francisco Casas fcasas@codeweavers.com --- libs/vkd3d-shader/hlsl_codegen.c | 93 ++++++++++++++++++++++-- libs/vkd3d-shader/tpf.c | 12 +-- tests/hlsl-combined-samplers.shader_test | 24 +++--- 3 files changed, 104 insertions(+), 25 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 319e922b2..eb61b4bf5 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -1982,6 +1982,83 @@ static bool remove_trivial_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *i return true; }
+/* Lower combined samples and sampler variables to synthesized separated textures and samplers. + * That is, translate SM1-style samples in the source to SM4-style samples in the bytecode. */ +static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_resource_load *load; + struct vkd3d_string_buffer *name; + struct hlsl_ir_var *var; + unsigned int i; + + if (instr->type != HLSL_IR_RESOURCE_LOAD) + return false; + load = hlsl_ir_resource_load(instr); + + switch (load->load_type) + { + case HLSL_RESOURCE_LOAD: + case HLSL_RESOURCE_GATHER_RED: + case HLSL_RESOURCE_GATHER_GREEN: + case HLSL_RESOURCE_GATHER_BLUE: + case HLSL_RESOURCE_GATHER_ALPHA: + return false; + + case HLSL_RESOURCE_SAMPLE: + case HLSL_RESOURCE_SAMPLE_LOD: + case HLSL_RESOURCE_SAMPLE_LOD_BIAS: + break; + } + if (load->sampler.var) + return false; + + if (!hlsl_type_is_resource(load->resource.var->data_type)) + { + hlsl_fixme(ctx, &instr->loc, "Lower combined samplers within structs."); + return false; + } + + assert(hlsl_type_get_regset(load->resource.var->data_type) == HLSL_REGSET_SAMPLERS); + + if (!(name = hlsl_get_string_buffer(ctx))) + return false; + vkd3d_string_buffer_printf(name, "<resource>%s", load->resource.var->name); + + TRACE("Lowering to separate resource %s.\n", debugstr_a(name->buffer)); + + if (!(var = hlsl_get_var(ctx->globals, name->buffer))) + { + struct hlsl_type *texture_array_type = hlsl_new_texture_type(ctx, load->sampling_dim, + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4), 0); + + /* Create (possibly multi-dimensional) texture array type with the same dims as the sampler array. */ + struct hlsl_type *arr_type = load->resource.var->data_type; + for (i = 0; i < load->resource.path_len; ++i) + { + assert(arr_type->class == HLSL_CLASS_ARRAY); + texture_array_type = hlsl_new_array_type(ctx, texture_array_type, arr_type->e.array.elements_count); + arr_type = arr_type->e.array.type; + } + + if (!(var = hlsl_new_synthetic_var_named(ctx, name->buffer, texture_array_type, &instr->loc))) + { + hlsl_release_string_buffer(ctx, name); + return false; + } + var->is_uniform = 1; + + list_add_before(&load->resource.var->extern_entry, &var->extern_entry); + } + hlsl_release_string_buffer(ctx, name); + + hlsl_copy_deref(ctx, &load->sampler, &load->resource); + load->resource.var = var; + assert(hlsl_deref_get_type(ctx, &load->resource)->base_type == HLSL_TYPE_TEXTURE); + assert(hlsl_deref_get_type(ctx, &load->sampler)->base_type == HLSL_TYPE_SAMPLER); + + return true; +} + /* Lower DIV to RCP + MUL. */ static bool lower_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { @@ -3362,7 +3439,7 @@ static void validate_buffer_offsets(struct hlsl_ctx *ctx)
LIST_FOR_EACH_ENTRY(var1, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { - if (!var1->is_uniform || var1->data_type->class == HLSL_CLASS_OBJECT) + if (!var1->is_uniform || hlsl_type_is_resource(var1->data_type)) continue;
buffer = var1->buffer; @@ -3373,7 +3450,7 @@ static void validate_buffer_offsets(struct hlsl_ctx *ctx) { unsigned int var1_reg_size, var2_reg_size;
- if (!var2->is_uniform || var2->data_type->class == HLSL_CLASS_OBJECT) + if (!var2->is_uniform || hlsl_type_is_resource(var2->data_type)) continue;
if (var1 == var2 || var1->buffer != var2->buffer) @@ -3423,7 +3500,7 @@ static void allocate_buffers(struct hlsl_ctx *ctx)
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { - if (var->is_uniform && var->data_type->class != HLSL_CLASS_OBJECT) + if (var->is_uniform && !hlsl_type_is_resource(var->data_type)) { if (var->is_param) var->buffer = ctx->params_buffer; @@ -3941,6 +4018,12 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry } while (progress);
+ hlsl_transform_ir(ctx, validate_static_object_references, body, NULL); + hlsl_transform_ir(ctx, track_object_components_sampler_dim, body, NULL); + if (profile->major_version >= 4) + hlsl_transform_ir(ctx, lower_combined_samples, body, NULL); + hlsl_transform_ir(ctx, track_object_components_usage, body, NULL); + if (profile->major_version < 4) { hlsl_transform_ir(ctx, lower_division, body, NULL); @@ -3954,10 +4037,6 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry hlsl_transform_ir(ctx, lower_abs, body, NULL); }
- hlsl_transform_ir(ctx, validate_static_object_references, body, NULL); - hlsl_transform_ir(ctx, track_object_components_sampler_dim, body, NULL); - hlsl_transform_ir(ctx, track_object_components_usage, body, NULL); - /* TODO: move forward, remove when no longer needed */ hlsl_transform_ir(ctx, transform_deref_paths_into_offsets, body, NULL); while (hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL)); diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index 11cbf6db8..b974812c3 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -3222,7 +3222,10 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) { const struct extern_resource *resource = &extern_resources[i];
- string_offset = put_string(&buffer, resource->name); + if (!strncmp(resource->name, "<resource>", strlen("<resource>"))) + string_offset = put_string(&buffer, resource->name + strlen("<resource>")); + else + string_offset = put_string(&buffer, resource->name); set_u32(&buffer, resources_offset + i * 8 * sizeof(uint32_t), string_offset); }
@@ -4956,11 +4959,8 @@ static void write_sm4_resource_load(struct hlsl_ctx *ctx,
case HLSL_RESOURCE_SAMPLE: case HLSL_RESOURCE_SAMPLE_LOD_BIAS: - if (!load->sampler.var) - { - hlsl_fixme(ctx, &load->node.loc, "SM4 combined sample expression."); - return; - } + /* Combined sample expressions were lowered. */ + assert(load->sampler.var); write_sm4_sample(ctx, buffer, load); break;
diff --git a/tests/hlsl-combined-samplers.shader_test b/tests/hlsl-combined-samplers.shader_test index b9677749a..cf13e705b 100644 --- a/tests/hlsl-combined-samplers.shader_test +++ b/tests/hlsl-combined-samplers.shader_test @@ -31,7 +31,7 @@ size (1, 1) 4.0 4.0 4.0 1.0
-[pixel shader todo] +[pixel shader] sampler sam;
float4 main() : sv_target @@ -40,11 +40,11 @@ float4 main() : sv_target }
[test] -todo draw quad -todo probe all rgba (0, 0, 0, 1) +draw quad +probe all rgba (0, 0, 0, 1)
-[pixel shader todo] +[pixel shader] Texture2D tex; sampler sam;
@@ -55,11 +55,11 @@ float4 main() : sv_target }
[test] -todo draw quad +draw quad todo probe all rgba (10, 10, 10, 11)
-[pixel shader todo] +[pixel shader] Texture2D tex; sampler sam[2];
@@ -69,11 +69,11 @@ float4 main() : sv_target }
[test] -todo draw quad +draw quad todo probe all rgba (21, 21, 21, 11)
-[pixel shader todo] +[pixel shader] sampler sam0; sampler sam1; sampler sam2; @@ -85,11 +85,11 @@ float4 main() : sv_target }
[test] -todo draw quad -todo probe all rgba (102, 102, 102, 111) +draw quad +probe all rgba (102, 102, 102, 111)
-[pixel shader todo] +[pixel shader] Texture2D tex[2][2]; sampler sam;
@@ -100,7 +100,7 @@ float4 main() : sv_target }
[test] -todo draw quad +draw quad todo probe all rgba (104, 104, 104, 111)
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl_codegen.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index eb61b4bf5..311580fc2 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -3056,9 +3056,12 @@ static void calculate_resource_register_counts(struct hlsl_ctx *ctx) { for (i = 0; i < type->reg_size[k]; ++i) { - /* Samplers are only allocated until the last used one. */ + bool is_separated = !strncmp(var->name, "<resource>", strlen("<resource>")); + + /* Samplers (and textures separated from them) are only allocated until the last + * used one. */ if (var->objects_usage[k][i].used) - var->regs[k].bind_count = (k == HLSL_REGSET_SAMPLERS) ? i + 1 : type->reg_size[k]; + var->regs[k].bind_count = (k == HLSL_REGSET_SAMPLERS || is_separated) ? i + 1 : type->reg_size[k]; } } }
On Wed May 31 05:48:04 2023 +0000, Francisco Casas wrote:
changed this line in [version 2 of the diff](/wine/vkd3d/-/merge_requests/209/diffs?diff_id=49551&start_sha=78e71c4bc794f5606fc1137d99ce63ce3e9865c5#c6c9a40d72060550079c91bb6dd5741748169928_434_433)
I see. I renamed the function to hlsl_component_to_string() and made it return the string buffer in the same style as hlsl_type_to_string(), hlsl_deref_path_to_string(), and hlsl_modifiers_to_string().
On Wed May 31 05:48:07 2023 +0000, Francisco Casas wrote:
changed this line in [version 2 of the diff](/wine/vkd3d/-/merge_requests/209/diffs?diff_id=49551&start_sha=78e71c4bc794f5606fc1137d99ce63ce3e9865c5#3cf804f245af47d51595ff932bf817c50967eea2_1999_1996)
I added a switch. I am not sure if you meant it this way though.
On Wed May 31 05:49:51 2023 +0000, Zebediah Figura wrote:
From the subject of patch 5/6:
track_object_components_usage() must be called twice:
This kind of suggests that making it do two different things wasn't actually desirable after all.
I added a patch to split the pass into two: track_object_components_sampler_dim() and track_object_components_usage().
On Wed May 31 05:48:05 2023 +0000, Francisco Casas wrote:
changed this line in [version 2 of the diff](/wine/vkd3d/-/merge_requests/209/diffs?diff_id=49551&start_sha=78e71c4bc794f5606fc1137d99ce63ce3e9865c5#b9a696b7e2f161c9052ff1159d7d8cb2907cdeae_5224_5195)
I don't have any indication. For now I will leave it just for 5.0.
Probably we want to do it just for 5.0 since we are thinking on implementing 5.1 resource arrays as a single component anyways and, as you say, when objects are contained within structs the native compiler crashes, even on the simplest cases:
``` struct { Texture2D tex; float4 a; } apple;
float4 main() : sv_target { return apple.tex.Load(int3(0, 0, 0)) + apple.a; }
```
I am inclined to enable the same restrictions for objects within structs that we apply for SM < 5 for SM 5.1.
On Wed May 31 05:48:05 2023 +0000, Francisco Casas wrote:
changed this line in [version 2 of the diff](/wine/vkd3d/-/merge_requests/209/diffs?diff_id=49551&start_sha=78e71c4bc794f5606fc1137d99ce63ce3e9865c5#b9a696b7e2f161c9052ff1159d7d8cb2907cdeae_2552_2518)
I added a `hlsl_type_get_component_offset()` as a helper.
I guess that with this pass the `hlsl_fixme()`'s in `write_sm4_resource_load()` and `write_sm4_resource_store()` can be promoted to `assert()`'s?
Yes, at least for the `hlsl_type_is_resource()` checks. The `var->is_uniform` checks should remain in place, in case there are still unhandled cases where resources can be components of parameters).
Also, but very minor: I don't feel the verb "promote" is really appropriate here. To me "promotion" usually means we're somehow playing with types, but no type is involved here. Maybe "split" would be more appropriate?
I see. In the new version the function no longer exists though.
On Wed May 31 05:48:06 2023 +0000, Francisco Casas wrote:
changed this line in [version 2 of the diff](/wine/vkd3d/-/merge_requests/209/diffs?diff_id=49551&start_sha=78e71c4bc794f5606fc1137d99ce63ce3e9865c5#b9a696b7e2f161c9052ff1159d7d8cb2907cdeae_2614_2518)
I agree, I rewrote the whole thing around sm4_get_extern_resources().
This required the following 3 patches: ``` vkd3d-shader/hlsl: Handle resource components individually for SM 5.0. vkd3d-shader/tpf: Introduce struct extern_resource. vkd3d-shader/hlsl: Allow derefs to provide the data_type. ```
On Wed May 31 05:48:10 2023 +0000, Francisco Casas wrote:
changed this line in [version 2 of the diff](/wine/vkd3d/-/merge_requests/209/diffs?diff_id=49551&start_sha=78e71c4bc794f5606fc1137d99ce63ce3e9865c5#b9a696b7e2f161c9052ff1159d7d8cb2907cdeae_3231_3228)
So, we can resolve this comment and https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/209#note_33393 as well, right?
I'm rather tempted to propose that we just leave off handling anything but top-level sm1 variables for an hlsl_fixme(). For that matter, do we even have anything that actually needs this combined-sampler logic?
No, not that I remember. Okay, I simplified it so that it only supports samplers and sample arrays (it emits a fixme for samplers within structs), now it is closer to the original patch. There is a test that will remain as TODO.
Perhaps more saliently, after doing a bit of testing, it seems that native logic here gets stupidly complicated, and this code isn't quite correct as-is. I don't think it's really worth trying to fix the code just for that.
I agree that the native logic gets complicated (and the native compiler even emits its own fixmes in many cases), also, it is hard to know what it does support and what it doesn't. But I am curious about what is not correct of the code.
On Wed May 31 05:49:53 2023 +0000, Francisco Casas wrote:
So, we can resolve this comment and https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/209#note_33393 as well, right?
Hmm, WRT https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/209#note_33393 I still see exactly the same line. Am I missing anything? Well, in the end I can live with it, it's technically not wrong, but I think it's brittle. It's easy to forget about which conventions are used and break things later. While a flag in a `struct` is much more explicit, and harder to inadvertently remove.
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/hlsl_codegen.c:
static void replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hlsl_ir_node *instr) {
- const struct hlsl_type *type;
- struct hlsl_type *type;
Maybe we should just decide that we always use `const hlsl_type *` everywhere, just like with the shader location?
In another MR, of course.
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/hlsl.h:
* components, within the pertaining regset), from the start of the variable, of the part * referenced. * The path is lowered to this single offset -- whose value may vary between SM1 and SM4 --
* before writing the bytecode. */
* before writing the bytecode.
* Since the type information cannot longer be retrieved from the offset alone, the type is
struct hlsl_src offset; enum hlsl_regset offset_regset;* stored in the offset_type_backup field. */
- struct hlsl_type *offset_type_backup;
It's fine for me to have this field, but once we decide to have it it would probably make sense to keep it for the whole life of the `struct hlsl_deref`, not just when storing the offset. I don't know if it can be useful before that, but the information it conveys make sense at any time, so by artificially restricting when it is available just makes our life harder. Also, let's just call it `type` or `data_type`.
I couldn't try this since it does not apply any more, but one thing that I noticed is that 1D case should actually produce 2D resource declaration, and use (x, 0.5) as a coordinate.
Or maybe this could be done by functions directly.
On Wed May 31 05:49:53 2023 +0000, Francisco Casas wrote:
I'm rather tempted to propose that we just leave off handling anything
but top-level sm1 variables for an hlsl_fixme(). For that matter, do we even have anything that actually needs this combined-sampler logic? No, not that I remember. Okay, I simplified it so that it only supports samplers and sample arrays (it emits a fixme for samplers within structs), now it is closer to the original patch. There is a test that will remain as TODO.
Perhaps more saliently, after doing a bit of testing, it seems that
native logic here gets stupidly complicated, and this code isn't quite correct as-is. I don't think it's really worth trying to fix the code just for that. I agree that the native logic gets complicated (and the native compiler even emits its own fixmes in many cases), also, it is hard to know what it does support and what it doesn't. But I am curious about what is not correct of the code.
I don't remember exactly; unfortunately I didn't write down, but I was able to recreate this example
``` struct apple { struct { sampler s[2]; float4 pos; } s[2]; } a;
float4 main(float4 f : texcoord) : sv_target { return tex2D(a.s[1].s[1], a.s[1].pos); } ```
which I believe yielded different RDEF information between native and (the old version of) this patch set.
I feel like there might have been an example that also yielded different allocation, but I might be misremembering.
Then I think I would be in favor to emit the `<resource>` as well.
I think I may have misinterpreted this comment, I assumed you were saying that you preferred the code as I wrote it? Or are you saying we should write out "<resource>" into the table instead of stripping it?
If the latter—to clarify, by "not visible" I mean the "<resource>" part is never visible, that's an invention I added, basically working around code that I wasn't sure at the time could handle multiple variables with the same name (well, definitely can't, in some respects). Native does indeed have multiple variables with the same name in the RDEF table.
I don't love the <resource> hack, though I still don't hate it either, and I can agree with the idea of just storing that as an extra part of the key.
--
Actually, what just occurred to me: along the same lines as some of my suggestions on this merge request, I'm not even sure we need to synthesize another variable. We could just reuse *this* variable (and set some flag telling RA that we need to allocate both sampler and texture variables for it?)
That does mean we need special logic around sm4_register_from_deref() [which could potentially be a simple as passing a register set to that function?], which is a bit unfortunate but maybe ultimately not that unfortunate? What do you think?
On Thu Jun 1 14:18:57 2023 +0000, Giovanni Mascellani wrote:
Maybe we should just decide that we always use `const hlsl_type *` everywhere, just like with the shader location? In another MR, of course.
The only reason we don't do that is the bytecode_offset field. Which is a bit awkward.
On Fri Jun 2 22:38:25 2023 +0000, Nikolay Sivov wrote:
I couldn't try this since it does not apply any more, but one thing that I noticed is that 1D case should actually produce 2D resource declaration, and use (x, 0.5) as a coordinate.
I see, albeit I am inclined to implement this as a new pass as in https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/219#note_34533
In this scheme, SM1 should keep the 1D resource loads, and just pass any value to `src0.y` when writing `texld` or `texldp`. One thing would remain, which is to allow SM1 samplers to be used both in tex1D() and tex2D() without calling them inconsistent.... Native SM4 just explodes:
```hlsl sampler sam; float4 main() : sv_target { return tex1D(sam, 0.0) + tex2D(sam, float2(0, 1)); } ``` ``` internal error: compilation aborted unexpectedly
compilation failed; no code produced ```
On Fri Jun 2 21:30:41 2023 +0000, Zebediah Figura wrote:
The only reason we don't do that is the bytecode_offset field. Which is a bit awkward.
Ouch...
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/tpf.c:
- enum hlsl_regset regset;
- unsigned int id, bind_count;
- bool user_packed;
+};
static int sm4_compare_extern_resources(const void *a, const void *b) {
- const struct hlsl_ir_var *aa = *(const struct hlsl_ir_var **)a;
- const struct hlsl_ir_var *bb = *(const struct hlsl_ir_var **)b;
- enum hlsl_regset aa_regset, bb_regset;
- const struct extern_resource *aa = (const struct extern_resource *)a;
- const struct extern_resource *bb = (const struct extern_resource *)b;
- if (aa->regset != bb->regset)
return aa->regset - bb->regset;
Given that you're touching this, you could also start using `vkd3d_u32_compare()` and the style used, for example, by `compare_param_hlsl_types()`.
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/tpf.c:
if (!(name = hlsl_strdup(ctx, var->name)))
{
sm4_free_extern_resources(extern_resources, *count);
*count = 0;
return NULL;
}
extern_resources[*count].var = var;
extern_resources[*count].name = name;
extern_resources[*count].data_type = var->data_type;
extern_resources[*count].regset = regset;
extern_resources[*count].id = var->regs[regset].id;
extern_resources[*count].bind_count = var->regs[regset].bind_count;
extern_resources[*count].user_packed = var->reg_reservation.reg_type;
That's probably not required by the language, but maybe adding a `!!` here can make the conversion more explicit and the programmer's intention easier to understand.
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/tpf.c:
if (var->objects_usage[regset][regset_offset].used)
{
if (!(name_buffer = hlsl_component_to_string(ctx, var, k)))
{
sm4_free_extern_resources(extern_resources, *count);
*count = 0;
return NULL;
}
if (!(name = hlsl_strdup(ctx, name_buffer->buffer)))
{
sm4_free_extern_resources(extern_resources, *count);
*count = 0;
hlsl_release_string_buffer(ctx, name_buffer);
return NULL;
}
hlsl_release_string_buffer(ctx, name_buffer);
This pattern is rather common. I doubt the performance impact is significant, but mostly with the goal of simplifying error management I wonder if we should introduce some function that resets the string buffer and returns its previous buffer, so that we can avoid duplicating a string just to free the original immediately after.
Not necessarily for this MR, of course.
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/tpf.c:
}
if (!(name = hlsl_strdup(ctx, name_buffer->buffer)))
{
sm4_free_extern_resources(extern_resources, *count);
*count = 0;
hlsl_release_string_buffer(ctx, name_buffer);
return NULL;
}
hlsl_release_string_buffer(ctx, name_buffer);
if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1,
sizeof(*extern_resources))))
{
sm4_free_extern_resources(extern_resources, *count);
*count = 0;
return NULL;
You're leaking `name` here. I would suggest resizing the array before allocating the name.
On Fri Jun 2 21:25:53 2023 +0000, Zebediah Figura wrote:
Then I think I would be in favor to emit the `<resource>` as well.
I think I may have misinterpreted this comment, I assumed you were saying that you preferred the code as I wrote it? Or are you saying we should write out "<resource>" into the table instead of stripping it? If the latter—to clarify, by "not visible" I mean the "<resource>" part is never visible, that's an invention I added, basically working around code that I wasn't sure at the time could handle multiple variables with the same name (well, definitely can't, in some respects). Native does indeed have multiple variables with the same name in the RDEF table. I don't love the <resource> hack, though I still don't hate it either, and I can agree with the idea of just storing that as an extra part of the key. -- Actually, what just occurred to me: along the same lines as some of my suggestions on this merge request, I'm not even sure we need to synthesize another variable. We could just reuse *this* variable (and set some flag telling RA that we need to allocate both sampler and texture variables for it?) That does mean we need special logic around sm4_register_from_deref() [which could potentially be a simple as passing a register set to that function?], which is a bit unfortunate but maybe ultimately not that unfortunate? What do you think?
Ok, I think I understand. I think I'd prefer if we kept two names for each variable, one for internal indexing and the other to be printed out in RDEF, rather than having that special exception if `tpf.c`, but I think I can tolerate it. I might become more vocal about it, though, if in the future the practice of prepending a variable name with a `<tag>` were to become more frequent.
I'd still ask for adding some kind of `is_separeted_resource` flag to `hlsl_ir_var` rather than parsing the variable name in 9/9.
I feel like there might have been an example that also yielded different allocation, but I might be misremembering.
Note that after this series, I have a couple of patches that take care of implementing some sorting rules so that register allocation matches native. e.g. multi-register externs are allocated before (i.e. with lower register indexes) than single-register externs.
Ok, I think I understand. I think I'd prefer if we kept two names for each variable, one for internal indexing and the other to be printed out in RDEF, rather than having that special exception if `tpf.c`, but I think I can tolerate it. I might become more vocal about it, though, if in the future the practice of prepending a variable name with a `<tag>` were to become more frequent.
I like the idea of keeping two names for each variable. I would call them `var->name` and `var->display_name`.
I'd still ask for adding some kind of `is_separeted_resource` flag to `hlsl_ir_var` rather than parsing the variable name in 9/9.
I agree with this too.
On Mon Jun 5 16:50:08 2023 +0000, Francisco Casas wrote:
Ok, I think I understand. I think I'd prefer if we kept two names for
each variable, one for internal indexing and the other to be printed out in RDEF, rather than having that special exception if `tpf.c`, but I think I can tolerate it. I might become more vocal about it, though, if in the future the practice of prepending a variable name with a `<tag>` were to become more frequent. I like the idea of keeping two names for each variable. I would call them `var->name` and `var->display_name`.
I'd still ask for adding some kind of `is_separeted_resource` flag to
`hlsl_ir_var` rather than parsing the variable name in 9/9. I agree with this too.
I'd rather see just a name plus a flag, than an external and internal name.
Honestly the more I think about my suggestion the more I like it; it seems like what the register set logic was built for. I was envisioning that sm1 bool/int/float would work that way as well, although I guess duplicating the variable there is an option too.
Honestly the more I think about my suggestion the more I like it; it seems like what the register set logic was built for. I was envisioning that sm1 bool/int/float would work that way as well, although I guess duplicating the variable there is an option too.
I have been giving it some thought of the idea of having the same variable for both the sampler part and the resource part of a lowered combined sample, and I don't think I like it very much.
There are a couple of problems we may have:
1. Seems that the allocation order of the texture part and the sampler part is not bonded together in the native compiler:
Consider: ```hlsl Texture2D tex0; sampler sam0;
sampler comb;
Texture2D tex1; sampler sam1;
float4 main() : sv_target { return tex2D(comb, float2(0, 0)) + tex0.Sample(sam0, float2(0, 0)) + tex1.Sample(sam1, float2(0, 0)); } ``` we get the following bindings: ``` // sam0 sampler NA NA s0 1 // comb sampler NA NA s1 1 // sam1 sampler NA NA s2 1 // comb texture float4 2d t0 1 // tex0 texture float4 2d t1 1 // tex1 texture float4 2d t2 1 ``` the texture part of comb is allocated before `tex0`, but the sampler part of comb is allocated after `sam0` (respecting declaration order), which shows that the texture parts of lowered combined samplers have to be moved first in the allocation order. I have a continuation patch that handle this in an easy way, just sorting the new created texture variable forward.
If we keep the same variable however, we may need to sort variables between calls to allocate_objects() for different regsets, to ensure this behavior.
2. The `var->objects_usage[]` array is created on hlsl_new_var() from the `type->regsize[]`. So we would either have to allocate memory for it once we discover that `var` is used as a combined sampler or we would have to preemptively set `type->regsize[HLSL_REGSET_TEXTURES]` the same as `type->regsize[HLSL_REGSET_SAMPLERS]` for all samplers in SM4.
3. We are currently assuming that all resources belong to a single regset, so we would have to add additional checks in many places where hlsl_type_get_regset() is called, to know if we are interested in the texture or sampler part.