Mainly promoting single object components to variables for SM 5.0's RDEF block and lowering combined samplers to separate sampler+texture objects for SM 4.
Following patches (including prepending uniform copies resource components within struct parameters) in: https://gitlab.winehq.org/fcasas/vkd3d/-/commits/master6c
-- v3: vkd3d-shader/hlsl: Don't allocate all texture registers for synthetic separated samplers. vkd3d-shader/hlsl: Lower combined samplers to separate sampler and texture objects for SM4. vkd3d-shader/hlsl: Separate tracking of sampler_dim and usage for object components. vkd3d-shader/hlsl: Introduce hlsl_new_synthetic_var_named(). vkd3d-shader/hlsl: Check is_uniform instead of HLSL_STORAGE_UNIFORM when validating object refs. tests: Add lowering combined samplers tests. vkd3d-shader/hlsl: Handle resource components individually for SM 5.0. vkd3d-shader/tpf: Introduce struct extern_resource. vkd3d-shader/hlsl: Allow derefs to provide the data_type.
From: Francisco Casas fcasas@codeweavers.com
After lowering the derefs path to a single offset node, there was no way of knowing the type of the referenced part of the variable. This little modification allows to avoid having to pass the data type everywhere and it is required for supporting instructions that reference objects components within struct types. --- libs/vkd3d-shader/hlsl.c | 4 ++- libs/vkd3d-shader/hlsl.h | 5 ++- libs/vkd3d-shader/hlsl_codegen.c | 3 +- libs/vkd3d-shader/tpf.c | 56 ++++++++++++++++---------------- 4 files changed, 37 insertions(+), 31 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index 617aef309..cd0143209 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -524,7 +524,9 @@ struct hlsl_type *hlsl_deref_get_type(struct hlsl_ctx *ctx, const struct hlsl_de unsigned int i;
assert(deref); - assert(!deref->offset.node); + + if (deref->offset.node) + return deref->data_type;
type = deref->var->data_type; for (i = 0; i < deref->path_len; ++i) diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 6a4e314d0..41a2b3f71 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -595,9 +595,12 @@ struct hlsl_deref * components, within the pertaining regset), from the start of the variable, of the part * referenced. * The path is lowered to this single offset -- whose value may vary between SM1 and SM4 -- - * before writing the bytecode. */ + * before writing the bytecode. + * Since the type information cannot longer be retrieved from the offset alone, the type is + * stored in the data_type field. */ struct hlsl_src offset; enum hlsl_regset offset_regset; + struct hlsl_type *data_type; };
struct hlsl_ir_load diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 638dab6e1..18a7595e1 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -126,7 +126,7 @@ static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, st static void replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hlsl_ir_node *instr) { - const struct hlsl_type *type; + struct hlsl_type *type; struct hlsl_ir_node *offset; struct hlsl_block block;
@@ -147,6 +147,7 @@ static void replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_der }
deref->offset_regset = hlsl_type_get_regset(type); + deref->data_type = type;
if (!(offset = new_offset_instr_from_deref(ctx, &block, deref, &instr->loc))) return; diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index eadad3b9d..9c86aa890 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -3336,8 +3336,9 @@ struct sm4_instruction
static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *reg, unsigned int *writemask, enum vkd3d_sm4_swizzle_type *swizzle_type, - const struct hlsl_deref *deref, const struct hlsl_type *data_type) + const struct hlsl_deref *deref) { + const struct hlsl_type *data_type = hlsl_deref_get_type(ctx, deref); const struct hlsl_ir_var *var = deref->var;
if (var->is_uniform) @@ -3474,11 +3475,11 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r }
static void sm4_src_from_deref(struct hlsl_ctx *ctx, struct sm4_src_register *src, - const struct hlsl_deref *deref, const struct hlsl_type *data_type, unsigned int map_writemask) + const struct hlsl_deref *deref, unsigned int map_writemask) { unsigned int writemask;
- sm4_register_from_deref(ctx, &src->reg, &writemask, &src->swizzle_type, deref, data_type); + sm4_register_from_deref(ctx, &src->reg, &writemask, &src->swizzle_type, deref); if (src->swizzle_type == VKD3D_SM4_SWIZZLE_VEC4) src->swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); } @@ -3982,10 +3983,11 @@ static void write_sm4_constant(struct hlsl_ctx *ctx, }
static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, - const struct hlsl_deref *resource, const struct hlsl_ir_node *coords, - const struct hlsl_ir_node *texel_offset, enum hlsl_sampler_dim dim) + const struct hlsl_ir_node *dst, const struct hlsl_deref *resource, + const struct hlsl_ir_node *coords, const struct hlsl_ir_node *texel_offset, + enum hlsl_sampler_dim dim) { + const struct hlsl_type *resource_type = hlsl_deref_get_type(ctx, resource); bool uav = (hlsl_type_get_regset(resource_type) == HLSL_REGSET_UAVS); struct sm4_instruction instr; unsigned int dim_count; @@ -4019,7 +4021,7 @@ static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buf instr.srcs[0].swizzle = hlsl_combine_swizzles(instr.srcs[0].swizzle, HLSL_SWIZZLE(X, Y, X, Z), 4); }
- sm4_src_from_deref(ctx, &instr.srcs[1], resource, resource_type, instr.dsts[0].writemask); + sm4_src_from_deref(ctx, &instr.srcs[1], resource, instr.dsts[0].writemask);
instr.src_count = 2;
@@ -4029,7 +4031,6 @@ static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buf static void write_sm4_sample(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_resource_load *load) { - const struct hlsl_type *resource_type = load->resource.var->data_type; const struct hlsl_ir_node *texel_offset = load->texel_offset.node; const struct hlsl_ir_node *coords = load->coords.node; const struct hlsl_deref *resource = &load->resource; @@ -4066,8 +4067,8 @@ static void write_sm4_sample(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer instr.dst_count = 1;
sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); - sm4_src_from_deref(ctx, &instr.srcs[1], resource, resource_type, instr.dsts[0].writemask); - sm4_src_from_deref(ctx, &instr.srcs[2], sampler, sampler->var->data_type, VKD3DSP_WRITEMASK_ALL); + sm4_src_from_deref(ctx, &instr.srcs[1], resource, instr.dsts[0].writemask); + sm4_src_from_deref(ctx, &instr.srcs[2], sampler, VKD3DSP_WRITEMASK_ALL); instr.src_count = 3;
if (load->load_type == HLSL_RESOURCE_SAMPLE_LOD_BIAS) @@ -4224,7 +4225,7 @@ static void write_sm4_store_uav_typed(struct hlsl_ctx *ctx, struct vkd3d_bytecod memset(&instr, 0, sizeof(instr)); instr.opcode = VKD3D_SM5_OP_STORE_UAV_TYPED;
- sm4_register_from_deref(ctx, &instr.dsts[0].reg, &instr.dsts[0].writemask, NULL, dst, dst->var->data_type); + sm4_register_from_deref(ctx, &instr.dsts[0].reg, &instr.dsts[0].writemask, NULL, dst); instr.dst_count = 1;
sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); @@ -4750,7 +4751,7 @@ static void write_sm4_load(struct hlsl_ctx *ctx,
instr.opcode = VKD3D_SM4_OP_MOVC;
- sm4_src_from_deref(ctx, &instr.srcs[0], &load->src, type, instr.dsts[0].writemask); + sm4_src_from_deref(ctx, &instr.srcs[0], &load->src, instr.dsts[0].writemask);
memset(&value, 0xff, sizeof(value)); sm4_src_from_constant_value(&instr.srcs[1], &value, type->dimx, instr.dsts[0].writemask); @@ -4762,7 +4763,7 @@ static void write_sm4_load(struct hlsl_ctx *ctx, { instr.opcode = VKD3D_SM4_OP_MOV;
- sm4_src_from_deref(ctx, &instr.srcs[0], &load->src, type, instr.dsts[0].writemask); + sm4_src_from_deref(ctx, &instr.srcs[0], &load->src, instr.dsts[0].writemask); instr.src_count = 1; }
@@ -4786,8 +4787,7 @@ static void write_sm4_loop(struct hlsl_ctx *ctx, }
static void write_sm4_gather(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, - const struct hlsl_deref *resource, const struct hlsl_deref *sampler, + const struct hlsl_ir_node *dst, const struct hlsl_deref *resource, const struct hlsl_deref *sampler, const struct hlsl_ir_node *coords, unsigned int swizzle, const struct hlsl_ir_node *texel_offset) { struct sm4_src_register *src; @@ -4817,10 +4817,10 @@ static void write_sm4_gather(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer } }
- sm4_src_from_deref(ctx, &instr.srcs[instr.src_count++], resource, resource_type, instr.dsts[0].writemask); + sm4_src_from_deref(ctx, &instr.srcs[instr.src_count++], resource, instr.dsts[0].writemask);
src = &instr.srcs[instr.src_count++]; - sm4_src_from_deref(ctx, src, sampler, sampler->var->data_type, VKD3DSP_WRITEMASK_ALL); + sm4_src_from_deref(ctx, src, sampler, VKD3DSP_WRITEMASK_ALL); src->reg.dim = VKD3D_SM4_DIMENSION_VEC4; src->swizzle_type = VKD3D_SM4_SWIZZLE_SCALAR; src->swizzle = swizzle; @@ -4867,8 +4867,8 @@ static void write_sm4_resource_load(struct hlsl_ctx *ctx, switch (load->load_type) { case HLSL_RESOURCE_LOAD: - write_sm4_ld(ctx, buffer, resource_type, &load->node, &load->resource, - coords, texel_offset, load->sampling_dim); + write_sm4_ld(ctx, buffer, &load->node, &load->resource, coords, texel_offset, + load->sampling_dim); break;
case HLSL_RESOURCE_SAMPLE: @@ -4882,23 +4882,23 @@ static void write_sm4_resource_load(struct hlsl_ctx *ctx, break;
case HLSL_RESOURCE_GATHER_RED: - write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, - &load->sampler, coords, HLSL_SWIZZLE(X, X, X, X), texel_offset); + write_sm4_gather(ctx, buffer, &load->node, &load->resource, &load->sampler, coords, + HLSL_SWIZZLE(X, X, X, X), texel_offset); break;
case HLSL_RESOURCE_GATHER_GREEN: - write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, - &load->sampler, coords, HLSL_SWIZZLE(Y, Y, Y, Y), texel_offset); + write_sm4_gather(ctx, buffer, &load->node, &load->resource, &load->sampler, coords, + HLSL_SWIZZLE(Y, Y, Y, Y), texel_offset); break;
case HLSL_RESOURCE_GATHER_BLUE: - write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, - &load->sampler, coords, HLSL_SWIZZLE(Z, Z, Z, Z), texel_offset); + write_sm4_gather(ctx, buffer, &load->node, &load->resource, &load->sampler, coords, + HLSL_SWIZZLE(Z, Z, Z, Z), texel_offset); break;
case HLSL_RESOURCE_GATHER_ALPHA: - write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource, - &load->sampler, coords, HLSL_SWIZZLE(W, W, W, W), texel_offset); + write_sm4_gather(ctx, buffer, &load->node, &load->resource, &load->sampler, coords, + HLSL_SWIZZLE(W, W, W, W), texel_offset); break;
case HLSL_RESOURCE_SAMPLE_LOD: @@ -4937,7 +4937,7 @@ static void write_sm4_store(struct hlsl_ctx *ctx, memset(&instr, 0, sizeof(instr)); instr.opcode = VKD3D_SM4_OP_MOV;
- sm4_register_from_deref(ctx, &instr.dsts[0].reg, &writemask, NULL, &store->lhs, rhs->data_type); + sm4_register_from_deref(ctx, &instr.dsts[0].reg, &writemask, NULL, &store->lhs); instr.dsts[0].writemask = hlsl_combine_writemasks(writemask, store->writemask); instr.dst_count = 1;
From: Francisco Casas fcasas@codeweavers.com
This struct is required for handling both whole-variable resources for SM < 5 and single-component resources for SM 5 in the same way, when writting the RDEF block and resource declarations within the shader. --- libs/vkd3d-shader/tpf.c | 137 +++++++++++++++++++++++++--------------- 1 file changed, 87 insertions(+), 50 deletions(-)
diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index 9c86aa890..e8e0b9a31 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -2962,27 +2962,49 @@ static D3D_SRV_DIMENSION sm4_rdef_resource_dimension(const struct hlsl_type *typ } }
+struct extern_resource +{ + /* var is only not NULL if this resource is a whole variable, so it may be responsible for more + * than one component. */ + const struct hlsl_ir_var *var; + + char *name; + struct hlsl_type *data_type; + + enum hlsl_regset regset; + unsigned int id, bind_count; + + bool user_packed; +}; + static int sm4_compare_extern_resources(const void *a, const void *b) { - const struct hlsl_ir_var *aa = *(const struct hlsl_ir_var **)a; - const struct hlsl_ir_var *bb = *(const struct hlsl_ir_var **)b; - enum hlsl_regset aa_regset, bb_regset; + const struct extern_resource *aa = (const struct extern_resource *)a; + const struct extern_resource *bb = (const struct extern_resource *)b; + int r; + + if ((r = vkd3d_u32_compare(aa->regset, bb->regset))) + return r;
- aa_regset = hlsl_type_get_regset(aa->data_type); - bb_regset = hlsl_type_get_regset(bb->data_type); + return vkd3d_u32_compare(aa->id, bb->id); +}
- if (aa_regset != bb_regset) - return aa_regset - bb_regset; +static void sm4_free_extern_resources(struct extern_resource *extern_resources, unsigned int count) +{ + unsigned int i;
- return aa->regs[aa_regset].id - bb->regs[bb_regset].id; + for (i = 0; i < count; ++i) + vkd3d_free(extern_resources[i].name); + vkd3d_free(extern_resources); }
-static const struct hlsl_ir_var **sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count) +static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count) { - const struct hlsl_ir_var **extern_resources = NULL; + struct extern_resource *extern_resources = NULL; const struct hlsl_ir_var *var; enum hlsl_regset regset; size_t capacity = 0; + char *name;
*count = 0;
@@ -2997,11 +3019,28 @@ static const struct hlsl_ir_var **sm4_get_extern_resources(struct hlsl_ctx *ctx, if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, sizeof(*extern_resources)))) { + sm4_free_extern_resources(extern_resources, *count); *count = 0; return NULL; }
- extern_resources[*count] = var; + if (!(name = hlsl_strdup(ctx, var->name))) + { + sm4_free_extern_resources(extern_resources, *count); + *count = 0; + return NULL; + } + + extern_resources[*count].var = var; + extern_resources[*count].name = name; + extern_resources[*count].data_type = var->data_type; + + extern_resources[*count].regset = regset; + extern_resources[*count].id = var->regs[regset].id; + extern_resources[*count].bind_count = var->regs[regset].bind_count; + + extern_resources[*count].user_packed = !!var->reg_reservation.reg_type; + ++*count; }
@@ -3015,8 +3054,8 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) size_t cbuffers_offset, resources_offset, creator_offset, string_offset; size_t cbuffer_position, resource_position, creator_position; const struct hlsl_profile_info *profile = ctx->profile; - const struct hlsl_ir_var **extern_resources; struct vkd3d_bytecode_buffer buffer = {0}; + struct extern_resource *extern_resources; const struct hlsl_buffer *cbuffer; const struct hlsl_ir_var *var;
@@ -3070,18 +3109,15 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc)
for (i = 0; i < extern_resources_count; ++i) { - enum hlsl_regset regset; + const struct extern_resource *resource = &extern_resources[i]; uint32_t flags = 0;
- var = extern_resources[i]; - regset = hlsl_type_get_regset(var->data_type); - - if (var->reg_reservation.reg_type) + if (resource->user_packed) flags |= D3D_SIF_USERPACKED;
put_u32(&buffer, 0); /* name */ - put_u32(&buffer, sm4_resource_type(var->data_type)); - if (regset == HLSL_REGSET_SAMPLERS) + put_u32(&buffer, sm4_resource_type(resource->data_type)); + if (resource->regset == HLSL_REGSET_SAMPLERS) { put_u32(&buffer, 0); put_u32(&buffer, 0); @@ -3089,15 +3125,15 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) } else { - unsigned int dimx = hlsl_type_get_component_type(ctx, var->data_type, 0)->e.resource_format->dimx; + unsigned int dimx = hlsl_type_get_component_type(ctx, resource->data_type, 0)->e.resource_format->dimx;
- put_u32(&buffer, sm4_resource_format(var->data_type)); - put_u32(&buffer, sm4_rdef_resource_dimension(var->data_type)); + put_u32(&buffer, sm4_resource_format(resource->data_type)); + put_u32(&buffer, sm4_rdef_resource_dimension(resource->data_type)); put_u32(&buffer, ~0u); /* FIXME: multisample count */ flags |= (dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT; } - put_u32(&buffer, var->regs[regset].id); - put_u32(&buffer, var->regs[regset].bind_count); + put_u32(&buffer, resource->id); + put_u32(&buffer, resource->bind_count); put_u32(&buffer, flags); }
@@ -3123,9 +3159,9 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc)
for (i = 0; i < extern_resources_count; ++i) { - var = extern_resources[i]; + const struct extern_resource *resource = &extern_resources[i];
- string_offset = put_string(&buffer, var->name); + string_offset = put_string(&buffer, resource->name); set_u32(&buffer, resources_offset + i * 8 * sizeof(uint32_t), string_offset); }
@@ -3231,7 +3267,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc)
add_section(dxbc, TAG_RDEF, &buffer);
- vkd3d_free(extern_resources); + sm4_free_extern_resources(extern_resources, extern_resources_count); }
static enum vkd3d_sm4_resource_type sm4_resource_dimension(const struct hlsl_type *type) @@ -3663,14 +3699,16 @@ static void write_sm4_dcl_constant_buffer(struct vkd3d_bytecode_buffer *buffer, write_sm4_instruction(buffer, &instr); }
-static void write_sm4_dcl_samplers(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) +static void write_sm4_dcl_samplers(struct vkd3d_bytecode_buffer *buffer, const struct extern_resource *resource) { - unsigned int i, count = var->data_type->reg_size[HLSL_REGSET_SAMPLERS]; struct sm4_instruction instr; + unsigned int i;
- for (i = 0; i < count; ++i) + assert(resource->regset == HLSL_REGSET_SAMPLERS); + + for (i = 0; i < resource->bind_count; ++i) { - if (!var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) + if (resource->var && !resource->var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) continue;
instr = (struct sm4_instruction) @@ -3678,7 +3716,7 @@ static void write_sm4_dcl_samplers(struct vkd3d_bytecode_buffer *buffer, const s .opcode = VKD3D_SM4_OP_DCL_SAMPLER,
.dsts[0].reg.type = VKD3D_SM4_RT_SAMPLER, - .dsts[0].reg.idx = {var->regs[HLSL_REGSET_SAMPLERS].id + i}, + .dsts[0].reg.idx = {resource->id + i}, .dsts[0].reg.idx_count = 1, .dst_count = 1, }; @@ -3688,18 +3726,20 @@ static void write_sm4_dcl_samplers(struct vkd3d_bytecode_buffer *buffer, const s }
static void write_sm4_dcl_textures(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_ir_var *var, bool uav) + const struct extern_resource *resource, bool uav) { enum hlsl_regset regset = uav ? HLSL_REGSET_UAVS : HLSL_REGSET_TEXTURES; - unsigned int i, count = var->data_type->reg_size[regset]; struct hlsl_type *component_type; struct sm4_instruction instr; + unsigned int i;
- component_type = hlsl_type_get_component_type(ctx, var->data_type, 0); + assert(resource->regset == regset);
- for (i = 0; i < count; ++i) + component_type = hlsl_type_get_component_type(ctx, resource->data_type, 0); + + for (i = 0; i < resource->bind_count; ++i) { - if (!var->objects_usage[regset][i].used) + if (resource->var && !resource->var->objects_usage[regset][i].used) continue;
instr = (struct sm4_instruction) @@ -3708,7 +3748,7 @@ static void write_sm4_dcl_textures(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b | (sm4_resource_dimension(component_type) << VKD3D_SM4_RESOURCE_TYPE_SHIFT),
.dsts[0].reg.type = uav ? VKD3D_SM5_RT_UAV : VKD3D_SM4_RT_RESOURCE, - .dsts[0].reg.idx = {var->regs[regset].id + i}, + .dsts[0].reg.idx = {resource->id + i}, .dsts[0].reg.idx_count = 1, .dst_count = 1,
@@ -5045,8 +5085,8 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *entry_func, struct dxbc_writer *dxbc) { const struct hlsl_profile_info *profile = ctx->profile; - const struct hlsl_ir_var **extern_resources; struct vkd3d_bytecode_buffer buffer = {0}; + struct extern_resource *extern_resources; unsigned int extern_resources_count, i; const struct hlsl_buffer *cbuffer; const struct hlsl_ir_var *var; @@ -5078,17 +5118,14 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx,
for (i = 0; i < extern_resources_count; ++i) { - enum hlsl_regset regset; + const struct extern_resource *resource = &extern_resources[i];
- var = extern_resources[i]; - regset = hlsl_type_get_regset(var->data_type); - - if (regset == HLSL_REGSET_SAMPLERS) - write_sm4_dcl_samplers(&buffer, var); - else if (regset == HLSL_REGSET_TEXTURES) - write_sm4_dcl_textures(ctx, &buffer, var, false); - else if (regset == HLSL_REGSET_UAVS) - write_sm4_dcl_textures(ctx, &buffer, var, true); + if (resource->regset == HLSL_REGSET_SAMPLERS) + write_sm4_dcl_samplers(&buffer, resource); + else if (resource->regset == HLSL_REGSET_TEXTURES) + write_sm4_dcl_textures(ctx, &buffer, resource, false); + else if (resource->regset == HLSL_REGSET_UAVS) + write_sm4_dcl_textures(ctx, &buffer, resource, true); }
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) @@ -5111,7 +5148,7 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx,
add_section(dxbc, TAG_SHDR, &buffer);
- vkd3d_free(extern_resources); + sm4_free_extern_resources(extern_resources, extern_resources_count); }
int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out)
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl.c | 70 +++++++++++++++++++ libs/vkd3d-shader/hlsl.h | 4 ++ libs/vkd3d-shader/tpf.c | 143 ++++++++++++++++++++++++-------------- tests/cbuffer.shader_test | 6 +- 4 files changed, 167 insertions(+), 56 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index cd0143209..6ae6a8f77 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -430,6 +430,51 @@ struct hlsl_type *hlsl_type_get_component_type(struct hlsl_ctx *ctx, struct hlsl return type; }
+unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_type *type, + enum hlsl_regset regset, unsigned int index) +{ + struct hlsl_type *next_type; + unsigned int offset = 0; + unsigned int idx; + + while (!type_is_single_component(type)) + { + next_type = type; + idx = traverse_path_from_component_index(ctx, &next_type, &index); + + switch (type->class) + { + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_VECTOR: + case HLSL_CLASS_MATRIX: + if (regset == HLSL_REGSET_NUMERIC) + offset += idx; + break; + + case HLSL_CLASS_STRUCT: + offset += type->e.record.fields[idx].reg_offset[regset]; + break; + + case HLSL_CLASS_ARRAY: + if (regset == HLSL_REGSET_NUMERIC) + offset += idx * align(type->e.array.type->reg_size[regset], 4); + else + offset += idx * type->e.array.type->reg_size[regset]; + break; + + case HLSL_CLASS_OBJECT: + assert(idx == 0); + break; + + default: + vkd3d_unreachable(); + } + type = next_type; + } + + return offset; +} + static bool init_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hlsl_ir_var *var, unsigned int path_len) { @@ -2050,6 +2095,31 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru } }
+struct vkd3d_string_buffer *hlsl_component_to_string(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var, + unsigned int index) +{ + struct hlsl_type *type = var->data_type, *current_type; + struct vkd3d_string_buffer *buffer; + unsigned int element_index; + + if (!(buffer = hlsl_get_string_buffer(ctx))) + return NULL; + + vkd3d_string_buffer_printf(buffer, "%s", var->name); + + while (!type_is_single_component(type)) + { + current_type = type; + element_index = traverse_path_from_component_index(ctx, &type, &index); + if (current_type->class == HLSL_CLASS_STRUCT) + vkd3d_string_buffer_printf(buffer, ".%s", current_type->e.record.fields[element_index].name); + else + vkd3d_string_buffer_printf(buffer, "[%u]", element_index); + } + + return buffer; +} + const char *debug_hlsl_type(struct hlsl_ctx *ctx, const struct hlsl_type *type) { struct vkd3d_string_buffer *string; diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 41a2b3f71..3162bca3b 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -1049,6 +1049,8 @@ const char *debug_hlsl_writemask(unsigned int writemask); const char *debug_hlsl_swizzle(unsigned int swizzle, unsigned int count);
struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const struct hlsl_type *type); +struct vkd3d_string_buffer *hlsl_component_to_string(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var, + unsigned int index); struct vkd3d_string_buffer *hlsl_modifiers_to_string(struct hlsl_ctx *ctx, unsigned int modifiers); const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type);
@@ -1179,6 +1181,8 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type); unsigned int hlsl_type_get_array_element_reg_size(const struct hlsl_type *type, enum hlsl_regset regset); struct hlsl_type *hlsl_type_get_component_type(struct hlsl_ctx *ctx, struct hlsl_type *type, unsigned int index); +unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_type *type, + enum hlsl_regset regset, unsigned int index); bool hlsl_type_is_row_major(const struct hlsl_type *type); unsigned int hlsl_type_minor_size(const struct hlsl_type *type); unsigned int hlsl_type_major_size(const struct hlsl_type *type); diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index e8e0b9a31..21270b832 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -3000,6 +3000,7 @@ static void sm4_free_extern_resources(struct extern_resource *extern_resources,
static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count) { + bool separate_components = ctx->profile->major_version == 5 && ctx->profile->minor_version == 0; struct extern_resource *extern_resources = NULL; const struct hlsl_ir_var *var; enum hlsl_regset regset; @@ -3010,38 +3011,100 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { - if (!hlsl_type_is_resource(var->data_type)) - continue; - regset = hlsl_type_get_regset(var->data_type); - if (!var->regs[regset].allocated) - continue; - - if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, - sizeof(*extern_resources)))) + if (separate_components) { - sm4_free_extern_resources(extern_resources, *count); - *count = 0; - return NULL; - } + unsigned int component_count = hlsl_type_component_count(var->data_type); + unsigned int k, regset_offset;
- if (!(name = hlsl_strdup(ctx, var->name))) - { - sm4_free_extern_resources(extern_resources, *count); - *count = 0; - return NULL; + for (k = 0; k < component_count; ++k) + { + struct hlsl_type *component_type = hlsl_type_get_component_type(ctx, var->data_type, k); + struct vkd3d_string_buffer *name_buffer; + + if (!hlsl_type_is_resource(component_type)) + continue; + + regset = hlsl_type_get_regset(component_type); + regset_offset = hlsl_type_get_component_offset(ctx, var->data_type, regset, k); + + if (regset_offset > var->regs[regset].bind_count) + continue; + + if (var->objects_usage[regset][regset_offset].used) + { + if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, + sizeof(*extern_resources)))) + { + sm4_free_extern_resources(extern_resources, *count); + *count = 0; + return NULL; + } + + if (!(name_buffer = hlsl_component_to_string(ctx, var, k))) + { + sm4_free_extern_resources(extern_resources, *count); + *count = 0; + return NULL; + } + if (!(name = hlsl_strdup(ctx, name_buffer->buffer))) + { + sm4_free_extern_resources(extern_resources, *count); + *count = 0; + hlsl_release_string_buffer(ctx, name_buffer); + return NULL; + } + hlsl_release_string_buffer(ctx, name_buffer); + + + extern_resources[*count].var = NULL; + extern_resources[*count].name = name; + extern_resources[*count].data_type = component_type; + + extern_resources[*count].regset = regset; + extern_resources[*count].id = var->regs[regset].id + regset_offset; + extern_resources[*count].bind_count = 1; + + extern_resources[*count].user_packed = false; + + ++*count; + } + } } + else + { + if (!hlsl_type_is_resource(var->data_type)) + continue; + regset = hlsl_type_get_regset(var->data_type); + if (!var->regs[regset].allocated) + continue; + + if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, + sizeof(*extern_resources)))) + { + sm4_free_extern_resources(extern_resources, *count); + *count = 0; + return NULL; + } + + if (!(name = hlsl_strdup(ctx, var->name))) + { + sm4_free_extern_resources(extern_resources, *count); + *count = 0; + return NULL; + }
- extern_resources[*count].var = var; - extern_resources[*count].name = name; - extern_resources[*count].data_type = var->data_type; + extern_resources[*count].var = var; + extern_resources[*count].name = name; + extern_resources[*count].data_type = var->data_type;
- extern_resources[*count].regset = regset; - extern_resources[*count].id = var->regs[regset].id; - extern_resources[*count].bind_count = var->regs[regset].bind_count; + extern_resources[*count].regset = regset; + extern_resources[*count].id = var->regs[regset].id; + extern_resources[*count].bind_count = var->regs[regset].bind_count;
- extern_resources[*count].user_packed = !!var->reg_reservation.reg_type; + extern_resources[*count].user_packed = !!var->reg_reservation.reg_type;
- ++*count; + ++*count; + } }
qsort(extern_resources, *count, sizeof(*extern_resources), sm4_compare_extern_resources); @@ -4871,33 +4934,15 @@ static void write_sm4_gather(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer static void write_sm4_resource_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_resource_load *load) { - const struct hlsl_type *resource_type = load->resource.var->data_type; const struct hlsl_ir_node *texel_offset = load->texel_offset.node; const struct hlsl_ir_node *coords = load->coords.node;
- if (!hlsl_type_is_resource(resource_type)) + if (load->sampler.var && !load->sampler.var->is_uniform) { - hlsl_fixme(ctx, &load->node.loc, "Separate object fields as new variables."); + hlsl_fixme(ctx, &load->node.loc, "Sample using non-uniform sampler variable."); return; }
- if (load->sampler.var) - { - const struct hlsl_type *sampler_type = load->sampler.var->data_type; - - if (!hlsl_type_is_resource(sampler_type)) - { - hlsl_fixme(ctx, &load->node.loc, "Separate object fields as new variables."); - return; - } - - if (!load->sampler.var->is_uniform) - { - hlsl_fixme(ctx, &load->node.loc, "Sample using non-uniform sampler variable."); - return; - } - } - if (!load->resource.var->is_uniform) { hlsl_fixme(ctx, &load->node.loc, "Load from non-uniform resource variable."); @@ -4950,14 +4995,6 @@ static void write_sm4_resource_load(struct hlsl_ctx *ctx, static void write_sm4_resource_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_resource_store *store) { - const struct hlsl_type *resource_type = store->resource.var->data_type; - - if (!hlsl_type_is_resource(resource_type)) - { - hlsl_fixme(ctx, &store->node.loc, "Separate object fields as new variables."); - return; - } - if (!store->resource.var->is_uniform) { hlsl_fixme(ctx, &store->node.loc, "Store to non-uniform resource variable."); diff --git a/tests/cbuffer.shader_test b/tests/cbuffer.shader_test index 7e2a91dca..83397c189 100644 --- a/tests/cbuffer.shader_test +++ b/tests/cbuffer.shader_test @@ -694,7 +694,7 @@ shader model >= 5.0 size (1, 1) 0.0 0.0 0.0 0.5
-[pixel shader todo] +[pixel shader] struct apple { float2 a; @@ -718,5 +718,5 @@ uniform 0 float4 0.0 1.0 2.0 3.0 uniform 4 float4 4.0 5.0 6.0 7.0 uniform 8 float4 8.0 9.0 10.0 11.0 uniform 12 float4 12.0 13.0 14.0 15.0 -todo draw quad -todo probe all rgba (124.0, 135.0, 146.0, 150.5) +draw quad +probe all rgba (124.0, 135.0, 146.0, 150.5)
From: Francisco Casas fcasas@codeweavers.com
--- Makefile.am | 1 + tests/hlsl-combined-samplers.shader_test | 125 +++++++++++++++++++++++ 2 files changed, 126 insertions(+) create mode 100644 tests/hlsl-combined-samplers.shader_test
diff --git a/Makefile.am b/Makefile.am index e06d0eeb4..560c2c819 100644 --- a/Makefile.am +++ b/Makefile.am @@ -79,6 +79,7 @@ vkd3d_shader_tests = \ tests/hlsl-attributes.shader_test \ tests/hlsl-bool-cast.shader_test \ tests/hlsl-clamp.shader_test \ + tests/hlsl-combined-samplers.shader_test \ tests/hlsl-comma.shader_test \ tests/hlsl-cross.shader_test \ tests/hlsl-d3dcolor-to-ubyte4.shader_test \ diff --git a/tests/hlsl-combined-samplers.shader_test b/tests/hlsl-combined-samplers.shader_test new file mode 100644 index 000000000..b9677749a --- /dev/null +++ b/tests/hlsl-combined-samplers.shader_test @@ -0,0 +1,125 @@ +[sampler 0] +filter linear linear linear +address clamp clamp clamp + +[sampler 1] +filter linear linear linear +address clamp clamp clamp + +[sampler 2] +filter linear linear linear +address clamp clamp clamp + +[texture 0] +size (1, 1) +0.0 0.0 0.0 1.0 + +[texture 1] +size (1, 1) +1.0 1.0 1.0 1.0 + +[texture 2] +size (1, 1) +2.0 2.0 2.0 1.0 + +[texture 3] +size (1, 1) +3.0 3.0 3.0 1.0 + +[texture 4] +size (1, 1) +4.0 4.0 4.0 1.0 + + +[pixel shader todo] +sampler sam; + +float4 main() : sv_target +{ + return tex2D(sam, float2(0, 0)); +} + +[test] +todo draw quad +todo probe all rgba (0, 0, 0, 1) + + +[pixel shader todo] +Texture2D tex; +sampler sam; + +// Textures for new separated samplers are allocated before regular textures. +float4 main() : sv_target +{ + return 10 * tex.Sample(sam, float2(0, 0)) + tex2D(sam, float2(0, 0)); +} + +[test] +todo draw quad +todo probe all rgba (10, 10, 10, 11) + + +[pixel shader todo] +Texture2D tex; +sampler sam[2]; + +float4 main() : sv_target +{ + return 10 * tex.Sample(sam[0], float2(0, 0)) + tex2D(sam[1], float2(0, 0)); +} + +[test] +todo draw quad +todo probe all rgba (21, 21, 21, 11) + + +[pixel shader todo] +sampler sam0; +sampler sam1; +sampler sam2; + +float4 main() : sv_target +{ + return 100 * tex2D(sam1, float2(0, 0)) + 10 * tex2D(sam0, float2(0, 0)) + + tex2D(sam2, float2(0, 0)); +} + +[test] +todo draw quad +todo probe all rgba (102, 102, 102, 111) + + +[pixel shader todo] +Texture2D tex[2][2]; +sampler sam; + +float4 main() : sv_target +{ + return 100 * tex[0][0].Load(int3(0, 0, 0)) + 10 * tex2D(sam, float2(0, 0)) + + tex[1][1].Sample(sam, float2(0, 0)); +} + +[test] +todo draw quad +todo probe all rgba (104, 104, 104, 111) + + +[require] +shader model >= 5.0 + + +[pixel shader todo] +struct +{ + Texture2D tex; + sampler sam; +} foo; + +float4 main() : sv_target +{ + return 10 * foo.tex.Sample(foo.sam, float2(0, 0)) + tex2D(foo.sam, float2(0, 0)); +} + +[test] +todo draw quad +todo probe all rgba (10, 10, 10, 11)
From: Francisco Casas fcasas@codeweavers.com
We are using the hlsl_ir_var.is_uniform flag to indicate when an object is a uniform copy created from a variable with the HLSL_STORAGE_UNIFORM modifier.
We should be checking for this instead of the HLSL_STORAGE_UNIFORM flag which is also set to 1 for the original variables, and there should be no reason to use this flag instead of "is_uniform" after the uniform copies and combined/separated samplers are created. --- libs/vkd3d-shader/hlsl_codegen.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 18a7595e1..5de8388f7 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -1680,7 +1680,7 @@ static bool validate_static_object_references(struct hlsl_ctx *ctx, struct hlsl_ { struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr);
- if (!(load->resource.var->storage_modifiers & HLSL_STORAGE_UNIFORM)) + if (!load->resource.var->is_uniform) { hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, "Loaded resource must have a single uniform source."); @@ -1695,7 +1695,7 @@ static bool validate_static_object_references(struct hlsl_ctx *ctx, struct hlsl_
if (load->sampler.var) { - if (!(load->sampler.var->storage_modifiers & HLSL_STORAGE_UNIFORM)) + if (!load->sampler.var->is_uniform) { hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, "Resource load sampler must have a single uniform source."); @@ -1713,7 +1713,7 @@ static bool validate_static_object_references(struct hlsl_ctx *ctx, struct hlsl_ { struct hlsl_ir_resource_store *store = hlsl_ir_resource_store(instr);
- if (!(store->resource.var->storage_modifiers & HLSL_STORAGE_UNIFORM)) + if (!store->resource.var->is_uniform) { hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, "Accessed resource must have a single uniform source.");
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl.c | 20 +++++++++++++------- libs/vkd3d-shader/hlsl.h | 2 ++ 2 files changed, 15 insertions(+), 7 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index 6ae6a8f77..fa28e27bb 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -1039,18 +1039,24 @@ struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *tem struct vkd3d_string_buffer *string; struct hlsl_ir_var *var; static LONG counter; - const char *name;
if (!(string = hlsl_get_string_buffer(ctx))) return NULL; vkd3d_string_buffer_printf(string, "<%s-%u>", template, InterlockedIncrement(&counter)); - if (!(name = hlsl_strdup(ctx, string->buffer))) - { - hlsl_release_string_buffer(ctx, string); - return NULL; - } - var = hlsl_new_var(ctx, name, type, loc, NULL, 0, NULL); + var = hlsl_new_synthetic_var_named(ctx, string->buffer, type, loc); hlsl_release_string_buffer(ctx, string); + return var; +} + +struct hlsl_ir_var *hlsl_new_synthetic_var_named(struct hlsl_ctx *ctx, const char *name, + struct hlsl_type *type, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_var *var; + const char *name_copy; + + if (!(name_copy = hlsl_strdup(ctx, name))) + return NULL; + var = hlsl_new_var(ctx, name_copy, type, loc, NULL, 0, NULL); if (var) list_add_tail(&ctx->dummy_scope->vars, &var->scope_entry); return var; diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 3162bca3b..1c5c536cd 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -1150,6 +1150,8 @@ struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, DWORD s, unsigned in struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc); struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *template, struct hlsl_type *type, const struct vkd3d_shader_location *loc); +struct hlsl_ir_var *hlsl_new_synthetic_var_named(struct hlsl_ctx *ctx, const char *name, + struct hlsl_type *type, const struct vkd3d_shader_location *loc); struct hlsl_type *hlsl_new_texture_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, struct hlsl_type *format, unsigned int sample_count); struct hlsl_type *hlsl_new_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, struct hlsl_type *format);
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl_codegen.c | 50 ++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 18 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 5de8388f7..5630f58d6 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -2890,7 +2890,7 @@ static const char *debug_register(char class, struct hlsl_reg reg, const struct return vkd3d_dbg_sprintf("%c%u%s", class, reg.id, debug_hlsl_writemask(reg.writemask)); }
-static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +static bool track_object_components_sampler_dim(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { struct hlsl_ir_resource_load *load; struct hlsl_ir_var *var; @@ -2902,15 +2902,16 @@ static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_n
load = hlsl_ir_resource_load(instr); var = load->resource.var; + regset = hlsl_type_get_regset(hlsl_deref_get_type(ctx, &load->resource)); + if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) + return false;
if (regset == HLSL_REGSET_SAMPLERS) { enum hlsl_sampler_dim dim;
assert(!load->sampler.var); - if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) - return false;
dim = var->objects_usage[regset][index].sampler_dim; if (dim != load->sampling_dim) @@ -2928,25 +2929,37 @@ static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_n return false; } } - var->objects_usage[regset][index].used = true; - var->objects_usage[regset][index].sampler_dim = load->sampling_dim; } - else - { - if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) - return false; + var->objects_usage[regset][index].sampler_dim = load->sampling_dim;
- var->objects_usage[regset][index].used = true; - var->objects_usage[regset][index].sampler_dim = load->sampling_dim; + return false; +}
- if (load->sampler.var) - { - var = load->sampler.var; - if (!hlsl_regset_index_from_deref(ctx, &load->sampler, HLSL_REGSET_SAMPLERS, &index)) - return false; +static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_resource_load *load; + struct hlsl_ir_var *var; + enum hlsl_regset regset; + unsigned int index;
- var->objects_usage[HLSL_REGSET_SAMPLERS][index].used = true; - } + if (instr->type != HLSL_IR_RESOURCE_LOAD) + return false; + + load = hlsl_ir_resource_load(instr); + var = load->resource.var; + + regset = hlsl_type_get_regset(hlsl_deref_get_type(ctx, &load->resource)); + if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) + return false; + + var->objects_usage[regset][index].used = true; + if (load->sampler.var) + { + var = load->sampler.var; + if (!hlsl_regset_index_from_deref(ctx, &load->sampler, HLSL_REGSET_SAMPLERS, &index)) + return false; + + var->objects_usage[HLSL_REGSET_SAMPLERS][index].used = true; }
return false; @@ -3942,6 +3955,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry }
hlsl_transform_ir(ctx, validate_static_object_references, body, NULL); + hlsl_transform_ir(ctx, track_object_components_sampler_dim, body, NULL); hlsl_transform_ir(ctx, track_object_components_usage, body, NULL);
/* TODO: move forward, remove when no longer needed */
From: Zebediah Figura zfigura@codeweavers.com
Co-authored-by: Francisco Casas fcasas@codeweavers.com --- libs/vkd3d-shader/hlsl.h | 1 + libs/vkd3d-shader/hlsl_codegen.c | 94 ++++++++++++++++++++++-- libs/vkd3d-shader/tpf.c | 18 +++-- tests/hlsl-combined-samplers.shader_test | 24 +++--- 4 files changed, 111 insertions(+), 26 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 1c5c536cd..799d039d0 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -417,6 +417,7 @@ struct hlsl_ir_var uint32_t is_output_semantic : 1; uint32_t is_uniform : 1; uint32_t is_param : 1; + uint32_t is_separated_resource : 1; };
/* Sized array of variables representing a function's parameters. */ diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 5630f58d6..32c89422f 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -1982,6 +1982,84 @@ static bool remove_trivial_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *i return true; }
+/* Lower combined samples and sampler variables to synthesized separated textures and samplers. + * That is, translate SM1-style samples in the source to SM4-style samples in the bytecode. */ +static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_resource_load *load; + struct vkd3d_string_buffer *name; + struct hlsl_ir_var *var; + unsigned int i; + + if (instr->type != HLSL_IR_RESOURCE_LOAD) + return false; + load = hlsl_ir_resource_load(instr); + + switch (load->load_type) + { + case HLSL_RESOURCE_LOAD: + case HLSL_RESOURCE_GATHER_RED: + case HLSL_RESOURCE_GATHER_GREEN: + case HLSL_RESOURCE_GATHER_BLUE: + case HLSL_RESOURCE_GATHER_ALPHA: + return false; + + case HLSL_RESOURCE_SAMPLE: + case HLSL_RESOURCE_SAMPLE_LOD: + case HLSL_RESOURCE_SAMPLE_LOD_BIAS: + break; + } + if (load->sampler.var) + return false; + + if (!hlsl_type_is_resource(load->resource.var->data_type)) + { + hlsl_fixme(ctx, &instr->loc, "Lower combined samplers within structs."); + return false; + } + + assert(hlsl_type_get_regset(load->resource.var->data_type) == HLSL_REGSET_SAMPLERS); + + if (!(name = hlsl_get_string_buffer(ctx))) + return false; + vkd3d_string_buffer_printf(name, "<resource>%s", load->resource.var->name); + + TRACE("Lowering to separate resource %s.\n", debugstr_a(name->buffer)); + + if (!(var = hlsl_get_var(ctx->globals, name->buffer))) + { + struct hlsl_type *texture_array_type = hlsl_new_texture_type(ctx, load->sampling_dim, + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4), 0); + + /* Create (possibly multi-dimensional) texture array type with the same dims as the sampler array. */ + struct hlsl_type *arr_type = load->resource.var->data_type; + for (i = 0; i < load->resource.path_len; ++i) + { + assert(arr_type->class == HLSL_CLASS_ARRAY); + texture_array_type = hlsl_new_array_type(ctx, texture_array_type, arr_type->e.array.elements_count); + arr_type = arr_type->e.array.type; + } + + if (!(var = hlsl_new_synthetic_var_named(ctx, name->buffer, texture_array_type, &instr->loc))) + { + hlsl_release_string_buffer(ctx, name); + return false; + } + var->is_uniform = 1; + var->is_separated_resource = true; + + list_add_before(&load->resource.var->extern_entry, &var->extern_entry); + } + hlsl_release_string_buffer(ctx, name); + + hlsl_copy_deref(ctx, &load->sampler, &load->resource); + load->resource.var = var; + assert(hlsl_deref_get_type(ctx, &load->resource)->base_type == HLSL_TYPE_TEXTURE); + assert(hlsl_deref_get_type(ctx, &load->sampler)->base_type == HLSL_TYPE_SAMPLER); + + return true; +} + /* Lower DIV to RCP + MUL. */ static bool lower_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { @@ -3362,7 +3440,7 @@ static void validate_buffer_offsets(struct hlsl_ctx *ctx)
LIST_FOR_EACH_ENTRY(var1, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { - if (!var1->is_uniform || var1->data_type->class == HLSL_CLASS_OBJECT) + if (!var1->is_uniform || hlsl_type_is_resource(var1->data_type)) continue;
buffer = var1->buffer; @@ -3373,7 +3451,7 @@ static void validate_buffer_offsets(struct hlsl_ctx *ctx) { unsigned int var1_reg_size, var2_reg_size;
- if (!var2->is_uniform || var2->data_type->class == HLSL_CLASS_OBJECT) + if (!var2->is_uniform || hlsl_type_is_resource(var2->data_type)) continue;
if (var1 == var2 || var1->buffer != var2->buffer) @@ -3423,7 +3501,7 @@ static void allocate_buffers(struct hlsl_ctx *ctx)
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { - if (var->is_uniform && var->data_type->class != HLSL_CLASS_OBJECT) + if (var->is_uniform && !hlsl_type_is_resource(var->data_type)) { if (var->is_param) var->buffer = ctx->params_buffer; @@ -3941,6 +4019,12 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry } while (progress);
+ hlsl_transform_ir(ctx, validate_static_object_references, body, NULL); + hlsl_transform_ir(ctx, track_object_components_sampler_dim, body, NULL); + if (profile->major_version >= 4) + hlsl_transform_ir(ctx, lower_combined_samples, body, NULL); + hlsl_transform_ir(ctx, track_object_components_usage, body, NULL); + if (profile->major_version < 4) { hlsl_transform_ir(ctx, lower_division, body, NULL); @@ -3954,10 +4038,6 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry hlsl_transform_ir(ctx, lower_abs, body, NULL); }
- hlsl_transform_ir(ctx, validate_static_object_references, body, NULL); - hlsl_transform_ir(ctx, track_object_components_sampler_dim, body, NULL); - hlsl_transform_ir(ctx, track_object_components_usage, body, NULL); - /* TODO: move forward, remove when no longer needed */ hlsl_transform_ir(ctx, transform_deref_paths_into_offsets, body, NULL); while (hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL)); diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index 21270b832..cf867c353 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -2998,6 +2998,13 @@ static void sm4_free_extern_resources(struct extern_resource *extern_resources, vkd3d_free(extern_resources); }
+static const char *string_skip_tag(const char *string) +{ + if (!strncmp(string, "<resource>", strlen("<resource>"))) + return string + strlen("<resource>"); + return string; +} + static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count) { bool separate_components = ctx->profile->major_version == 5 && ctx->profile->minor_version == 0; @@ -3046,7 +3053,7 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un *count = 0; return NULL; } - if (!(name = hlsl_strdup(ctx, name_buffer->buffer))) + if (!(name = hlsl_strdup(ctx, string_skip_tag(name_buffer->buffer)))) { sm4_free_extern_resources(extern_resources, *count); *count = 0; @@ -3086,7 +3093,7 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un return NULL; }
- if (!(name = hlsl_strdup(ctx, var->name))) + if (!(name = hlsl_strdup(ctx, string_skip_tag(var->name)))) { sm4_free_extern_resources(extern_resources, *count); *count = 0; @@ -4958,11 +4965,8 @@ static void write_sm4_resource_load(struct hlsl_ctx *ctx,
case HLSL_RESOURCE_SAMPLE: case HLSL_RESOURCE_SAMPLE_LOD_BIAS: - if (!load->sampler.var) - { - hlsl_fixme(ctx, &load->node.loc, "SM4 combined sample expression."); - return; - } + /* Combined sample expressions were lowered. */ + assert(load->sampler.var); write_sm4_sample(ctx, buffer, load); break;
diff --git a/tests/hlsl-combined-samplers.shader_test b/tests/hlsl-combined-samplers.shader_test index b9677749a..cf13e705b 100644 --- a/tests/hlsl-combined-samplers.shader_test +++ b/tests/hlsl-combined-samplers.shader_test @@ -31,7 +31,7 @@ size (1, 1) 4.0 4.0 4.0 1.0
-[pixel shader todo] +[pixel shader] sampler sam;
float4 main() : sv_target @@ -40,11 +40,11 @@ float4 main() : sv_target }
[test] -todo draw quad -todo probe all rgba (0, 0, 0, 1) +draw quad +probe all rgba (0, 0, 0, 1)
-[pixel shader todo] +[pixel shader] Texture2D tex; sampler sam;
@@ -55,11 +55,11 @@ float4 main() : sv_target }
[test] -todo draw quad +draw quad todo probe all rgba (10, 10, 10, 11)
-[pixel shader todo] +[pixel shader] Texture2D tex; sampler sam[2];
@@ -69,11 +69,11 @@ float4 main() : sv_target }
[test] -todo draw quad +draw quad todo probe all rgba (21, 21, 21, 11)
-[pixel shader todo] +[pixel shader] sampler sam0; sampler sam1; sampler sam2; @@ -85,11 +85,11 @@ float4 main() : sv_target }
[test] -todo draw quad -todo probe all rgba (102, 102, 102, 111) +draw quad +probe all rgba (102, 102, 102, 111)
-[pixel shader todo] +[pixel shader] Texture2D tex[2][2]; sampler sam;
@@ -100,7 +100,7 @@ float4 main() : sv_target }
[test] -todo draw quad +draw quad todo probe all rgba (104, 104, 104, 111)
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl_codegen.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 32c89422f..8700d5cb6 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -3057,9 +3057,12 @@ static void calculate_resource_register_counts(struct hlsl_ctx *ctx) { for (i = 0; i < type->reg_size[k]; ++i) { - /* Samplers are only allocated until the last used one. */ + bool is_separated = var->is_separated_resource; + + /* Samplers (and textures separated from them) are only allocated until the last + * used one. */ if (var->objects_usage[k][i].used) - var->regs[k].bind_count = (k == HLSL_REGSET_SAMPLERS) ? i + 1 : type->reg_size[k]; + var->regs[k].bind_count = (k == HLSL_REGSET_SAMPLERS || is_separated) ? i + 1 : type->reg_size[k]; } } }
On Wed Jun 7 01:34:53 2023 +0000, Francisco Casas wrote:
changed this line in [version 3 of the diff](/wine/vkd3d/-/merge_requests/209/diffs?diff_id=50514&start_sha=95acdde6345364e271e102410a6a3512aaba5dd6#3cf804f245af47d51595ff932bf817c50967eea2_3059_3060)
I added the `is_separated_resource` flag. However, it is still necessary to remove the `<resource>` from the variable name when writing the RDEF. I put this logic in string_skip_tag() which is called in sm4_get_extern_resources().
It's fine for me to have this field, but once we decide to have it it would probably make sense to keep it for the whole life of the `struct hlsl_deref`, not just when storing the offset. I don't know if it can be useful before that, but the information it conveys make sense at any time, so by artificially restricting when it is available just makes our life harder.
I implemented a patch with this idea https://gitlab.winehq.org/fcasas/vkd3d/-/commit/4f2e06da2ce02ba6ed803cb793d1... , but I think it its controversial enough to better not include it in this MR.
Also, let's just call it `type` or `data_type`.
Done!
On Wed Jun 7 01:35:06 2023 +0000, Francisco Casas wrote:
I added the `is_separated_resource` flag. However, it is still necessary to remove the `<resource>` from the variable name when writing the RDEF. I put this logic in string_skip_tag() which is called in sm4_get_extern_resources().
I still prefer the idea of having a public and an internal name, but this is better than it was before.
This merge request was approved by Giovanni Mascellani.
- Seems that the allocation order of the texture part and the sampler part is not bonded together in the native compiler:
That doesn't seem like a blocker, though. We already do allocation in multiple passes; we could do a separate pass for combined samplers where necessary.
- The var->objects_usage[] array is created on hlsl_new_var() from the type->regsize[]. So we would either have to allocate memory for it once we discover that var is used as a combined sampler or we would have to preemptively set type->regsize[HLSL_REGSET_TEXTURES] the same as type->regsize[HLSL_REGSET_SAMPLERS] for all samplers in SM4.
Or we mark internally that the variable is used as a combined sampler before RA (i.e. the same time when we're running lower_combined_samples, probably) and then set objects_usage based on that.
- We are currently assuming that all resources belong to a single regset, so we would have to add additional checks in many places where hlsl_type_get_regset() is called, to know if we are interested in the texture or sampler part.
See, and this is where I have to once again state that hlsl_type_get_regset() has always seemed like a fundamentally broken function to me. It never had a clear boolean answer for structs, and I'm not convinced it can have a clear boolean answer for individual variables either.
I'm not married to this idea of reusing variables, but I'm still not quite convinced it's the wrong idea either.
Seems that the allocation order of the texture part and the sampler part is not bonded together in the native compiler:
That doesn't seem like a blocker, though. We already do allocation in multiple passes; we could do a separate pass for combined samplers where necessary.
Yep, that is a reasonable solution.
The var->objects_usage[] array is created on hlsl_new_var() from the type->regsize[]. So we would either have to allocate memory for it once we discover that var is used as a combined sampler or we would have to preemptively set type->regsize[HLSL_REGSET_TEXTURES] the same as type->regsize[HLSL_REGSET_SAMPLERS] for all samplers in SM4.
Or we mark internally that the variable is used as a combined sampler before RA (i.e. the same time when we're running lower_combined_samples, probably) and then set objects_usage based on that.
A problem is that we first need to use var->objects_usage in track_object_components_sampler_dim() first, so that we know the sampler_dim of the new texture to be generated.
But I have an alternate solution: we could add a "requires_separate_texture" field to the anonymous struct in objects_usage[][], storing the requirement for the texture resource allocation in objects_usage[HLSL_REGSET_SAMPLER][·].
See, and this is where I have to once again state that hlsl_type_get_regset() has always seemed like a fundamentally broken function to me. It never had a clear boolean answer for structs, and I'm not convinced it can have a clear boolean answer for individual variables either.
I agree in part now. IMO, we should get rid of hlsl_type_get_regset() except for when it is used on a particular deref (maybe, turning it into something like hlsl_deref_get_regset(), or extending deref->offset_regset to the whole lifetime of the deref).
Currently we are using it for two things:
1. As "the regset of the type of a value used by an instruction", which I think is the correct use. In theory, a deref shall never point to a struct after we have split copies.
All these uses after the deref is lowered into a single offset can be replaced by deref->offset_regset. The uses before the lowering would still require to use the implementation of hlsl_type_get_regset() but on the type reached by the deref's path. We would have the guarantee that it is not a struct (unless we are doing something wrong).
2. When we iterate over extern variables to either allocate registers or write the CTAB and RDEF sections. We sometimes use hlsl_type_is_resource() to ensure that they belong to a single regset.
In all these cases we should assume that each variable doesn't necessarily belong to a single regset and not call the function. Instead, iterate over all regsets and check individually if the variable is allocated, or needs to be.
---
The last change in particular is big so I would prefer to leave these things for part 4 of this series and upstream this MR as it is. If you think we should introduce them right away, that's fine, I hope I don't make these new patches too controversial though.
I'm not married to this idea of reusing variables, but I'm still not quite convinced it's the wrong idea either.
I'm not entirely sure of what's your positive reason for reusing variables, though. I find Francisco's current solution reasonable. I had a few comments on some specific details, but the general architecture makes sense. I'd say that native behavior makes it rather natural to think a SM1-3 sampler as, secretly, a SM4 sampler plus a resource, so the transformation Francisco is introducing doesn't look hacky. OTOH using the same variable object for two different things doesn't feel very natural. I'm sure it can be made to work, but I see little value in reconsidering previous design choices for what doesn't really look like an enhancement in itself. Could you please explain what you like of your proposal?
I'm not entirely sure of what's your positive reason for reusing variables, though. I find Francisco's current solution reasonable. I had a few comments on some specific details, but the general architecture makes sense. I'd say that native behavior makes it rather natural to think a SM1-3 sampler as, secretly, a SM4 sampler plus a resource, so the transformation Francisco is introducing doesn't look hacky. OTOH using the same variable object for two different things doesn't feel very natural. I'm sure it can be made to work, but I see little value in reconsidering previous design choices for what doesn't really look like an enhancement in itself. Could you please explain what you like of your proposal?
Without having tried it, it just seems simple and like it makes sense. The old code isn't complex either (well, until it starts needing to handle struct/array types) but it seems like it should be even simpler to do it this way. There's no need to add synthetic variables (and hence no internal flags to mark them as different from the other variables).
It also lines up with my preconceived notion of how sm3 int/bool constants would work. Again, they don't actually have to work that way, but I feel like it makes a lot of sense.