Mainly comprises support for allocating arrays of resources, and loading from them, for both SM1 and SM4.
-- v7: vkd3d-shader/hlsl: Support resource arrays when writting SM4. vkd3d-shader/hlsl: Write resource loads in SM1. vkd3d-shader/hlsl: Write sampler declarations in SM1. vkd3d-shader/hlsl: Track objects sampling dimension. vkd3d-shader/hlsl: Track object components usage and allocate registers accordingly. tests: Test objects as parameters. vkd3d-shader/hlsl: Skip object components when creating input/output copies. vkd3d-shader/hlsl: Add fixme for uniform copies for objects within structs. vkd3d-shader/hlsl: Support multiple-register variables in object regsets. tests: Add additional texture array register reservation tests.
From: Francisco Casas fcasas@codeweavers.com
--- tests/register-reservations.shader_test | 32 +++++++++++++++++++++++++ 1 file changed, 32 insertions(+)
diff --git a/tests/register-reservations.shader_test b/tests/register-reservations.shader_test index f0287d00..b60e980b 100644 --- a/tests/register-reservations.shader_test +++ b/tests/register-reservations.shader_test @@ -23,6 +23,38 @@ size (1, 1) 4.0 4.0 4.0 99.0
+% If a single component in a texture array is used, all registers are reserved. +[pixel shader todo] +Texture2D partially_used[2][2]; +Texture2D tex; + +float4 main() : sv_target +{ + return 10 * tex.Load(int3(0, 0, 0)) + partially_used[0][1].Load(int3(0, 0, 0)); +} + +[test] +todo draw quad +todo probe all rgba (41.0, 41.0, 41.0, 1089.0) + + +% If no component in a texture array is used, and it doesn't have a register reservation, no +% register is reserved. +[pixel shader] +Texture2D unused[4]; +Texture2D tex; + +float4 main() : sv_target +{ + return tex.Load(int3(0, 0, 0)); +} + +[test] +draw quad +probe all rgba (0.0, 0.0, 0.0, 99.0) + + +% Register reservations force to reserve all the resource registers. Even if unused. [pixel shader] Texture2D unused : register(t0); Texture2D tex;
From: Francisco Casas fcasas@codeweavers.com
Variables that contain more than one object (arrays or structs) require the allocation of contiguous registers in the respective object register spaces. --- libs/vkd3d-shader/d3dbc.c | 14 ++-- libs/vkd3d-shader/hlsl.c | 8 ++- libs/vkd3d-shader/hlsl.h | 11 +++- libs/vkd3d-shader/hlsl_codegen.c | 85 ++++++++++++++++++------- libs/vkd3d-shader/tpf.c | 17 ++++- tests/register-reservations.shader_test | 2 +- 6 files changed, 95 insertions(+), 42 deletions(-)
diff --git a/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d-shader/d3dbc.c index aa45dc2d..8bb86214 100644 --- a/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d-shader/d3dbc.c @@ -1337,19 +1337,15 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe if (!var->semantic.name && var->regs[regset].allocated) { put_u32(buffer, 0); /* name */ - if (var->data_type->class == HLSL_CLASS_OBJECT - && (var->data_type->base_type == HLSL_TYPE_SAMPLER - || var->data_type->base_type == HLSL_TYPE_TEXTURE)) + if (regset == HLSL_REGSET_NUMERIC) { - assert(regset == HLSL_REGSET_SAMPLERS); - put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[regset].id)); - put_u32(buffer, 1); + put_u32(buffer, vkd3d_make_u32(D3DXRS_FLOAT4, var->regs[regset].id)); + put_u32(buffer, var->data_type->reg_size[regset] / 4); } else { - assert(regset == HLSL_REGSET_NUMERIC); - put_u32(buffer, vkd3d_make_u32(D3DXRS_FLOAT4, var->regs[regset].id)); - put_u32(buffer, var->data_type->reg_size[regset] / 4); + put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[regset].id)); + put_u32(buffer, var->regs[regset].bind_count); } put_u32(buffer, 0); /* type */ put_u32(buffer, 0); /* FIXME: default value */ diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index 19656431..9fca74b7 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -166,6 +166,9 @@ static unsigned int get_array_size(const struct hlsl_type *type)
bool hlsl_type_is_resource(const struct hlsl_type *type) { + if (type->class == HLSL_CLASS_ARRAY) + return hlsl_type_is_resource(type->e.array.type); + if (type->class == HLSL_CLASS_OBJECT) { switch (type->base_type) @@ -186,6 +189,9 @@ enum hlsl_regset hlsl_type_get_regset(const struct hlsl_type *type) if (type->class <= HLSL_CLASS_LAST_NUMERIC) return HLSL_REGSET_NUMERIC;
+ if (type->class == HLSL_CLASS_ARRAY) + return hlsl_type_get_regset(type->e.array.type); + if (type->class == HLSL_CLASS_OBJECT) { switch (type->base_type) @@ -203,8 +209,6 @@ enum hlsl_regset hlsl_type_get_regset(const struct hlsl_type *type) vkd3d_unreachable(); } } - else if (type->class == HLSL_CLASS_ARRAY) - return hlsl_type_get_regset(type->e.array.type);
vkd3d_unreachable(); } diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 2e9d3f5e..5f4f5735 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -230,16 +230,21 @@ struct hlsl_struct_field size_t name_bytecode_offset; };
-/* Information of the register allocated for an instruction node or variable. +/* Information of the register(s) allocated for an instruction node or variable. * These values are initialized at the end of hlsl_emit_bytecode(), after the compilation passes, * just before writing the bytecode. - * For numeric registers, a writemask can be provided to indicate the reservation of only some of the - * 4 components. * The type of register (register class) is implied from its use, so it is not stored in this * struct. */ struct hlsl_reg { + /* Index of the first register allocated. */ uint32_t id; + /* Number of registers to be allocated. + * Unlike the variable's type's regsize, it is not expressed in register components, but rather + * in whole registers, and may depend on which components are used within the shader. */ + uint32_t bind_count; + /* For numeric registers, a writemask can be provided to indicate the reservation of only some + * of the 4 components. */ unsigned int writemask; /* Whether the register has been allocated. */ bool allocated; diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 42f8ab3b..517f27d6 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -2400,8 +2400,10 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) { var->regs[regset].allocated = true; var->regs[regset].id = var->reg_reservation.reg_index; - TRACE("Allocated reserved %s to %c%u.\n", var->name, var->reg_reservation.reg_type, - var->reg_reservation.reg_index); + var->regs[regset].bind_count = var->data_type->reg_size[regset]; + TRACE("Allocated reserved %s to %c%u-%c%u.\n", var->name, var->reg_reservation.reg_type, + var->reg_reservation.reg_index, var->reg_reservation.reg_type, + var->reg_reservation.reg_index + var->regs[regset].bind_count); } } } @@ -2627,6 +2629,7 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct liveness * liveness->regs[component_idx + i].last_read = last_read; } ret.id = component_idx / 4; + ret.bind_count = 1; ret.writemask = hlsl_combine_writemasks(writemask, (1u << component_count) - 1); ret.allocated = true; liveness->reg_count = max(liveness->reg_count, ret.id + 1); @@ -2664,6 +2667,7 @@ static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct liveness *liv for (i = 0; i < reg_size; ++i) liveness->regs[component_idx + i].last_read = last_read; ret.id = component_idx / 4; + ret.bind_count = align(reg_size, 4) / 4; ret.allocated = true; liveness->reg_count = max(liveness->reg_count, ret.id + align(reg_size, 4)); return ret; @@ -2957,6 +2961,7 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var { var->regs[HLSL_REGSET_NUMERIC].allocated = true; var->regs[HLSL_REGSET_NUMERIC].id = (*counter)++; + var->regs[HLSL_REGSET_NUMERIC].bind_count = 1; var->regs[HLSL_REGSET_NUMERIC].writemask = (1 << var->data_type->dimx) - 1; TRACE("Allocated %s to %s.\n", var->name, debug_register(output ? 'o' : 'v', var->regs[HLSL_REGSET_NUMERIC], var->data_type)); @@ -3135,6 +3140,7 @@ static void allocate_buffers(struct hlsl_ctx *ctx) }
buffer->reg.id = buffer->reservation.reg_index; + buffer->reg.bind_count = 1; buffer->reg.allocated = true; TRACE("Allocated reserved %s to cb%u.\n", buffer->name, index); } @@ -3144,6 +3150,7 @@ static void allocate_buffers(struct hlsl_ctx *ctx) ++index;
buffer->reg.id = index; + buffer->reg.bind_count = 1; buffer->reg.allocated = true; TRACE("Allocated %s to cb%u.\n", buffer->name, index); ++index; @@ -3165,13 +3172,17 @@ static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum uint32_t index) { const struct hlsl_ir_var *var; + unsigned int start, count;
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, const struct hlsl_ir_var, extern_entry) { if (!var->regs[regset].allocated) continue;
- if (index == var->regs[regset].id) + start = var->regs[regset].id; + count = var->regs[regset].bind_count; + + if (start <= index && index < start + count) return var; } return NULL; @@ -3182,7 +3193,6 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) char regset_name = get_regset_name(regset); struct hlsl_ir_var *var; uint32_t min_index = 0; - uint32_t index;
if (regset == HLSL_REGSET_UAVS) { @@ -3194,19 +3204,17 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) } }
- index = min_index; - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { - if (!var->last_read || !var->data_type->reg_size[regset]) + unsigned int count = var->regs[regset].bind_count; + + if (count == 0) continue;
if (var->regs[regset].allocated) { - const struct hlsl_ir_var *reserved_object; - unsigned int index = var->regs[regset].id; - - reserved_object = get_allocated_object(ctx, regset, index); + const struct hlsl_ir_var *reserved_object, *last_reported = NULL; + unsigned int index, i;
if (var->regs[regset].id < min_index) { @@ -3214,28 +3222,44 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, "UAV index (%u) must be higher than the maximum render target index (%u).", var->regs[regset].id, min_index - 1); + continue; } - else if (reserved_object && reserved_object != var) + + for (i = 0; i < count; ++i) { - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, - "Multiple objects bound to %c%u.", regset_name, index); - hlsl_note(ctx, &reserved_object->loc, VKD3D_SHADER_LOG_ERROR, - "Object '%s' is already bound to %c%u.", reserved_object->name, - regset_name, index); - } + index = var->regs[regset].id + i;
- var->regs[regset].id = var->reg_reservation.reg_index; - var->regs[regset].allocated = true; - TRACE("Allocated reserved %s to %c%u.\n", var->name, regset_name, var->regs[regset].id); + reserved_object = get_allocated_object(ctx, regset, index); + if (reserved_object && reserved_object != var && reserved_object != last_reported) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, + "Multiple variables bound to %c%u.", regset_name, index); + hlsl_note(ctx, &reserved_object->loc, VKD3D_SHADER_LOG_ERROR, + "Variable '%s' is already bound to %c%u.", reserved_object->name, + regset_name, index); + last_reported = reserved_object; + } + } } else { - while (get_allocated_object(ctx, regset, index)) + unsigned int index = min_index; + unsigned int available = 0; + + while (available < count) + { + if (get_allocated_object(ctx, regset, index)) + available = 0; + else + ++available; ++index; + } + index -= count;
var->regs[regset].id = index; var->regs[regset].allocated = true; - TRACE("Allocated object to %c%u.\n", regset_name, index); + TRACE("Allocated variable %s to %c%u-%c%u.\n", var->name, regset_name, index, regset_name, + index + count); ++index; } } @@ -3462,7 +3486,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry { var = entry_func->parameters.vars[i];
- if (var->data_type->class == HLSL_CLASS_OBJECT || (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) + if (hlsl_type_is_resource(var->data_type) || (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) { prepend_uniform_copy(ctx, &body->instrs, var); } @@ -3557,6 +3581,19 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry rb_for_each_entry(&ctx->functions, dump_function, ctx);
allocate_register_reservations(ctx); + + /* For now, request all the registers for each variable, as long as it is used. */ + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + unsigned int k; + + for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) + { + if (!var->regs[k].allocated) + var->regs[k].bind_count = var->last_read ? var->data_type->reg_size[k] : 0; + } + } + allocate_temp_registers(ctx, entry_func); if (profile->major_version < 4) { diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index 3debe952..71edced3 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -2609,6 +2609,9 @@ static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b
static D3D_SHADER_INPUT_TYPE sm4_resource_type(const struct hlsl_type *type) { + if (type->class == HLSL_CLASS_ARRAY) + return sm4_resource_type(type->e.array.type); + switch (type->base_type) { case HLSL_TYPE_SAMPLER: @@ -2624,6 +2627,9 @@ static D3D_SHADER_INPUT_TYPE sm4_resource_type(const struct hlsl_type *type)
static D3D_RESOURCE_RETURN_TYPE sm4_resource_format(const struct hlsl_type *type) { + if (type->class == HLSL_CLASS_ARRAY) + return sm4_resource_format(type->e.array.type); + switch (type->e.resource_format->base_type) { case HLSL_TYPE_DOUBLE: @@ -2648,6 +2654,9 @@ static D3D_RESOURCE_RETURN_TYPE sm4_resource_format(const struct hlsl_type *type
static D3D_SRV_DIMENSION sm4_rdef_resource_dimension(const struct hlsl_type *type) { + if (type->class == HLSL_CLASS_ARRAY) + return sm4_rdef_resource_dimension(type->e.array.type); + switch (type->sampler_dim) { case HLSL_SAMPLER_DIM_1D: @@ -3051,7 +3060,9 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r
if (var->is_uniform) { - if (data_type->class == HLSL_CLASS_OBJECT && data_type->base_type == HLSL_TYPE_TEXTURE) + enum hlsl_regset regset = hlsl_type_get_regset(data_type); + + if (regset == HLSL_REGSET_TEXTURES) { reg->type = VKD3D_SM4_RT_RESOURCE; reg->dim = VKD3D_SM4_DIMENSION_VEC4; @@ -3061,7 +3072,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r reg->idx_count = 1; *writemask = VKD3DSP_WRITEMASK_ALL; } - else if (data_type->class == HLSL_CLASS_OBJECT && data_type->base_type == HLSL_TYPE_UAV) + else if (regset == HLSL_REGSET_UAVS) { reg->type = VKD3D_SM5_RT_UAV; reg->dim = VKD3D_SM4_DIMENSION_VEC4; @@ -3071,7 +3082,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r reg->idx_count = 1; *writemask = VKD3DSP_WRITEMASK_ALL; } - else if (data_type->class == HLSL_CLASS_OBJECT && data_type->base_type == HLSL_TYPE_SAMPLER) + else if (regset == HLSL_REGSET_SAMPLERS) { reg->type = VKD3D_SM4_RT_SAMPLER; reg->dim = VKD3D_SM4_DIMENSION_NONE; diff --git a/tests/register-reservations.shader_test b/tests/register-reservations.shader_test index b60e980b..16aee594 100644 --- a/tests/register-reservations.shader_test +++ b/tests/register-reservations.shader_test @@ -80,4 +80,4 @@ float4 main() : sv_target
[test] draw quad -todo probe all rgba (4.0, 4.0, 4.0, 99.0) +probe all rgba (4.0, 4.0, 4.0, 99.0)
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl_codegen.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 517f27d6..a2bfa228 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -3449,6 +3449,25 @@ static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_a } }
+static bool type_has_object_components(struct hlsl_type *type) +{ + if (type->class == HLSL_CLASS_OBJECT) + return true; + if (type->class == HLSL_CLASS_ARRAY) + return type_has_object_components(type->e.array.type); + if (type->class == HLSL_CLASS_STRUCT) + { + unsigned int i; + + for (i = 0; i < type->e.record.field_count; ++i) + { + if (type_has_object_components(type->e.record.fields[i].type)) + return true; + } + } + return false; +} + int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out) { @@ -3492,6 +3511,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry } else { + if (type_has_object_components(var->data_type)) + hlsl_fixme(ctx, &var->loc, "Prepend uniform copies for object components within structs."); + if (var->data_type->class != HLSL_CLASS_STRUCT && !var->semantic.name) hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, "Parameter "%s" is missing a semantic.", var->name);
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl_codegen.c | 6 ++++++ 1 file changed, 6 insertions(+)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index a2bfa228..e44e92df 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -315,6 +315,9 @@ static void prepend_input_struct_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *field_load; struct hlsl_ir_constant *c;
+ if (hlsl_type_is_resource(field->type)) + continue; + if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) return; list_add_after(&lhs->node.entry, &c->node.entry); @@ -410,6 +413,9 @@ static void append_output_struct_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *field_load; struct hlsl_ir_constant *c;
+ if (hlsl_type_is_resource(field->type)) + continue; + if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) return; list_add_tail(instrs, &c->node.entry);
From: Francisco Casas fcasas@codeweavers.com
--- Makefile.am | 1 + tests/object-parameters.shader_test | 183 ++++++++++++++++++++++++++++ tests/shader_runner_d3d12.c | 4 +- 3 files changed, 186 insertions(+), 2 deletions(-) create mode 100644 tests/object-parameters.shader_test
diff --git a/Makefile.am b/Makefile.am index b9fd06ee..d7f7e85e 100644 --- a/Makefile.am +++ b/Makefile.am @@ -140,6 +140,7 @@ vkd3d_shader_tests = \ tests/nointerpolation.shader_test \ tests/object-field-offsets.shader_test \ tests/object-references.shader_test \ + tests/object-parameters.shader_test \ tests/pow.shader_test \ tests/preproc-if.shader_test \ tests/preproc-ifdef.shader_test \ diff --git a/tests/object-parameters.shader_test b/tests/object-parameters.shader_test new file mode 100644 index 00000000..2ae1762c --- /dev/null +++ b/tests/object-parameters.shader_test @@ -0,0 +1,183 @@ +[pixel shader fail todo] +Texture2D tex0; + +float4 main(out Texture2D tex : TEXTURE) : sv_target +{ + return float4(1, 2, 3, 4); +} + + +[pixel shader fail todo] +struct params +{ + Texture2D tex : TEXTURE; +}; + +float4 main(inout params x) : sv_target +{ + return float4(1, 2, 3, 4); +} + + +[require] +shader model >= 5.0 + + +[pixel shader todo] +uniform float global; + +struct apple +{ + Texture2D tex; + float4 pos : sv_position; +}; + +float4 main(struct apple input, uniform float param) : sv_target +{ + return input.tex.Load(int3(0, 0, 0)) + global + param + input.pos; +} + + +[texture 0] +size (2, 2) +1.0 1.0 1.0 1.0 0.0 0.0 0.0 1.0 +1.0 1.0 1.0 1.0 0.0 0.0 0.0 1.0 + +[texture 1] +size (2, 2) +2.0 2.0 2.0 1.0 0.0 0.0 0.0 1.0 +2.0 2.0 2.0 1.0 0.0 0.0 0.0 1.0 + +[texture 2] +size (2, 2) +3.0 3.0 3.0 1.0 0.0 0.0 0.0 1.0 +3.0 3.0 3.0 1.0 0.0 0.0 0.0 1.0 + +[texture 3] +size (2, 2) +4.0 4.0 4.0 1.0 0.0 0.0 0.0 1.0 +4.0 4.0 4.0 1.0 0.0 0.0 0.0 1.0 + +[texture 4] +size (2, 2) +5.0 5.0 5.0 1.0 0.0 0.0 0.0 1.0 +5.0 5.0 5.0 1.0 0.0 0.0 0.0 1.0 + +[texture 5] +size (2, 2) +6.0 6.0 6.0 1.0 0.0 0.0 0.0 1.0 +6.0 6.0 6.0 1.0 0.0 0.0 0.0 1.0 + +[sampler 0] +filter linear linear linear +address clamp clamp clamp + +[sampler 1] +filter linear linear linear +address clamp clamp clamp + +[sampler 2] +filter linear linear linear +address clamp clamp clamp + +[sampler 3] +filter point point point +address clamp clamp clamp + +[sampler 4] +filter linear linear linear +address clamp clamp clamp + + +[pixel shader todo] +struct apple +{ + Texture2D unused; // must reserve t1 + Texture2D tex[3]; // must reserve t2-t4 + Texture2D lone; // must reserve t5 + float4 pos : sv_position; +}; + +Texture2D tex0; // must reserve t0 +sampler sam; + +float4 main(struct apple input) : sv_target +{ + return 100 * input.tex[1].Sample(sam, float2(0, 0)) + + 10 * tex0.Sample(sam, float2(0, 0)) + + input.lone.Sample(sam, float2(0, 0)); +} + +[test] +todo draw quad +todo probe all rgba (416.0, 416.0, 416.0, 111.0) + + +[pixel shader todo] +Texture2D tex; + +struct apple +{ + sampler unused0; // must reserve s0 + sampler sam[3]; // must reserve to s1-s2 + sampler unused1; // doesn't reserve +}; + +float4 main(struct apple input, sampler samp) : sv_target +{ + // samp must reserve s3 + + return float4(tex.Sample(samp, float2(0.3, 0.3)).xy, + tex.Sample(input.sam[1], float2(0.5, 0.5)).xy); +} + +[test] +todo draw quad +todo probe all rgba (1.0, 1.0, 0.5, 0.5) + + +[sampler 0] +filter linear linear linear +address clamp clamp clamp + +[sampler 1] +filter linear linear linear +address clamp clamp clamp + +[sampler 2] +filter linear linear linear +address clamp clamp clamp + +[sampler 3] +filter linear linear linear +address clamp clamp clamp + +[sampler 4] +filter point point point +address clamp clamp clamp + + +[pixel shader todo] +Texture2D tex; +sampler sam0; // must reserve s3 + +struct apple +{ + sampler unused0; // must reserve s0 + sampler sam[3]; // must reserve s1-s2 + sampler unused1; // doesn't reserve +}; + +float4 main(struct apple input, sampler samp) : sv_target +{ + // samp must reserve s4 + + return float4(tex.Sample(sam0, float2(0.5, 0.5)).x, tex.Sample(samp, float2(0.3, 0.3)).x, + tex.Sample(input.sam[1], float2(0.5, 0.5)).xw); +} + + +[test] +todo draw quad +todo probe all rgba (0.5, 1.0, 0.5, 1.0) + diff --git a/tests/shader_runner_d3d12.c b/tests/shader_runner_d3d12.c index 7e9fed25..dfc9fbf4 100644 --- a/tests/shader_runner_d3d12.c +++ b/tests/shader_runner_d3d12.c @@ -167,8 +167,8 @@ static ID3D12RootSignature *d3d12_runner_create_root_signature(struct d3d12_shad ID3D12GraphicsCommandList *command_list, unsigned int *uniform_index) { D3D12_ROOT_SIGNATURE_DESC root_signature_desc = {0}; - D3D12_ROOT_PARAMETER root_params[5], *root_param; - D3D12_STATIC_SAMPLER_DESC static_samplers[1]; + D3D12_ROOT_PARAMETER root_params[8], *root_param; + D3D12_STATIC_SAMPLER_DESC static_samplers[5]; ID3D12RootSignature *root_signature; HRESULT hr; size_t i;
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl.c | 22 ++++++ libs/vkd3d-shader/hlsl.h | 7 ++ libs/vkd3d-shader/hlsl_codegen.c | 127 ++++++++++++++++++++++++++++--- 3 files changed, 145 insertions(+), 11 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index 9fca74b7..c2dbf4c3 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -112,8 +112,12 @@ struct hlsl_ir_var *hlsl_get_var(struct hlsl_scope *scope, const char *name)
void hlsl_free_var(struct hlsl_ir_var *decl) { + unsigned int k; + vkd3d_free((void *)decl->name); hlsl_cleanup_semantic(&decl->semantic); + for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) + vkd3d_free((void *)decl->objects_usage[k]); vkd3d_free(decl); }
@@ -942,6 +946,7 @@ struct hlsl_ir_var *hlsl_new_var(struct hlsl_ctx *ctx, const char *name, struct const struct hlsl_reg_reservation *reg_reservation) { struct hlsl_ir_var *var; + unsigned int k;
if (!(var = hlsl_alloc(ctx, sizeof(*var)))) return NULL; @@ -954,6 +959,23 @@ struct hlsl_ir_var *hlsl_new_var(struct hlsl_ctx *ctx, const char *name, struct var->storage_modifiers = modifiers; if (reg_reservation) var->reg_reservation = *reg_reservation; + + for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) + { + unsigned int i, obj_count = type->reg_size[k]; + + if (obj_count == 0) + continue; + + if (!(var->objects_usage[k] = hlsl_alloc(ctx, sizeof(*var->objects_usage[0]) * obj_count))) + { + for (i = 0; i < k; ++i) + vkd3d_free(var->objects_usage[i]); + vkd3d_free(var); + return NULL; + } + } + return var; }
diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 5f4f5735..b41fc4ea 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -396,6 +396,11 @@ struct hlsl_ir_var * and the buffer_offset instead. */ struct hlsl_reg regs[HLSL_REGSET_LAST + 1];
+ struct + { + bool used; + } *objects_usage[HLSL_REGSET_LAST_OBJECT + 1]; + uint32_t is_input_semantic : 1; uint32_t is_output_semantic : 1; uint32_t is_uniform : 1; @@ -1160,6 +1165,8 @@ unsigned int hlsl_swizzle_from_writemask(unsigned int writemask); struct hlsl_type *hlsl_deref_get_type(struct hlsl_ctx *ctx, const struct hlsl_deref *deref); bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, unsigned int *start, unsigned int *count); +bool hlsl_regset_index_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, + enum hlsl_regset regset, unsigned int *index); bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, unsigned int *offset); unsigned int hlsl_offset_from_deref_safe(struct hlsl_ctx *ctx, const struct hlsl_deref *deref); struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref); diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index e44e92df..01ffd9ae 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -2706,6 +2706,71 @@ static const char *debug_register(char class, struct hlsl_reg reg, const struct return vkd3d_dbg_sprintf("%c%u%s", class, reg.id, debug_hlsl_writemask(reg.writemask)); }
+static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_resource_load *load; + struct hlsl_ir_var *var; + enum hlsl_regset regset; + unsigned int index; + + if (instr->type != HLSL_IR_RESOURCE_LOAD) + return false; + + load = hlsl_ir_resource_load(instr); + var = load->resource.var; + regset = hlsl_type_get_regset(hlsl_deref_get_type(ctx, &load->resource)); + + if (regset == HLSL_REGSET_SAMPLERS) + { + assert(!load->sampler.var); + + if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) + return false; + + var->objects_usage[regset][index].used = true; + } + else + { + if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) + return false; + + var->objects_usage[regset][index].used = true; + + if (load->sampler.var) + { + var = load->sampler.var; + if (!hlsl_regset_index_from_deref(ctx, &load->sampler, HLSL_REGSET_SAMPLERS, &index)) + return false; + + var->objects_usage[HLSL_REGSET_SAMPLERS][index].used = true; + } + } + + return false; +} + +static void calculate_resource_register_counts(struct hlsl_ctx *ctx) +{ + struct hlsl_ir_var *var; + struct hlsl_type *type; + unsigned int i, k; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + type = var->data_type; + + for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) + { + for (i = 0; i < type->reg_size[k]; ++i) + { + /* Samplers are only allocated until the last used one. */ + if (var->objects_usage[k][i].used) + var->regs[k].bind_count = (k == HLSL_REGSET_SAMPLERS) ? i + 1 : type->reg_size[k]; + } + } + } +} + static void allocate_variable_temp_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, struct liveness *liveness) { if (var->is_input_semantic || var->is_output_semantic || var->is_uniform) @@ -3346,6 +3411,55 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl return true; }
+bool hlsl_regset_index_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, + enum hlsl_regset regset, unsigned int *index) +{ + struct hlsl_type *type = deref->var->data_type; + unsigned int i; + + assert(regset <= HLSL_REGSET_LAST_OBJECT); + + *index = 0; + + for (i = 0; i < deref->path_len; ++i) + { + struct hlsl_ir_node *path_node = deref->path[i].node; + unsigned int idx = 0; + + assert(path_node); + if (path_node->type != HLSL_IR_CONSTANT) + return false; + + /* We should always have generated a cast to UINT. */ + assert(path_node->data_type->class == HLSL_CLASS_SCALAR + && path_node->data_type->base_type == HLSL_TYPE_UINT); + + idx = hlsl_ir_constant(path_node)->value[0].u; + + switch (type->class) + { + case HLSL_CLASS_ARRAY: + if (idx >= type->e.array.elements_count) + return false; + + *index += idx * type->e.array.type->reg_size[regset]; + break; + + case HLSL_CLASS_STRUCT: + *index += type->e.record.fields[idx].reg_offset[regset]; + break; + + default: + vkd3d_unreachable(); + } + + type = hlsl_get_element_type_from_path_index(ctx, type, path_node); + } + + assert(type->reg_size[regset] == 1); + return true; +} + bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, unsigned int *offset) { struct hlsl_ir_node *offset_node = deref->offset.node; @@ -3594,6 +3708,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry }
transform_ir(ctx, validate_static_object_references, body, NULL); + transform_ir(ctx, track_object_components_usage, body, NULL);
/* TODO: move forward, remove when no longer needed */ transform_ir(ctx, transform_deref_paths_into_offsets, body, NULL); @@ -3610,17 +3725,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry
allocate_register_reservations(ctx);
- /* For now, request all the registers for each variable, as long as it is used. */ - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - unsigned int k; - - for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) - { - if (!var->regs[k].allocated) - var->regs[k].bind_count = var->last_read ? var->data_type->reg_size[k] : 0; - } - } + calculate_resource_register_counts(ctx);
allocate_temp_registers(ctx, entry_func); if (profile->major_version < 4)
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl.c | 4 ++++ libs/vkd3d-shader/hlsl.h | 4 ++++ libs/vkd3d-shader/hlsl.y | 1 + libs/vkd3d-shader/hlsl_codegen.c | 21 ++++++++++++++++++++- libs/vkd3d-shader/vkd3d_shader_private.h | 1 + tests/sampler.shader_test | 9 +++++++++ 6 files changed, 39 insertions(+), 1 deletion(-)
diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index c2dbf4c3..37914e9f 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -1346,6 +1346,9 @@ struct hlsl_ir_resource_load *hlsl_new_resource_load(struct hlsl_ctx *ctx, hlsl_src_from_node(&load->coords, params->coords); hlsl_src_from_node(&load->texel_offset, params->texel_offset); hlsl_src_from_node(&load->lod, params->lod); + load->sampling_dim = params->sampling_dim; + if (load->sampling_dim == HLSL_SAMPLER_DIM_GENERIC) + load->sampling_dim = hlsl_deref_get_type(ctx, &load->resource)->sampler_dim; return load; }
@@ -1625,6 +1628,7 @@ static struct hlsl_ir_node *clone_resource_load(struct hlsl_ctx *ctx, clone_src(map, &dst->coords, &src->coords); clone_src(map, &dst->lod, &src->lod); clone_src(map, &dst->texel_offset, &src->texel_offset); + dst->sampling_dim = src->sampling_dim; return &dst->node; }
diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index b41fc4ea..f99aa6e2 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -399,6 +399,8 @@ struct hlsl_ir_var struct { bool used; + enum hlsl_sampler_dim sampler_dim; + struct vkd3d_shader_location first_sampler_dim_loc; } *objects_usage[HLSL_REGSET_LAST_OBJECT + 1];
uint32_t is_input_semantic : 1; @@ -612,6 +614,7 @@ struct hlsl_ir_resource_load enum hlsl_resource_load_type load_type; struct hlsl_deref resource, sampler; struct hlsl_src coords, lod, texel_offset; + enum hlsl_sampler_dim sampling_dim; };
struct hlsl_ir_resource_store @@ -810,6 +813,7 @@ struct hlsl_resource_load_params enum hlsl_resource_load_type type; struct hlsl_ir_node *resource, *sampler; struct hlsl_ir_node *coords, *lod, *texel_offset; + enum hlsl_sampler_dim sampling_dim; };
static inline struct hlsl_ir_call *hlsl_ir_call(const struct hlsl_ir_node *node) diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index d516f3b5..f41fc787 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -3306,6 +3306,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer *
load_params.coords = coords; load_params.format = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4); + load_params.sampling_dim = dim;
if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) return false; diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 01ffd9ae..8e6fd6fc 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -2722,12 +2722,30 @@ static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_n
if (regset == HLSL_REGSET_SAMPLERS) { - assert(!load->sampler.var); + enum hlsl_sampler_dim dim;
+ assert(!load->sampler.var); if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) return false;
+ dim = var->objects_usage[regset][index].sampler_dim; + if (dim != load->sampling_dim) + { + if (dim == HLSL_SAMPLER_DIM_GENERIC) + { + var->objects_usage[regset][index].first_sampler_dim_loc = instr->loc; + } + else + { + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INCONSISTENT_SAMPLER, + "Inconsistent generic sampler usage dimension."); + hlsl_note(ctx, &var->objects_usage[regset][index].first_sampler_dim_loc, + VKD3D_SHADER_LOG_ERROR, "First use is here."); + return false; + } + } var->objects_usage[regset][index].used = true; + var->objects_usage[regset][index].sampler_dim = load->sampling_dim; } else { @@ -2735,6 +2753,7 @@ static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_n return false;
var->objects_usage[regset][index].used = true; + var->objects_usage[regset][index].sampler_dim = load->sampling_dim;
if (load->sampler.var) { diff --git a/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d-shader/vkd3d_shader_private.h index c06cecb6..5ffa8f18 100644 --- a/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d-shader/vkd3d_shader_private.h @@ -125,6 +125,7 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_HLSL_INVALID_THREAD_COUNT = 5023, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE = 5024, VKD3D_SHADER_ERROR_HLSL_RECURSIVE_CALL = 5025, + VKD3D_SHADER_ERROR_HLSL_INCONSISTENT_SAMPLER = 5026,
VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301, diff --git a/tests/sampler.shader_test b/tests/sampler.shader_test index 2cb70a0b..ecb540da 100644 --- a/tests/sampler.shader_test +++ b/tests/sampler.shader_test @@ -45,3 +45,12 @@ float4 main() : sv_target [test] draw quad probe all rgba (0.25, 0, 0.25, 0) + + +[pixel shader fail] +sampler s; + +float4 main() : sv_target +{ + return tex2D(s, float2(0.0, 0.0)) + tex3D(s, float3(0.0, 0.0, 0.0)); +}
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/d3dbc.c | 76 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+)
diff --git a/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d-shader/d3dbc.c index 8bb86214..1488b146 100644 --- a/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d-shader/d3dbc.c @@ -1628,6 +1628,81 @@ static void write_sm1_semantic_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ } }
+static void write_sm1_sampler_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + unsigned int reg_id, enum hlsl_sampler_dim sampler_dim) +{ + struct sm1_dst_register reg = {0}; + uint32_t token, res_type = 0; + + token = D3DSIO_DCL; + if (ctx->profile->major_version > 1) + token |= 2 << D3DSI_INSTLENGTH_SHIFT; + put_u32(buffer, token); + + switch (sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: + res_type = VKD3D_SM1_RESOURCE_TEXTURE_1D; + break; + + case HLSL_SAMPLER_DIM_2D: + res_type = VKD3D_SM1_RESOURCE_TEXTURE_2D; + break; + + case HLSL_SAMPLER_DIM_CUBE: + res_type = VKD3D_SM1_RESOURCE_TEXTURE_CUBE; + break; + + case HLSL_SAMPLER_DIM_3D: + res_type = VKD3D_SM1_RESOURCE_TEXTURE_3D; + break; + + default: + vkd3d_unreachable(); + break; + } + + token = (1u << 31); + token |= res_type << VKD3D_SM1_RESOURCE_TYPE_SHIFT; + put_u32(buffer, token); + + reg.type = D3DSPR_SAMPLER; + reg.writemask = VKD3DSP_WRITEMASK_ALL; + reg.reg = reg_id; + + write_sm1_dst_register(buffer, ®); +} + +static void write_sm1_sampler_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) +{ + enum hlsl_sampler_dim sampler_dim; + unsigned int i, count, reg_id; + struct hlsl_ir_var *var; + + if (ctx->profile->major_version < 2) + return; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (!var->regs[HLSL_REGSET_SAMPLERS].allocated) + continue; + + count = var->regs[HLSL_REGSET_SAMPLERS].bind_count; + + for (i = 0; i < count; ++i) + { + if (var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) + { + sampler_dim = var->objects_usage[HLSL_REGSET_SAMPLERS][i].sampler_dim; + assert(sampler_dim != HLSL_SAMPLER_DIM_GENERIC); + + reg_id = var->regs[HLSL_REGSET_SAMPLERS].id + i; + write_sm1_sampler_dcl(ctx, buffer, reg_id, sampler_dim); + } + } + } +} + static void write_sm1_constant(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) { @@ -1952,6 +2027,7 @@ int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_fun
write_sm1_constant_defs(ctx, &buffer); write_sm1_semantic_dcls(ctx, &buffer); + write_sm1_sampler_dcls(ctx, &buffer); write_sm1_instructions(ctx, &buffer, entry_func);
put_u32(&buffer, D3DSIO_END);
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/d3dbc.c | 40 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+)
diff --git a/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d-shader/d3dbc.c index 1488b146..d85fbf1f 100644 --- a/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d-shader/d3dbc.c @@ -1888,6 +1888,42 @@ static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b write_sm1_instruction(ctx, buffer, &sm1_instr); }
+static void write_sm1_resource_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_ir_node *instr) +{ + const struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); + struct hlsl_ir_node *coords = load->coords.node; + unsigned int sampler_offset, reg_id; + struct sm1_instruction sm1_instr; + + sampler_offset = hlsl_offset_from_deref_safe(ctx, &load->resource); + reg_id = load->resource.var->regs[HLSL_REGSET_SAMPLERS].id + sampler_offset; + + sm1_instr = (struct sm1_instruction) + { + .opcode = D3DSIO_TEX, + + .dst.type = D3DSPR_TEMP, + .dst.reg = instr->reg.id, + .dst.writemask = instr->reg.writemask, + .has_dst = 1, + + .srcs[0].type = D3DSPR_TEMP, + .srcs[0].reg = coords->reg.id, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL), + + .srcs[1].type = D3DSPR_SAMPLER, + .srcs[1].reg = reg_id, + .srcs[1].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL), + + .src_count = 2, + }; + + assert(instr->reg.allocated); + + write_sm1_instruction(ctx, buffer, &sm1_instr); +} + static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) { @@ -2002,6 +2038,10 @@ static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b write_sm1_load(ctx, buffer, instr); break;
+ case HLSL_IR_RESOURCE_LOAD: + write_sm1_resource_load(ctx, buffer, instr); + break; + case HLSL_IR_STORE: write_sm1_store(ctx, buffer, instr); break;
From: Francisco Casas fcasas@codeweavers.com
The new fixmes can be triggered in presence of object components within structs (for SM5).
In shaders such as this one:
struct apple { Texture2D tex : TEX; float4 color : COLOR; };
float4 main(struct apple input) : sv_target { return input.tex.Load(int3(1, 2, 3)); }
Or this one:
struct { Texture2D tex; float4 color; } s;
float4 main() : sv_target { return s.tex.Load(int3(1, 2, 3)); } --- libs/vkd3d-shader/tpf.c | 127 +++++++++++++++--------- tests/object-references.shader_test | 6 +- tests/register-reservations.shader_test | 6 +- tests/uav.shader_test | 4 +- 4 files changed, 87 insertions(+), 56 deletions(-)
diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index 71edced3..a547db94 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -2809,13 +2809,15 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) } else { + unsigned int dimx = hlsl_type_get_component_type(ctx, var->data_type, 0)->e.resource_format->dimx; + put_u32(&buffer, sm4_resource_format(var->data_type)); put_u32(&buffer, sm4_rdef_resource_dimension(var->data_type)); put_u32(&buffer, ~0u); /* FIXME: multisample count */ - flags |= (var->data_type->e.resource_format->dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT; + flags |= (dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT; } put_u32(&buffer, var->regs[regset].id); - put_u32(&buffer, 1); /* bind count */ + put_u32(&buffer, var->regs[regset].bind_count); put_u32(&buffer, flags); }
@@ -3069,6 +3071,8 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r if (swizzle_type) *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; reg->idx[0] = var->regs[HLSL_REGSET_TEXTURES].id; + reg->idx[0] += hlsl_offset_from_deref_safe(ctx, deref); + assert(deref->offset_regset == HLSL_REGSET_TEXTURES); reg->idx_count = 1; *writemask = VKD3DSP_WRITEMASK_ALL; } @@ -3079,6 +3083,8 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r if (swizzle_type) *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; reg->idx[0] = var->regs[HLSL_REGSET_UAVS].id; + reg->idx[0] += hlsl_offset_from_deref_safe(ctx, deref); + assert(deref->offset_regset == HLSL_REGSET_UAVS); reg->idx_count = 1; *writemask = VKD3DSP_WRITEMASK_ALL; } @@ -3089,6 +3095,8 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r if (swizzle_type) *swizzle_type = VKD3D_SM4_SWIZZLE_NONE; reg->idx[0] = var->regs[HLSL_REGSET_SAMPLERS].id; + reg->idx[0] += hlsl_offset_from_deref_safe(ctx, deref); + assert(deref->offset_regset == HLSL_REGSET_SAMPLERS); reg->idx_count = 1; *writemask = VKD3DSP_WRITEMASK_ALL; } @@ -3351,44 +3359,67 @@ static void write_sm4_dcl_constant_buffer(struct vkd3d_bytecode_buffer *buffer, write_sm4_instruction(buffer, &instr); }
-static void write_sm4_dcl_sampler(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) +static void write_sm4_dcl_samplers(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) { - const struct sm4_instruction instr = + unsigned int i, count = var->data_type->reg_size[HLSL_REGSET_SAMPLERS]; + struct sm4_instruction instr; + + for (i = 0; i < count; ++i) { - .opcode = VKD3D_SM4_OP_DCL_SAMPLER, + if (!var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) + continue;
- .dsts[0].reg.type = VKD3D_SM4_RT_SAMPLER, - .dsts[0].reg.idx = {var->regs[HLSL_REGSET_SAMPLERS].id}, - .dsts[0].reg.idx_count = 1, - .dst_count = 1, - }; - write_sm4_instruction(buffer, &instr); + instr = (struct sm4_instruction) + { + .opcode = VKD3D_SM4_OP_DCL_SAMPLER, + + .dsts[0].reg.type = VKD3D_SM4_RT_SAMPLER, + .dsts[0].reg.idx = {var->regs[HLSL_REGSET_SAMPLERS].id + i}, + .dsts[0].reg.idx_count = 1, + .dst_count = 1, + }; + + write_sm4_instruction(buffer, &instr); + } }
-static void write_sm4_dcl_texture(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) +static void write_sm4_dcl_textures(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_ir_var *var, bool uav) { - bool uav = (var->data_type->base_type == HLSL_TYPE_UAV); - struct sm4_instruction instr = + enum hlsl_regset regset = uav ? HLSL_REGSET_UAVS : HLSL_REGSET_TEXTURES; + unsigned int i, count = var->data_type->reg_size[regset]; + struct hlsl_type *component_type; + struct sm4_instruction instr; + + component_type = hlsl_type_get_component_type(ctx, var->data_type, 0); + + for (i = 0; i < count; ++i) { - .opcode = (uav ? VKD3D_SM5_OP_DCL_UAV_TYPED : VKD3D_SM4_OP_DCL_RESOURCE) - | (sm4_resource_dimension(var->data_type) << VKD3D_SM4_RESOURCE_TYPE_SHIFT), + if (!var->objects_usage[regset][i].used) + continue;
- .dsts[0].reg.type = uav ? VKD3D_SM5_RT_UAV : VKD3D_SM4_RT_RESOURCE, - .dsts[0].reg.idx = {uav ? var->regs[HLSL_REGSET_UAVS].id : var->regs[HLSL_REGSET_TEXTURES].id}, - .dsts[0].reg.idx_count = 1, - .dst_count = 1, + instr = (struct sm4_instruction) + { + .opcode = (uav ? VKD3D_SM5_OP_DCL_UAV_TYPED : VKD3D_SM4_OP_DCL_RESOURCE) + | (sm4_resource_dimension(component_type) << VKD3D_SM4_RESOURCE_TYPE_SHIFT),
- .idx[0] = sm4_resource_format(var->data_type) * 0x1111, - .idx_count = 1, - }; + .dsts[0].reg.type = uav ? VKD3D_SM5_RT_UAV : VKD3D_SM4_RT_RESOURCE, + .dsts[0].reg.idx = {var->regs[regset].id + i}, + .dsts[0].reg.idx_count = 1, + .dst_count = 1,
- if (var->data_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS - || var->data_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY) - { - instr.opcode |= var->data_type->sample_count << VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; - } + .idx[0] = sm4_resource_format(component_type) * 0x1111, + .idx_count = 1, + };
- write_sm4_instruction(buffer, &instr); + if (component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS + || component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY) + { + instr.opcode |= component_type->sample_count << VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; + } + + write_sm4_instruction(buffer, &instr); + } }
static void write_sm4_dcl_semantic(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) @@ -3668,9 +3699,9 @@ static void write_sm4_constant(struct hlsl_ctx *ctx, static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, const struct hlsl_deref *resource, const struct hlsl_ir_node *coords, - const struct hlsl_ir_node *texel_offset) + const struct hlsl_ir_node *texel_offset, enum hlsl_sampler_dim dim) { - bool uav = (resource_type->base_type == HLSL_TYPE_UAV); + bool uav = (hlsl_type_get_regset(resource_type) == HLSL_REGSET_UAVS); struct sm4_instruction instr; unsigned int dim_count;
@@ -3696,7 +3727,7 @@ static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buf { /* Mipmap level is in the last component in the IR, but needs to be in the W * component in the instruction. */ - dim_count = hlsl_sampler_dim_count(resource_type->sampler_dim); + dim_count = hlsl_sampler_dim_count(dim); if (dim_count == 1) instr.srcs[0].swizzle = hlsl_combine_swizzles(instr.srcs[0].swizzle, HLSL_SWIZZLE(X, X, X, Y), 4); if (dim_count == 2) @@ -4485,10 +4516,9 @@ static void write_sm4_resource_load(struct hlsl_ctx *ctx, const struct hlsl_ir_node *texel_offset = load->texel_offset.node; const struct hlsl_ir_node *coords = load->coords.node;
- if (resource_type->class != HLSL_CLASS_OBJECT) + if (!hlsl_type_is_resource(resource_type)) { - assert(resource_type->class == HLSL_CLASS_ARRAY || resource_type->class == HLSL_CLASS_STRUCT); - hlsl_fixme(ctx, &load->node.loc, "Resource being a component of another variable."); + hlsl_fixme(ctx, &load->node.loc, "Separate object fields as new variables."); return; }
@@ -4496,14 +4526,11 @@ static void write_sm4_resource_load(struct hlsl_ctx *ctx, { const struct hlsl_type *sampler_type = load->sampler.var->data_type;
- if (sampler_type->class != HLSL_CLASS_OBJECT) + if (!hlsl_type_is_resource(sampler_type)) { - assert(sampler_type->class == HLSL_CLASS_ARRAY || sampler_type->class == HLSL_CLASS_STRUCT); - hlsl_fixme(ctx, &load->node.loc, "Sampler being a component of another variable."); + hlsl_fixme(ctx, &load->node.loc, "Separate object fields as new variables."); return; } - assert(sampler_type->base_type == HLSL_TYPE_SAMPLER); - assert(sampler_type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC);
if (!load->sampler.var->is_uniform) { @@ -4522,7 +4549,7 @@ static void write_sm4_resource_load(struct hlsl_ctx *ctx, { case HLSL_RESOURCE_LOAD: write_sm4_ld(ctx, buffer, resource_type, &load->node, &load->resource, - coords, texel_offset); + coords, texel_offset, load->sampling_dim); break;
case HLSL_RESOURCE_SAMPLE: @@ -4566,10 +4593,9 @@ static void write_sm4_resource_store(struct hlsl_ctx *ctx, { const struct hlsl_type *resource_type = store->resource.var->data_type;
- if (resource_type->class != HLSL_CLASS_OBJECT) + if (!hlsl_type_is_resource(resource_type)) { - assert(resource_type->class == HLSL_CLASS_ARRAY || resource_type->class == HLSL_CLASS_STRUCT); - hlsl_fixme(ctx, &store->node.loc, "Resource being a component of another variable."); + hlsl_fixme(ctx, &store->node.loc, "Separate object fields as new variables."); return; }
@@ -4733,12 +4759,17 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx,
for (i = 0; i < extern_resources_count; ++i) { + enum hlsl_regset regset; + var = extern_resources[i]; + regset = hlsl_type_get_regset(var->data_type);
- if (var->data_type->base_type == HLSL_TYPE_SAMPLER) - write_sm4_dcl_sampler(&buffer, var); - else if (var->data_type->base_type == HLSL_TYPE_TEXTURE || var->data_type->base_type == HLSL_TYPE_UAV) - write_sm4_dcl_texture(&buffer, var); + if (regset == HLSL_REGSET_SAMPLERS) + write_sm4_dcl_samplers(&buffer, var); + else if (regset == HLSL_REGSET_TEXTURES) + write_sm4_dcl_textures(ctx, &buffer, var, false); + else if (regset == HLSL_REGSET_UAVS) + write_sm4_dcl_textures(ctx, &buffer, var, true); }
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) diff --git a/tests/object-references.shader_test b/tests/object-references.shader_test index bc74ccd4..5ed4dcd6 100644 --- a/tests/object-references.shader_test +++ b/tests/object-references.shader_test @@ -92,7 +92,7 @@ size (1, 1) size (1, 1) 3.0 3.0 3.0 1.0
-[pixel shader todo] +[pixel shader] Texture2D tex[3];
struct foo { @@ -111,8 +111,8 @@ float4 main() : sv_target }
[test] -todo draw quad -todo probe all rgba (312, 312, 312, 111) +draw quad +probe all rgba (312, 312, 312, 111)
[pixel shader] diff --git a/tests/register-reservations.shader_test b/tests/register-reservations.shader_test index 16aee594..72f68c1e 100644 --- a/tests/register-reservations.shader_test +++ b/tests/register-reservations.shader_test @@ -24,7 +24,7 @@ size (1, 1)
% If a single component in a texture array is used, all registers are reserved. -[pixel shader todo] +[pixel shader] Texture2D partially_used[2][2]; Texture2D tex;
@@ -34,8 +34,8 @@ float4 main() : sv_target }
[test] -todo draw quad -todo probe all rgba (41.0, 41.0, 41.0, 1089.0) +draw quad +probe all rgba (41.0, 41.0, 41.0, 1089.0)
% If no component in a texture array is used, and it doesn't have a register reservation, no diff --git a/tests/uav.shader_test b/tests/uav.shader_test index f0eade4d..9740575c 100644 --- a/tests/uav.shader_test +++ b/tests/uav.shader_test @@ -147,7 +147,7 @@ size (1, 1)
0.5 0.6 0.7 0.8
-[pixel shader todo] +[pixel shader] RWTexture2D<float4> u[2] : register(u2);
float4 main() : sv_target1 @@ -158,6 +158,6 @@ float4 main() : sv_target1 }
[test] -todo draw quad +draw quad probe uav 2 (0, 0) rgba (1.1, 1.2, 1.3, 1.4) probe uav 3 (0, 0) rgba (2.1, 2.2, 2.3, 2.4)