Mainly comprises support for allocating arrays of resources, and loading from them, for both SM1 and SM4.
-- v3: vkd3d-shader/hlsl: Support resource arrays when writting SM4. vkd3d-shader/hlsl: Write resource loads in SM1. vkd3d-shader/hlsl: Write sampler declarations in SM1. vkd3d-shader/hlsl: Track objects sampling dimension. vkd3d-shader/hlsl: Track object components usage and allocate registers accordingly. tests: Test objects as parameters. vkd3d-shader/hlsl: Skip object components when creating input/output copies. vkd3d-shader/hlsl: Add fixme for uniform copies for objects within structs. vkd3d-shader/hlsl: Support multiple-register variables in object regsets.
From: Francisco Casas fcasas@codeweavers.com
--- tests/register-reservations.shader_test | 32 +++++++++++++++++++++++++ 1 file changed, 32 insertions(+)
diff --git a/tests/register-reservations.shader_test b/tests/register-reservations.shader_test index f0287d00..b60e980b 100644 --- a/tests/register-reservations.shader_test +++ b/tests/register-reservations.shader_test @@ -23,6 +23,38 @@ size (1, 1) 4.0 4.0 4.0 99.0
+% If a single component in a texture array is used, all registers are reserved. +[pixel shader todo] +Texture2D partially_used[2][2]; +Texture2D tex; + +float4 main() : sv_target +{ + return 10 * tex.Load(int3(0, 0, 0)) + partially_used[0][1].Load(int3(0, 0, 0)); +} + +[test] +todo draw quad +todo probe all rgba (41.0, 41.0, 41.0, 1089.0) + + +% If no component in a texture array is used, and it doesn't have a register reservation, no +% register is reserved. +[pixel shader] +Texture2D unused[4]; +Texture2D tex; + +float4 main() : sv_target +{ + return tex.Load(int3(0, 0, 0)); +} + +[test] +draw quad +probe all rgba (0.0, 0.0, 0.0, 99.0) + + +% Register reservations force to reserve all the resource registers. Even if unused. [pixel shader] Texture2D unused : register(t0); Texture2D tex;
From: Francisco Casas fcasas@codeweavers.com
Variables that contain more than one object (arrays or structs) require the allocation of contiguous registers in the respective object register spaces. --- libs/vkd3d-shader/hlsl.c | 8 ++- libs/vkd3d-shader/hlsl.h | 11 +++- libs/vkd3d-shader/hlsl_codegen.c | 85 ++++++++++++++++++------- libs/vkd3d-shader/hlsl_sm1.c | 14 ++-- libs/vkd3d-shader/hlsl_sm4.c | 17 ++++- tests/register-reservations.shader_test | 2 +- 6 files changed, 95 insertions(+), 42 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index 869638a6..35b83f8b 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -166,6 +166,9 @@ static unsigned int get_array_size(const struct hlsl_type *type)
bool hlsl_type_is_resource(const struct hlsl_type *type) { + if (type->class == HLSL_CLASS_ARRAY) + return hlsl_type_is_resource(type->e.array.type); + if (type->class == HLSL_CLASS_OBJECT) { switch (type->base_type) @@ -186,6 +189,9 @@ enum hlsl_regset hlsl_type_get_regset(const struct hlsl_type *type) if (type->class <= HLSL_CLASS_LAST_NUMERIC) return HLSL_REGSET_NUMERIC;
+ if (type->class == HLSL_CLASS_ARRAY) + return hlsl_type_get_regset(type->e.array.type); + if (type->class == HLSL_CLASS_OBJECT) { switch (type->base_type) @@ -203,8 +209,6 @@ enum hlsl_regset hlsl_type_get_regset(const struct hlsl_type *type) vkd3d_unreachable(); } } - else if (type->class == HLSL_CLASS_ARRAY) - return hlsl_type_get_regset(type->e.array.type);
vkd3d_unreachable(); } diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index b38fba1c..03bfefb3 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -228,16 +228,21 @@ struct hlsl_struct_field size_t name_bytecode_offset; };
-/* Information of the register allocated for an instruction node or variable. +/* Information of the register(s) allocated for an instruction node or variable. * These values are initialized at the end of hlsl_emit_bytecode(), after the compilation passes, * just before writing the bytecode. - * For numeric registers, a writemask can be provided to indicate the reservation of only some of the - * 4 components. * The type of register (register class) is implied from its use, so it is not stored in this * struct. */ struct hlsl_reg { + /* Index of the first register allocated. */ uint32_t id; + /* Number of registers to be allocated. + * Unlike the variable's type's regsize, it is not expressed in register components, but rather + * in whole registers, and may depend on which components are used within the shader. */ + uint32_t count; + /* For numeric registers, a writemask can be provided to indicate the reservation of only some + * of the 4 components. */ unsigned int writemask; /* Whether the register has been allocated. */ bool allocated; diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index c8130a3f..7556393f 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -2399,8 +2399,10 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) { var->regs[regset].allocated = true; var->regs[regset].id = var->reg_reservation.reg_index; - TRACE("Allocated reserved %s to %c%u.\n", var->name, var->reg_reservation.reg_type, - var->reg_reservation.reg_index); + var->regs[regset].count = var->data_type->reg_size[regset]; + TRACE("Allocated reserved %s to %c%u-%c%u.\n", var->name, var->reg_reservation.reg_type, + var->reg_reservation.reg_index, var->reg_reservation.reg_type, + var->reg_reservation.reg_index + var->regs[regset].count); } } } @@ -2626,6 +2628,7 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct liveness * liveness->regs[component_idx + i].last_read = last_read; } ret.id = component_idx / 4; + ret.count = 1; ret.writemask = hlsl_combine_writemasks(writemask, (1u << component_count) - 1); ret.allocated = true; liveness->reg_count = max(liveness->reg_count, ret.id + 1); @@ -2663,6 +2666,7 @@ static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct liveness *liv for (i = 0; i < reg_size; ++i) liveness->regs[component_idx + i].last_read = last_read; ret.id = component_idx / 4; + ret.count = align(reg_size, 4) / 4; ret.allocated = true; liveness->reg_count = max(liveness->reg_count, ret.id + align(reg_size, 4)); return ret; @@ -2957,6 +2961,7 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var { var->regs[HLSL_REGSET_NUMERIC].allocated = true; var->regs[HLSL_REGSET_NUMERIC].id = (*counter)++; + var->regs[HLSL_REGSET_NUMERIC].count = 1; var->regs[HLSL_REGSET_NUMERIC].writemask = (1 << var->data_type->dimx) - 1; TRACE("Allocated %s to %s.\n", var->name, debug_register(output ? 'o' : 'v', var->regs[HLSL_REGSET_NUMERIC], var->data_type)); @@ -3135,6 +3140,7 @@ static void allocate_buffers(struct hlsl_ctx *ctx) }
buffer->reg.id = buffer->reservation.reg_index; + buffer->reg.count = 1; buffer->reg.allocated = true; TRACE("Allocated reserved %s to cb%u.\n", buffer->name, index); } @@ -3144,6 +3150,7 @@ static void allocate_buffers(struct hlsl_ctx *ctx) ++index;
buffer->reg.id = index; + buffer->reg.count = 1; buffer->reg.allocated = true; TRACE("Allocated %s to cb%u.\n", buffer->name, index); ++index; @@ -3165,13 +3172,17 @@ static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum uint32_t index) { const struct hlsl_ir_var *var; + unsigned int start, count;
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, const struct hlsl_ir_var, extern_entry) { if (!var->regs[regset].allocated) continue;
- if (index == var->regs[regset].id) + start = var->regs[regset].id; + count = var->regs[regset].count; + + if (start <= index && index < start + count) return var; } return NULL; @@ -3182,7 +3193,6 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) char regset_name = get_regset_name(regset); struct hlsl_ir_var *var; uint32_t min_index = 0; - uint32_t index;
if (regset == HLSL_REGSET_UAVS) { @@ -3194,19 +3204,17 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) } }
- index = min_index; - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { - if (!var->last_read || !var->data_type->reg_size[regset]) + unsigned int count = var->regs[regset].count; + + if (count == 0) continue;
if (var->regs[regset].allocated) { - const struct hlsl_ir_var *reserved_object; - unsigned int index = var->regs[regset].id; - - reserved_object = get_allocated_object(ctx, regset, index); + const struct hlsl_ir_var *reserved_object, *last_reported = NULL; + unsigned int index, i;
if (var->regs[regset].id < min_index) { @@ -3214,28 +3222,44 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, "UAV index (%u) must be higher than the maximum render target index (%u).", var->regs[regset].id, min_index - 1); + continue; } - else if (reserved_object && reserved_object != var) + + for (i = 0; i < count; ++i) { - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, - "Multiple objects bound to %c%u.", regset_name, index); - hlsl_note(ctx, &reserved_object->loc, VKD3D_SHADER_LOG_ERROR, - "Object '%s' is already bound to %c%u.", reserved_object->name, - regset_name, index); - } + index = var->regs[regset].id + i;
- var->regs[regset].id = var->reg_reservation.reg_index; - var->regs[regset].allocated = true; - TRACE("Allocated reserved %s to %c%u.\n", var->name, regset_name, var->regs[regset].id); + reserved_object = get_allocated_object(ctx, regset, index); + if (reserved_object && reserved_object != var && reserved_object != last_reported) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, + "Multiple variables bound to %c%u.", regset_name, index); + hlsl_note(ctx, &reserved_object->loc, VKD3D_SHADER_LOG_ERROR, + "Variable '%s' is already bound to %c%u.", reserved_object->name, + regset_name, index); + last_reported = reserved_object; + } + } } else { - while (get_allocated_object(ctx, regset, index)) + unsigned int index = min_index; + unsigned int available = 0; + + while (available < count) + { + if (get_allocated_object(ctx, regset, index)) + available = 0; + else + ++available; ++index; + } + index -= count;
var->regs[regset].id = index; var->regs[regset].allocated = true; - TRACE("Allocated object to %c%u.\n", regset_name, index); + TRACE("Allocated variable %s to %c%u-%c%u.\n", var->name, regset_name, index, regset_name, + index + count); ++index; } } @@ -3462,7 +3486,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry { var = entry_func->parameters.vars[i];
- if (var->data_type->class == HLSL_CLASS_OBJECT || (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) + if (hlsl_type_is_resource(var->data_type) || (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) { prepend_uniform_copy(ctx, &body->instrs, var); } @@ -3557,6 +3581,19 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry rb_for_each_entry(&ctx->functions, dump_function, ctx);
allocate_register_reservations(ctx); + + /* For now, request all the registers for each variable, as long as it is used. */ + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + unsigned int k; + + for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) + { + if (!var->regs[k].allocated) + var->regs[k].count = var->last_read ? var->data_type->reg_size[k] : 0; + } + } + allocate_temp_registers(ctx, entry_func); if (profile->major_version < 4) { diff --git a/libs/vkd3d-shader/hlsl_sm1.c b/libs/vkd3d-shader/hlsl_sm1.c index be665981..69b4e5de 100644 --- a/libs/vkd3d-shader/hlsl_sm1.c +++ b/libs/vkd3d-shader/hlsl_sm1.c @@ -360,19 +360,15 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe if (!var->semantic.name && var->regs[regset].allocated) { put_u32(buffer, 0); /* name */ - if (var->data_type->class == HLSL_CLASS_OBJECT - && (var->data_type->base_type == HLSL_TYPE_SAMPLER - || var->data_type->base_type == HLSL_TYPE_TEXTURE)) + if (regset == HLSL_REGSET_NUMERIC) { - assert(regset == HLSL_REGSET_SAMPLERS); - put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[regset].id)); - put_u32(buffer, 1); + put_u32(buffer, vkd3d_make_u32(D3DXRS_FLOAT4, var->regs[regset].id)); + put_u32(buffer, var->data_type->reg_size[regset] / 4); } else { - assert(regset == HLSL_REGSET_NUMERIC); - put_u32(buffer, vkd3d_make_u32(D3DXRS_FLOAT4, var->regs[regset].id)); - put_u32(buffer, var->data_type->reg_size[regset] / 4); + put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[regset].id)); + put_u32(buffer, 1); } put_u32(buffer, 0); /* type */ put_u32(buffer, 0); /* FIXME: default value */ diff --git a/libs/vkd3d-shader/hlsl_sm4.c b/libs/vkd3d-shader/hlsl_sm4.c index 8be848c5..973fe0d0 100644 --- a/libs/vkd3d-shader/hlsl_sm4.c +++ b/libs/vkd3d-shader/hlsl_sm4.c @@ -427,6 +427,9 @@ static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b
static D3D_SHADER_INPUT_TYPE sm4_resource_type(const struct hlsl_type *type) { + if (type->class == HLSL_CLASS_ARRAY) + return sm4_resource_type(type->e.array.type); + switch (type->base_type) { case HLSL_TYPE_SAMPLER: @@ -442,6 +445,9 @@ static D3D_SHADER_INPUT_TYPE sm4_resource_type(const struct hlsl_type *type)
static D3D_RESOURCE_RETURN_TYPE sm4_resource_format(const struct hlsl_type *type) { + if (type->class == HLSL_CLASS_ARRAY) + return sm4_resource_format(type->e.array.type); + switch (type->e.resource_format->base_type) { case HLSL_TYPE_DOUBLE: @@ -466,6 +472,9 @@ static D3D_RESOURCE_RETURN_TYPE sm4_resource_format(const struct hlsl_type *type
static D3D_SRV_DIMENSION sm4_rdef_resource_dimension(const struct hlsl_type *type) { + if (type->class == HLSL_CLASS_ARRAY) + return sm4_rdef_resource_dimension(type->e.array.type); + switch (type->sampler_dim) { case HLSL_SAMPLER_DIM_1D: @@ -869,7 +878,9 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r
if (var->is_uniform) { - if (data_type->class == HLSL_CLASS_OBJECT && data_type->base_type == HLSL_TYPE_TEXTURE) + enum hlsl_regset regset = hlsl_type_get_regset(data_type); + + if (regset == HLSL_REGSET_TEXTURES) { reg->type = VKD3D_SM4_RT_RESOURCE; reg->dim = VKD3D_SM4_DIMENSION_VEC4; @@ -879,7 +890,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r reg->idx_count = 1; *writemask = VKD3DSP_WRITEMASK_ALL; } - else if (data_type->class == HLSL_CLASS_OBJECT && data_type->base_type == HLSL_TYPE_UAV) + else if (regset == HLSL_REGSET_UAVS) { reg->type = VKD3D_SM5_RT_UAV; reg->dim = VKD3D_SM4_DIMENSION_VEC4; @@ -889,7 +900,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r reg->idx_count = 1; *writemask = VKD3DSP_WRITEMASK_ALL; } - else if (data_type->class == HLSL_CLASS_OBJECT && data_type->base_type == HLSL_TYPE_SAMPLER) + else if (regset == HLSL_REGSET_SAMPLERS) { reg->type = VKD3D_SM4_RT_SAMPLER; reg->dim = VKD3D_SM4_DIMENSION_NONE; diff --git a/tests/register-reservations.shader_test b/tests/register-reservations.shader_test index b60e980b..16aee594 100644 --- a/tests/register-reservations.shader_test +++ b/tests/register-reservations.shader_test @@ -80,4 +80,4 @@ float4 main() : sv_target
[test] draw quad -todo probe all rgba (4.0, 4.0, 4.0, 99.0) +probe all rgba (4.0, 4.0, 4.0, 99.0)
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl_codegen.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 7556393f..fcce0322 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -3449,6 +3449,25 @@ static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_a } }
+static bool type_has_object_components(struct hlsl_type *type) +{ + if (type->class == HLSL_CLASS_OBJECT) + return true; + if (type->class == HLSL_CLASS_ARRAY) + return type_has_object_components(type->e.array.type); + if (type->class == HLSL_CLASS_STRUCT) + { + unsigned int i; + + for (i = 0; i < type->e.record.field_count; ++i) + { + if (type_has_object_components(type->e.record.fields[i].type)) + return true; + } + } + return false; +} + int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out) { @@ -3492,6 +3511,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry } else { + if (type_has_object_components(var->data_type)) + hlsl_fixme(ctx, &var->loc, "Prepend uniform copies for object components within structs."); + if (var->data_type->class != HLSL_CLASS_STRUCT && !var->semantic.name) hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, "Parameter "%s" is missing a semantic.", var->name);
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl_codegen.c | 6 ++++++ 1 file changed, 6 insertions(+)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index fcce0322..5f84c2af 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -315,6 +315,9 @@ static void prepend_input_struct_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *field_load; struct hlsl_ir_constant *c;
+ if (hlsl_type_is_resource(field->type)) + continue; + if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) return; list_add_after(&lhs->node.entry, &c->node.entry); @@ -410,6 +413,9 @@ static void append_output_struct_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *field_load; struct hlsl_ir_constant *c;
+ if (hlsl_type_is_resource(field->type)) + continue; + if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) return; list_add_tail(instrs, &c->node.entry);
From: Francisco Casas fcasas@codeweavers.com
--- Makefile.am | 1 + tests/object-parameters.shader_test | 183 ++++++++++++++++++++++++++++ tests/shader_runner_d3d12.c | 4 +- 3 files changed, 186 insertions(+), 2 deletions(-) create mode 100644 tests/object-parameters.shader_test
diff --git a/Makefile.am b/Makefile.am index bff65a85..d67895b2 100644 --- a/Makefile.am +++ b/Makefile.am @@ -135,6 +135,7 @@ vkd3d_shader_tests = \ tests/nointerpolation.shader_test \ tests/object-field-offsets.shader_test \ tests/object-references.shader_test \ + tests/object-parameters.shader_test \ tests/pow.shader_test \ tests/preproc-if.shader_test \ tests/preproc-ifdef.shader_test \ diff --git a/tests/object-parameters.shader_test b/tests/object-parameters.shader_test new file mode 100644 index 00000000..60dde5d3 --- /dev/null +++ b/tests/object-parameters.shader_test @@ -0,0 +1,183 @@ +[pixel shader fail todo] +Texture2D tex0; + +float4 main(out Texture2D tex : TEXTURE) : sv_target +{ + return float4(1, 2, 3, 4); +} + + +[pixel shader fail todo] +struct params +{ + Texture2D tex : TEXTURE; +}; + +float4 main(inout params x) : sv_target +{ + return float4(1, 2, 3, 4); +} + + +[require] +shader model >= 5.0 + + +[pixel shader todo] +uniform float global; + +struct apple +{ + Texture2D tex; + float4 pos : sv_position; +}; + +float4 main(struct apple input, uniform float param) : sv_target +{ + return input.tex.Load(int3(0, 0, 0)) + global + param + input.pos; +} + + +[texture 0] +size (2, 2) +1.0 1.0 1.0 1.0 0.0 0.0 0.0 1.0 +1.0 1.0 1.0 1.0 0.0 0.0 0.0 1.0 + +[texture 1] +size (2, 2) +2.0 2.0 2.0 1.0 0.0 0.0 0.0 1.0 +2.0 2.0 2.0 1.0 0.0 0.0 0.0 1.0 + +[texture 2] +size (2, 2) +3.0 3.0 3.0 1.0 0.0 0.0 0.0 1.0 +3.0 3.0 3.0 1.0 0.0 0.0 0.0 1.0 + +[texture 3] +size (2, 2) +4.0 4.0 4.0 1.0 0.0 0.0 0.0 1.0 +4.0 4.0 4.0 1.0 0.0 0.0 0.0 1.0 + +[texture 4] +size (2, 2) +5.0 5.0 5.0 1.0 0.0 0.0 0.0 1.0 +5.0 5.0 5.0 1.0 0.0 0.0 0.0 1.0 + +[texture 5] +size (2, 2) +6.0 6.0 6.0 1.0 0.0 0.0 0.0 1.0 +6.0 6.0 6.0 1.0 0.0 0.0 0.0 1.0 + +[sampler 0] +filter linear linear linear +address clamp clamp clamp + +[sampler 1] +filter linear linear linear +address clamp clamp clamp + +[sampler 2] +filter linear linear linear +address clamp clamp clamp + +[sampler 3] +filter point point point +address clamp clamp clamp + +[sampler 4] +filter linear linear linear +address clamp clamp clamp + + +[pixel shader todo] +struct apple +{ + Texture2D unused; // must reserve t1 + Texture2D tex[3]; // must reserve t2-t4 + Texture2D lone; // must reserve t5 + float4 pos : sv_position; +}; + +Texture2D tex0; // must reserve t0 +sampler sam; + +float4 main(struct apple input) : sv_target +{ + return 100 * input.tex[1].Sample(sam, float2(0, 0)) + + 10 * tex0.Sample(sam, float2(0, 0)) + + input.lone.Sample(sam, float2(0, 0)); +} + +[test] +todo draw quad +todo probe all rgba (416.0, 416.0, 416.0, 111.0) + + +[pixel shader todo] +Texture2D tex; + +struct apple +{ + sampler unused0; // must reserve s0 + sampler sam[3]; // must reserve to s1-s2 + sampler unused1; // doesn't reserve +}; + +float4 main(struct apple input, sampler samp) : sv_target +{ + // samp must reserve s3 + + return float4(tex.Sample(samp, float2(0.22, 0.22)).xy, + tex.Sample(input.sam[1], float2(0.5, 0.5)).xy); +} + +[test] +todo draw quad +todo probe all rgba (1.0, 1.0, 0.5, 0.5) + + +[sampler 0] +filter linear linear linear +address clamp clamp clamp + +[sampler 1] +filter linear linear linear +address clamp clamp clamp + +[sampler 2] +filter linear linear linear +address clamp clamp clamp + +[sampler 3] +filter linear linear linear +address clamp clamp clamp + +[sampler 4] +filter point point point +address clamp clamp clamp + + +[pixel shader todo] +Texture2D tex; +sampler sam0; // must reserve s3 + +struct apple +{ + sampler unused0; // must reserve s0 + sampler sam[3]; // must reserve s1-s2 + sampler unused1; // doesn't reserve +}; + +float4 main(struct apple input, sampler samp) : sv_target +{ + // samp must reserve s4 + + return float4(tex.Sample(sam0, float2(0.5, 0.5)).x, tex.Sample(samp, float2(0.22, 0.22)).x, + tex.Sample(input.sam[1], float2(0.5, 0.5)).xw); +} + + +[test] +todo draw quad +todo probe all rgba (0.5, 1.0, 0.5, 1.0) + diff --git a/tests/shader_runner_d3d12.c b/tests/shader_runner_d3d12.c index 54d1c51a..969c4f49 100644 --- a/tests/shader_runner_d3d12.c +++ b/tests/shader_runner_d3d12.c @@ -167,8 +167,8 @@ static ID3D12RootSignature *d3d12_runner_create_root_signature(struct d3d12_shad ID3D12GraphicsCommandList *command_list, unsigned int *uniform_index) { D3D12_ROOT_SIGNATURE_DESC root_signature_desc = {0}; - D3D12_ROOT_PARAMETER root_params[5], *root_param; - D3D12_STATIC_SAMPLER_DESC static_samplers[1]; + D3D12_ROOT_PARAMETER root_params[8], *root_param; + D3D12_STATIC_SAMPLER_DESC static_samplers[5]; ID3D12RootSignature *root_signature; HRESULT hr; size_t i;
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl.c | 22 ++++ libs/vkd3d-shader/hlsl.h | 7 ++ libs/vkd3d-shader/hlsl_codegen.c | 127 +++++++++++++++++++++-- libs/vkd3d-shader/vkd3d_shader_private.h | 1 + 4 files changed, 146 insertions(+), 11 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index 35b83f8b..4852ad7e 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -112,8 +112,12 @@ struct hlsl_ir_var *hlsl_get_var(struct hlsl_scope *scope, const char *name)
void hlsl_free_var(struct hlsl_ir_var *decl) { + unsigned int k; + vkd3d_free((void *)decl->name); hlsl_cleanup_semantic(&decl->semantic); + for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) + vkd3d_free((void *)decl->objects_usage[k]); vkd3d_free(decl); }
@@ -942,6 +946,7 @@ struct hlsl_ir_var *hlsl_new_var(struct hlsl_ctx *ctx, const char *name, struct const struct hlsl_reg_reservation *reg_reservation) { struct hlsl_ir_var *var; + unsigned int k;
if (!(var = hlsl_alloc(ctx, sizeof(*var)))) return NULL; @@ -954,6 +959,23 @@ struct hlsl_ir_var *hlsl_new_var(struct hlsl_ctx *ctx, const char *name, struct var->storage_modifiers = modifiers; if (reg_reservation) var->reg_reservation = *reg_reservation; + + for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) + { + unsigned int i, obj_count = type->reg_size[k]; + + if (obj_count == 0) + continue; + + if (!(var->objects_usage[k] = hlsl_alloc(ctx, sizeof(*var->objects_usage[0]) * obj_count))) + { + for (i = 0; i < k; ++i) + vkd3d_free(var->objects_usage[i]); + vkd3d_free(var); + return NULL; + } + } + return var; }
diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 03bfefb3..53c1231a 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -394,6 +394,11 @@ struct hlsl_ir_var * and the buffer_offset instead. */ struct hlsl_reg regs[HLSL_REGSET_LAST + 1];
+ struct + { + bool used; + } *objects_usage[HLSL_REGSET_LAST_OBJECT + 1]; + uint32_t is_input_semantic : 1; uint32_t is_output_semantic : 1; uint32_t is_uniform : 1; @@ -1154,6 +1159,8 @@ unsigned int hlsl_swizzle_from_writemask(unsigned int writemask); struct hlsl_type *hlsl_deref_get_type(struct hlsl_ctx *ctx, const struct hlsl_deref *deref); bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, unsigned int *start, unsigned int *count); +bool hlsl_regset_index_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, + enum hlsl_regset regset, unsigned int *index); bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, unsigned int *offset); unsigned int hlsl_offset_from_deref_safe(struct hlsl_ctx *ctx, const struct hlsl_deref *deref); struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref); diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 5f84c2af..7d9d498a 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -2705,6 +2705,71 @@ static const char *debug_register(char class, struct hlsl_reg reg, const struct return vkd3d_dbg_sprintf("%c%u%s", class, reg.id, debug_hlsl_writemask(reg.writemask)); }
+static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_resource_load *load; + struct hlsl_ir_var *var; + enum hlsl_regset regset; + unsigned int index; + + if (instr->type != HLSL_IR_RESOURCE_LOAD) + return false; + + load = hlsl_ir_resource_load(instr); + var = load->resource.var; + regset = hlsl_type_get_regset(hlsl_deref_get_type(ctx, &load->resource)); + + if (regset == HLSL_REGSET_SAMPLERS) + { + assert(!load->sampler.var); + + if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) + return false; + + var->objects_usage[regset][index].used = true; + } + else + { + if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) + return false; + + var->objects_usage[regset][index].used = true; + + if (load->sampler.var) + { + var = load->sampler.var; + if (!hlsl_regset_index_from_deref(ctx, &load->sampler, HLSL_REGSET_SAMPLERS, &index)) + return false; + + var->objects_usage[HLSL_REGSET_SAMPLERS][index].used = true; + } + } + + return false; +} + +static void calculate_resource_register_counts(struct hlsl_ctx *ctx) +{ + struct hlsl_ir_var *var; + struct hlsl_type *type; + unsigned int i, k; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + type = var->data_type; + + for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) + { + for (i = 0; i < type->reg_size[k]; ++i) + { + /* Samplers are only allocated until the last used one. */ + if (var->objects_usage[k][i].used) + var->regs[k].count = (k == HLSL_REGSET_SAMPLERS) ? i + 1 : type->reg_size[k]; + } + } + } +} + static void allocate_variable_temp_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, struct liveness *liveness) { if (var->is_input_semantic || var->is_output_semantic || var->is_uniform) @@ -3346,6 +3411,55 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl return true; }
+bool hlsl_regset_index_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, + enum hlsl_regset regset, unsigned int *index) +{ + struct hlsl_type *type = deref->var->data_type; + unsigned int i; + + assert(regset <= HLSL_REGSET_LAST_OBJECT); + + *index = 0; + + for (i = 0; i < deref->path_len; ++i) + { + struct hlsl_ir_node *path_node = deref->path[i].node; + unsigned int idx = 0; + + assert(path_node); + if (path_node->type != HLSL_IR_CONSTANT) + return false; + + /* We should always have generated a cast to UINT. */ + assert(path_node->data_type->class == HLSL_CLASS_SCALAR + && path_node->data_type->base_type == HLSL_TYPE_UINT); + + idx = hlsl_ir_constant(path_node)->value[0].u; + + switch (type->class) + { + case HLSL_CLASS_ARRAY: + if (idx >= type->e.array.elements_count) + return false; + + *index += idx * type->e.array.type->reg_size[regset]; + break; + + case HLSL_CLASS_STRUCT: + *index += type->e.record.fields[idx].reg_offset[regset]; + break; + + default: + break; + } + + type = hlsl_get_element_type_from_path_index(ctx, type, path_node); + } + + assert(type->reg_size[regset] == 1); + return true; +} + bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, unsigned int *offset) { struct hlsl_ir_node *offset_node = deref->offset.node; @@ -3594,6 +3708,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry }
transform_ir(ctx, validate_static_object_references, body, NULL); + transform_ir(ctx, track_object_components_usage, body, NULL);
/* TODO: move forward, remove when no longer needed */ transform_ir(ctx, transform_deref_paths_into_offsets, body, NULL); @@ -3610,17 +3725,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry
allocate_register_reservations(ctx);
- /* For now, request all the registers for each variable, as long as it is used. */ - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - unsigned int k; - - for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) - { - if (!var->regs[k].allocated) - var->regs[k].count = var->last_read ? var->data_type->reg_size[k] : 0; - } - } + calculate_resource_register_counts(ctx);
allocate_temp_registers(ctx, entry_func); if (profile->major_version < 4) diff --git a/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d-shader/vkd3d_shader_private.h index e635a70b..1d73005b 100644 --- a/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d-shader/vkd3d_shader_private.h @@ -125,6 +125,7 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_HLSL_INVALID_THREAD_COUNT = 5023, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE = 5024, VKD3D_SHADER_ERROR_HLSL_RECURSIVE_CALL = 5025, + VKD3D_SHADER_ERROR_HLSL_NONCONSTANT_INDEX = 5026,
VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301,
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl.c | 4 ++++ libs/vkd3d-shader/hlsl.h | 4 ++++ libs/vkd3d-shader/hlsl.y | 1 + libs/vkd3d-shader/hlsl_codegen.c | 21 ++++++++++++++++++++- libs/vkd3d-shader/vkd3d_shader_private.h | 1 + 5 files changed, 30 insertions(+), 1 deletion(-)
diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index 4852ad7e..ca6938ce 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -1343,6 +1343,9 @@ struct hlsl_ir_resource_load *hlsl_new_resource_load(struct hlsl_ctx *ctx, hlsl_src_from_node(&load->coords, params->coords); hlsl_src_from_node(&load->texel_offset, params->texel_offset); hlsl_src_from_node(&load->lod, params->lod); + load->sampling_dim = params->sampling_dim; + if (load->sampling_dim == HLSL_SAMPLER_DIM_GENERIC) + load->sampling_dim = hlsl_deref_get_type(ctx, &load->resource)->sampler_dim; return load; }
@@ -1616,6 +1619,7 @@ static struct hlsl_ir_node *clone_resource_load(struct hlsl_ctx *ctx, clone_src(map, &dst->coords, &src->coords); clone_src(map, &dst->lod, &src->lod); clone_src(map, &dst->texel_offset, &src->texel_offset); + dst->sampling_dim = src->sampling_dim; return &dst->node; }
diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 53c1231a..616ee06c 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -397,6 +397,8 @@ struct hlsl_ir_var struct { bool used; + enum hlsl_sampler_dim sampler_dim; + struct vkd3d_shader_location first_sampler_dim_loc; } *objects_usage[HLSL_REGSET_LAST_OBJECT + 1];
uint32_t is_input_semantic : 1; @@ -608,6 +610,7 @@ struct hlsl_ir_resource_load enum hlsl_resource_load_type load_type; struct hlsl_deref resource, sampler; struct hlsl_src coords, lod, texel_offset; + enum hlsl_sampler_dim sampling_dim; };
struct hlsl_ir_resource_store @@ -806,6 +809,7 @@ struct hlsl_resource_load_params enum hlsl_resource_load_type type; struct hlsl_ir_node *resource, *sampler; struct hlsl_ir_node *coords, *lod, *texel_offset; + enum hlsl_sampler_dim sampling_dim; };
static inline struct hlsl_ir_call *hlsl_ir_call(const struct hlsl_ir_node *node) diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index 0ddae6ee..4816ad9b 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -3200,6 +3200,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer *
load_params.coords = coords; load_params.format = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4); + load_params.sampling_dim = dim;
if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) return false; diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 7d9d498a..dac50445 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -2721,12 +2721,30 @@ static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_n
if (regset == HLSL_REGSET_SAMPLERS) { - assert(!load->sampler.var); + enum hlsl_sampler_dim dim;
+ assert(!load->sampler.var); if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index)) return false;
+ dim = var->objects_usage[regset][index].sampler_dim; + if (dim != load->sampling_dim) + { + if (dim == HLSL_SAMPLER_DIM_GENERIC) + { + var->objects_usage[regset][index].first_sampler_dim_loc = instr->loc; + } + else + { + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INCONSISTENT_SAMPLER, + "Inconsistent generic sampler usage dimension."); + hlsl_note(ctx, &var->objects_usage[regset][index].first_sampler_dim_loc, + VKD3D_SHADER_LOG_ERROR, "First use is here."); + return false; + } + } var->objects_usage[regset][index].used = true; + var->objects_usage[regset][index].sampler_dim = load->sampling_dim; } else { @@ -2734,6 +2752,7 @@ static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_n return false;
var->objects_usage[regset][index].used = true; + var->objects_usage[regset][index].sampler_dim = load->sampling_dim;
if (load->sampler.var) { diff --git a/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d-shader/vkd3d_shader_private.h index 1d73005b..76d9e379 100644 --- a/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d-shader/vkd3d_shader_private.h @@ -126,6 +126,7 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE = 5024, VKD3D_SHADER_ERROR_HLSL_RECURSIVE_CALL = 5025, VKD3D_SHADER_ERROR_HLSL_NONCONSTANT_INDEX = 5026, + VKD3D_SHADER_ERROR_HLSL_INCONSISTENT_SAMPLER = 5027,
VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301,
From: Francisco Casas fcasas@codeweavers.com
--- include/vkd3d_d3d9types.h | 7 ++-- libs/vkd3d-shader/hlsl_sm1.c | 76 ++++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 3 deletions(-)
diff --git a/include/vkd3d_d3d9types.h b/include/vkd3d_d3d9types.h index 75d04614..c85c3bea 100644 --- a/include/vkd3d_d3d9types.h +++ b/include/vkd3d_d3d9types.h @@ -29,9 +29,10 @@
#define D3DSI_INSTLENGTH_SHIFT 24
-#define D3DSP_DCL_USAGE_SHIFT 0 -#define D3DSP_DCL_USAGEINDEX_SHIFT 16 -#define D3DSP_DSTMOD_SHIFT 20 +#define D3DSP_DCL_USAGE_SHIFT 0 +#define D3DSP_DCL_USAGEINDEX_SHIFT 16 +#define D3DSP_DSTMOD_SHIFT 20 +#define D3DSP_DCL_RESOURCETYPE_SHIFT 27
#define D3DSP_SRCMOD_SHIFT 24
diff --git a/libs/vkd3d-shader/hlsl_sm1.c b/libs/vkd3d-shader/hlsl_sm1.c index 69b4e5de..2e85d71d 100644 --- a/libs/vkd3d-shader/hlsl_sm1.c +++ b/libs/vkd3d-shader/hlsl_sm1.c @@ -651,6 +651,81 @@ static void write_sm1_semantic_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ } }
+static void write_sm1_sampler_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + unsigned int reg_id, enum hlsl_sampler_dim sampler_dim) +{ + struct sm1_dst_register reg = {0}; + uint32_t token, res_type = 0; + + token = D3DSIO_DCL; + if (ctx->profile->major_version > 1) + token |= 2 << D3DSI_INSTLENGTH_SHIFT; + put_u32(buffer, token); + + switch (sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: + res_type = 1; + break; + + case HLSL_SAMPLER_DIM_2D: + res_type = 2; + break; + + case HLSL_SAMPLER_DIM_CUBE: + res_type = 3; + break; + + case HLSL_SAMPLER_DIM_3D: + res_type = 4; + break; + + default: + vkd3d_unreachable(); + break; + } + + token = (1u << 31); + token |= res_type << D3DSP_DCL_RESOURCETYPE_SHIFT; + put_u32(buffer, token); + + reg.type = D3DSPR_SAMPLER; + reg.writemask = VKD3DSP_WRITEMASK_ALL; + reg.reg = reg_id; + + write_sm1_dst_register(buffer, ®); +} + +static void write_sm1_sampler_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) +{ + enum hlsl_sampler_dim sampler_dim; + unsigned int i, count, reg_id; + struct hlsl_ir_var *var; + + if (ctx->profile->major_version < 2) + return; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (!var->regs[HLSL_REGSET_SAMPLERS].allocated) + continue; + + count = var->regs[HLSL_REGSET_SAMPLERS].count; + + for (i = 0; i < count; ++i) + { + if (var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) + { + sampler_dim = var->objects_usage[HLSL_REGSET_SAMPLERS][i].sampler_dim; + assert(sampler_dim != HLSL_SAMPLER_DIM_GENERIC); + + reg_id = var->regs[HLSL_REGSET_SAMPLERS].id + i; + write_sm1_sampler_dcl(ctx, buffer, reg_id, sampler_dim); + } + } + } +} + static void write_sm1_constant(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) { @@ -967,6 +1042,7 @@ int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_fun
write_sm1_constant_defs(ctx, &buffer); write_sm1_semantic_dcls(ctx, &buffer); + write_sm1_sampler_dcls(ctx, &buffer); write_sm1_instructions(ctx, &buffer, entry_func);
put_u32(&buffer, D3DSIO_END);
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/hlsl_sm1.c | 40 ++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+)
diff --git a/libs/vkd3d-shader/hlsl_sm1.c b/libs/vkd3d-shader/hlsl_sm1.c index 2e85d71d..481b39fb 100644 --- a/libs/vkd3d-shader/hlsl_sm1.c +++ b/libs/vkd3d-shader/hlsl_sm1.c @@ -903,6 +903,42 @@ static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b write_sm1_instruction(ctx, buffer, &sm1_instr); }
+static void write_sm1_resource_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_ir_node *instr) +{ + const struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); + struct hlsl_ir_node *coords = load->coords.node; + unsigned int sampler_offset, reg_id; + struct sm1_instruction sm1_instr; + + sampler_offset = hlsl_offset_from_deref_safe(ctx, &load->resource); + reg_id = load->resource.var->regs[HLSL_REGSET_SAMPLERS].id + sampler_offset; + + sm1_instr = (struct sm1_instruction) + { + .opcode = D3DSIO_TEX, + + .dst.type = D3DSPR_TEMP, + .dst.reg = instr->reg.id, + .dst.writemask = instr->reg.writemask, + .has_dst = 1, + + .srcs[0].type = D3DSPR_TEMP, + .srcs[0].reg = coords->reg.id, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL), + + .srcs[1].type = D3DSPR_SAMPLER, + .srcs[1].reg = reg_id, + .srcs[1].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL), + + .src_count = 2, + }; + + assert(instr->reg.allocated); + + write_sm1_instruction(ctx, buffer, &sm1_instr); +} + static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) { @@ -1017,6 +1053,10 @@ static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b write_sm1_load(ctx, buffer, instr); break;
+ case HLSL_IR_RESOURCE_LOAD: + write_sm1_resource_load(ctx, buffer, instr); + break; + case HLSL_IR_STORE: write_sm1_store(ctx, buffer, instr); break;
From: Francisco Casas fcasas@codeweavers.com
The new fixmes can be triggered in presence of object components within structs (for SM5).
In shaders such as this one:
struct apple { Texture2D tex : TEX; float4 color : COLOR; };
float4 main(struct apple input) : sv_target { return input.tex.Load(int3(1, 2, 3)); }
Or this one:
struct { Texture2D tex; float4 color; } s;
float4 main() : sv_target { return s.tex.Load(int3(1, 2, 3)); } --- libs/vkd3d-shader/hlsl_sm4.c | 127 +++++++++++++++--------- tests/object-references.shader_test | 6 +- tests/register-reservations.shader_test | 6 +- tests/uav.shader_test | 4 +- 4 files changed, 87 insertions(+), 56 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_sm4.c b/libs/vkd3d-shader/hlsl_sm4.c index 973fe0d0..58f1cf23 100644 --- a/libs/vkd3d-shader/hlsl_sm4.c +++ b/libs/vkd3d-shader/hlsl_sm4.c @@ -627,13 +627,15 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) } else { + unsigned int dimx = hlsl_type_get_component_type(ctx, var->data_type, 0)->e.resource_format->dimx; + put_u32(&buffer, sm4_resource_format(var->data_type)); put_u32(&buffer, sm4_rdef_resource_dimension(var->data_type)); put_u32(&buffer, ~0u); /* FIXME: multisample count */ - flags |= (var->data_type->e.resource_format->dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT; + flags |= (dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT; } put_u32(&buffer, var->regs[regset].id); - put_u32(&buffer, 1); /* bind count */ + put_u32(&buffer, var->regs[regset].count); /* bind count */ put_u32(&buffer, flags); }
@@ -887,6 +889,8 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r if (swizzle_type) *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; reg->idx[0] = var->regs[HLSL_REGSET_TEXTURES].id; + reg->idx[0] += hlsl_offset_from_deref_safe(ctx, deref); + assert(deref->offset_regset == HLSL_REGSET_TEXTURES); reg->idx_count = 1; *writemask = VKD3DSP_WRITEMASK_ALL; } @@ -897,6 +901,8 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r if (swizzle_type) *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; reg->idx[0] = var->regs[HLSL_REGSET_UAVS].id; + reg->idx[0] += hlsl_offset_from_deref_safe(ctx, deref); + assert(deref->offset_regset == HLSL_REGSET_UAVS); reg->idx_count = 1; *writemask = VKD3DSP_WRITEMASK_ALL; } @@ -907,6 +913,8 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r if (swizzle_type) *swizzle_type = VKD3D_SM4_SWIZZLE_NONE; reg->idx[0] = var->regs[HLSL_REGSET_SAMPLERS].id; + reg->idx[0] += hlsl_offset_from_deref_safe(ctx, deref); + assert(deref->offset_regset == HLSL_REGSET_SAMPLERS); reg->idx_count = 1; *writemask = VKD3DSP_WRITEMASK_ALL; } @@ -1169,44 +1177,67 @@ static void write_sm4_dcl_constant_buffer(struct vkd3d_bytecode_buffer *buffer, write_sm4_instruction(buffer, &instr); }
-static void write_sm4_dcl_sampler(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) +static void write_sm4_dcl_samplers(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) { - const struct sm4_instruction instr = + unsigned int i, count = var->data_type->reg_size[HLSL_REGSET_SAMPLERS]; + struct sm4_instruction instr; + + for (i = 0; i < count; ++i) { - .opcode = VKD3D_SM4_OP_DCL_SAMPLER, + if (!var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) + continue;
- .dsts[0].reg.type = VKD3D_SM4_RT_SAMPLER, - .dsts[0].reg.idx = {var->regs[HLSL_REGSET_SAMPLERS].id}, - .dsts[0].reg.idx_count = 1, - .dst_count = 1, - }; - write_sm4_instruction(buffer, &instr); + instr = (struct sm4_instruction) + { + .opcode = VKD3D_SM4_OP_DCL_SAMPLER, + + .dsts[0].reg.type = VKD3D_SM4_RT_SAMPLER, + .dsts[0].reg.idx = {var->regs[HLSL_REGSET_SAMPLERS].id + i}, + .dsts[0].reg.idx_count = 1, + .dst_count = 1, + }; + + write_sm4_instruction(buffer, &instr); + } }
-static void write_sm4_dcl_texture(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) +static void write_sm4_dcl_textures(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_ir_var *var, bool uav) { - bool uav = (var->data_type->base_type == HLSL_TYPE_UAV); - struct sm4_instruction instr = + enum hlsl_regset regset = uav ? HLSL_REGSET_UAVS : HLSL_REGSET_TEXTURES; + unsigned int i, count = var->data_type->reg_size[regset]; + struct hlsl_type *component_type; + struct sm4_instruction instr; + + component_type = hlsl_type_get_component_type(ctx, var->data_type, 0); + + for (i = 0; i < count; ++i) { - .opcode = (uav ? VKD3D_SM5_OP_DCL_UAV_TYPED : VKD3D_SM4_OP_DCL_RESOURCE) - | (sm4_resource_dimension(var->data_type) << VKD3D_SM4_RESOURCE_TYPE_SHIFT), + if (!var->objects_usage[regset][i].used) + continue;
- .dsts[0].reg.type = uav ? VKD3D_SM5_RT_UAV : VKD3D_SM4_RT_RESOURCE, - .dsts[0].reg.idx = {uav ? var->regs[HLSL_REGSET_UAVS].id : var->regs[HLSL_REGSET_TEXTURES].id}, - .dsts[0].reg.idx_count = 1, - .dst_count = 1, + instr = (struct sm4_instruction) + { + .opcode = (uav ? VKD3D_SM5_OP_DCL_UAV_TYPED : VKD3D_SM4_OP_DCL_RESOURCE) + | (sm4_resource_dimension(component_type) << VKD3D_SM4_RESOURCE_TYPE_SHIFT),
- .idx[0] = sm4_resource_format(var->data_type) * 0x1111, - .idx_count = 1, - }; + .dsts[0].reg.type = uav ? VKD3D_SM5_RT_UAV : VKD3D_SM4_RT_RESOURCE, + .dsts[0].reg.idx = {var->regs[regset].id + i}, + .dsts[0].reg.idx_count = 1, + .dst_count = 1,
- if (var->data_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS - || var->data_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY) - { - instr.opcode |= var->data_type->sample_count << VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; - } + .idx[0] = sm4_resource_format(component_type) * 0x1111, + .idx_count = 1, + };
- write_sm4_instruction(buffer, &instr); + if (component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS + || component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY) + { + instr.opcode |= component_type->sample_count << VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; + } + + write_sm4_instruction(buffer, &instr); + } }
static void write_sm4_dcl_semantic(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) @@ -1486,9 +1517,9 @@ static void write_sm4_constant(struct hlsl_ctx *ctx, static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, const struct hlsl_deref *resource, const struct hlsl_ir_node *coords, - const struct hlsl_ir_node *texel_offset) + const struct hlsl_ir_node *texel_offset, enum hlsl_sampler_dim dim) { - bool uav = (resource_type->base_type == HLSL_TYPE_UAV); + bool uav = (hlsl_type_get_regset(resource_type) == HLSL_REGSET_UAVS); struct sm4_instruction instr; unsigned int dim_count;
@@ -1514,7 +1545,7 @@ static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buf { /* Mipmap level is in the last component in the IR, but needs to be in the W * component in the instruction. */ - dim_count = hlsl_sampler_dim_count(resource_type->sampler_dim); + dim_count = hlsl_sampler_dim_count(dim); if (dim_count == 1) instr.srcs[0].swizzle = hlsl_combine_swizzles(instr.srcs[0].swizzle, HLSL_SWIZZLE(X, X, X, Y), 4); if (dim_count == 2) @@ -2250,10 +2281,9 @@ static void write_sm4_resource_load(struct hlsl_ctx *ctx, const struct hlsl_ir_node *texel_offset = load->texel_offset.node; const struct hlsl_ir_node *coords = load->coords.node;
- if (resource_type->class != HLSL_CLASS_OBJECT) + if (!hlsl_type_is_resource(resource_type)) { - assert(resource_type->class == HLSL_CLASS_ARRAY || resource_type->class == HLSL_CLASS_STRUCT); - hlsl_fixme(ctx, &load->node.loc, "Resource being a component of another variable."); + hlsl_fixme(ctx, &load->node.loc, "Separate object fields as new variables."); return; }
@@ -2261,14 +2291,11 @@ static void write_sm4_resource_load(struct hlsl_ctx *ctx, { const struct hlsl_type *sampler_type = load->sampler.var->data_type;
- if (sampler_type->class != HLSL_CLASS_OBJECT) + if (!hlsl_type_is_resource(sampler_type)) { - assert(sampler_type->class == HLSL_CLASS_ARRAY || sampler_type->class == HLSL_CLASS_STRUCT); - hlsl_fixme(ctx, &load->node.loc, "Sampler being a component of another variable."); + hlsl_fixme(ctx, &load->node.loc, "Separate object fields as new variables."); return; } - assert(sampler_type->base_type == HLSL_TYPE_SAMPLER); - assert(sampler_type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC);
if (!load->sampler.var->is_uniform) { @@ -2287,7 +2314,7 @@ static void write_sm4_resource_load(struct hlsl_ctx *ctx, { case HLSL_RESOURCE_LOAD: write_sm4_ld(ctx, buffer, resource_type, &load->node, &load->resource, - coords, texel_offset); + coords, texel_offset, load->sampling_dim); break;
case HLSL_RESOURCE_SAMPLE: @@ -2331,10 +2358,9 @@ static void write_sm4_resource_store(struct hlsl_ctx *ctx, { const struct hlsl_type *resource_type = store->resource.var->data_type;
- if (resource_type->class != HLSL_CLASS_OBJECT) + if (!hlsl_type_is_resource(resource_type)) { - assert(resource_type->class == HLSL_CLASS_ARRAY || resource_type->class == HLSL_CLASS_STRUCT); - hlsl_fixme(ctx, &store->node.loc, "Resource being a component of another variable."); + hlsl_fixme(ctx, &store->node.loc, "Separate object fields as new variables."); return; }
@@ -2498,12 +2524,17 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx,
for (i = 0; i < extern_resources_count; ++i) { + enum hlsl_regset regset; + var = extern_resources[i]; + regset = hlsl_type_get_regset(var->data_type);
- if (var->data_type->base_type == HLSL_TYPE_SAMPLER) - write_sm4_dcl_sampler(&buffer, var); - else if (var->data_type->base_type == HLSL_TYPE_TEXTURE || var->data_type->base_type == HLSL_TYPE_UAV) - write_sm4_dcl_texture(&buffer, var); + if (regset == HLSL_REGSET_SAMPLERS) + write_sm4_dcl_samplers(&buffer, var); + else if (regset == HLSL_REGSET_TEXTURES) + write_sm4_dcl_textures(ctx, &buffer, var, false); + else if (regset == HLSL_REGSET_UAVS) + write_sm4_dcl_textures(ctx, &buffer, var, true); }
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) diff --git a/tests/object-references.shader_test b/tests/object-references.shader_test index bc74ccd4..5ed4dcd6 100644 --- a/tests/object-references.shader_test +++ b/tests/object-references.shader_test @@ -92,7 +92,7 @@ size (1, 1) size (1, 1) 3.0 3.0 3.0 1.0
-[pixel shader todo] +[pixel shader] Texture2D tex[3];
struct foo { @@ -111,8 +111,8 @@ float4 main() : sv_target }
[test] -todo draw quad -todo probe all rgba (312, 312, 312, 111) +draw quad +probe all rgba (312, 312, 312, 111)
[pixel shader] diff --git a/tests/register-reservations.shader_test b/tests/register-reservations.shader_test index 16aee594..72f68c1e 100644 --- a/tests/register-reservations.shader_test +++ b/tests/register-reservations.shader_test @@ -24,7 +24,7 @@ size (1, 1)
% If a single component in a texture array is used, all registers are reserved. -[pixel shader todo] +[pixel shader] Texture2D partially_used[2][2]; Texture2D tex;
@@ -34,8 +34,8 @@ float4 main() : sv_target }
[test] -todo draw quad -todo probe all rgba (41.0, 41.0, 41.0, 1089.0) +draw quad +probe all rgba (41.0, 41.0, 41.0, 1089.0)
% If no component in a texture array is used, and it doesn't have a register reservation, no diff --git a/tests/uav.shader_test b/tests/uav.shader_test index 0c690f8e..e7fdb8f4 100644 --- a/tests/uav.shader_test +++ b/tests/uav.shader_test @@ -133,7 +133,7 @@ size (1, 1)
0.5 0.6 0.7 0.8
-[pixel shader todo] +[pixel shader] RWTexture2D<float4> u[2] : register(u2);
float4 main() : sv_target1 @@ -144,6 +144,6 @@ float4 main() : sv_target1 }
[test] -todo draw quad +draw quad probe uav 2 (0, 0) rgba (1.1, 1.2, 1.3, 1.4) probe uav 3 (0, 0) rgba (2.1, 2.2, 2.3, 2.4)
On Fri Apr 21 22:00:40 2023 +0000, Francisco Casas wrote:
Isn't the register count the same thing as the register size here?
Actually, I'm not sure I see the difference in general? I thought there was a convention of "element" vs "register", but apart from that...? When I said count, I refer to the `hlsl_reg.count`, which is the number of registers that a given variable (or field) **requires** to allocate, currently used by allocate_register_reservations() and allocate_objects(), which may be different depending on how the variable's components are used across the shader. Another difference between `hlsl_reg.count` and the variable's type's `reg_size`, is that it is currently expressed in whole registers not register components. This difference is only meaningful for numeric registers, where each register has 4 components, and while I am initializing `hlsl_reg.count` now, it is not used for numeric components so far. I updated the documentation to clarify.
Yeah, I don't think we want to ascribe "count" vs "size" to that distinction if we can avoid it. Something like "reserved" vs "used" would probably be better.
On Fri Apr 21 19:46:43 2023 +0000, Zebediah Figura wrote:
Okay, it is not as bad as I imagine. Perhaps I should do the cost
assessments when I am fresh in the morning and not when I tired.
Still, there are 5 places where it would be better to iterate over a
regset: track_object_components_usage(), calculate_resource_register_counts(), write_sm1_sampler_dcls(), write_sm4_dcl_samplers(), and write_sm4_dcl_textures(); but iteration per-component it is useful for promoting resource components into separate variables for SM 5.1. Hrm, I didn't quite think this through completely, I forgot that register allocation is still only done per-variable. Yeah, so it would require that for every time we're declaring a texture. If we need to track anything per-component, it still may be best to do it that way, though.
I probably didn't understood this the first time, I thought you were
referring to the register range that's allocated, which goes from 0 to the maximum used component (which can be stored in var->regs[HLSL_REGSET_NUMERIC].count).
If I understand correctly, you are saying that in SM1 there are cases
where a uniform variable (or field) can require the size of a bool or a int according to how it is used. I am not familiarized with this behavior. Can you provide an example? Most of sm1 uses float uniforms, even where they're declared with non-float type. SM 3.0 introduces the first flow control, and its flow control instructions—and *only* those instructions—take non-float types. Specifically "if" takes a bool type, and "loop" takes an integer type. Consider the following shader:
uniform struct { float f; bool b; int i; } a; float4 main() : sv_target { float x = a.f + a.i + a.b; if (a.b) x += 2; return x; }
Ultimately "a.b" is allocated to *both* the float and bool register sets. (You can get a similar effect for ints by declaring a loop that executes for i iterations, but I didn't bother with that.) This is true without the struct too, of course, but the struct shows that there are interesting consequences wrt allocation. Granted, I suppose we don't really need to do this tracking per-component—we can just do it the same way as we do textures...
That example alone doesn't seem to be triggering the allocation of both bool and float registers for `a.b` because of compiler optimizations[1], but I changed the shader a little to prevent them:
```hlsl sampler sam;
uniform struct { float f; bool b; int i; } a;
float4 main() : sv_target { float x = a.f + a.i + a.b; if (a.b) x += tex2D(sam, float2(0, 0)); return x; } ```
I get this register table: ``` // Name Reg Size // ------------ ----- ---- // a b0 2 // a c0 3 // sam s0 1 ```
Note that the offset of the bool `a.b` is the same as the offset of the float `a.b`.
So, we will need a function similar to `track_object_components_usage()` but checking for the usage of bool components, to determine the size of the variable in the REGSET_BOOL (?). Still, I don't think tracking per-component would be necessary.
---
[1] This implies that whether to expect bools in the input signature or not is totally a decision of the compiler.
Perhaps we can get away with just putting floats in the input signature and casting to float within the shader. That may not work if some applications hardcode the input signature, but also, I may be really impossible to mimic compiler optimizations 1:1. For instance, this slightly modified shader **doesn't** require bool registers:
```hlsl sampler sam;
uniform struct { float f; bool b; int i; } a;
bool k;
float4 main() : sv_target { float x = a.f + a.i + a.b; if (a.b || k) x += tex2D(sam, float2(0, 0)); return x; } ```
That example alone doesn't seem to be triggering the allocation of both bool and float registers for `a.b` because of compiler optimizations[1], but I changed the shader a little to prevent them:
Curious, it does for me (d3dcompiler_47 from winetricks). But yes, it probably is not critical to match native exactly.
So, we will need a function similar to `track_object_components_usage()` but checking for the usage of bool components, to determine the size of the variable in the REGSET_BOOL (?). Still, I don't think tracking per-component would be necessary.
We could potentially do it in the same function, even (and call it something that implies it's not specific to objects).