-- v17: tests/d3d12: Test multiple clip distance inputs in test_clip_distance(). tests/d3d12: Use five clip distances for the multiple test in test_clip_distance(). vkd3d-shader/ir: Transform clip/cull outputs and patch constants into arrays. vkd3d-shader/ir: Transform clip/cull inputs into an array. vkd3d-shader/spirv: Support no-op signature elements.
From: Conor McCarthy cmccarthy@codeweavers.com
Allows normalisation without compacting the array. --- libs/vkd3d-shader/spirv.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-)
diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index a3baeea75..5257679d6 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -4876,7 +4876,7 @@ static unsigned int shader_signature_next_location(const struct shader_signature return max_row; }
-static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, +static void spirv_compiler_emit_input(struct spirv_compiler *compiler, enum vkd3d_shader_register_type reg_type, unsigned int element_idx) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; @@ -4887,7 +4887,6 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, const struct vkd3d_spirv_builtin *builtin; enum vkd3d_shader_sysval_semantic sysval; uint32_t write_mask, reg_write_mask; - struct vkd3d_symbol *symbol = NULL; uint32_t val_id, input_id, var_id; uint32_t type_id, float_type_id; struct vkd3d_symbol reg_symbol; @@ -4900,6 +4899,10 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, ? &compiler->patch_constant_signature : &compiler->input_signature;
signature_element = &shader_signature->elements[element_idx]; + /* No-op element from normalisation. */ + if (!signature_element->register_count) + return; + sysval = signature_element->sysval_semantic; /* The Vulkan spec does not explicitly forbid passing varyings from the * TCS to the TES via builtins. However, Mesa doesn't seem to handle it @@ -4955,8 +4958,7 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, * being repeated in another (i.e. vcp/vocp), which should have been deleted. */ if (reg_type != VKD3DSPR_INPUT || !is_in_fork_or_join_phase(compiler)) FIXME("Duplicate input definition found.\n"); - symbol = RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); - return symbol->id; + return; }
if (builtin) @@ -5029,7 +5031,7 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, spirv_compiler_emit_store_reg(compiler, &dst_reg, signature_element->mask, val_id); }
- return input_id; + return; }
static void spirv_compiler_emit_input_register(struct spirv_compiler *compiler, @@ -5247,6 +5249,10 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, shader_signature = is_patch_constant ? &compiler->patch_constant_signature : &compiler->output_signature;
signature_element = &shader_signature->elements[element_idx]; + /* No-op element from normalisation. */ + if (!signature_element->register_count) + return; + sysval = signature_element->sysval_semantic; /* Don't use builtins for TCS -> TES varyings. See spirv_compiler_emit_input(). */ if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL && !is_patch_constant)
From: Conor McCarthy cmccarthy@codeweavers.com
Up to eight clip/cull values are supported, and the DXBC pattern of spreading these across two vector signature elements is a hacky solution, so converting these into an array is best for VSIR. SPIR-V requires these to be an array. --- libs/vkd3d-shader/ir.c | 445 +++++++++++++++++++++++ libs/vkd3d-shader/spirv.c | 2 +- libs/vkd3d-shader/vkd3d_shader_private.h | 2 + 3 files changed, 448 insertions(+), 1 deletion(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 886344874..03fc2b043 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -18,6 +18,8 @@
#include "vkd3d_shader_private.h"
+static const unsigned int MAX_CLIP_OR_CULL_DISTANCE_COUNT = 8; + bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_version *version, unsigned int reserve) { program->shader_version = *version; @@ -1456,6 +1458,446 @@ static enum vkd3d_result shader_normalise_io_registers(struct vkd3d_shader_parse return VKD3D_OK; }
+struct clip_cull_normaliser_scan +{ + bool need_normalisation; + unsigned int offset; + unsigned int remap; +}; + +struct clip_cull_normaliser_signature +{ + struct shader_signature *s; + struct clip_cull_normaliser_scan scan[MAX_REG_OUTPUT]; +}; + +struct clip_cull_normaliser +{ + struct vkd3d_shader_parser *parser; + + struct vkd3d_shader_location location; + bool has_normalised_clip_cull; + enum vkd3d_result result; + + struct vkd3d_shader_instruction *instructions; + size_t instruction_capacity; + size_t instruction_count; + + struct clip_cull_normaliser_signature input_signature; + + unsigned int temp_count; +}; + +static unsigned int mask_find_free_array_slot(unsigned int used_mask, unsigned int array_size) +{ + unsigned int i, array_mask = (1u << array_size) - 1; + /* Find the lowest register with at least array_size free above. */ + for (i = 0; i < MAX_REG_OUTPUT; ++i) + { + if (!(used_mask & (array_mask << i))) + break; + } + return i; +} + +static void normaliser_signature_transform_clip_or_cull(struct clip_cull_normaliser_signature *signature, + enum vkd3d_shader_sysval_semantic target_sysval, struct clip_cull_normaliser *normaliser) +{ + unsigned int i, base, array_size, mask, reg_mask, location_mask; + struct vkd3d_shader_parser *parser = normaliser->parser; + struct shader_signature *s = signature->s; + struct signature_element *e; + + /* Up to two vec4 clip/cull elements are allowed. Merge these into an array, + * and track the location and array offset of the second one. */ + for (i = 0, array_size = 0, reg_mask = 0, location_mask = 0, base = 0; i < s->element_count; ++i) + { + e = &s->elements[i]; + + if (e->sysval_semantic != target_sysval) + { + mask = (1u << e->register_count) - 1; + /* Compile a map of used registers and locations. */ + reg_mask |= mask << e->register_index; + location_mask |= mask << e->target_location; + continue; + } + + if (!array_size) + { + base = i; + array_size = vsir_write_mask_component_count(e->mask); + } + else + { + signature->scan[i].remap = base; + signature->scan[i].need_normalisation = true; + signature->scan[i].offset = array_size; + array_size += vsir_write_mask_component_count(e->mask); + /* Make no-op. */ + memset(e, 0, sizeof(*e)); + } + } + + if (!array_size) + return; + + if (array_size > MAX_CLIP_OR_CULL_DISTANCE_COUNT) + { + WARN("Invalid array size %u.\n", array_size); + vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "Clip or cull array size %u exceeds the limit of 8.", array_size); + normaliser->result = VKD3D_ERROR_INVALID_SHADER; + return; + } + + signature->scan[base].need_normalisation = true; + e = &s->elements[base]; + + i = mask_find_free_array_slot(reg_mask, array_size); + /* Signature locations are not used for sysvals, so if it proves necessary, it should be + * ok to raise MAX_REG_OUTPUT to 40 and validate that non-sysval register indices are < 32. */ + if (array_size > MAX_REG_OUTPUT - i) + { + FIXME("Too many registers; register index %u, array size %u.\n", i, array_size); + vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "Clip or cull base register index %u + array size %u exceeds limit of 32 registers.", i, array_size); + normaliser->result = VKD3D_ERROR_INVALID_SHADER; + return; + } + + e->register_index = i; + e->target_location = mask_find_free_array_slot(location_mask, array_size); + e->register_count = array_size; + e->mask = 1; + e->used_mask = 1; + + normaliser->has_normalised_clip_cull = true; +} + +static void normaliser_signature_transform_clip_cull(struct clip_cull_normaliser_signature *signature, + struct clip_cull_normaliser *normaliser) +{ + normaliser_signature_transform_clip_or_cull(signature, VKD3D_SHADER_SV_CLIP_DISTANCE, normaliser); + normaliser_signature_transform_clip_or_cull(signature, VKD3D_SHADER_SV_CULL_DISTANCE, normaliser); +} + +static struct vkd3d_shader_instruction *clip_cull_normaliser_require_space(struct clip_cull_normaliser *normaliser, + size_t count) +{ + if (!vkd3d_array_reserve((void **)&normaliser->instructions, &normaliser->instruction_capacity, + normaliser->instruction_count + count, sizeof(*normaliser->instructions))) + { + ERR("Failed to allocate instructions.\n"); + normaliser->result = VKD3D_ERROR_OUT_OF_MEMORY; + return NULL; + } + return &normaliser->instructions[normaliser->instruction_count]; +} + +static struct vkd3d_shader_dst_param *clip_cull_normaliser_emit_mov(struct clip_cull_normaliser *normaliser, + struct vkd3d_shader_src_param *src_param) +{ + struct vkd3d_shader_instruction *ins; + + if (!(ins = clip_cull_normaliser_require_space(normaliser, 1))) + return NULL; + vsir_instruction_init(ins, &normaliser->location, VKD3DSIH_MOV); + + ins->src = src_param; + ins->src_count = 1; + + if (!(ins->dst = vsir_program_get_dst_params(&normaliser->parser->program, 1))) + { + ERR("Failed to allocate instruction dst param.\n"); + normaliser->result = VKD3D_ERROR_OUT_OF_MEMORY; + return NULL; + } + ins->dst_count = 1; + + ++normaliser->instruction_count; + return ins->dst; +} + +static void clip_cull_normaliser_copy_instruction(struct clip_cull_normaliser *normaliser, + struct vkd3d_shader_instruction *ins) +{ + struct vkd3d_shader_instruction *dst_ins; + + if (!(dst_ins = clip_cull_normaliser_require_space(normaliser, 1))) + return; + *dst_ins = *ins; + ++normaliser->instruction_count; +} + +static bool clip_cull_src_normalisation_get_write_mask(struct vkd3d_shader_instruction *ins, + unsigned int src_idx, unsigned int *dst_write_mask) +{ + switch (ins->handler_idx) + { + case VKD3DSIH_DP2: + case VKD3DSIH_DP3: + case VKD3DSIH_DP4: + case VKD3DSIH_EVAL_CENTROID: + case VKD3DSIH_GATHER4: + case VKD3DSIH_GATHER4_C: + case VKD3DSIH_GATHER4_PO: + case VKD3DSIH_GATHER4_PO_C: + case VKD3DSIH_LD: + case VKD3DSIH_LD2DMS: + case VKD3DSIH_LD2DMS_S: + case VKD3DSIH_LD_RAW: + case VKD3DSIH_LD_RAW_S: + case VKD3DSIH_LD_STRUCTURED: + case VKD3DSIH_LD_STRUCTURED_S: + case VKD3DSIH_LD_UAV_TYPED: + case VKD3DSIH_LD_UAV_TYPED_S: + case VKD3DSIH_LOD: + case VKD3DSIH_RESINFO: + case VKD3DSIH_SAMPLE: + case VKD3DSIH_SAMPLE_B_CL_S: + case VKD3DSIH_SAMPLE_GRAD: + case VKD3DSIH_SAMPLE_POS: + return false; + case VKD3DSIH_ATOMIC_AND: + case VKD3DSIH_ATOMIC_CMP_STORE: + case VKD3DSIH_ATOMIC_IADD: + case VKD3DSIH_ATOMIC_IMAX: + case VKD3DSIH_ATOMIC_IMIN: + case VKD3DSIH_ATOMIC_OR: + case VKD3DSIH_ATOMIC_UMAX: + case VKD3DSIH_ATOMIC_UMIN: + case VKD3DSIH_ATOMIC_XOR: + case VKD3DSIH_EVAL_SAMPLE_INDEX: + case VKD3DSIH_IMM_ATOMIC_AND: + case VKD3DSIH_IMM_ATOMIC_CMP_EXCH: + case VKD3DSIH_IMM_ATOMIC_EXCH: + case VKD3DSIH_IMM_ATOMIC_IADD: + case VKD3DSIH_IMM_ATOMIC_IMAX: + case VKD3DSIH_IMM_ATOMIC_IMIN: + case VKD3DSIH_IMM_ATOMIC_OR: + case VKD3DSIH_IMM_ATOMIC_UMAX: + case VKD3DSIH_IMM_ATOMIC_UMIN: + case VKD3DSIH_IMM_ATOMIC_XOR: + *dst_write_mask = VKD3DSP_WRITEMASK_0; + return src_idx > 0; + case VKD3DSIH_BRANCH: + case VKD3DSIH_DISCARD: + case VKD3DSIH_RETP: + case VKD3DSIH_SWITCH_MONOLITHIC: + *dst_write_mask = VKD3DSP_WRITEMASK_0; + break; + case VKD3DSIH_SAMPLE_B: + case VKD3DSIH_SAMPLE_C: + case VKD3DSIH_SAMPLE_C_CL_S: + case VKD3DSIH_SAMPLE_C_LZ: + case VKD3DSIH_SAMPLE_C_LZ_S: + case VKD3DSIH_SAMPLE_CL_S: + case VKD3DSIH_SAMPLE_LOD: + case VKD3DSIH_SAMPLE_LOD_S: + *dst_write_mask = VKD3DSP_WRITEMASK_0; + return src_idx >= 3; + case VKD3DSIH_SAMPLE_GRAD_CL_S: + *dst_write_mask = VKD3DSP_WRITEMASK_0; + return src_idx >= 5; + case VKD3DSIH_STORE_RAW: + case VKD3DSIH_STORE_STRUCTURED: + return src_idx == ins->src_count - 1; + case VKD3DSIH_STORE_UAV_TYPED: + return src_idx > 0; + default: + break; + } + + return true; +} + +static void shader_instruction_src_param_clip_cull_normalise(struct vkd3d_shader_instruction *ins, + unsigned int src_idx, unsigned int write_mask, struct clip_cull_normaliser *normaliser) +{ + unsigned int i, element_idx, component_idx, array_offset, handled_mask, mask_bit; + struct vkd3d_shader_src_param *src_param = &ins->src[src_idx]; + struct vkd3d_shader_parser *parser = normaliser->parser; + const struct clip_cull_normaliser_signature *signature; + struct vkd3d_shader_register *reg = &src_param->reg; + struct vsir_program *program = &parser->program; + struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_src_param *mov_src; + + switch (reg->type) + { + case VKD3DSPR_INPUT: + /* Sysvals are not needed for domain shader inputs. */ + if (parser->program.shader_version.type == VKD3D_SHADER_TYPE_DOMAIN) + return; + signature = &normaliser->input_signature; + break; + default: + return; + } + + element_idx = reg->idx[reg->idx_count - 1].offset; + + if (!signature->scan[element_idx].need_normalisation) + return; + + if (!clip_cull_src_normalisation_get_write_mask(ins, src_idx, &write_mask)) + { + WARN("Unsupported src index %u for handler %u.\n", src_idx, ins->handler_idx); + vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "Unsupported clip/cull source index %u for handler %u.", src_idx, ins->handler_idx); + normaliser->result = VKD3D_ERROR_INVALID_SHADER; + return; + } + + if (!write_mask) + return; + + /* Elements with an array offset are remapped to the base element. */ + if ((array_offset = signature->scan[element_idx].offset)) + { + element_idx = signature->scan[element_idx].remap; + reg->idx[reg->idx_count - 1].offset = element_idx; + } + + /* Dynamic array addressing of clip/cull inputs is not supported. */ + if (reg->idx_count >= ARRAY_SIZE(reg->idx)) + { + WARN("Unexpected index count %u.\n", reg->idx_count); + vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, + "Invalid register index count %u for a clip/cull load.", reg->idx_count); + normaliser->result = VKD3D_ERROR_INVALID_SHADER; + return; + } + + /* Move the indices up so the array index can be placed in idx[0]. */ + memmove(®->idx[1], ®->idx[0], reg->idx_count * sizeof(reg->idx[0])); + memset(®->idx[0], 0, sizeof(reg->idx[0])); + ++reg->idx_count; + + if (vsir_write_mask_component_count(write_mask) == 1) + { + reg->idx[0].offset = array_offset + vsir_swizzle_get_component(src_param->swizzle, + vsir_write_mask_get_component_idx(write_mask)); + src_param->swizzle = 0; + return; + } + + for (i = 0, handled_mask = 0; i < VKD3D_VEC4_SIZE; ++i) + { + if (!(write_mask & (1u << i))) + continue; + + /* For each component accessed by the write mask + swizzle, emit a MOV from the clip/cull array to a temp. + * The original instruction will still apply the swizzle, so the temp is a direct reflection of the source. */ + + component_idx = vsir_swizzle_get_component(src_param->swizzle, i); + mask_bit = 1u << component_idx; + + if (handled_mask & mask_bit) + continue; + handled_mask |= mask_bit; + + if (!(mov_src = vsir_program_get_src_params(program, 1))) + { + ERR("Failed to allocate instruction src param.\n"); + normaliser->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; + } + mov_src->reg = *reg; + mov_src->reg.idx[0].offset = array_offset + component_idx; + mov_src->swizzle = 0; + mov_src->modifiers = 0; + + if (!(dst_param = clip_cull_normaliser_emit_mov(normaliser, mov_src))) + return; + vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, reg->data_type, 1); + dst_param->reg.dimension = reg->dimension; + dst_param->reg.idx[0].offset = parser->program.temp_count; + normaliser->temp_count = 1; + dst_param->write_mask = mask_bit; + dst_param->modifiers = 0; + dst_param->shift = 0; + } + + /* Substitute the temp for the vector clip/cull source. */ + vsir_register_init(reg, VKD3DSPR_TEMP, reg->data_type, 1); + reg->dimension = VSIR_DIMENSION_VEC4; + reg->idx[0].offset = parser->program.temp_count; +} + +static void shader_instruction_normalise_clip_cull_params(struct vkd3d_shader_instruction *ins, + struct clip_cull_normaliser *normaliser) +{ + unsigned int i, write_mask; + + if (ins->handler_idx == VKD3DSIH_NOP) + return; + + normaliser->location = ins->location; + + if (vsir_instruction_is_dcl(ins)) + { + clip_cull_normaliser_copy_instruction(normaliser, ins); + return; + } + + for (i = 0, write_mask = 0; i < ins->dst_count; ++i) + if (ins->dst[i].reg.type != VKD3DSPR_NULL) + write_mask |= ins->dst[i].write_mask; + + for (i = 0; i < ins->src_count; ++i) + shader_instruction_src_param_clip_cull_normalise(ins, i, write_mask, normaliser); + + clip_cull_normaliser_copy_instruction(normaliser, ins); +} + +static enum vkd3d_result normalise_clip_cull(struct vkd3d_shader_parser *parser) +{ + struct clip_cull_normaliser normaliser = {0}; + unsigned int i; + + normaliser.parser = parser; + normaliser.input_signature.s = &parser->shader_desc.input_signature; + + if (parser->program.shader_version.type != VKD3D_SHADER_TYPE_DOMAIN) + normaliser_signature_transform_clip_cull(&normaliser.input_signature, &normaliser); + + if (parser->failed) + return VKD3D_ERROR_INVALID_SHADER; + + if (!normaliser.has_normalised_clip_cull) + return VKD3D_OK; + + if (!clip_cull_normaliser_require_space(&normaliser, parser->program.instructions.count)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + for (i = 0; i < parser->program.instructions.count; ++i) + shader_instruction_normalise_clip_cull_params(&parser->program.instructions.elements[i], &normaliser); + + if (normaliser.result >= 0) + { + vkd3d_free(parser->program.instructions.elements); + parser->program.instructions.elements = normaliser.instructions; + parser->program.instructions.capacity = normaliser.instruction_capacity; + parser->program.instructions.count = normaliser.instruction_count; + + parser->program.temp_count += normaliser.temp_count; + + return VKD3D_OK; + } + else + { + if (normaliser.result == VKD3D_ERROR_OUT_OF_MEMORY) + { + vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_OUT_OF_MEMORY, + "Out of memory allocating clip/cull normalization instructions.\n"); + } + vkd3d_free(normaliser.instructions); + return normaliser.result; + } +} + struct flat_constant_def { enum vkd3d_shader_d3dbc_constant_register set; @@ -3052,6 +3494,9 @@ enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, if ((result = shader_normalise_io_registers(parser)) < 0) return result;
+ if ((result = normalise_clip_cull(parser)) < 0) + return result; + if ((result = instruction_array_normalise_flat_constants(&parser->program)) < 0) return result;
diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index 5257679d6..24f75a085 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -4915,7 +4915,7 @@ static void spirv_compiler_emit_input(struct spirv_compiler *compiler, array_sizes[0] = signature_element->register_count; array_sizes[1] = (reg_type == VKD3DSPR_PATCHCONST ? 0 : compiler->input_control_point_count); if (array_sizes[0] == 1 && !vsir_sysval_semantic_is_tess_factor(signature_element->sysval_semantic) - && (!vsir_sysval_semantic_is_clip_cull(signature_element->sysval_semantic) || array_sizes[1])) + && (!vsir_sysval_semantic_is_clip_cull(sysval) || array_sizes[1])) { array_sizes[0] = 0; } diff --git a/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d-shader/vkd3d_shader_private.h index 7239beaf7..526397e89 100644 --- a/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d-shader/vkd3d_shader_private.h @@ -218,6 +218,8 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX = 9015, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW = 9016, VKD3D_SHADER_ERROR_VSIR_INVALID_SSA_USAGE = 9017, + VKD3D_SHADER_ERROR_VSIR_OUT_OF_MEMORY = 9018, + VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE = 9019,
VKD3D_SHADER_WARNING_VSIR_DYNAMIC_DESCRIPTOR_ARRAY = 9300, };
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/ir.c | 144 +++++++++++++++++++++++++++++++++ libs/vkd3d-shader/spirv.c | 162 ++------------------------------------ 2 files changed, 152 insertions(+), 154 deletions(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 03fc2b043..5d4f42df7 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -1484,8 +1484,12 @@ struct clip_cull_normaliser size_t instruction_count;
struct clip_cull_normaliser_signature input_signature; + struct clip_cull_normaliser_signature output_signature; + struct clip_cull_normaliser_signature patch_constant_signature;
unsigned int temp_count; + + enum vkd3d_shader_opcode phase; };
static unsigned int mask_find_free_array_slot(unsigned int used_mask, unsigned int array_size) @@ -1633,6 +1637,7 @@ static void clip_cull_normaliser_copy_instruction(struct clip_cull_normaliser *n static bool clip_cull_src_normalisation_get_write_mask(struct vkd3d_shader_instruction *ins, unsigned int src_idx, unsigned int *dst_write_mask) { + return true; switch (ins->handler_idx) { case VKD3DSIH_DP2: @@ -1732,6 +1737,15 @@ static void shader_instruction_src_param_clip_cull_normalise(struct vkd3d_shader return; signature = &normaliser->input_signature; break; + case VKD3DSPR_OUTPUT: + /* Sysvals are not needed for hull shader outputs. */ + if (parser->program.shader_version.type == VKD3D_SHADER_TYPE_HULL) + return; + signature = &normaliser->output_signature; + break; + case VKD3DSPR_PATCHCONST: + signature = &normaliser->patch_constant_signature; + break; default: return; } @@ -1826,6 +1840,119 @@ static void shader_instruction_src_param_clip_cull_normalise(struct vkd3d_shader reg->idx[0].offset = parser->program.temp_count; }
+static void shader_dst_param_clip_cull_normalise(struct vkd3d_shader_dst_param *dst_param, + struct clip_cull_normaliser *normaliser) + { + struct vkd3d_shader_parser *parser = normaliser->parser; + const struct clip_cull_normaliser_signature *signature; + unsigned int i, element_idx, write_mask, array_offset; + struct vkd3d_shader_register *reg = &dst_param->reg; + struct vsir_program *program = &parser->program; + struct vkd3d_shader_src_param *src_param; + struct vkd3d_shader_dst_param *mov_dst; + + if (!reg->idx_count) + return; + + switch (reg->type) + { + /* VKD3DSPR_INPUT must not occur in a dst param. */ + + case VKD3DSPR_OUTPUT: + if (normaliser->phase == VKD3DSIH_HS_FORK_PHASE || normaliser->phase == VKD3DSIH_HS_JOIN_PHASE) + { + signature = &normaliser->patch_constant_signature; + } + else + { + /* Sysvals are not needed for hull shader outputs. */ + if (parser->program.shader_version.type == VKD3D_SHADER_TYPE_HULL) + return; + signature = &normaliser->output_signature; + } + break; + + case VKD3DSPR_PATCHCONST: + signature = &normaliser->patch_constant_signature; + break; + + default: + return; + } + + element_idx = reg->idx[reg->idx_count - 1].offset; + + if (!signature->scan[element_idx].need_normalisation) + return; + + /* Elements with an array offset are remapped to the base element. */ + if ((array_offset = signature->scan[element_idx].offset)) + { + element_idx = signature->scan[element_idx].remap; + reg->idx[reg->idx_count - 1].offset = element_idx; + } + + write_mask = dst_param->write_mask; + + /* Dynamic array addressing of clip/cull outputs is not supported. */ + if (reg->idx_count >= ARRAY_SIZE(reg->idx)) + { + WARN("Unexpected index count %u.\n", reg->idx_count); + vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, + "Invalid register index count %u for a clip/cull store.", reg->idx_count); + normaliser->result = VKD3D_ERROR_INVALID_SHADER; + return; + } + + /* Move the indices up so the array index can be placed in idx[0]. */ + memmove(®->idx[1], ®->idx[0], reg->idx_count * sizeof(reg->idx[0])); + memset(®->idx[0], 0, sizeof(reg->idx[0])); + ++reg->idx_count; + + if (vsir_write_mask_component_count(write_mask) == 1) + { + reg->idx[0].offset = array_offset + vsir_write_mask_get_component_idx(write_mask); + dst_param->write_mask = VKD3DSP_WRITEMASK_0; + return; + } + + for (i = 0; i < VKD3D_VEC4_SIZE; ++i) + { + if (!(write_mask & (1u << i))) + continue; + + /* For each component, emit a MOV from a temp to the clip/cull array. */ + + if (!(src_param = vsir_program_get_src_params(program, 1))) + { + ERR("Failed to allocate instruction dst param.\n"); + normaliser->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; + } + src_param->swizzle = vkd3d_shader_create_swizzle(i, i, i, i); + src_param->modifiers = 0; + vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, reg->data_type, 1); + src_param->reg.dimension = reg->dimension; + src_param->reg.idx[0].offset = parser->program.temp_count; + normaliser->temp_count = 1; + + if (!(mov_dst = clip_cull_normaliser_emit_mov(normaliser, src_param))) + return; + mov_dst->reg = *reg; + mov_dst->reg.idx[0].offset = array_offset + i; + mov_dst->write_mask = VKD3DSP_WRITEMASK_0; + mov_dst->modifiers = 0; + mov_dst->shift = 0; + } + + /* Substitute the temp for the vector clip/cull destination. If this is for a MOV instruction with + * clip/cull source, it results in a harmless no-op MOV, because shader_src_param_clip_cull_normalise() + * has already written the clip/cull source to the temp. */ + vsir_register_init(reg, VKD3DSPR_TEMP, reg->data_type, 1); + reg->dimension = VSIR_DIMENSION_VEC4; + reg->idx[0].offset = parser->program.temp_count; +} + static void shader_instruction_normalise_clip_cull_params(struct vkd3d_shader_instruction *ins, struct clip_cull_normaliser *normaliser) { @@ -1842,6 +1969,14 @@ static void shader_instruction_normalise_clip_cull_params(struct vkd3d_shader_in return; }
+ if (ins->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE || ins->handler_idx == VKD3DSIH_HS_FORK_PHASE + || ins->handler_idx == VKD3DSIH_HS_JOIN_PHASE) + { + normaliser->phase = ins->handler_idx; + clip_cull_normaliser_copy_instruction(normaliser, ins); + return; + } + for (i = 0, write_mask = 0; i < ins->dst_count; ++i) if (ins->dst[i].reg.type != VKD3DSPR_NULL) write_mask |= ins->dst[i].write_mask; @@ -1850,6 +1985,9 @@ static void shader_instruction_normalise_clip_cull_params(struct vkd3d_shader_in shader_instruction_src_param_clip_cull_normalise(ins, i, write_mask, normaliser);
clip_cull_normaliser_copy_instruction(normaliser, ins); + + for (i = 0; i < ins->dst_count; ++i) + shader_dst_param_clip_cull_normalise(&ins->dst[i], normaliser); }
static enum vkd3d_result normalise_clip_cull(struct vkd3d_shader_parser *parser) @@ -1859,9 +1997,15 @@ static enum vkd3d_result normalise_clip_cull(struct vkd3d_shader_parser *parser)
normaliser.parser = parser; normaliser.input_signature.s = &parser->shader_desc.input_signature; + normaliser.output_signature.s = &parser->shader_desc.output_signature; + normaliser.patch_constant_signature.s = &parser->shader_desc.patch_constant_signature; + normaliser.phase = VKD3DSIH_INVALID;
if (parser->program.shader_version.type != VKD3D_SHADER_TYPE_DOMAIN) normaliser_signature_transform_clip_cull(&normaliser.input_signature, &normaliser); + if (parser->program.shader_version.type != VKD3D_SHADER_TYPE_HULL) + normaliser_signature_transform_clip_cull(&normaliser.output_signature, &normaliser); + normaliser_signature_transform_clip_cull(&normaliser.patch_constant_signature, &normaliser);
if (parser->failed) return VKD3D_ERROR_INVALID_SHADER; diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index 24f75a085..cb9ab927e 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -2332,7 +2332,6 @@ struct spirv_compiler { uint32_t id; enum vkd3d_shader_component_type component_type; - uint32_t array_element_mask; } *output_info; uint32_t private_output_variable[MAX_REG_OUTPUT + 1]; /* 1 entry for oDepth */ uint32_t private_output_variable_write_mask[MAX_REG_OUTPUT + 1]; /* 1 entry for oDepth */ @@ -5088,88 +5087,6 @@ static bool is_dual_source_blending(const struct spirv_compiler *compiler) return compiler->shader_type == VKD3D_SHADER_TYPE_PIXEL && info && info->dual_source_blending; }
-static void calculate_clip_or_cull_distance_mask(const struct signature_element *e, uint32_t *mask) -{ - unsigned int write_mask; - - if (e->semantic_index >= sizeof(*mask) * CHAR_BIT / VKD3D_VEC4_SIZE) - { - FIXME("Invalid semantic index %u for clip/cull distance.\n", e->semantic_index); - return; - } - - write_mask = e->mask >> vsir_write_mask_get_component_idx(e->mask); - *mask |= (write_mask & VKD3DSP_WRITEMASK_ALL) << (VKD3D_VEC4_SIZE * e->semantic_index); -} - -/* Emits arrayed SPIR-V built-in variables. */ -static void spirv_compiler_emit_shader_signature_outputs(struct spirv_compiler *compiler) -{ - const struct shader_signature *output_signature = &compiler->output_signature; - uint32_t clip_distance_mask = 0, clip_distance_id = 0; - uint32_t cull_distance_mask = 0, cull_distance_id = 0; - const struct vkd3d_spirv_builtin *builtin; - unsigned int i, count; - - for (i = 0; i < output_signature->element_count; ++i) - { - const struct signature_element *e = &output_signature->elements[i]; - - switch (e->sysval_semantic) - { - case VKD3D_SHADER_SV_CLIP_DISTANCE: - calculate_clip_or_cull_distance_mask(e, &clip_distance_mask); - break; - - case VKD3D_SHADER_SV_CULL_DISTANCE: - calculate_clip_or_cull_distance_mask(e, &cull_distance_mask); - break; - - default: - break; - } - } - - if (clip_distance_mask) - { - count = vkd3d_popcount(clip_distance_mask); - builtin = get_spirv_builtin_for_sysval(compiler, VKD3D_SHADER_SV_CLIP_DISTANCE); - clip_distance_id = spirv_compiler_emit_builtin_variable(compiler, - builtin, SpvStorageClassOutput, count); - } - - if (cull_distance_mask) - { - count = vkd3d_popcount(cull_distance_mask); - builtin = get_spirv_builtin_for_sysval(compiler, VKD3D_SHADER_SV_CULL_DISTANCE); - cull_distance_id = spirv_compiler_emit_builtin_variable(compiler, - builtin, SpvStorageClassOutput, count); - } - - for (i = 0; i < output_signature->element_count; ++i) - { - const struct signature_element *e = &output_signature->elements[i]; - - switch (e->sysval_semantic) - { - case VKD3D_SHADER_SV_CLIP_DISTANCE: - compiler->output_info[i].id = clip_distance_id; - compiler->output_info[i].component_type = VKD3D_SHADER_COMPONENT_FLOAT; - compiler->output_info[i].array_element_mask = clip_distance_mask; - break; - - case VKD3D_SHADER_SV_CULL_DISTANCE: - compiler->output_info[i].id = cull_distance_id; - compiler->output_info[i].component_type = VKD3D_SHADER_COMPONENT_FLOAT; - compiler->output_info[i].array_element_mask = cull_distance_mask; - break; - - default: - break; - } - } -} - static void spirv_compiler_emit_output_register(struct spirv_compiler *compiler, const struct vkd3d_shader_dst_param *dst) { @@ -5259,7 +5176,8 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, sysval = VKD3D_SHADER_SV_NONE; array_sizes[0] = signature_element->register_count; array_sizes[1] = (reg_type == VKD3DSPR_PATCHCONST ? 0 : compiler->output_control_point_count); - if (array_sizes[0] == 1 && !vsir_sysval_semantic_is_tess_factor(signature_element->sysval_semantic)) + if (array_sizes[0] == 1 && !vsir_sysval_semantic_is_tess_factor(signature_element->sysval_semantic) + && (!vsir_sysval_semantic_is_clip_cull(sysval) || array_sizes[1])) array_sizes[0] = 0;
builtin = vkd3d_get_spirv_builtin(compiler, reg_type, sysval); @@ -5285,8 +5203,7 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, use_private_variable = true;
if (!is_patch_constant - && (get_shader_output_swizzle(compiler, signature_element->register_index) != VKD3D_SHADER_NO_SWIZZLE - || (compiler->output_info[element_idx].id && compiler->output_info[element_idx].array_element_mask))) + && get_shader_output_swizzle(compiler, signature_element->register_index) != VKD3D_SHADER_NO_SWIZZLE) { use_private_variable = true; } @@ -5301,11 +5218,7 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, return; }
- if (!is_patch_constant && compiler->output_info[element_idx].id) - { - id = compiler->output_info[element_idx].id; - } - else if (builtin) + if (builtin) { if (spirv_compiler_get_current_shader_phase(compiler)) id = spirv_compiler_emit_shader_phase_builtin_variable(compiler, builtin, array_sizes, 2); @@ -5386,51 +5299,18 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, } }
-static uint32_t spirv_compiler_get_output_array_index(struct spirv_compiler *compiler, - const struct signature_element *e) -{ - enum vkd3d_shader_sysval_semantic sysval = e->sysval_semantic; - const struct vkd3d_spirv_builtin *builtin; - - builtin = get_spirv_builtin_for_sysval(compiler, sysval); - - switch (sysval) - { - case VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN: - case VKD3D_SHADER_SV_TESS_FACTOR_LINEDET: - return builtin->member_idx; - default: - return e->semantic_index; - } -} - static void spirv_compiler_emit_store_shader_output(struct spirv_compiler *compiler, const struct shader_signature *signature, const struct signature_element *output, const struct vkd3d_shader_output_info *output_info, uint32_t output_index_id, uint32_t val_id, uint32_t write_mask) { - uint32_t dst_write_mask, use_mask, uninit_mask, swizzle, mask; + uint32_t dst_write_mask, use_mask, uninit_mask, swizzle, type_id, zero_id, ptr_type_id; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - uint32_t type_id, zero_id, ptr_type_id, chain_id, object_id; - const struct signature_element *element; - unsigned int i, index, array_idx; + unsigned int index; uint32_t output_id;
dst_write_mask = output->mask; use_mask = output->used_mask; - if (!output->sysval_semantic) - { - for (i = 0; i < signature->element_count; ++i) - { - element = &signature->elements[i]; - if (element->register_index != output->register_index) - continue; - if (element->sysval_semantic) - continue; - dst_write_mask |= element->mask; - use_mask |= element->used_mask; - } - } index = vsir_write_mask_get_component_idx(output->mask); dst_write_mask >>= index; use_mask >>= index; @@ -5472,31 +5352,8 @@ static void spirv_compiler_emit_store_shader_output(struct spirv_compiler *compi output_id = vkd3d_spirv_build_op_access_chain1(builder, ptr_type_id, output_id, output_index_id); }
- if (!output_info->array_element_mask) - { - spirv_compiler_emit_store(compiler, - output_id, dst_write_mask, output_info->component_type, SpvStorageClassOutput, write_mask, val_id); - return; - } - - type_id = vkd3d_spirv_get_type_id(builder, output_info->component_type, 1); - ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassOutput, type_id); - mask = output_info->array_element_mask; - array_idx = spirv_compiler_get_output_array_index(compiler, output); - mask &= (1u << (array_idx * VKD3D_VEC4_SIZE)) - 1; - for (i = 0, index = vkd3d_popcount(mask); i < VKD3D_VEC4_SIZE; ++i) - { - if (!(write_mask & (VKD3DSP_WRITEMASK_0 << i))) - continue; - - chain_id = vkd3d_spirv_build_op_access_chain1(builder, - ptr_type_id, output_id, spirv_compiler_get_constant_uint(compiler, index)); - object_id = spirv_compiler_emit_swizzle(compiler, val_id, write_mask, - output_info->component_type, VKD3D_SHADER_NO_SWIZZLE, VKD3DSP_WRITEMASK_0 << i); - spirv_compiler_emit_store(compiler, chain_id, VKD3DSP_WRITEMASK_0, - output_info->component_type, SpvStorageClassOutput, VKD3DSP_WRITEMASK_0 << i, object_id); - ++index; - } + spirv_compiler_emit_store(compiler, + output_id, dst_write_mask, output_info->component_type, SpvStorageClassOutput, write_mask, val_id); }
static void spirv_compiler_emit_shader_epilogue_function(struct spirv_compiler *compiler) @@ -9943,9 +9800,6 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, compiler->input_control_point_count = program->input_control_point_count; compiler->output_control_point_count = program->output_control_point_count;
- if (compiler->shader_type != VKD3D_SHADER_TYPE_HULL) - spirv_compiler_emit_shader_signature_outputs(compiler); - for (i = 0; i < instructions.count && result >= 0; ++i) { compiler->location.line = i + 1;
From: Conor McCarthy cmccarthy@codeweavers.com
--- tests/d3d12.c | 183 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 116 insertions(+), 67 deletions(-)
diff --git a/tests/d3d12.c b/tests/d3d12.c index d7933ed63..a3aed2cc5 100644 --- a/tests/d3d12.c +++ b/tests/d3d12.c @@ -31199,7 +31199,7 @@ static void test_clip_distance(void) ID3D12CommandQueue *queue; ID3D12PipelineState *pso; ID3D12Device *device; - unsigned int i; + unsigned int i, j; D3D12_BOX box; HRESULT hr;
@@ -31254,51 +31254,53 @@ static void test_clip_distance(void) #if 0 bool use_constant; float clip_distance0; - float clip_distance1; + float4 clip_distance1;
struct input { float4 position : POSITION; float distance0 : CLIP_DISTANCE0; - float distance1 : CLIP_DISTANCE1; + float4 distance1 : CLIP_DISTANCE1; };
struct vertex { float4 position : SV_POSITION; float user_clip : CLIP_DISTANCE; - float2 clip : SV_ClipDistance; + float clip0 : SV_ClipDistance0; + float4 clip1 : SV_ClipDistance1; };
void main(input vin, out vertex vertex) { vertex.position = vin.position; vertex.user_clip = vin.distance0; - vertex.clip.x = vin.distance0; + vertex.clip0 = vin.distance0; if (use_constant) - vertex.clip.x = clip_distance0; - vertex.clip.y = vin.distance1; + vertex.clip0 = clip_distance0; + vertex.clip1 = vin.distance1.xzyw; if (use_constant) - vertex.clip.y = clip_distance1; + vertex.clip1 = clip_distance1; } #endif - 0x43425844, 0xef5cc236, 0xe2fbfa69, 0x560b6591, 0x23037999, 0x00000001, 0x00000214, 0x00000003, - 0x0000002c, 0x0000009c, 0x00000120, 0x4e475349, 0x00000068, 0x00000003, 0x00000008, 0x00000050, + 0x43425844, 0x85af9c9d, 0xa40fe352, 0x1fdcce87, 0x16f969e1, 0x00000001, 0x00000240, 0x00000003, + 0x0000002c, 0x0000009c, 0x00000138, 0x4e475349, 0x00000068, 0x00000003, 0x00000008, 0x00000050, 0x00000000, 0x00000000, 0x00000003, 0x00000000, 0x00000f0f, 0x00000059, 0x00000000, 0x00000000, 0x00000003, 0x00000001, 0x00000101, 0x00000059, 0x00000001, 0x00000000, 0x00000003, 0x00000002, - 0x00000101, 0x49534f50, 0x4e4f4954, 0x494c4300, 0x49445f50, 0x4e415453, 0xab004543, 0x4e47534f, - 0x0000007c, 0x00000003, 0x00000008, 0x00000050, 0x00000000, 0x00000001, 0x00000003, 0x00000000, - 0x0000000f, 0x0000005c, 0x00000000, 0x00000000, 0x00000003, 0x00000001, 0x00000e01, 0x0000006a, - 0x00000000, 0x00000002, 0x00000003, 0x00000002, 0x00000c03, 0x505f5653, 0x5449534f, 0x004e4f49, - 0x50494c43, 0x5349445f, 0x434e4154, 0x56530045, 0x696c435f, 0x73694470, 0x636e6174, 0xabab0065, - 0x52444853, 0x000000ec, 0x00010040, 0x0000003b, 0x04000059, 0x00208e46, 0x00000000, 0x00000001, - 0x0300005f, 0x001010f2, 0x00000000, 0x0300005f, 0x00101012, 0x00000001, 0x0300005f, 0x00101012, - 0x00000002, 0x04000067, 0x001020f2, 0x00000000, 0x00000001, 0x03000065, 0x00102012, 0x00000001, - 0x04000067, 0x00102032, 0x00000002, 0x00000002, 0x05000036, 0x001020f2, 0x00000000, 0x00101e46, - 0x00000000, 0x05000036, 0x00102012, 0x00000001, 0x0010100a, 0x00000001, 0x0b000037, 0x00102012, - 0x00000002, 0x0020800a, 0x00000000, 0x00000000, 0x0020801a, 0x00000000, 0x00000000, 0x0010100a, - 0x00000001, 0x0b000037, 0x00102022, 0x00000002, 0x0020800a, 0x00000000, 0x00000000, 0x0020802a, - 0x00000000, 0x00000000, 0x0010100a, 0x00000002, 0x0100003e, + 0x00000f0f, 0x49534f50, 0x4e4f4954, 0x494c4300, 0x49445f50, 0x4e415453, 0xab004543, 0x4e47534f, + 0x00000094, 0x00000004, 0x00000008, 0x00000068, 0x00000000, 0x00000001, 0x00000003, 0x00000000, + 0x0000000f, 0x00000074, 0x00000000, 0x00000000, 0x00000003, 0x00000001, 0x00000e01, 0x00000082, + 0x00000000, 0x00000002, 0x00000003, 0x00000002, 0x00000e01, 0x00000082, 0x00000001, 0x00000002, + 0x00000003, 0x00000003, 0x0000000f, 0x505f5653, 0x5449534f, 0x004e4f49, 0x50494c43, 0x5349445f, + 0x434e4154, 0x56530045, 0x696c435f, 0x73694470, 0x636e6174, 0xabab0065, 0x58454853, 0x00000100, + 0x00010050, 0x00000040, 0x0100086a, 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0300005f, + 0x001010f2, 0x00000000, 0x0300005f, 0x00101012, 0x00000001, 0x0300005f, 0x001010f2, 0x00000002, + 0x04000067, 0x001020f2, 0x00000000, 0x00000001, 0x03000065, 0x00102012, 0x00000001, 0x04000067, + 0x00102012, 0x00000002, 0x00000002, 0x04000067, 0x001020f2, 0x00000003, 0x00000002, 0x05000036, + 0x001020f2, 0x00000000, 0x00101e46, 0x00000000, 0x05000036, 0x00102012, 0x00000001, 0x0010100a, + 0x00000001, 0x0b000037, 0x00102012, 0x00000002, 0x0020800a, 0x00000000, 0x00000000, 0x0020801a, + 0x00000000, 0x00000000, 0x0010100a, 0x00000001, 0x0b000037, 0x001020f2, 0x00000003, 0x00208006, + 0x00000000, 0x00000000, 0x00208e46, 0x00000000, 0x00000001, 0x00101d86, 0x00000002, 0x0100003e, }; static const D3D12_SHADER_BYTECODE vs_multiple = {vs_multiple_code, sizeof(vs_multiple_code)}; #if 0 @@ -31534,6 +31536,12 @@ static void test_clip_distance(void) {"CLIP_DISTANCE", 0, DXGI_FORMAT_R32_FLOAT, 1, 0, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0}, {"CLIP_DISTANCE", 1, DXGI_FORMAT_R32_FLOAT, 1, 4, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0}, }; + static const D3D12_INPUT_ELEMENT_DESC layout_desc_multiple[] = + { + {"POSITION", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0}, + {"CLIP_DISTANCE", 0, DXGI_FORMAT_R32_FLOAT, 1, 0, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0}, + {"CLIP_DISTANCE", 1, DXGI_FORMAT_R32G32B32A32_FLOAT, 1, 4, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0}, + }; static const struct vec4 quad[] = { {-1.0f, -1.0f}, @@ -31553,6 +31561,18 @@ static void test_clip_distance(void) {1.0f, 1.0f}, {1.0f, 1.0f}, }; + struct + { + float clip_distance0; + struct vec4 clip_distance1; + } + vertices_multiple[] = + { + {1.0f, {1.0f, 1.0f, 1.0f, 1.0f}}, + {1.0f, {1.0f, 1.0f, 1.0f, 1.0f}}, + {1.0f, {1.0f, 1.0f, 1.0f, 1.0f}}, + {1.0f, {1.0f, 1.0f, 1.0f, 1.0f}}, + }; static const float white[] = {1.0f, 1.0f, 1.0f, 1.0f}; struct { @@ -31561,6 +31581,12 @@ static void test_clip_distance(void) float clip_distance1; float tessellation_factor; } cb_data; + struct + { + bool use_constant; + float clip_distance0; + struct vec4 clip_distance1; + } cb_data_multiple;
memset(&desc, 0, sizeof(desc)); desc.rt_width = 640; @@ -31759,16 +31785,23 @@ static void test_clip_distance(void) memset(&pso_desc.DS, 0, sizeof(pso_desc.DS)); memset(&pso_desc.GS, 0, sizeof(pso_desc.GS)); pso_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + pso_desc.InputLayout.pInputElementDescs = layout_desc_multiple; + pso_desc.InputLayout.NumElements = ARRAY_SIZE(layout_desc_multiple); hr = ID3D12Device_CreateGraphicsPipelineState(device, &pso_desc, &IID_ID3D12PipelineState, (void **)&pso); ok(hr == S_OK, "Failed to create pipeline state, hr %#x.\n", hr);
- cb_data.use_constant = false; - update_buffer_data(vs_cb, 0, sizeof(cb_data), &cb_data); + memset(&cb_data_multiple, 0, sizeof(cb_data_multiple)); + ID3D12Resource_Release(vs_cb); + vs_cb = create_upload_buffer(device, sizeof(cb_data_multiple), &cb_data_multiple);
- for (i = 0; i < ARRAY_SIZE(vertices); ++i) - vertices[i].clip_distance0 = 1.0f; - update_buffer_data(vb[1], 0, sizeof(vertices), vertices); + ID3D12Resource_Release(vb[1]); + vb[1] = create_upload_buffer(device, sizeof(vertices_multiple), vertices_multiple); + vbv[1].BufferLocation = ID3D12Resource_GetGPUVirtualAddress(vb[1]); + vbv[1].StrideInBytes = sizeof(*vertices_multiple); + vbv[1].SizeInBytes = sizeof(vertices_multiple); + + update_buffer_data(vb[1], 0, sizeof(vertices_multiple), vertices_multiple); ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL); ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature); ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 0, @@ -31794,49 +31827,65 @@ static void test_clip_distance(void) transition_resource_state(command_list, context.render_target, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET);
- for (i = 0; i < ARRAY_SIZE(vertices); ++i) + for (i = 0; i < 4; ++i) { - vertices[i].clip_distance0 = i < 2 ? 1.0f : -1.0f; - vertices[i].clip_distance1 = i % 2 ? 1.0f : -1.0f; - } - update_buffer_data(vb[1], 0, sizeof(vertices), vertices); - ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL); - ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature); - ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 0, - ID3D12Resource_GetGPUVirtualAddress(vs_cb)); - ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 1, - ID3D12Resource_GetGPUVirtualAddress(tess_cb)); - ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 2, - ID3D12Resource_GetGPUVirtualAddress(tess_cb)); - ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 3, - ID3D12Resource_GetGPUVirtualAddress(gs_cb)); - ID3D12GraphicsCommandList_SetPipelineState(command_list, pso); - ID3D12GraphicsCommandList_IASetPrimitiveTopology(command_list, D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); - ID3D12GraphicsCommandList_RSSetViewports(command_list, 1, &context.viewport); - ID3D12GraphicsCommandList_RSSetScissorRects(command_list, 1, &context.scissor_rect); - ID3D12GraphicsCommandList_IASetVertexBuffers(command_list, 0, ARRAY_SIZE(vbv), vbv); - ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL); - ID3D12GraphicsCommandList_DrawInstanced(command_list, 4, 1, 0, 0); - transition_resource_state(command_list, context.render_target, - D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE); + vkd3d_test_push_context("Component %u", i);
- get_resource_readback_with_command_list(context.render_target, 0, &rb, queue, command_list); - set_box(&box, 0, 0, 0, 320, 240, 1); - check_readback_data_uint(&rb.rb, &box, 0xff00ff00, 1); - set_box(&box, 0, 240, 0, 320, 480, 1); - check_readback_data_uint(&rb.rb, &box, 0xffffffff, 1); - set_box(&box, 320, 0, 0, 640, 480, 1); - check_readback_data_uint(&rb.rb, &box, 0xffffffff, 1); - release_resource_readback(&rb); + for (j = 0; j < ARRAY_SIZE(vertices_multiple); ++j) + { + float clip1 = j % 2 ? 1.0f : -1.0f; + vertices_multiple[j].clip_distance0 = j < 2 ? 1.0f : -1.0f; + vertices_multiple[j].clip_distance1.x = 1.0f; + vertices_multiple[j].clip_distance1.y = 1.0f; + vertices_multiple[j].clip_distance1.z = 1.0f; + vertices_multiple[j].clip_distance1.w = 1.0f; + switch (i) + { + case 0: vertices_multiple[j].clip_distance1.x = clip1; break; + case 1: vertices_multiple[j].clip_distance1.y = clip1; break; + case 2: vertices_multiple[j].clip_distance1.z = clip1; break; + case 3: vertices_multiple[j].clip_distance1.w = clip1; break; + } + } + update_buffer_data(vb[1], 0, sizeof(vertices_multiple), vertices_multiple); + ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL); + ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature); + ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 0, + ID3D12Resource_GetGPUVirtualAddress(vs_cb)); + ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 1, + ID3D12Resource_GetGPUVirtualAddress(tess_cb)); + ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 2, + ID3D12Resource_GetGPUVirtualAddress(tess_cb)); + ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 3, + ID3D12Resource_GetGPUVirtualAddress(gs_cb)); + ID3D12GraphicsCommandList_SetPipelineState(command_list, pso); + ID3D12GraphicsCommandList_IASetPrimitiveTopology(command_list, D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); + ID3D12GraphicsCommandList_RSSetViewports(command_list, 1, &context.viewport); + ID3D12GraphicsCommandList_RSSetScissorRects(command_list, 1, &context.scissor_rect); + ID3D12GraphicsCommandList_IASetVertexBuffers(command_list, 0, ARRAY_SIZE(vbv), vbv); + ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL); + ID3D12GraphicsCommandList_DrawInstanced(command_list, 4, 1, 0, 0); + transition_resource_state(command_list, context.render_target, + D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE);
- reset_command_list(command_list, context.allocator); - transition_resource_state(command_list, context.render_target, - D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET); + get_resource_readback_with_command_list(context.render_target, 0, &rb, queue, command_list); + set_box(&box, 0, 0, 0, 320, 240, 1); + check_readback_data_uint(&rb.rb, &box, 0xff00ff00, 1); + set_box(&box, 0, 240, 0, 320, 480, 1); + check_readback_data_uint(&rb.rb, &box, 0xffffffff, 1); + set_box(&box, 320, 0, 0, 640, 480, 1); + check_readback_data_uint(&rb.rb, &box, 0xffffffff, 1); + release_resource_readback(&rb);
- cb_data.use_constant = true; - cb_data.clip_distance0 = 0.0f; - cb_data.clip_distance1 = 0.0f; - update_buffer_data(vs_cb, 0, sizeof(cb_data), &cb_data); + reset_command_list(command_list, context.allocator); + transition_resource_state(command_list, context.render_target, + D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET); + + vkd3d_test_pop_context(); + } + + cb_data_multiple.use_constant = true; + update_buffer_data(vs_cb, 0, sizeof(cb_data_multiple), &cb_data_multiple); ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL); ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature); ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 0,
From: Conor McCarthy cmccarthy@codeweavers.com
--- tests/d3d12.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+)
diff --git a/tests/d3d12.c b/tests/d3d12.c index a3aed2cc5..eb90caf7a 100644 --- a/tests/d3d12.c +++ b/tests/d3d12.c @@ -31303,6 +31303,36 @@ static void test_clip_distance(void) 0x00000000, 0x00000000, 0x00208e46, 0x00000000, 0x00000001, 0x00101d86, 0x00000002, 0x0100003e, }; static const D3D12_SHADER_BYTECODE vs_multiple = {vs_multiple_code, sizeof(vs_multiple_code)}; + static const DWORD ps_multiple_code[] = + { +#if 0 + struct vertex + { + float4 position : SV_POSITION; + float user_clip : CLIP_DISTANCE; + float clip0 : SV_ClipDistance0; + float4 clip1 : SV_ClipDistance1; + }; + + float4 main(vertex input) : SV_Target + { + return float4(input.clip0, input.clip1.xyz); + } +#endif + 0x43425844, 0x672b84d7, 0x92f9cfdd, 0x87eece2d, 0xfb11dafc, 0x00000001, 0x00000168, 0x00000003, + 0x0000002c, 0x000000c8, 0x000000fc, 0x4e475349, 0x00000094, 0x00000004, 0x00000008, 0x00000068, + 0x00000000, 0x00000001, 0x00000003, 0x00000000, 0x0000000f, 0x00000074, 0x00000000, 0x00000000, + 0x00000003, 0x00000001, 0x00000001, 0x00000082, 0x00000000, 0x00000002, 0x00000003, 0x00000002, + 0x00000101, 0x00000082, 0x00000001, 0x00000002, 0x00000003, 0x00000003, 0x0000070f, 0x505f5653, + 0x5449534f, 0x004e4f49, 0x50494c43, 0x5349445f, 0x434e4154, 0x56530045, 0x696c435f, 0x73694470, + 0x636e6174, 0xabab0065, 0x4e47534f, 0x0000002c, 0x00000001, 0x00000008, 0x00000020, 0x00000000, + 0x00000000, 0x00000003, 0x00000000, 0x0000000f, 0x545f5653, 0x65677261, 0xabab0074, 0x58454853, + 0x00000064, 0x00000050, 0x00000019, 0x0100086a, 0x04001064, 0x00101012, 0x00000002, 0x00000002, + 0x04001064, 0x00101072, 0x00000003, 0x00000002, 0x03000065, 0x001020f2, 0x00000000, 0x05000036, + 0x00102012, 0x00000000, 0x0010100a, 0x00000002, 0x05000036, 0x001020e2, 0x00000000, 0x00101906, + 0x00000003, 0x0100003e, + }; + static const D3D12_SHADER_BYTECODE ps_multiple = {ps_multiple_code, sizeof(ps_multiple_code)}; #if 0 bool use_constant; float clip_distance0; @@ -31907,6 +31937,50 @@ static void test_clip_distance(void) D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE); check_sub_resource_uint(context.render_target, 0, queue, command_list, 0xff00ff00, 0);
+ reset_command_list(command_list, context.allocator); + transition_resource_state(command_list, context.render_target, + D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET); + + ID3D12PipelineState_Release(pso); + + pso_desc.PS = ps_multiple; + hr = ID3D12Device_CreateGraphicsPipelineState(device, &pso_desc, + &IID_ID3D12PipelineState, (void **)&pso); + ok(hr == S_OK, "Failed to create pipeline state, hr %#x.\n", hr); + + for (i = 0; i < ARRAY_SIZE(vertices_multiple); ++i) + { + vertices_multiple[i].clip_distance0 = 0.2f; + vertices_multiple[i].clip_distance1.x = 0.4f; + vertices_multiple[i].clip_distance1.y = 0.6f; + vertices_multiple[i].clip_distance1.z = 0.8f; + vertices_multiple[i].clip_distance1.w = 1.0f; + } + update_buffer_data(vb[1], 0, sizeof(vertices_multiple), vertices_multiple); + + cb_data_multiple.use_constant = false; + update_buffer_data(vs_cb, 0, sizeof(cb_data_multiple), &cb_data_multiple); + ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL); + ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature); + ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 0, + ID3D12Resource_GetGPUVirtualAddress(vs_cb)); + ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 1, + ID3D12Resource_GetGPUVirtualAddress(tess_cb)); + ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 2, + ID3D12Resource_GetGPUVirtualAddress(tess_cb)); + ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 3, + ID3D12Resource_GetGPUVirtualAddress(gs_cb)); + ID3D12GraphicsCommandList_SetPipelineState(command_list, pso); + ID3D12GraphicsCommandList_IASetPrimitiveTopology(command_list, D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); + ID3D12GraphicsCommandList_RSSetViewports(command_list, 1, &context.viewport); + ID3D12GraphicsCommandList_RSSetScissorRects(command_list, 1, &context.scissor_rect); + ID3D12GraphicsCommandList_IASetVertexBuffers(command_list, 0, ARRAY_SIZE(vbv), vbv); + ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL); + ID3D12GraphicsCommandList_DrawInstanced(command_list, 4, 1, 0, 0); + transition_resource_state(command_list, context.render_target, + D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE); + check_sub_resource_uint(context.render_target, 0, queue, command_list, 0x99cc6633, 0); + ID3D12PipelineState_Release(pso); for (i = 0; i < ARRAY_SIZE(vb); ++i) ID3D12Resource_Release(vb[i]);
we might think about adding a bitmap_set_range() helper
I think the need to deal with a bit range which crosses a `uint32_t` boundary means it's not worth the trouble at this point.
On Wed Feb 21 22:46:02 2024 +0000, Zebediah Figura wrote:
Is this helpful?
Only to avoid copying an unnecessary instruction, which has a chance of avoiding reallocation too.
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/ir.c:
unsigned int src_idx, unsigned int write_mask, struct clip_cull_normaliser *normaliser)
+{
- unsigned int i, element_idx, component_idx, array_offset, handled_mask, mask_bit;
- struct vkd3d_shader_src_param *src_param = &ins->src[src_idx];
- struct vkd3d_shader_parser *parser = normaliser->parser;
- const struct clip_cull_normaliser_signature *signature;
- struct vkd3d_shader_register *reg = &src_param->reg;
- struct vsir_program *program = &parser->program;
- struct vkd3d_shader_dst_param *dst_param;
- struct vkd3d_shader_src_param *mov_src;
- switch (reg->type)
- {
case VKD3DSPR_INPUT:
/* Sysvals are not needed for domain shader inputs. */
if (parser->program.shader_version.type == VKD3D_SHADER_TYPE_DOMAIN)
Can this happen? Unless I'm missing something, it seems that for domain shaders `normaliser_signature_transform_clip_cull()` is never called, so `has_normalised_clip_cull` is never set and the normaliser returns before reaching here.
On Thu Feb 22 08:22:18 2024 +0000, Conor McCarthy wrote:
we might think about adding a bitmap_set_range() helper
I think the need to deal with a bit range which crosses a `uint32_t` boundary means it's not worth the trouble at this point.
Fair enough.
On Wed Feb 21 22:46:01 2024 +0000, Zebediah Figura wrote:
That said, though, the write mask logic here assumes that all instructions consuming clip/cull behave like mov with respect to write mask. Is that actually valid? E.g. for a somewhat pathological example, the coords argument of SAMPLE doesn't. If the point is to avoid writing any out of bounds accesses (i.e. component_idx >= signature->s[element_idx].register_count), should we instead just replace such out of bounds accesses with zero or something?
I guess explicitly handling every instruction type works too, but it seems a bit fragile. Is there a problem with just clamping out of bounds accesses?
On Thu Feb 22 20:02:42 2024 +0000, Zebediah Figura wrote:
I guess explicitly handling every instruction type works too, but it seems a bit fragile. Is there a problem with just clamping out of bounds accesses?
Indeed it's a bit fragile. It could instead copy up to four components to the temp, if available, just in case, but I don't like the idea of doing that each time a scalar is loaded. It's pretty spammy in SPIR-V code, and the temp is often used for other purposes later, where the written values could still be floating around if not overwritten. That's unlikely to interfere with optimisation, but I'm not sure.