-- v21: tests/d3d12: Test multiple clip distance inputs in test_clip_distance(). tests/d3d12: Use five clip distances for the multiple test in test_clip_distance(). vkd3d-shader/ir: Transform clip/cull outputs and patch constants into arrays. vkd3d-shader/ir: Transform clip/cull inputs into an array. vkd3d-shader/spirv: Support no-op signature elements.
From: Conor McCarthy cmccarthy@codeweavers.com
Allows normalisation without compacting the array. --- libs/vkd3d-shader/spirv.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-)
diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index 46130244c..a6c16d8f0 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -4973,7 +4973,7 @@ static unsigned int shader_signature_next_location(const struct shader_signature return max_row; }
-static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, +static void spirv_compiler_emit_input(struct spirv_compiler *compiler, enum vkd3d_shader_register_type reg_type, unsigned int element_idx) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; @@ -4984,7 +4984,6 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, const struct vkd3d_spirv_builtin *builtin; enum vkd3d_shader_sysval_semantic sysval; uint32_t write_mask, reg_write_mask; - struct vkd3d_symbol *symbol = NULL; uint32_t val_id, input_id, var_id; uint32_t type_id, float_type_id; struct vkd3d_symbol reg_symbol; @@ -4997,6 +4996,10 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, ? &compiler->patch_constant_signature : &compiler->input_signature;
signature_element = &shader_signature->elements[element_idx]; + /* No-op element from normalisation. */ + if (!signature_element->register_count) + return; + sysval = signature_element->sysval_semantic; /* The Vulkan spec does not explicitly forbid passing varyings from the * TCS to the TES via builtins. However, Mesa doesn't seem to handle it @@ -5052,8 +5055,7 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, * being repeated in another (i.e. vcp/vocp), which should have been deleted. */ if (reg_type != VKD3DSPR_INPUT || !is_in_fork_or_join_phase(compiler)) FIXME("Duplicate input definition found.\n"); - symbol = RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); - return symbol->id; + return; }
if (builtin) @@ -5126,7 +5128,7 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, spirv_compiler_emit_store_reg(compiler, &dst_reg, signature_element->mask, val_id); }
- return input_id; + return; }
static void spirv_compiler_emit_input_register(struct spirv_compiler *compiler, @@ -5344,6 +5346,10 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, shader_signature = is_patch_constant ? &compiler->patch_constant_signature : &compiler->output_signature;
signature_element = &shader_signature->elements[element_idx]; + /* No-op element from normalisation. */ + if (!signature_element->register_count) + return; + sysval = signature_element->sysval_semantic; /* Don't use builtins for TCS -> TES varyings. See spirv_compiler_emit_input(). */ if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL && !is_patch_constant)
From: Conor McCarthy cmccarthy@codeweavers.com
Up to eight clip/cull values are supported, and the DXBC pattern of spreading these across two vector signature elements is a hacky solution, so converting these into an array is best for VSIR. SPIR-V requires these to be an array. --- libs/vkd3d-shader/ir.c | 476 +++++++++++++++++++++++ libs/vkd3d-shader/spirv.c | 2 +- libs/vkd3d-shader/vkd3d_shader_private.h | 3 + 3 files changed, 480 insertions(+), 1 deletion(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 8af537390..63b15c65a 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -18,6 +18,8 @@
#include "vkd3d_shader_private.h"
+static const unsigned int MAX_CLIP_OR_CULL_DISTANCE_COUNT = 8; + bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_version *version, unsigned int reserve) { program->shader_version = *version; @@ -1470,6 +1472,477 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program return VKD3D_OK; }
+struct clip_cull_normaliser_scan +{ + bool need_normalisation; + unsigned int offset; + unsigned int remap; +}; + +struct clip_cull_normaliser_signature +{ + struct shader_signature *s; + struct clip_cull_normaliser_scan scan[MAX_REG_OUTPUT]; +}; + +struct clip_cull_normaliser +{ + struct vsir_program *program; + struct vkd3d_shader_message_context *message_context; + bool failed; + + struct vkd3d_shader_location location; + bool has_normalised_clip_cull; + enum vkd3d_result result; + + struct vkd3d_shader_instruction *instructions; + size_t instruction_capacity; + size_t instruction_count; + + struct clip_cull_normaliser_signature input_signature; + + unsigned int temp_count; +}; + +static void VKD3D_PRINTF_FUNC(3, 4) clip_cull_normaliser_error(struct clip_cull_normaliser *normaliser, + enum vkd3d_shader_error error, const char *format, ...) +{ + va_list args; + + va_start(args, format); + vkd3d_shader_verror(normaliser->message_context, &normaliser->location, error, format, args); + va_end(args); + + normaliser->failed = true; +} + +static unsigned int mask_find_free_array_slot(unsigned int used_mask, unsigned int array_size) +{ + unsigned int i, array_mask = (1u << array_size) - 1; + /* Find the lowest register with at least array_size free above. */ + for (i = 0; i < MAX_REG_OUTPUT; ++i) + { + if (!(used_mask & (array_mask << i))) + break; + } + return i; +} + +static void normaliser_signature_transform_clip_or_cull(struct clip_cull_normaliser_signature *signature, + enum vkd3d_shader_sysval_semantic target_sysval, struct clip_cull_normaliser *normaliser) +{ + unsigned int i, base, array_size, mask, reg_mask, location_mask; + struct shader_signature *s = signature->s; + struct signature_element *e; + + /* Up to two vec4 clip/cull elements are allowed. Merge these into an array, + * and track the location and array offset of the second one. */ + for (i = 0, array_size = 0, reg_mask = 0, location_mask = 0, base = 0; i < s->element_count; ++i) + { + e = &s->elements[i]; + + if (e->sysval_semantic != target_sysval) + { + mask = (1u << e->register_count) - 1; + /* Compile a map of used registers and locations. */ + reg_mask |= mask << e->register_index; + location_mask |= mask << e->target_location; + continue; + } + + if (!array_size) + { + base = i; + array_size = vsir_write_mask_component_count(e->mask); + } + else + { + signature->scan[i].remap = base; + signature->scan[i].need_normalisation = true; + signature->scan[i].offset = array_size; + array_size += vsir_write_mask_component_count(e->mask); + /* Make no-op. */ + memset(e, 0, sizeof(*e)); + } + } + + if (!array_size) + return; + + if (array_size > MAX_CLIP_OR_CULL_DISTANCE_COUNT) + { + WARN("Invalid array size %u.\n", array_size); + clip_cull_normaliser_error(normaliser, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "Clip or cull array size %u exceeds the limit of 8.", array_size); + normaliser->result = VKD3D_ERROR_INVALID_SHADER; + return; + } + + signature->scan[base].need_normalisation = true; + e = &s->elements[base]; + + i = mask_find_free_array_slot(reg_mask, array_size); + /* Signature locations are not used for sysvals, so if it proves necessary, it should be + * ok to raise MAX_REG_OUTPUT to 40 and validate that non-sysval register indices are < 32. */ + if (array_size > MAX_REG_OUTPUT - i) + { + FIXME("Too many registers; register index %u, array size %u.\n", i, array_size); + clip_cull_normaliser_error(normaliser, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "Clip or cull base register index %u + array size %u exceeds limit of 32 registers.", i, array_size); + normaliser->result = VKD3D_ERROR_INVALID_SHADER; + return; + } + + e->register_index = i; + e->target_location = mask_find_free_array_slot(location_mask, array_size); + e->register_count = array_size; + e->mask = 1; + e->used_mask = 1; + + normaliser->has_normalised_clip_cull = true; +} + +static void normaliser_signature_transform_clip_cull(struct clip_cull_normaliser_signature *signature, + struct clip_cull_normaliser *normaliser) +{ + normaliser_signature_transform_clip_or_cull(signature, VKD3D_SHADER_SV_CLIP_DISTANCE, normaliser); + normaliser_signature_transform_clip_or_cull(signature, VKD3D_SHADER_SV_CULL_DISTANCE, normaliser); +} + +static struct vkd3d_shader_instruction *clip_cull_normaliser_require_space(struct clip_cull_normaliser *normaliser, + size_t count) +{ + if (!vkd3d_array_reserve((void **)&normaliser->instructions, &normaliser->instruction_capacity, + normaliser->instruction_count + count, sizeof(*normaliser->instructions))) + { + ERR("Failed to allocate instructions.\n"); + normaliser->result = VKD3D_ERROR_OUT_OF_MEMORY; + return NULL; + } + return &normaliser->instructions[normaliser->instruction_count]; +} + +static struct vkd3d_shader_dst_param *clip_cull_normaliser_emit_mov(struct clip_cull_normaliser *normaliser, + struct vkd3d_shader_src_param *src_param) +{ + struct vkd3d_shader_instruction *ins; + + if (!(ins = clip_cull_normaliser_require_space(normaliser, 1))) + return NULL; + vsir_instruction_init(ins, &normaliser->location, VKD3DSIH_MOV); + + ins->src = src_param; + ins->src_count = 1; + + if (!(ins->dst = vsir_program_get_dst_params(normaliser->program, 1))) + { + ERR("Failed to allocate instruction dst param.\n"); + normaliser->result = VKD3D_ERROR_OUT_OF_MEMORY; + return NULL; + } + ins->dst_count = 1; + + ++normaliser->instruction_count; + return ins->dst; +} + +static void clip_cull_normaliser_copy_instruction(struct clip_cull_normaliser *normaliser, + struct vkd3d_shader_instruction *ins) +{ + struct vkd3d_shader_instruction *dst_ins; + + if (!(dst_ins = clip_cull_normaliser_require_space(normaliser, 1))) + return; + *dst_ins = *ins; + ++normaliser->instruction_count; +} + +/* Use the dst write mask where it is known to be valid. Integer instructions are included in case + * float values are bitcast. This wouldn't make much sense for clip/cull, but handling it is harmless. + * All other occurrences are likely to be very rare, and use the fallback path. + * This function would be unnecessary if swizzles never contain components not in the mask, but + * validating that would be no less complex than this. */ +static unsigned int clip_cull_src_normalisation_get_write_mask(struct vkd3d_shader_instruction *ins, + unsigned int dst_write_mask, struct clip_cull_normaliser *normaliser) +{ + switch (ins->handler_idx) + { + case VKD3DSIH_ABS: + case VKD3DSIH_ACOS: + case VKD3DSIH_ADD: + case VKD3DSIH_AND: + case VKD3DSIH_ASIN: + case VKD3DSIH_ATAN: + case VKD3DSIH_BFREV: + case VKD3DSIH_COUNTBITS: + case VKD3DSIH_DIV: + case VKD3DSIH_EQO: + case VKD3DSIH_EQU: + case VKD3DSIH_EXP: + case VKD3DSIH_F32TOF16: + case VKD3DSIH_FRC: + case VKD3DSIH_FREM: + case VKD3DSIH_FTOD: + case VKD3DSIH_FTOI: + case VKD3DSIH_FTOU: + case VKD3DSIH_GEO: + case VKD3DSIH_GEU: + case VKD3DSIH_HCOS: + case VKD3DSIH_HSIN: + case VKD3DSIH_HTAN: + case VKD3DSIH_IADD: + case VKD3DSIH_IBFE: + case VKD3DSIH_IDIV: + case VKD3DSIH_IEQ: + case VKD3DSIH_IGE: + case VKD3DSIH_ILT: + case VKD3DSIH_IMAD: + case VKD3DSIH_IMAX: + case VKD3DSIH_IMIN: + case VKD3DSIH_IMUL: + case VKD3DSIH_INE: + case VKD3DSIH_INEG: + case VKD3DSIH_ISFINITE: + case VKD3DSIH_ISHL: + case VKD3DSIH_ISHR: + case VKD3DSIH_ISINF: + case VKD3DSIH_ISNAN: + case VKD3DSIH_ITOD: + case VKD3DSIH_ITOF: + case VKD3DSIH_ITOI: + case VKD3DSIH_LOG: + case VKD3DSIH_LTO: + case VKD3DSIH_LTU: + case VKD3DSIH_MAD: + case VKD3DSIH_MAX: + case VKD3DSIH_MIN: + case VKD3DSIH_MOV: + case VKD3DSIH_MOVC: + case VKD3DSIH_MSAD: + case VKD3DSIH_MUL: + case VKD3DSIH_NEO: + case VKD3DSIH_NEU: + case VKD3DSIH_NOT: + case VKD3DSIH_OR: + case VKD3DSIH_POW: + case VKD3DSIH_RCP: + case VKD3DSIH_ROUND_NE: + case VKD3DSIH_ROUND_NI: + case VKD3DSIH_ROUND_PI: + case VKD3DSIH_ROUND_Z: + case VKD3DSIH_RSQ: + case VKD3DSIH_SINCOS: + case VKD3DSIH_SQRT: + case VKD3DSIH_SUB: + case VKD3DSIH_TAN: + case VKD3DSIH_UBFE: + case VKD3DSIH_UDIV: + case VKD3DSIH_UGE: + case VKD3DSIH_ULT: + case VKD3DSIH_UMAX: + case VKD3DSIH_UMIN: + case VKD3DSIH_UMUL: + case VKD3DSIH_USHR: + case VKD3DSIH_UTOD: + case VKD3DSIH_UTOF: + case VKD3DSIH_UTOU: + case VKD3DSIH_XOR: + return dst_write_mask; + + /* Condition parameters could take a bitcast float for a nonzero check. */ + case VKD3DSIH_BRANCH: + case VKD3DSIH_BREAKP: + case VKD3DSIH_CONTINUEP: + case VKD3DSIH_IF: + case VKD3DSIH_SWITCH: + return VKD3DSP_WRITEMASK_0; + + /* Fallback: copy all available values to the temp. */ + default: + FIXME("Copying up to four clip/cull values for handler %u.\n", ins->handler_idx); + vkd3d_shader_warning(normaliser->message_context, &normaliser->location, + VKD3D_SHADER_WARNING_VSIR_UNKNOWN_WRITE_MASK, + "Copying up to four clip/cull values for handler %u.", ins->handler_idx); + return VKD3DSP_WRITEMASK_ALL; + } +} + +static void shader_instruction_src_param_clip_cull_normalise(struct vkd3d_shader_instruction *ins, + unsigned int src_idx, unsigned int write_mask, struct clip_cull_normaliser *normaliser) +{ + unsigned int i, element_idx, component_idx, array_offset, max_component_count, handled_mask, mask_bit; + struct vkd3d_shader_src_param *src_param = &ins->src[src_idx]; + const struct clip_cull_normaliser_signature *signature; + struct vkd3d_shader_register *reg = &src_param->reg; + struct vsir_program *program = normaliser->program; + struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_src_param *mov_src; + + switch (reg->type) + { + case VKD3DSPR_INPUT: + signature = &normaliser->input_signature; + break; + default: + return; + } + + element_idx = reg->idx[reg->idx_count - 1].offset; + + if (!signature->scan[element_idx].need_normalisation) + return; + + if (!(write_mask = clip_cull_src_normalisation_get_write_mask(ins, write_mask, normaliser))) + return; + + /* Elements with an array offset are remapped to the base element. */ + if ((array_offset = signature->scan[element_idx].offset)) + { + element_idx = signature->scan[element_idx].remap; + reg->idx[reg->idx_count - 1].offset = element_idx; + } + + /* Dynamic array addressing of clip/cull inputs is not supported. */ + if (reg->idx_count >= ARRAY_SIZE(reg->idx)) + { + WARN("Unexpected index count %u.\n", reg->idx_count); + clip_cull_normaliser_error(normaliser, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, + "Invalid register index count %u for a clip/cull load.", reg->idx_count); + normaliser->result = VKD3D_ERROR_INVALID_SHADER; + return; + } + + /* Move the indices up so the array index can be placed in idx[0]. */ + memmove(®->idx[1], ®->idx[0], reg->idx_count * sizeof(reg->idx[0])); + memset(®->idx[0], 0, sizeof(reg->idx[0])); + ++reg->idx_count; + + if (vsir_write_mask_component_count(write_mask) == 1) + { + reg->idx[0].offset = array_offset + vsir_swizzle_get_component(src_param->swizzle, + vsir_write_mask_get_component_idx(write_mask)); + src_param->swizzle = 0; + return; + } + + max_component_count = signature->s->elements[element_idx].register_count - array_offset; + max_component_count = min(max_component_count, VKD3D_VEC4_SIZE); + + for (i = 0, handled_mask = 0; i < max_component_count; ++i) + { + if (!(write_mask & (1u << i))) + continue; + + /* For each component accessed by the write mask + swizzle, emit a MOV from the clip/cull array to a temp. + * The original instruction will still apply the swizzle, so the temp is a direct reflection of the source. */ + + component_idx = vsir_swizzle_get_component(src_param->swizzle, i); + mask_bit = 1u << component_idx; + + if (handled_mask & mask_bit) + continue; + handled_mask |= mask_bit; + + if (!(mov_src = vsir_program_get_src_params(program, 1))) + { + ERR("Failed to allocate instruction src param.\n"); + normaliser->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; + } + mov_src->reg = *reg; + mov_src->reg.idx[0].offset = array_offset + component_idx; + mov_src->swizzle = 0; + mov_src->modifiers = 0; + + if (!(dst_param = clip_cull_normaliser_emit_mov(normaliser, mov_src))) + return; + vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, reg->data_type, 1); + dst_param->reg.dimension = reg->dimension; + dst_param->reg.idx[0].offset = program->temp_count; + normaliser->temp_count = 1; + dst_param->write_mask = mask_bit; + dst_param->modifiers = 0; + dst_param->shift = 0; + } + + /* Substitute the temp for the vector clip/cull source. */ + vsir_register_init(reg, VKD3DSPR_TEMP, reg->data_type, 1); + reg->dimension = VSIR_DIMENSION_VEC4; + reg->idx[0].offset = program->temp_count; +} + +static void shader_instruction_normalise_clip_cull_params(struct vkd3d_shader_instruction *ins, + struct clip_cull_normaliser *normaliser) +{ + unsigned int i, write_mask; + + if (ins->handler_idx == VKD3DSIH_NOP) + return; + + normaliser->location = ins->location; + + if (vsir_instruction_is_dcl(ins)) + { + clip_cull_normaliser_copy_instruction(normaliser, ins); + return; + } + + for (i = 0, write_mask = 0; i < ins->dst_count; ++i) + if (ins->dst[i].reg.type != VKD3DSPR_NULL) + write_mask |= ins->dst[i].write_mask; + + for (i = 0; i < ins->src_count; ++i) + shader_instruction_src_param_clip_cull_normalise(ins, i, write_mask, normaliser); + + clip_cull_normaliser_copy_instruction(normaliser, ins); +} + +static enum vkd3d_result normalise_clip_cull(struct vsir_program *program) +{ + struct clip_cull_normaliser normaliser = {0}; + unsigned int i; + + normaliser.program = program; + normaliser.input_signature.s = &program->input_signature; + + if (program->shader_version.type != VKD3D_SHADER_TYPE_DOMAIN) + normaliser_signature_transform_clip_cull(&normaliser.input_signature, &normaliser); + + if (normaliser.failed) + return VKD3D_ERROR_INVALID_SHADER; + + if (!normaliser.has_normalised_clip_cull) + return VKD3D_OK; + + if (!clip_cull_normaliser_require_space(&normaliser, program->instructions.count)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + for (i = 0; i < program->instructions.count; ++i) + shader_instruction_normalise_clip_cull_params(&program->instructions.elements[i], &normaliser); + + if (normaliser.result >= 0) + { + vkd3d_free(program->instructions.elements); + program->instructions.elements = normaliser.instructions; + program->instructions.capacity = normaliser.instruction_capacity; + program->instructions.count = normaliser.instruction_count; + + program->temp_count += normaliser.temp_count; + + return VKD3D_OK; + } + else + { + if (normaliser.result == VKD3D_ERROR_OUT_OF_MEMORY) + { + clip_cull_normaliser_error(&normaliser, VKD3D_SHADER_ERROR_VSIR_OUT_OF_MEMORY, + "Out of memory allocating clip/cull normalization instructions.\n"); + } + vkd3d_free(normaliser.instructions); + return normaliser.result; + } +} + struct flat_constant_def { enum vkd3d_shader_d3dbc_constant_register set; @@ -5538,6 +6011,9 @@ enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t if ((result = vsir_program_normalise_io_registers(program)) < 0) return result;
+ if ((result = normalise_clip_cull(program)) < 0) + return result; + if ((result = instruction_array_normalise_flat_constants(program)) < 0) return result;
diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index a6c16d8f0..47ba639a3 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -5012,7 +5012,7 @@ static void spirv_compiler_emit_input(struct spirv_compiler *compiler, array_sizes[0] = signature_element->register_count; array_sizes[1] = (reg_type == VKD3DSPR_PATCHCONST ? 0 : compiler->input_control_point_count); if (array_sizes[0] == 1 && !vsir_sysval_semantic_is_tess_factor(signature_element->sysval_semantic) - && (!vsir_sysval_semantic_is_clip_cull(signature_element->sysval_semantic) || array_sizes[1])) + && (!vsir_sysval_semantic_is_clip_cull(sysval) || array_sizes[1])) { array_sizes[0] = 0; } diff --git a/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d-shader/vkd3d_shader_private.h index b07a7bff7..5147bd13f 100644 --- a/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d-shader/vkd3d_shader_private.h @@ -220,8 +220,11 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX = 9015, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW = 9016, VKD3D_SHADER_ERROR_VSIR_INVALID_SSA_USAGE = 9017, + VKD3D_SHADER_ERROR_VSIR_OUT_OF_MEMORY = 9018, + VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE = 9019,
VKD3D_SHADER_WARNING_VSIR_DYNAMIC_DESCRIPTOR_ARRAY = 9300, + VKD3D_SHADER_WARNING_VSIR_UNKNOWN_WRITE_MASK = 9301, };
enum vkd3d_shader_opcode
From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d-shader/ir.c | 145 ++++++++++++++++++++++++++++++++++ libs/vkd3d-shader/spirv.c | 162 ++------------------------------------ 2 files changed, 153 insertions(+), 154 deletions(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 63b15c65a..94b865a1e 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -1500,8 +1500,12 @@ struct clip_cull_normaliser size_t instruction_count;
struct clip_cull_normaliser_signature input_signature; + struct clip_cull_normaliser_signature output_signature; + struct clip_cull_normaliser_signature patch_constant_signature;
unsigned int temp_count; + + enum vkd3d_shader_opcode phase; };
static void VKD3D_PRINTF_FUNC(3, 4) clip_cull_normaliser_error(struct clip_cull_normaliser *normaliser, @@ -1781,8 +1785,20 @@ static void shader_instruction_src_param_clip_cull_normalise(struct vkd3d_shader switch (reg->type) { case VKD3DSPR_INPUT: + /* Sysvals are not needed for domain shader inputs. */ + if (program->shader_version.type == VKD3D_SHADER_TYPE_DOMAIN) + return; signature = &normaliser->input_signature; break; + case VKD3DSPR_OUTPUT: + /* Sysvals are not needed for hull shader outputs. */ + if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL) + return; + signature = &normaliser->output_signature; + break; + case VKD3DSPR_PATCHCONST: + signature = &normaliser->patch_constant_signature; + break; default: return; } @@ -1871,6 +1887,118 @@ static void shader_instruction_src_param_clip_cull_normalise(struct vkd3d_shader reg->idx[0].offset = program->temp_count; }
+static void shader_dst_param_clip_cull_normalise(struct vkd3d_shader_dst_param *dst_param, + struct clip_cull_normaliser *normaliser) + { + const struct clip_cull_normaliser_signature *signature; + unsigned int i, element_idx, write_mask, array_offset; + struct vkd3d_shader_register *reg = &dst_param->reg; + struct vsir_program *program = normaliser->program; + struct vkd3d_shader_src_param *src_param; + struct vkd3d_shader_dst_param *mov_dst; + + if (!reg->idx_count) + return; + + switch (reg->type) + { + /* VKD3DSPR_INPUT must not occur in a dst param. */ + + case VKD3DSPR_OUTPUT: + if (normaliser->phase == VKD3DSIH_HS_FORK_PHASE || normaliser->phase == VKD3DSIH_HS_JOIN_PHASE) + { + signature = &normaliser->patch_constant_signature; + } + else + { + /* Sysvals are not needed for hull shader outputs. */ + if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL) + return; + signature = &normaliser->output_signature; + } + break; + + case VKD3DSPR_PATCHCONST: + signature = &normaliser->patch_constant_signature; + break; + + default: + return; + } + + element_idx = reg->idx[reg->idx_count - 1].offset; + + if (!signature->scan[element_idx].need_normalisation) + return; + + /* Elements with an array offset are remapped to the base element. */ + if ((array_offset = signature->scan[element_idx].offset)) + { + element_idx = signature->scan[element_idx].remap; + reg->idx[reg->idx_count - 1].offset = element_idx; + } + + write_mask = dst_param->write_mask; + + /* Dynamic array addressing of clip/cull outputs is not supported. */ + if (reg->idx_count >= ARRAY_SIZE(reg->idx)) + { + WARN("Unexpected index count %u.\n", reg->idx_count); + clip_cull_normaliser_error(normaliser, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, + "Invalid register index count %u for a clip/cull store.", reg->idx_count); + normaliser->result = VKD3D_ERROR_INVALID_SHADER; + return; + } + + /* Move the indices up so the array index can be placed in idx[0]. */ + memmove(®->idx[1], ®->idx[0], reg->idx_count * sizeof(reg->idx[0])); + memset(®->idx[0], 0, sizeof(reg->idx[0])); + ++reg->idx_count; + + if (vsir_write_mask_component_count(write_mask) == 1) + { + reg->idx[0].offset = array_offset + vsir_write_mask_get_component_idx(write_mask); + dst_param->write_mask = VKD3DSP_WRITEMASK_0; + return; + } + + for (i = 0; i < VKD3D_VEC4_SIZE; ++i) + { + if (!(write_mask & (1u << i))) + continue; + + /* For each component, emit a MOV from a temp to the clip/cull array. */ + + if (!(src_param = vsir_program_get_src_params(program, 1))) + { + ERR("Failed to allocate instruction dst param.\n"); + normaliser->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; + } + src_param->swizzle = vkd3d_shader_create_swizzle(i, i, i, i); + src_param->modifiers = 0; + vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, reg->data_type, 1); + src_param->reg.dimension = reg->dimension; + src_param->reg.idx[0].offset = program->temp_count; + normaliser->temp_count = 1; + + if (!(mov_dst = clip_cull_normaliser_emit_mov(normaliser, src_param))) + return; + mov_dst->reg = *reg; + mov_dst->reg.idx[0].offset = array_offset + i; + mov_dst->write_mask = VKD3DSP_WRITEMASK_0; + mov_dst->modifiers = 0; + mov_dst->shift = 0; + } + + /* Substitute the temp for the vector clip/cull destination. If this is for a MOV instruction with + * clip/cull source, it results in a harmless no-op MOV, because shader_src_param_clip_cull_normalise() + * has already written the clip/cull source to the temp. */ + vsir_register_init(reg, VKD3DSPR_TEMP, reg->data_type, 1); + reg->dimension = VSIR_DIMENSION_VEC4; + reg->idx[0].offset = program->temp_count; +} + static void shader_instruction_normalise_clip_cull_params(struct vkd3d_shader_instruction *ins, struct clip_cull_normaliser *normaliser) { @@ -1887,6 +2015,14 @@ static void shader_instruction_normalise_clip_cull_params(struct vkd3d_shader_in return; }
+ if (ins->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE || ins->handler_idx == VKD3DSIH_HS_FORK_PHASE + || ins->handler_idx == VKD3DSIH_HS_JOIN_PHASE) + { + normaliser->phase = ins->handler_idx; + clip_cull_normaliser_copy_instruction(normaliser, ins); + return; + } + for (i = 0, write_mask = 0; i < ins->dst_count; ++i) if (ins->dst[i].reg.type != VKD3DSPR_NULL) write_mask |= ins->dst[i].write_mask; @@ -1895,6 +2031,9 @@ static void shader_instruction_normalise_clip_cull_params(struct vkd3d_shader_in shader_instruction_src_param_clip_cull_normalise(ins, i, write_mask, normaliser);
clip_cull_normaliser_copy_instruction(normaliser, ins); + + for (i = 0; i < ins->dst_count; ++i) + shader_dst_param_clip_cull_normalise(&ins->dst[i], normaliser); }
static enum vkd3d_result normalise_clip_cull(struct vsir_program *program) @@ -1904,9 +2043,15 @@ static enum vkd3d_result normalise_clip_cull(struct vsir_program *program)
normaliser.program = program; normaliser.input_signature.s = &program->input_signature; + normaliser.output_signature.s = &program->output_signature; + normaliser.patch_constant_signature.s = &program->patch_constant_signature; + normaliser.phase = VKD3DSIH_INVALID;
if (program->shader_version.type != VKD3D_SHADER_TYPE_DOMAIN) normaliser_signature_transform_clip_cull(&normaliser.input_signature, &normaliser); + if (program->shader_version.type != VKD3D_SHADER_TYPE_HULL) + normaliser_signature_transform_clip_cull(&normaliser.output_signature, &normaliser); + normaliser_signature_transform_clip_cull(&normaliser.patch_constant_signature, &normaliser);
if (normaliser.failed) return VKD3D_ERROR_INVALID_SHADER; diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index 47ba639a3..222cdc4b4 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -2355,7 +2355,6 @@ struct spirv_compiler { uint32_t id; enum vkd3d_shader_component_type component_type; - uint32_t array_element_mask; } *output_info; uint32_t private_output_variable[MAX_REG_OUTPUT + 1]; /* 1 entry for oDepth */ uint32_t private_output_variable_write_mask[MAX_REG_OUTPUT + 1]; /* 1 entry for oDepth */ @@ -5185,88 +5184,6 @@ static bool is_dual_source_blending(const struct spirv_compiler *compiler) return compiler->shader_type == VKD3D_SHADER_TYPE_PIXEL && info && info->dual_source_blending; }
-static void calculate_clip_or_cull_distance_mask(const struct signature_element *e, uint32_t *mask) -{ - unsigned int write_mask; - - if (e->semantic_index >= sizeof(*mask) * CHAR_BIT / VKD3D_VEC4_SIZE) - { - FIXME("Invalid semantic index %u for clip/cull distance.\n", e->semantic_index); - return; - } - - write_mask = e->mask >> vsir_write_mask_get_component_idx(e->mask); - *mask |= (write_mask & VKD3DSP_WRITEMASK_ALL) << (VKD3D_VEC4_SIZE * e->semantic_index); -} - -/* Emits arrayed SPIR-V built-in variables. */ -static void spirv_compiler_emit_shader_signature_outputs(struct spirv_compiler *compiler) -{ - const struct shader_signature *output_signature = &compiler->output_signature; - uint32_t clip_distance_mask = 0, clip_distance_id = 0; - uint32_t cull_distance_mask = 0, cull_distance_id = 0; - const struct vkd3d_spirv_builtin *builtin; - unsigned int i, count; - - for (i = 0; i < output_signature->element_count; ++i) - { - const struct signature_element *e = &output_signature->elements[i]; - - switch (e->sysval_semantic) - { - case VKD3D_SHADER_SV_CLIP_DISTANCE: - calculate_clip_or_cull_distance_mask(e, &clip_distance_mask); - break; - - case VKD3D_SHADER_SV_CULL_DISTANCE: - calculate_clip_or_cull_distance_mask(e, &cull_distance_mask); - break; - - default: - break; - } - } - - if (clip_distance_mask) - { - count = vkd3d_popcount(clip_distance_mask); - builtin = get_spirv_builtin_for_sysval(compiler, VKD3D_SHADER_SV_CLIP_DISTANCE); - clip_distance_id = spirv_compiler_emit_builtin_variable(compiler, - builtin, SpvStorageClassOutput, count); - } - - if (cull_distance_mask) - { - count = vkd3d_popcount(cull_distance_mask); - builtin = get_spirv_builtin_for_sysval(compiler, VKD3D_SHADER_SV_CULL_DISTANCE); - cull_distance_id = spirv_compiler_emit_builtin_variable(compiler, - builtin, SpvStorageClassOutput, count); - } - - for (i = 0; i < output_signature->element_count; ++i) - { - const struct signature_element *e = &output_signature->elements[i]; - - switch (e->sysval_semantic) - { - case VKD3D_SHADER_SV_CLIP_DISTANCE: - compiler->output_info[i].id = clip_distance_id; - compiler->output_info[i].component_type = VKD3D_SHADER_COMPONENT_FLOAT; - compiler->output_info[i].array_element_mask = clip_distance_mask; - break; - - case VKD3D_SHADER_SV_CULL_DISTANCE: - compiler->output_info[i].id = cull_distance_id; - compiler->output_info[i].component_type = VKD3D_SHADER_COMPONENT_FLOAT; - compiler->output_info[i].array_element_mask = cull_distance_mask; - break; - - default: - break; - } - } -} - static void spirv_compiler_emit_output_register(struct spirv_compiler *compiler, const struct vkd3d_shader_dst_param *dst) { @@ -5356,7 +5273,8 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, sysval = VKD3D_SHADER_SV_NONE; array_sizes[0] = signature_element->register_count; array_sizes[1] = (reg_type == VKD3DSPR_PATCHCONST ? 0 : compiler->output_control_point_count); - if (array_sizes[0] == 1 && !vsir_sysval_semantic_is_tess_factor(signature_element->sysval_semantic)) + if (array_sizes[0] == 1 && !vsir_sysval_semantic_is_tess_factor(signature_element->sysval_semantic) + && (!vsir_sysval_semantic_is_clip_cull(sysval) || array_sizes[1])) array_sizes[0] = 0;
builtin = vkd3d_get_spirv_builtin(compiler, reg_type, sysval); @@ -5382,8 +5300,7 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, use_private_variable = true;
if (!is_patch_constant - && (get_shader_output_swizzle(compiler, signature_element->register_index) != VKD3D_SHADER_NO_SWIZZLE - || (compiler->output_info[element_idx].id && compiler->output_info[element_idx].array_element_mask))) + && get_shader_output_swizzle(compiler, signature_element->register_index) != VKD3D_SHADER_NO_SWIZZLE) { use_private_variable = true; } @@ -5398,11 +5315,7 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, return; }
- if (!is_patch_constant && compiler->output_info[element_idx].id) - { - id = compiler->output_info[element_idx].id; - } - else if (builtin) + if (builtin) { if (spirv_compiler_get_current_shader_phase(compiler)) id = spirv_compiler_emit_shader_phase_builtin_variable(compiler, builtin, array_sizes, 2); @@ -5483,51 +5396,18 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, } }
-static uint32_t spirv_compiler_get_output_array_index(struct spirv_compiler *compiler, - const struct signature_element *e) -{ - enum vkd3d_shader_sysval_semantic sysval = e->sysval_semantic; - const struct vkd3d_spirv_builtin *builtin; - - builtin = get_spirv_builtin_for_sysval(compiler, sysval); - - switch (sysval) - { - case VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN: - case VKD3D_SHADER_SV_TESS_FACTOR_LINEDET: - return builtin->member_idx; - default: - return e->semantic_index; - } -} - static void spirv_compiler_emit_store_shader_output(struct spirv_compiler *compiler, const struct shader_signature *signature, const struct signature_element *output, const struct vkd3d_shader_output_info *output_info, uint32_t output_index_id, uint32_t val_id, uint32_t write_mask) { - uint32_t dst_write_mask, use_mask, uninit_mask, swizzle, mask; + uint32_t dst_write_mask, use_mask, uninit_mask, swizzle, type_id, zero_id, ptr_type_id; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - uint32_t type_id, zero_id, ptr_type_id, chain_id, object_id; - const struct signature_element *element; - unsigned int i, index, array_idx; + unsigned int index; uint32_t output_id;
dst_write_mask = output->mask; use_mask = output->used_mask; - if (!output->sysval_semantic) - { - for (i = 0; i < signature->element_count; ++i) - { - element = &signature->elements[i]; - if (element->register_index != output->register_index) - continue; - if (element->sysval_semantic) - continue; - dst_write_mask |= element->mask; - use_mask |= element->used_mask; - } - } index = vsir_write_mask_get_component_idx(output->mask); dst_write_mask >>= index; use_mask >>= index; @@ -5569,31 +5449,8 @@ static void spirv_compiler_emit_store_shader_output(struct spirv_compiler *compi output_id = vkd3d_spirv_build_op_access_chain1(builder, ptr_type_id, output_id, output_index_id); }
- if (!output_info->array_element_mask) - { - spirv_compiler_emit_store(compiler, - output_id, dst_write_mask, output_info->component_type, SpvStorageClassOutput, write_mask, val_id); - return; - } - - type_id = vkd3d_spirv_get_type_id(builder, output_info->component_type, 1); - ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassOutput, type_id); - mask = output_info->array_element_mask; - array_idx = spirv_compiler_get_output_array_index(compiler, output); - mask &= (1u << (array_idx * VKD3D_VEC4_SIZE)) - 1; - for (i = 0, index = vkd3d_popcount(mask); i < VKD3D_VEC4_SIZE; ++i) - { - if (!(write_mask & (VKD3DSP_WRITEMASK_0 << i))) - continue; - - chain_id = vkd3d_spirv_build_op_access_chain1(builder, - ptr_type_id, output_id, spirv_compiler_get_constant_uint(compiler, index)); - object_id = spirv_compiler_emit_swizzle(compiler, val_id, write_mask, - output_info->component_type, VKD3D_SHADER_NO_SWIZZLE, VKD3DSP_WRITEMASK_0 << i); - spirv_compiler_emit_store(compiler, chain_id, VKD3DSP_WRITEMASK_0, - output_info->component_type, SpvStorageClassOutput, VKD3DSP_WRITEMASK_0 << i, object_id); - ++index; - } + spirv_compiler_emit_store(compiler, + output_id, dst_write_mask, output_info->component_type, SpvStorageClassOutput, write_mask, val_id); }
static void spirv_compiler_emit_shader_epilogue_function(struct spirv_compiler *compiler) @@ -10182,9 +10039,6 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, compiler->input_control_point_count = program->input_control_point_count; compiler->output_control_point_count = program->output_control_point_count;
- if (compiler->shader_type != VKD3D_SHADER_TYPE_HULL) - spirv_compiler_emit_shader_signature_outputs(compiler); - for (i = 0; i < instructions.count && result >= 0; ++i) { compiler->location.line = i + 1;
From: Conor McCarthy cmccarthy@codeweavers.com
--- tests/d3d12.c | 183 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 116 insertions(+), 67 deletions(-)
diff --git a/tests/d3d12.c b/tests/d3d12.c index b8d20a581..24f5b85b2 100644 --- a/tests/d3d12.c +++ b/tests/d3d12.c @@ -31241,7 +31241,7 @@ static void test_clip_distance(void) ID3D12CommandQueue *queue; ID3D12PipelineState *pso; ID3D12Device *device; - unsigned int i; + unsigned int i, j; D3D12_BOX box; HRESULT hr;
@@ -31296,51 +31296,53 @@ static void test_clip_distance(void) #if 0 bool use_constant; float clip_distance0; - float clip_distance1; + float4 clip_distance1;
struct input { float4 position : POSITION; float distance0 : CLIP_DISTANCE0; - float distance1 : CLIP_DISTANCE1; + float4 distance1 : CLIP_DISTANCE1; };
struct vertex { float4 position : SV_POSITION; float user_clip : CLIP_DISTANCE; - float2 clip : SV_ClipDistance; + float clip0 : SV_ClipDistance0; + float4 clip1 : SV_ClipDistance1; };
void main(input vin, out vertex vertex) { vertex.position = vin.position; vertex.user_clip = vin.distance0; - vertex.clip.x = vin.distance0; + vertex.clip0 = vin.distance0; if (use_constant) - vertex.clip.x = clip_distance0; - vertex.clip.y = vin.distance1; + vertex.clip0 = clip_distance0; + vertex.clip1 = vin.distance1.xzyw; if (use_constant) - vertex.clip.y = clip_distance1; + vertex.clip1 = clip_distance1; } #endif - 0x43425844, 0xef5cc236, 0xe2fbfa69, 0x560b6591, 0x23037999, 0x00000001, 0x00000214, 0x00000003, - 0x0000002c, 0x0000009c, 0x00000120, 0x4e475349, 0x00000068, 0x00000003, 0x00000008, 0x00000050, + 0x43425844, 0x85af9c9d, 0xa40fe352, 0x1fdcce87, 0x16f969e1, 0x00000001, 0x00000240, 0x00000003, + 0x0000002c, 0x0000009c, 0x00000138, 0x4e475349, 0x00000068, 0x00000003, 0x00000008, 0x00000050, 0x00000000, 0x00000000, 0x00000003, 0x00000000, 0x00000f0f, 0x00000059, 0x00000000, 0x00000000, 0x00000003, 0x00000001, 0x00000101, 0x00000059, 0x00000001, 0x00000000, 0x00000003, 0x00000002, - 0x00000101, 0x49534f50, 0x4e4f4954, 0x494c4300, 0x49445f50, 0x4e415453, 0xab004543, 0x4e47534f, - 0x0000007c, 0x00000003, 0x00000008, 0x00000050, 0x00000000, 0x00000001, 0x00000003, 0x00000000, - 0x0000000f, 0x0000005c, 0x00000000, 0x00000000, 0x00000003, 0x00000001, 0x00000e01, 0x0000006a, - 0x00000000, 0x00000002, 0x00000003, 0x00000002, 0x00000c03, 0x505f5653, 0x5449534f, 0x004e4f49, - 0x50494c43, 0x5349445f, 0x434e4154, 0x56530045, 0x696c435f, 0x73694470, 0x636e6174, 0xabab0065, - 0x52444853, 0x000000ec, 0x00010040, 0x0000003b, 0x04000059, 0x00208e46, 0x00000000, 0x00000001, - 0x0300005f, 0x001010f2, 0x00000000, 0x0300005f, 0x00101012, 0x00000001, 0x0300005f, 0x00101012, - 0x00000002, 0x04000067, 0x001020f2, 0x00000000, 0x00000001, 0x03000065, 0x00102012, 0x00000001, - 0x04000067, 0x00102032, 0x00000002, 0x00000002, 0x05000036, 0x001020f2, 0x00000000, 0x00101e46, - 0x00000000, 0x05000036, 0x00102012, 0x00000001, 0x0010100a, 0x00000001, 0x0b000037, 0x00102012, - 0x00000002, 0x0020800a, 0x00000000, 0x00000000, 0x0020801a, 0x00000000, 0x00000000, 0x0010100a, - 0x00000001, 0x0b000037, 0x00102022, 0x00000002, 0x0020800a, 0x00000000, 0x00000000, 0x0020802a, - 0x00000000, 0x00000000, 0x0010100a, 0x00000002, 0x0100003e, + 0x00000f0f, 0x49534f50, 0x4e4f4954, 0x494c4300, 0x49445f50, 0x4e415453, 0xab004543, 0x4e47534f, + 0x00000094, 0x00000004, 0x00000008, 0x00000068, 0x00000000, 0x00000001, 0x00000003, 0x00000000, + 0x0000000f, 0x00000074, 0x00000000, 0x00000000, 0x00000003, 0x00000001, 0x00000e01, 0x00000082, + 0x00000000, 0x00000002, 0x00000003, 0x00000002, 0x00000e01, 0x00000082, 0x00000001, 0x00000002, + 0x00000003, 0x00000003, 0x0000000f, 0x505f5653, 0x5449534f, 0x004e4f49, 0x50494c43, 0x5349445f, + 0x434e4154, 0x56530045, 0x696c435f, 0x73694470, 0x636e6174, 0xabab0065, 0x58454853, 0x00000100, + 0x00010050, 0x00000040, 0x0100086a, 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0300005f, + 0x001010f2, 0x00000000, 0x0300005f, 0x00101012, 0x00000001, 0x0300005f, 0x001010f2, 0x00000002, + 0x04000067, 0x001020f2, 0x00000000, 0x00000001, 0x03000065, 0x00102012, 0x00000001, 0x04000067, + 0x00102012, 0x00000002, 0x00000002, 0x04000067, 0x001020f2, 0x00000003, 0x00000002, 0x05000036, + 0x001020f2, 0x00000000, 0x00101e46, 0x00000000, 0x05000036, 0x00102012, 0x00000001, 0x0010100a, + 0x00000001, 0x0b000037, 0x00102012, 0x00000002, 0x0020800a, 0x00000000, 0x00000000, 0x0020801a, + 0x00000000, 0x00000000, 0x0010100a, 0x00000001, 0x0b000037, 0x001020f2, 0x00000003, 0x00208006, + 0x00000000, 0x00000000, 0x00208e46, 0x00000000, 0x00000001, 0x00101d86, 0x00000002, 0x0100003e, }; static const D3D12_SHADER_BYTECODE vs_multiple = {vs_multiple_code, sizeof(vs_multiple_code)}; #if 0 @@ -31576,6 +31578,12 @@ static void test_clip_distance(void) {"CLIP_DISTANCE", 0, DXGI_FORMAT_R32_FLOAT, 1, 0, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0}, {"CLIP_DISTANCE", 1, DXGI_FORMAT_R32_FLOAT, 1, 4, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0}, }; + static const D3D12_INPUT_ELEMENT_DESC layout_desc_multiple[] = + { + {"POSITION", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0}, + {"CLIP_DISTANCE", 0, DXGI_FORMAT_R32_FLOAT, 1, 0, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0}, + {"CLIP_DISTANCE", 1, DXGI_FORMAT_R32G32B32A32_FLOAT, 1, 4, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0}, + }; static const struct vec4 quad[] = { {-1.0f, -1.0f}, @@ -31595,6 +31603,18 @@ static void test_clip_distance(void) {1.0f, 1.0f}, {1.0f, 1.0f}, }; + struct + { + float clip_distance0; + struct vec4 clip_distance1; + } + vertices_multiple[] = + { + {1.0f, {1.0f, 1.0f, 1.0f, 1.0f}}, + {1.0f, {1.0f, 1.0f, 1.0f, 1.0f}}, + {1.0f, {1.0f, 1.0f, 1.0f, 1.0f}}, + {1.0f, {1.0f, 1.0f, 1.0f, 1.0f}}, + }; static const float white[] = {1.0f, 1.0f, 1.0f, 1.0f}; struct { @@ -31603,6 +31623,12 @@ static void test_clip_distance(void) float clip_distance1; float tessellation_factor; } cb_data; + struct + { + bool use_constant; + float clip_distance0; + struct vec4 clip_distance1; + } cb_data_multiple;
memset(&desc, 0, sizeof(desc)); desc.rt_width = 640; @@ -31801,16 +31827,23 @@ static void test_clip_distance(void) memset(&pso_desc.DS, 0, sizeof(pso_desc.DS)); memset(&pso_desc.GS, 0, sizeof(pso_desc.GS)); pso_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + pso_desc.InputLayout.pInputElementDescs = layout_desc_multiple; + pso_desc.InputLayout.NumElements = ARRAY_SIZE(layout_desc_multiple); hr = ID3D12Device_CreateGraphicsPipelineState(device, &pso_desc, &IID_ID3D12PipelineState, (void **)&pso); ok(hr == S_OK, "Failed to create pipeline state, hr %#x.\n", hr);
- cb_data.use_constant = false; - update_buffer_data(vs_cb, 0, sizeof(cb_data), &cb_data); + memset(&cb_data_multiple, 0, sizeof(cb_data_multiple)); + ID3D12Resource_Release(vs_cb); + vs_cb = create_upload_buffer(device, sizeof(cb_data_multiple), &cb_data_multiple);
- for (i = 0; i < ARRAY_SIZE(vertices); ++i) - vertices[i].clip_distance0 = 1.0f; - update_buffer_data(vb[1], 0, sizeof(vertices), vertices); + ID3D12Resource_Release(vb[1]); + vb[1] = create_upload_buffer(device, sizeof(vertices_multiple), vertices_multiple); + vbv[1].BufferLocation = ID3D12Resource_GetGPUVirtualAddress(vb[1]); + vbv[1].StrideInBytes = sizeof(*vertices_multiple); + vbv[1].SizeInBytes = sizeof(vertices_multiple); + + update_buffer_data(vb[1], 0, sizeof(vertices_multiple), vertices_multiple); ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL); ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature); ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 0, @@ -31836,49 +31869,65 @@ static void test_clip_distance(void) transition_resource_state(command_list, context.render_target, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET);
- for (i = 0; i < ARRAY_SIZE(vertices); ++i) + for (i = 0; i < 4; ++i) { - vertices[i].clip_distance0 = i < 2 ? 1.0f : -1.0f; - vertices[i].clip_distance1 = i % 2 ? 1.0f : -1.0f; - } - update_buffer_data(vb[1], 0, sizeof(vertices), vertices); - ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL); - ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature); - ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 0, - ID3D12Resource_GetGPUVirtualAddress(vs_cb)); - ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 1, - ID3D12Resource_GetGPUVirtualAddress(tess_cb)); - ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 2, - ID3D12Resource_GetGPUVirtualAddress(tess_cb)); - ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 3, - ID3D12Resource_GetGPUVirtualAddress(gs_cb)); - ID3D12GraphicsCommandList_SetPipelineState(command_list, pso); - ID3D12GraphicsCommandList_IASetPrimitiveTopology(command_list, D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); - ID3D12GraphicsCommandList_RSSetViewports(command_list, 1, &context.viewport); - ID3D12GraphicsCommandList_RSSetScissorRects(command_list, 1, &context.scissor_rect); - ID3D12GraphicsCommandList_IASetVertexBuffers(command_list, 0, ARRAY_SIZE(vbv), vbv); - ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL); - ID3D12GraphicsCommandList_DrawInstanced(command_list, 4, 1, 0, 0); - transition_resource_state(command_list, context.render_target, - D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE); + vkd3d_test_push_context("Component %u", i);
- get_resource_readback_with_command_list(context.render_target, 0, &rb, queue, command_list); - set_box(&box, 0, 0, 0, 320, 240, 1); - check_readback_data_uint(&rb.rb, &box, 0xff00ff00, 1); - set_box(&box, 0, 240, 0, 320, 480, 1); - check_readback_data_uint(&rb.rb, &box, 0xffffffff, 1); - set_box(&box, 320, 0, 0, 640, 480, 1); - check_readback_data_uint(&rb.rb, &box, 0xffffffff, 1); - release_resource_readback(&rb); + for (j = 0; j < ARRAY_SIZE(vertices_multiple); ++j) + { + float clip1 = j % 2 ? 1.0f : -1.0f; + vertices_multiple[j].clip_distance0 = j < 2 ? 1.0f : -1.0f; + vertices_multiple[j].clip_distance1.x = 1.0f; + vertices_multiple[j].clip_distance1.y = 1.0f; + vertices_multiple[j].clip_distance1.z = 1.0f; + vertices_multiple[j].clip_distance1.w = 1.0f; + switch (i) + { + case 0: vertices_multiple[j].clip_distance1.x = clip1; break; + case 1: vertices_multiple[j].clip_distance1.y = clip1; break; + case 2: vertices_multiple[j].clip_distance1.z = clip1; break; + case 3: vertices_multiple[j].clip_distance1.w = clip1; break; + } + } + update_buffer_data(vb[1], 0, sizeof(vertices_multiple), vertices_multiple); + ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL); + ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature); + ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 0, + ID3D12Resource_GetGPUVirtualAddress(vs_cb)); + ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 1, + ID3D12Resource_GetGPUVirtualAddress(tess_cb)); + ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 2, + ID3D12Resource_GetGPUVirtualAddress(tess_cb)); + ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 3, + ID3D12Resource_GetGPUVirtualAddress(gs_cb)); + ID3D12GraphicsCommandList_SetPipelineState(command_list, pso); + ID3D12GraphicsCommandList_IASetPrimitiveTopology(command_list, D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); + ID3D12GraphicsCommandList_RSSetViewports(command_list, 1, &context.viewport); + ID3D12GraphicsCommandList_RSSetScissorRects(command_list, 1, &context.scissor_rect); + ID3D12GraphicsCommandList_IASetVertexBuffers(command_list, 0, ARRAY_SIZE(vbv), vbv); + ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL); + ID3D12GraphicsCommandList_DrawInstanced(command_list, 4, 1, 0, 0); + transition_resource_state(command_list, context.render_target, + D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE);
- reset_command_list(command_list, context.allocator); - transition_resource_state(command_list, context.render_target, - D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET); + get_resource_readback_with_command_list(context.render_target, 0, &rb, queue, command_list); + set_box(&box, 0, 0, 0, 320, 240, 1); + check_readback_data_uint(&rb.rb, &box, 0xff00ff00, 1); + set_box(&box, 0, 240, 0, 320, 480, 1); + check_readback_data_uint(&rb.rb, &box, 0xffffffff, 1); + set_box(&box, 320, 0, 0, 640, 480, 1); + check_readback_data_uint(&rb.rb, &box, 0xffffffff, 1); + release_resource_readback(&rb);
- cb_data.use_constant = true; - cb_data.clip_distance0 = 0.0f; - cb_data.clip_distance1 = 0.0f; - update_buffer_data(vs_cb, 0, sizeof(cb_data), &cb_data); + reset_command_list(command_list, context.allocator); + transition_resource_state(command_list, context.render_target, + D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET); + + vkd3d_test_pop_context(); + } + + cb_data_multiple.use_constant = true; + update_buffer_data(vs_cb, 0, sizeof(cb_data_multiple), &cb_data_multiple); ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL); ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature); ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 0,
From: Conor McCarthy cmccarthy@codeweavers.com
--- tests/d3d12.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+)
diff --git a/tests/d3d12.c b/tests/d3d12.c index 24f5b85b2..4d79a8502 100644 --- a/tests/d3d12.c +++ b/tests/d3d12.c @@ -31345,6 +31345,36 @@ static void test_clip_distance(void) 0x00000000, 0x00000000, 0x00208e46, 0x00000000, 0x00000001, 0x00101d86, 0x00000002, 0x0100003e, }; static const D3D12_SHADER_BYTECODE vs_multiple = {vs_multiple_code, sizeof(vs_multiple_code)}; + static const DWORD ps_multiple_code[] = + { +#if 0 + struct vertex + { + float4 position : SV_POSITION; + float user_clip : CLIP_DISTANCE; + float clip0 : SV_ClipDistance0; + float4 clip1 : SV_ClipDistance1; + }; + + float4 main(vertex input) : SV_Target + { + return float4(input.clip0, input.clip1.xyz); + } +#endif + 0x43425844, 0x672b84d7, 0x92f9cfdd, 0x87eece2d, 0xfb11dafc, 0x00000001, 0x00000168, 0x00000003, + 0x0000002c, 0x000000c8, 0x000000fc, 0x4e475349, 0x00000094, 0x00000004, 0x00000008, 0x00000068, + 0x00000000, 0x00000001, 0x00000003, 0x00000000, 0x0000000f, 0x00000074, 0x00000000, 0x00000000, + 0x00000003, 0x00000001, 0x00000001, 0x00000082, 0x00000000, 0x00000002, 0x00000003, 0x00000002, + 0x00000101, 0x00000082, 0x00000001, 0x00000002, 0x00000003, 0x00000003, 0x0000070f, 0x505f5653, + 0x5449534f, 0x004e4f49, 0x50494c43, 0x5349445f, 0x434e4154, 0x56530045, 0x696c435f, 0x73694470, + 0x636e6174, 0xabab0065, 0x4e47534f, 0x0000002c, 0x00000001, 0x00000008, 0x00000020, 0x00000000, + 0x00000000, 0x00000003, 0x00000000, 0x0000000f, 0x545f5653, 0x65677261, 0xabab0074, 0x58454853, + 0x00000064, 0x00000050, 0x00000019, 0x0100086a, 0x04001064, 0x00101012, 0x00000002, 0x00000002, + 0x04001064, 0x00101072, 0x00000003, 0x00000002, 0x03000065, 0x001020f2, 0x00000000, 0x05000036, + 0x00102012, 0x00000000, 0x0010100a, 0x00000002, 0x05000036, 0x001020e2, 0x00000000, 0x00101906, + 0x00000003, 0x0100003e, + }; + static const D3D12_SHADER_BYTECODE ps_multiple = {ps_multiple_code, sizeof(ps_multiple_code)}; #if 0 bool use_constant; float clip_distance0; @@ -31949,6 +31979,50 @@ static void test_clip_distance(void) D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE); check_sub_resource_uint(context.render_target, 0, queue, command_list, 0xff00ff00, 0);
+ reset_command_list(command_list, context.allocator); + transition_resource_state(command_list, context.render_target, + D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET); + + ID3D12PipelineState_Release(pso); + + pso_desc.PS = ps_multiple; + hr = ID3D12Device_CreateGraphicsPipelineState(device, &pso_desc, + &IID_ID3D12PipelineState, (void **)&pso); + ok(hr == S_OK, "Failed to create pipeline state, hr %#x.\n", hr); + + for (i = 0; i < ARRAY_SIZE(vertices_multiple); ++i) + { + vertices_multiple[i].clip_distance0 = 0.2f; + vertices_multiple[i].clip_distance1.x = 0.4f; + vertices_multiple[i].clip_distance1.y = 0.6f; + vertices_multiple[i].clip_distance1.z = 0.8f; + vertices_multiple[i].clip_distance1.w = 1.0f; + } + update_buffer_data(vb[1], 0, sizeof(vertices_multiple), vertices_multiple); + + cb_data_multiple.use_constant = false; + update_buffer_data(vs_cb, 0, sizeof(cb_data_multiple), &cb_data_multiple); + ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL); + ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature); + ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 0, + ID3D12Resource_GetGPUVirtualAddress(vs_cb)); + ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 1, + ID3D12Resource_GetGPUVirtualAddress(tess_cb)); + ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 2, + ID3D12Resource_GetGPUVirtualAddress(tess_cb)); + ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 3, + ID3D12Resource_GetGPUVirtualAddress(gs_cb)); + ID3D12GraphicsCommandList_SetPipelineState(command_list, pso); + ID3D12GraphicsCommandList_IASetPrimitiveTopology(command_list, D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); + ID3D12GraphicsCommandList_RSSetViewports(command_list, 1, &context.viewport); + ID3D12GraphicsCommandList_RSSetScissorRects(command_list, 1, &context.scissor_rect); + ID3D12GraphicsCommandList_IASetVertexBuffers(command_list, 0, ARRAY_SIZE(vbv), vbv); + ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL); + ID3D12GraphicsCommandList_DrawInstanced(command_list, 4, 1, 0, 0); + transition_resource_state(command_list, context.render_target, + D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE); + check_sub_resource_uint(context.render_target, 0, queue, command_list, 0x99cc6633, 0); + ID3D12PipelineState_Release(pso); for (i = 0; i < ARRAY_SIZE(vb); ++i) ID3D12Resource_Release(vb[i]);
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/ir.c:
}
- }
- if (!array_size)
return;
- if (array_size > MAX_CLIP_OR_CULL_DISTANCE_COUNT)
- {
WARN("Invalid array size %u.\n", array_size);
clip_cull_normaliser_error(normaliser, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE,
"Clip or cull array size %u exceeds the limit of 8.", array_size);
normaliser->result = VKD3D_ERROR_INVALID_SHADER;
return;
- }
- signature->scan[base].need_normalisation = true;
You're setting `need_normalisation` for each signature element other than the first in the loop above, and then you set the flag for the first element too here. Why can't this happen in the loop too?
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/ir.c:
- {
vkd3d_free(program->instructions.elements);
program->instructions.elements = normaliser.instructions;
program->instructions.capacity = normaliser.instruction_capacity;
program->instructions.count = normaliser.instruction_count;
program->temp_count += normaliser.temp_count;
return VKD3D_OK;
- }
- else
- {
if (normaliser.result == VKD3D_ERROR_OUT_OF_MEMORY)
{
clip_cull_normaliser_error(&normaliser, VKD3D_SHADER_ERROR_VSIR_OUT_OF_MEMORY,
"Out of memory allocating clip/cull normalization instructions.\n");
Not a problem, but in general I don't think we care too much about emitting explicit errors for memory allocation failures.
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/ir.c:
- memmove(®->idx[1], ®->idx[0], reg->idx_count * sizeof(reg->idx[0]));
- memset(®->idx[0], 0, sizeof(reg->idx[0]));
- ++reg->idx_count;
- if (vsir_write_mask_component_count(write_mask) == 1)
- {
reg->idx[0].offset = array_offset + vsir_swizzle_get_component(src_param->swizzle,
vsir_write_mask_get_component_idx(write_mask));
src_param->swizzle = 0;
return;
- }
- max_component_count = signature->s->elements[element_idx].register_count - array_offset;
- max_component_count = min(max_component_count, VKD3D_VEC4_SIZE);
- for (i = 0, handled_mask = 0; i < max_component_count; ++i)
I am not sure I understand this: what is `max_component_count` expected to be? Why don't you just iterate over the whole write mask?
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/ir.c:
if (!(mov_src = vsir_program_get_src_params(program, 1)))
{
ERR("Failed to allocate instruction src param.\n");
normaliser->result = VKD3D_ERROR_OUT_OF_MEMORY;
return;
}
mov_src->reg = *reg;
mov_src->reg.idx[0].offset = array_offset + component_idx;
mov_src->swizzle = 0;
mov_src->modifiers = 0;
if (!(dst_param = clip_cull_normaliser_emit_mov(normaliser, mov_src)))
return;
vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, reg->data_type, 1);
dst_param->reg.dimension = reg->dimension;
Shouldn't the destination (i.e., the temp register) be VEC4 even if the source is scalar?