From: Conor McCarthy cmccarthy@codeweavers.com
Up to eight clip/cull values are supported, and the DXBC pattern of spreading these across two vector signature elements is a hacky solution, so converting these into an array is best for VSIR. SPIR-V requires these to be an array. --- libs/vkd3d-shader/ir.c | 400 ++++++++++++++++++++++- libs/vkd3d-shader/spirv.c | 7 +- libs/vkd3d-shader/vkd3d_shader_private.h | 2 + 3 files changed, 405 insertions(+), 4 deletions(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 3f8a6f192..532b4b710 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -18,6 +18,8 @@
#include "vkd3d_shader_private.h"
+static const unsigned int D3D12_CLIP_OR_CULL_DISTANCE_COUNT = 8; + static inline bool shader_register_is_phase_instance_id(const struct vkd3d_shader_register *reg) { return reg->type == VKD3DSPR_FORKINSTID || reg->type == VKD3DSPR_JOININSTID; @@ -1255,6 +1257,399 @@ static enum vkd3d_result shader_normalise_io_registers(struct vkd3d_shader_parse return VKD3D_OK; }
+struct clip_cull_normaliser_signature +{ + struct shader_signature *s; + unsigned int base_element_idx; + unsigned int need_normalisation[MAX_REG_OUTPUT]; + unsigned int offsets[MAX_REG_OUTPUT]; +}; + +struct clip_cull_normaliser +{ + struct vkd3d_shader_parser *parser; + + struct vkd3d_shader_location location; + bool has_normalised_clip_cull; + enum vkd3d_result result; + + struct vkd3d_shader_instruction *instructions; + size_t instruction_capacity; + size_t instruction_count; + + struct clip_cull_normaliser_signature input_signature; + + bool has_dcl_temps; + ptrdiff_t temp_ins_idx; + unsigned int temp_count; + unsigned int max_temp_count; +}; + +static void shader_signature_transform_clip_cull(struct clip_cull_normaliser_signature *signature, + enum vkd3d_shader_sysval_semantic target_sysval, struct clip_cull_normaliser *normaliser) +{ + unsigned int i, j, base, array_size, reg_mask, count, arrayed_count; + struct vkd3d_shader_parser *parser = normaliser->parser; + struct shader_signature *s = signature->s; + struct signature_element *e; + + /* Up to two vec4 clip/cull elements are allowed. Merge these into an array, + * and track the location and array offset of the second one. */ + for (i = 0, array_size = 0, reg_mask = 0, base = 0, count = 0, arrayed_count = 0; i < s->element_count; ++i) + { + e = &s->elements[i]; + + if (e->sysval_semantic != target_sysval) + { + /* Compile a map of used registers. */ + reg_mask |= ((1u << e->register_count) - 1) << e->register_index; + continue; + } + + if (!array_size) + base = i; + + signature->offsets[i] = array_size; + array_size += vsir_write_mask_component_count(e->mask); + ++count; + arrayed_count += e->register_count > 1; + } + + if (!array_size) + return; + /* DXIL supports clip/cull arrays of size 2, and a single array requires no modification. Larger + * arrays exceed the semantic index limit (and cause dxcompiler version 1.7.0.4006 to crash). */ + if (arrayed_count == 1 && count == 1) + return; + /* TODO: combining an array with another clip/cull element should be rare, and requires special handling. */ + if (arrayed_count) + { + WARN("Unsupported multiple elements with at least one array.\n"); + vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "Multiple clip or cull signature elements are not supported if at least one is an array."); + return; + } + + if (array_size > D3D12_CLIP_OR_CULL_DISTANCE_COUNT) + { + WARN("Invalid array size %u.\n", array_size); + vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "Clip or cull array size %u exceeds the D3D12 limit of 8.", array_size); + return; + } + + signature->base_element_idx = base; + signature->need_normalisation[base] = true; + e = &s->elements[base]; + + /* Find the lowest register with at least array_size free above. */ + for (i = base, j = ((1u << array_size) - 1) << base; i < MAX_REG_OUTPUT; ++i, j <<= 1) + { + if (!(reg_mask & j)) + break; + } + + /* Signature locations are not used for sysvals, so if it proves necessary, it should be + * ok to raise MAX_REG_OUTPUT to 40 and validate that non-sysval register indices are < 32. */ + if (array_size > MAX_REG_OUTPUT - i) + { + FIXME("Too many registers; register index %u, array size %u.\n", i, array_size); + vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "Clip or cull base register index %u + array size %u exceeds limit of 32 registers.", i, array_size); + return; + } + + e->register_index = i; + e->register_count = array_size; + e->mask = 1; + e->used_mask = 1; + + /* Delete the second clip/cull element. It will be remapped to the first by checking the offset table. */ + j = base + 1; + for (i = j; i < s->element_count; ++i) + { + e = &s->elements[i]; + + if (e->sysval_semantic != target_sysval) + s->elements[j++] = *e; + } + s->element_count = j; + + normaliser->has_normalised_clip_cull = true; +} + +static struct vkd3d_shader_instruction *clip_cull_normaliser_require_space(struct clip_cull_normaliser *normaliser, + size_t count) +{ + if (!vkd3d_array_reserve((void **)&normaliser->instructions, &normaliser->instruction_capacity, + normaliser->instruction_count + count, sizeof(*normaliser->instructions))) + { + ERR("Failed to allocate instructions.\n"); + normaliser->result = VKD3D_ERROR_OUT_OF_MEMORY; + return NULL; + } + return &normaliser->instructions[normaliser->instruction_count]; +} + +static struct vkd3d_shader_dst_param *clip_cull_normaliser_emit_mov(struct clip_cull_normaliser *normaliser, + struct vkd3d_shader_src_param *src_param) +{ + struct vkd3d_shader_instruction *ins; + + if (!(ins = clip_cull_normaliser_require_space(normaliser, 1))) + return NULL; + vsir_instruction_init(ins, &normaliser->location, VKD3DSIH_MOV); + + ins->src = src_param; + ins->src_count = 1; + + if (!(ins->dst = shader_parser_get_dst_params(normaliser->parser, 1))) + { + ERR("Failed to allocate instruction dst param.\n"); + normaliser->result = VKD3D_ERROR_OUT_OF_MEMORY; + return NULL; + } + ins->dst_count = 1; + + ++normaliser->instruction_count; + return ins->dst; +} + +static void clip_cull_normaliser_copy_instruction(struct clip_cull_normaliser *normaliser, + struct vkd3d_shader_instruction *ins) +{ + struct vkd3d_shader_instruction *dst_ins; + + if (!(dst_ins = clip_cull_normaliser_require_space(normaliser, 1))) + return; + *dst_ins = *ins; + ++normaliser->instruction_count; +} + +static void clip_cull_normaliser_resolve_temp_declaration(struct clip_cull_normaliser *normaliser) +{ + if (normaliser->temp_ins_idx < 0) + return; + + if (!normaliser->has_dcl_temps && !normaliser->temp_count) + normaliser->instructions[normaliser->temp_ins_idx].handler_idx = VKD3DSIH_NOP; + else + normaliser->instructions[normaliser->temp_ins_idx].declaration.count += normaliser->temp_count; + + normaliser->max_temp_count = max(normaliser->temp_count, normaliser->max_temp_count); + + normaliser->temp_count = 0; + normaliser->temp_ins_idx = -1; + normaliser->has_dcl_temps = false; +} + +static void shader_src_param_clip_cull_normalise(struct vkd3d_shader_src_param *src_param, + unsigned int write_mask, struct clip_cull_normaliser *normaliser) +{ + unsigned int i, element_idx, component_idx, array_offset, handled_mask, mask_bit; + struct vkd3d_shader_parser *parser = normaliser->parser; + const struct clip_cull_normaliser_signature *signature; + struct vkd3d_shader_register *reg = &src_param->reg; + struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_src_param *mov_src; + + switch (reg->type) + { + case VKD3DSPR_INPUT: + if (parser->shader_version.type == VKD3D_SHADER_TYPE_DOMAIN) + return; + signature = &normaliser->input_signature; + break; + default: + return; + } + + element_idx = reg->idx[reg->idx_count - 1].offset; + if ((array_offset = signature->offsets[element_idx])) + { + element_idx = signature->base_element_idx; + reg->idx[reg->idx_count - 1].offset = element_idx; + } + if (!signature->need_normalisation[element_idx]) + return; + + if (!write_mask) + { + WARN("Unexpected zero write mask.\n"); + vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, + "Write mask for a clip/cull load is zero."); + normaliser->result = VKD3D_ERROR_INVALID_SHADER; + return; + } + /* Dynamic array addressing of clip/cull inputs is not supported, except for dxil arrayed inputs, + * which are filtered out above by 'need_normalisation'. */ + if (reg->idx_count >= ARRAY_SIZE(reg->idx)) + { + WARN("Unexpected index count %u.\n", reg->idx_count); + vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, + "Invalid register index count %u for a clip/cull load.", reg->idx_count); + normaliser->result = VKD3D_ERROR_INVALID_SHADER; + return; + } + + /* Move the indices up so the array index can be placed in idx[0]. */ + memmove(®->idx[1], ®->idx[0], reg->idx_count * sizeof(reg->idx[0])); + memset(®->idx[0], 0, sizeof(reg->idx[0])); + ++reg->idx_count; + + if (vsir_write_mask_component_count(write_mask) == 1) + { + reg->idx[0].offset = array_offset + vsir_swizzle_get_component(src_param->swizzle, + vsir_write_mask_get_component_idx(write_mask)); + src_param->swizzle = 0; + return; + } + + for (i = 0, handled_mask = 0; i < VKD3D_VEC4_SIZE; ++i) + { + if (!(write_mask & (1u << i))) + continue; + + /* For each component accessed by the write mask+swizzle, emit a MOV from the clip/cull array to a temp. + * The original instruction will still apply the swizzle, so the temp is a direct reflection of the source. */ + + component_idx = vsir_swizzle_get_component(src_param->swizzle, i); + mask_bit = 1u << component_idx; + + if (handled_mask & mask_bit) + continue; + handled_mask |= mask_bit; + + if (!(mov_src = shader_parser_get_src_params(parser, 1))) + { + ERR("Failed to allocate instruction src param.\n"); + normaliser->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; + } + mov_src->reg = *reg; + mov_src->reg.idx[0].offset = array_offset + component_idx; + mov_src->swizzle = 0; + mov_src->modifiers = 0; + + if (!(dst_param = clip_cull_normaliser_emit_mov(normaliser, mov_src))) + return; + vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, reg->data_type, 1); + dst_param->reg.dimension = reg->dimension; + dst_param->reg.idx[0].offset = parser->shader_desc.temp_count; + normaliser->temp_count = 1; + dst_param->write_mask = mask_bit; + dst_param->modifiers = 0; + dst_param->shift = 0; + } + + /* Substitute the temp for the vector clip/cull source. */ + vsir_register_init(reg, VKD3DSPR_TEMP, reg->data_type, 1); + reg->dimension = VSIR_DIMENSION_VEC4; + reg->idx[0].offset = parser->shader_desc.temp_count; +} + +static void shader_instruction_normalise_clip_cull_params(struct vkd3d_shader_instruction *ins, + struct clip_cull_normaliser *normaliser) +{ + struct vkd3d_shader_instruction *dst_ins; + unsigned int i, write_mask; + + if (ins->handler_idx == VKD3DSIH_NOP) + return; + + normaliser->location = ins->location; + + if (shader_instruction_is_dcl(ins)) + { + if (ins->handler_idx == VKD3DSIH_DCL_TEMPS) + { + /* DCL_TEMPS should not occur after the declarations section, but let's make it robust. */ + if (normaliser->temp_ins_idx >= 0) + normaliser->instructions[normaliser->temp_ins_idx].handler_idx = VKD3DSIH_NOP; + normaliser->temp_ins_idx = normaliser->instruction_count; + normaliser->has_dcl_temps = true; + } + clip_cull_normaliser_copy_instruction(normaliser, ins); + return; + } + + if (ins->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE || ins->handler_idx == VKD3DSIH_HS_FORK_PHASE + || ins->handler_idx == VKD3DSIH_HS_JOIN_PHASE) + { + clip_cull_normaliser_resolve_temp_declaration(normaliser); + clip_cull_normaliser_copy_instruction(normaliser, ins); + return; + } + + if (normaliser->temp_ins_idx < 0) + { + if (!(dst_ins = clip_cull_normaliser_require_space(normaliser, 1))) + return; + normaliser->temp_ins_idx = normaliser->instruction_count; + vsir_instruction_init(dst_ins, &normaliser->location, VKD3DSIH_DCL_TEMPS); + ++normaliser->instruction_count; + } + + for (i = 0, write_mask = 0; i < ins->dst_count; ++i) + if (ins->dst[i].reg.type != VKD3DSPR_NULL) + write_mask |= ins->dst[i].write_mask; + for (i = 0; i < ins->src_count; ++i) + shader_src_param_clip_cull_normalise(&ins->src[i], write_mask, normaliser); + + clip_cull_normaliser_copy_instruction(normaliser, ins); +} + +static enum vkd3d_result shader_normalise_clip_cull(struct vkd3d_shader_parser *parser) +{ + struct clip_cull_normaliser normaliser = {0}; + unsigned int i; + + normaliser.parser = parser; + normaliser.input_signature.s = &parser->shader_desc.input_signature; + normaliser.temp_ins_idx = -1; + + if (parser->shader_version.type != VKD3D_SHADER_TYPE_DOMAIN) + { + shader_signature_transform_clip_cull(&normaliser.input_signature, VKD3D_SHADER_SV_CLIP_DISTANCE, &normaliser); + shader_signature_transform_clip_cull(&normaliser.input_signature, VKD3D_SHADER_SV_CULL_DISTANCE, &normaliser); + } + + if (parser->failed) + return VKD3D_ERROR_INVALID_SHADER; + + if (!normaliser.has_normalised_clip_cull) + return VKD3D_OK; + + if (!clip_cull_normaliser_require_space(&normaliser, parser->instructions.count)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + for (i = 0; i < parser->instructions.count; ++i) + shader_instruction_normalise_clip_cull_params(&parser->instructions.elements[i], &normaliser); + + if (normaliser.result >= 0) + { + vkd3d_free(parser->instructions.elements); + parser->instructions.elements = normaliser.instructions; + parser->instructions.capacity = normaliser.instruction_capacity; + parser->instructions.count = normaliser.instruction_count; + + clip_cull_normaliser_resolve_temp_declaration(&normaliser); + parser->shader_desc.temp_count += normaliser.max_temp_count; + + return VKD3D_OK; + } + else + { + if (normaliser.result == VKD3D_ERROR_OUT_OF_MEMORY) + { + vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_OUT_OF_MEMORY, + "Out of memory allocating clip/cull normalization instructions.\n"); + } + vkd3d_free(normaliser.instructions); + return normaliser.result; + } +} + struct flat_constant_def { enum vkd3d_shader_d3dbc_constant_register set; @@ -1543,7 +1938,7 @@ enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, enum vkd3d_result result = VKD3D_OK;
if (parser->shader_desc.is_dxil) - return result; + return shader_normalise_clip_cull(parser);
if (parser->shader_version.type != VKD3D_SHADER_TYPE_PIXEL && (result = remap_output_signature(parser, compile_info)) < 0) @@ -1561,6 +1956,9 @@ enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, if ((result = shader_normalise_io_registers(parser)) < 0) return result;
+ if ((result = shader_normalise_clip_cull(parser)) < 0) + return result; + if ((result = instruction_array_normalise_flat_constants(parser)) < 0) return result;
diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index 23f249ee3..dc3a89382 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -4844,7 +4844,7 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, array_sizes[0] = signature_element->register_count; array_sizes[1] = (reg_type == VKD3DSPR_PATCHCONST ? 0 : compiler->input_control_point_count); if (array_sizes[0] == 1 && !vsir_sysval_semantic_is_tess_factor(signature_element->sysval_semantic) - && (!vsir_sysval_semantic_is_clip_cull(signature_element->sysval_semantic) || array_sizes[1])) + && (!vsir_sysval_semantic_is_clip_cull(sysval) || array_sizes[1])) { array_sizes[0] = 0; } @@ -9953,8 +9953,6 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, enum vkd3d_result result = VKD3D_OK; unsigned int i;
- if (parser->shader_desc.temp_count) - spirv_compiler_emit_temps(compiler, parser->shader_desc.temp_count); if (parser->shader_desc.ssa_count) spirv_compiler_allocate_ssa_register_ids(compiler, parser->shader_desc.ssa_count);
@@ -9966,6 +9964,9 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, if ((result = vkd3d_shader_normalise(parser, compile_info)) < 0) return result;
+ if (parser->shader_desc.temp_count) + spirv_compiler_emit_temps(compiler, parser->shader_desc.temp_count); + instructions = parser->instructions; memset(&parser->instructions, 0, sizeof(parser->instructions));
diff --git a/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d-shader/vkd3d_shader_private.h index b85441aeb..947ef213c 100644 --- a/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d-shader/vkd3d_shader_private.h @@ -217,6 +217,8 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX = 9015, VKD3D_SHADER_ERROR_VSIR_INVALID_INSTRUCTION_NESTING = 9016, VKD3D_SHADER_ERROR_VSIR_INVALID_SSA_USAGE = 9017, + VKD3D_SHADER_ERROR_VSIR_OUT_OF_MEMORY = 9018, + VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE = 9019,
VKD3D_SHADER_WARNING_VSIR_DYNAMIC_DESCRIPTOR_ARRAY = 9300, };