From: Conor McCarthy cmccarthy@codeweavers.com
Up to eight clip/cull values are supported, and the DXBC pattern of spreading these across two vector signature elements is a hacky solution, so converting these into an array is best for VSIR. SPIR-V requires these to be an array. --- libs/vkd3d-shader/ir.c | 461 +++++++++++++++++++++++ libs/vkd3d-shader/spirv.c | 2 +- libs/vkd3d-shader/vkd3d_shader_private.h | 3 + 3 files changed, 465 insertions(+), 1 deletion(-)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index f0bd85338..97a88e21f 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -18,6 +18,8 @@
#include "vkd3d_shader_private.h"
+static const unsigned int MAX_CLIP_OR_CULL_DISTANCE_COUNT = 8; + bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_version *version, unsigned int reserve) { program->shader_version = *version; @@ -1456,6 +1458,462 @@ static enum vkd3d_result shader_normalise_io_registers(struct vkd3d_shader_parse return VKD3D_OK; }
+struct clip_cull_normaliser_scan +{ + bool need_normalisation; + unsigned int offset; + unsigned int remap; +}; + +struct clip_cull_normaliser_signature +{ + struct shader_signature *s; + struct clip_cull_normaliser_scan scan[MAX_REG_OUTPUT]; +}; + +struct clip_cull_normaliser +{ + struct vkd3d_shader_parser *parser; + + struct vkd3d_shader_location location; + bool has_normalised_clip_cull; + enum vkd3d_result result; + + struct vkd3d_shader_instruction *instructions; + size_t instruction_capacity; + size_t instruction_count; + + struct clip_cull_normaliser_signature input_signature; + + unsigned int temp_count; +}; + +static unsigned int mask_find_free_array_slot(unsigned int used_mask, unsigned int array_size) +{ + unsigned int i, array_mask = (1u << array_size) - 1; + /* Find the lowest register with at least array_size free above. */ + for (i = 0; i < MAX_REG_OUTPUT; ++i) + { + if (!(used_mask & (array_mask << i))) + break; + } + return i; +} + +static void normaliser_signature_transform_clip_or_cull(struct clip_cull_normaliser_signature *signature, + enum vkd3d_shader_sysval_semantic target_sysval, struct clip_cull_normaliser *normaliser) +{ + unsigned int i, base, array_size, mask, reg_mask, location_mask; + struct vkd3d_shader_parser *parser = normaliser->parser; + struct shader_signature *s = signature->s; + struct signature_element *e; + + /* Up to two vec4 clip/cull elements are allowed. Merge these into an array, + * and track the location and array offset of the second one. */ + for (i = 0, array_size = 0, reg_mask = 0, location_mask = 0, base = 0; i < s->element_count; ++i) + { + e = &s->elements[i]; + + if (e->sysval_semantic != target_sysval) + { + mask = (1u << e->register_count) - 1; + /* Compile a map of used registers and locations. */ + reg_mask |= mask << e->register_index; + location_mask |= mask << e->target_location; + continue; + } + + if (!array_size) + { + base = i; + array_size = vsir_write_mask_component_count(e->mask); + } + else + { + signature->scan[i].remap = base; + signature->scan[i].need_normalisation = true; + signature->scan[i].offset = array_size; + array_size += vsir_write_mask_component_count(e->mask); + /* Make no-op. */ + memset(e, 0, sizeof(*e)); + } + } + + if (!array_size) + return; + + if (array_size > MAX_CLIP_OR_CULL_DISTANCE_COUNT) + { + WARN("Invalid array size %u.\n", array_size); + vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "Clip or cull array size %u exceeds the limit of 8.", array_size); + normaliser->result = VKD3D_ERROR_INVALID_SHADER; + return; + } + + signature->scan[base].need_normalisation = true; + e = &s->elements[base]; + + i = mask_find_free_array_slot(reg_mask, array_size); + /* Signature locations are not used for sysvals, so if it proves necessary, it should be + * ok to raise MAX_REG_OUTPUT to 40 and validate that non-sysval register indices are < 32. */ + if (array_size > MAX_REG_OUTPUT - i) + { + FIXME("Too many registers; register index %u, array size %u.\n", i, array_size); + vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "Clip or cull base register index %u + array size %u exceeds limit of 32 registers.", i, array_size); + normaliser->result = VKD3D_ERROR_INVALID_SHADER; + return; + } + + e->register_index = i; + e->target_location = mask_find_free_array_slot(location_mask, array_size); + e->register_count = array_size; + e->mask = 1; + e->used_mask = 1; + + normaliser->has_normalised_clip_cull = true; +} + +static void normaliser_signature_transform_clip_cull(struct clip_cull_normaliser_signature *signature, + struct clip_cull_normaliser *normaliser) +{ + normaliser_signature_transform_clip_or_cull(signature, VKD3D_SHADER_SV_CLIP_DISTANCE, normaliser); + normaliser_signature_transform_clip_or_cull(signature, VKD3D_SHADER_SV_CULL_DISTANCE, normaliser); +} + +static struct vkd3d_shader_instruction *clip_cull_normaliser_require_space(struct clip_cull_normaliser *normaliser, + size_t count) +{ + if (!vkd3d_array_reserve((void **)&normaliser->instructions, &normaliser->instruction_capacity, + normaliser->instruction_count + count, sizeof(*normaliser->instructions))) + { + ERR("Failed to allocate instructions.\n"); + normaliser->result = VKD3D_ERROR_OUT_OF_MEMORY; + return NULL; + } + return &normaliser->instructions[normaliser->instruction_count]; +} + +static struct vkd3d_shader_dst_param *clip_cull_normaliser_emit_mov(struct clip_cull_normaliser *normaliser, + struct vkd3d_shader_src_param *src_param) +{ + struct vkd3d_shader_instruction *ins; + + if (!(ins = clip_cull_normaliser_require_space(normaliser, 1))) + return NULL; + vsir_instruction_init(ins, &normaliser->location, VKD3DSIH_MOV); + + ins->src = src_param; + ins->src_count = 1; + + if (!(ins->dst = vsir_program_get_dst_params(&normaliser->parser->program, 1))) + { + ERR("Failed to allocate instruction dst param.\n"); + normaliser->result = VKD3D_ERROR_OUT_OF_MEMORY; + return NULL; + } + ins->dst_count = 1; + + ++normaliser->instruction_count; + return ins->dst; +} + +static void clip_cull_normaliser_copy_instruction(struct clip_cull_normaliser *normaliser, + struct vkd3d_shader_instruction *ins) +{ + struct vkd3d_shader_instruction *dst_ins; + + if (!(dst_ins = clip_cull_normaliser_require_space(normaliser, 1))) + return; + *dst_ins = *ins; + ++normaliser->instruction_count; +} + +/* Use the dst write mask where it is known to be valid. Integer instructions are included in case + * float values are bitcast. This wouldn't make much sense for clip/cull, but handling it is harmless. + * All other occurrances are likely to be very rare, and use the fallback path. */ +static unsigned int clip_cull_src_normalisation_get_write_mask(struct vkd3d_shader_instruction *ins, + unsigned int dst_write_mask, struct vkd3d_shader_parser *parser) +{ + switch (ins->handler_idx) + { + case VKD3DSIH_ABS: + case VKD3DSIH_ACOS: + case VKD3DSIH_ADD: + case VKD3DSIH_AND: + case VKD3DSIH_ASIN: + case VKD3DSIH_ATAN: + case VKD3DSIH_BFREV: + case VKD3DSIH_COUNTBITS: + case VKD3DSIH_DIV: + case VKD3DSIH_EQO: + case VKD3DSIH_EQU: + case VKD3DSIH_EXP: + case VKD3DSIH_F32TOF16: + case VKD3DSIH_FRC: + case VKD3DSIH_FREM: + case VKD3DSIH_FTOD: + case VKD3DSIH_FTOI: + case VKD3DSIH_FTOU: + case VKD3DSIH_GEO: + case VKD3DSIH_GEU: + case VKD3DSIH_HCOS: + case VKD3DSIH_HSIN: + case VKD3DSIH_HTAN: + case VKD3DSIH_IADD: + case VKD3DSIH_IBFE: + case VKD3DSIH_IDIV: + case VKD3DSIH_IEQ: + case VKD3DSIH_IGE: + case VKD3DSIH_ILT: + case VKD3DSIH_IMAD: + case VKD3DSIH_IMAX: + case VKD3DSIH_IMIN: + case VKD3DSIH_IMUL: + case VKD3DSIH_INE: + case VKD3DSIH_INEG: + case VKD3DSIH_ISFINITE: + case VKD3DSIH_ISHL: + case VKD3DSIH_ISHR: + case VKD3DSIH_ISINF: + case VKD3DSIH_ISNAN: + case VKD3DSIH_ITOD: + case VKD3DSIH_ITOF: + case VKD3DSIH_ITOI: + case VKD3DSIH_LOG: + case VKD3DSIH_LTO: + case VKD3DSIH_LTU: + case VKD3DSIH_MAD: + case VKD3DSIH_MAX: + case VKD3DSIH_MIN: + case VKD3DSIH_MOV: + case VKD3DSIH_MOVC: + case VKD3DSIH_MSAD: + case VKD3DSIH_MUL: + case VKD3DSIH_NEO: + case VKD3DSIH_NEU: + case VKD3DSIH_NOT: + case VKD3DSIH_OR: + case VKD3DSIH_POW: + case VKD3DSIH_RCP: + case VKD3DSIH_ROUND_NE: + case VKD3DSIH_ROUND_NI: + case VKD3DSIH_ROUND_PI: + case VKD3DSIH_ROUND_Z: + case VKD3DSIH_RSQ: + case VKD3DSIH_SINCOS: + case VKD3DSIH_SQRT: + case VKD3DSIH_SUB: + case VKD3DSIH_TAN: + case VKD3DSIH_UBFE: + case VKD3DSIH_UDIV: + case VKD3DSIH_UGE: + case VKD3DSIH_ULT: + case VKD3DSIH_UMAX: + case VKD3DSIH_UMIN: + case VKD3DSIH_UMUL: + case VKD3DSIH_USHR: + case VKD3DSIH_UTOD: + case VKD3DSIH_UTOF: + case VKD3DSIH_UTOU: + case VKD3DSIH_XOR: + return dst_write_mask; + + /* Condition parameters could take a bitcast float for a nonzero check. */ + case VKD3DSIH_BRANCH: + case VKD3DSIH_BREAKP: + case VKD3DSIH_CONTINUEP: + case VKD3DSIH_IF: + case VKD3DSIH_SWITCH: + return VKD3DSP_WRITEMASK_0; + + /* Fallback: copy all available values to the temp. */ + default: + FIXME("Copying up to four clip/cull values for handler %u.\n", ins->handler_idx); + vkd3d_shader_parser_warning(parser, VKD3D_SHADER_WARNING_VSIR_UNKNOWN_WRITE_MASK, + "Copying up to four clip/cull values for handler %u.", ins->handler_idx); + return VKD3DSP_WRITEMASK_ALL; + } +} + +static void shader_instruction_src_param_clip_cull_normalise(struct vkd3d_shader_instruction *ins, + unsigned int src_idx, unsigned int write_mask, struct clip_cull_normaliser *normaliser) +{ + unsigned int i, element_idx, component_idx, array_offset, max_component_count, handled_mask, mask_bit; + struct vkd3d_shader_src_param *src_param = &ins->src[src_idx]; + struct vkd3d_shader_parser *parser = normaliser->parser; + const struct clip_cull_normaliser_signature *signature; + struct vkd3d_shader_register *reg = &src_param->reg; + struct vsir_program *program = &parser->program; + struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_src_param *mov_src; + + switch (reg->type) + { + case VKD3DSPR_INPUT: + signature = &normaliser->input_signature; + break; + default: + return; + } + + element_idx = reg->idx[reg->idx_count - 1].offset; + + if (!signature->scan[element_idx].need_normalisation) + return; + + if (!(write_mask = clip_cull_src_normalisation_get_write_mask(ins, write_mask, parser))) + return; + + /* Elements with an array offset are remapped to the base element. */ + if ((array_offset = signature->scan[element_idx].offset)) + { + element_idx = signature->scan[element_idx].remap; + reg->idx[reg->idx_count - 1].offset = element_idx; + } + + /* Dynamic array addressing of clip/cull inputs is not supported. */ + if (reg->idx_count >= ARRAY_SIZE(reg->idx)) + { + WARN("Unexpected index count %u.\n", reg->idx_count); + vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, + "Invalid register index count %u for a clip/cull load.", reg->idx_count); + normaliser->result = VKD3D_ERROR_INVALID_SHADER; + return; + } + + /* Move the indices up so the array index can be placed in idx[0]. */ + memmove(®->idx[1], ®->idx[0], reg->idx_count * sizeof(reg->idx[0])); + memset(®->idx[0], 0, sizeof(reg->idx[0])); + ++reg->idx_count; + + if (vsir_write_mask_component_count(write_mask) == 1) + { + reg->idx[0].offset = array_offset + vsir_swizzle_get_component(src_param->swizzle, + vsir_write_mask_get_component_idx(write_mask)); + src_param->swizzle = 0; + return; + } + + max_component_count = signature->s->elements[element_idx].register_count - array_offset; + max_component_count = min(max_component_count, VKD3D_VEC4_SIZE); + + for (i = 0, handled_mask = 0; i < max_component_count; ++i) + { + if (!(write_mask & (1u << i))) + continue; + + /* For each component accessed by the write mask + swizzle, emit a MOV from the clip/cull array to a temp. + * The original instruction will still apply the swizzle, so the temp is a direct reflection of the source. */ + + component_idx = vsir_swizzle_get_component(src_param->swizzle, i); + mask_bit = 1u << component_idx; + + if (handled_mask & mask_bit) + continue; + handled_mask |= mask_bit; + + if (!(mov_src = vsir_program_get_src_params(program, 1))) + { + ERR("Failed to allocate instruction src param.\n"); + normaliser->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; + } + mov_src->reg = *reg; + mov_src->reg.idx[0].offset = array_offset + component_idx; + mov_src->swizzle = 0; + mov_src->modifiers = 0; + + if (!(dst_param = clip_cull_normaliser_emit_mov(normaliser, mov_src))) + return; + vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, reg->data_type, 1); + dst_param->reg.dimension = reg->dimension; + dst_param->reg.idx[0].offset = parser->program.temp_count; + normaliser->temp_count = 1; + dst_param->write_mask = mask_bit; + dst_param->modifiers = 0; + dst_param->shift = 0; + } + + /* Substitute the temp for the vector clip/cull source. */ + vsir_register_init(reg, VKD3DSPR_TEMP, reg->data_type, 1); + reg->dimension = VSIR_DIMENSION_VEC4; + reg->idx[0].offset = parser->program.temp_count; +} + +static void shader_instruction_normalise_clip_cull_params(struct vkd3d_shader_instruction *ins, + struct clip_cull_normaliser *normaliser) +{ + unsigned int i, write_mask; + + if (ins->handler_idx == VKD3DSIH_NOP) + return; + + normaliser->location = ins->location; + + if (vsir_instruction_is_dcl(ins)) + { + clip_cull_normaliser_copy_instruction(normaliser, ins); + return; + } + + for (i = 0, write_mask = 0; i < ins->dst_count; ++i) + if (ins->dst[i].reg.type != VKD3DSPR_NULL) + write_mask |= ins->dst[i].write_mask; + + for (i = 0; i < ins->src_count; ++i) + shader_instruction_src_param_clip_cull_normalise(ins, i, write_mask, normaliser); + + clip_cull_normaliser_copy_instruction(normaliser, ins); +} + +static enum vkd3d_result normalise_clip_cull(struct vkd3d_shader_parser *parser) +{ + struct clip_cull_normaliser normaliser = {0}; + unsigned int i; + + normaliser.parser = parser; + normaliser.input_signature.s = &parser->shader_desc.input_signature; + + if (parser->program.shader_version.type != VKD3D_SHADER_TYPE_DOMAIN) + normaliser_signature_transform_clip_cull(&normaliser.input_signature, &normaliser); + + if (parser->failed) + return VKD3D_ERROR_INVALID_SHADER; + + if (!normaliser.has_normalised_clip_cull) + return VKD3D_OK; + + if (!clip_cull_normaliser_require_space(&normaliser, parser->program.instructions.count)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + for (i = 0; i < parser->program.instructions.count; ++i) + shader_instruction_normalise_clip_cull_params(&parser->program.instructions.elements[i], &normaliser); + + if (normaliser.result >= 0) + { + vkd3d_free(parser->program.instructions.elements); + parser->program.instructions.elements = normaliser.instructions; + parser->program.instructions.capacity = normaliser.instruction_capacity; + parser->program.instructions.count = normaliser.instruction_count; + + parser->program.temp_count += normaliser.temp_count; + + return VKD3D_OK; + } + else + { + if (normaliser.result == VKD3D_ERROR_OUT_OF_MEMORY) + { + vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_OUT_OF_MEMORY, + "Out of memory allocating clip/cull normalization instructions.\n"); + } + vkd3d_free(normaliser.instructions); + return normaliser.result; + } +} + struct flat_constant_def { enum vkd3d_shader_d3dbc_constant_register set; @@ -3377,6 +3835,9 @@ enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, if ((result = shader_normalise_io_registers(parser)) < 0) return result;
+ if ((result = normalise_clip_cull(parser)) < 0) + return result; + if ((result = instruction_array_normalise_flat_constants(&parser->program)) < 0) return result;
diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index 5257679d6..24f75a085 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -4915,7 +4915,7 @@ static void spirv_compiler_emit_input(struct spirv_compiler *compiler, array_sizes[0] = signature_element->register_count; array_sizes[1] = (reg_type == VKD3DSPR_PATCHCONST ? 0 : compiler->input_control_point_count); if (array_sizes[0] == 1 && !vsir_sysval_semantic_is_tess_factor(signature_element->sysval_semantic) - && (!vsir_sysval_semantic_is_clip_cull(signature_element->sysval_semantic) || array_sizes[1])) + && (!vsir_sysval_semantic_is_clip_cull(sysval) || array_sizes[1])) { array_sizes[0] = 0; } diff --git a/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d-shader/vkd3d_shader_private.h index 4b322b95b..9c89b4e6f 100644 --- a/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d-shader/vkd3d_shader_private.h @@ -218,8 +218,11 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX = 9015, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW = 9016, VKD3D_SHADER_ERROR_VSIR_INVALID_SSA_USAGE = 9017, + VKD3D_SHADER_ERROR_VSIR_OUT_OF_MEMORY = 9018, + VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE = 9019,
VKD3D_SHADER_WARNING_VSIR_DYNAMIC_DESCRIPTOR_ARRAY = 9300, + VKD3D_SHADER_WARNING_VSIR_UNKNOWN_WRITE_MASK = 9301, };
enum vkd3d_shader_opcode