This should be the last piece of the puzzle for SNK games.
Basically, support for "if" conditionals for shader model 3.0 and 4.0.
This is achieved through the use of the `if_comp` instruction (VKD3DSIH_IFC), but these are not handled by the cf_flattener. So I wrote `vsir_program_lower_ifc()` as a normalization pass to turn them into VKD3DSIH_IF in 3/4. This requires translating the condition of IFC into a separate vsir comparison instruction. I think I got the mapping right in `get_handler_idx_from_rel_op()` but it would be good if someone with more experience in SPIR-V takes a close look.
From: Francisco Casas fcasas@codeweavers.com
--- tests/hlsl/conditional.shader_test | 168 +++++++++++++++++++++++++++++ 1 file changed, 168 insertions(+)
diff --git a/tests/hlsl/conditional.shader_test b/tests/hlsl/conditional.shader_test index 89e00dce8..a73b42898 100644 --- a/tests/hlsl/conditional.shader_test +++ b/tests/hlsl/conditional.shader_test @@ -74,6 +74,26 @@ float main() : sv_target [require] shader model >= 3.0
+ +[pixel shader todo(sm<4)] +uniform float a; + +float4 main() : sv_target +{ + if (a < 4) + return float4(1, 2, 3, 4); + return float4(10, 20, 30, 40); +} + +[test] +uniform 0 float -2 +todo(sm<4 | glsl) draw quad +probe all rgba (1, 2, 3, 4) +uniform 0 float 10 +todo(sm<4 | glsl) draw quad +probe all rgba (10, 20, 30, 40) + + [pixel shader todo(sm<4)] uniform float4 u;
@@ -107,3 +127,151 @@ float4 main() : sv_target [test] todo(glsl) draw quad probe all rgba (9.0, 10.0, 11.0, 12.0) + + +[pixel shader todo(sm<4)] +int a, b; + +float4 main() : sv_target +{ + if (a < b) + return -1.0; + else + return 1.0; +} + +[test] +if(sm<4) uniform 0 float 8 +if(sm<4) uniform 4 float 9 +if(sm>=4) uniform 0 int 8 +if(sm>=4) uniform 1 int 9 +todo(sm<4 | glsl) draw quad +probe all rgba (-1.0, -1.0, -1.0, -1.0) +if(sm<4) uniform 0 float -3 +if(sm<4) uniform 4 float -4 +if(sm>=4) uniform 0 int -3 +if(sm>=4) uniform 1 int -4 +todo(sm<4 | glsl) draw quad +probe all rgba (1.0, 1.0, 1.0, 1.0) +if(sm<4) uniform 0 float 7 +if(sm<4) uniform 4 float 7 +if(sm>=4) uniform 0 int 7 +if(sm>=4) uniform 1 int 7 +todo(sm<4 | glsl) draw quad +probe all rgba (1.0, 1.0, 1.0, 1.0) + + +[pixel shader todo(sm<4)] +int a, b; + +float4 main() : sv_target +{ + if (a <= b) + return -1.0; + else + return 1.0; +} + +[test] +if(sm<4) uniform 0 float 8 +if(sm<4) uniform 4 float 9 +if(sm>=4) uniform 0 int 8 +if(sm>=4) uniform 1 int 9 +todo(sm<4 | glsl) draw quad +probe all rgba (-1.0, -1.0, -1.0, -1.0) +if(sm<4) uniform 0 float -3 +if(sm<4) uniform 4 float -4 +if(sm>=4) uniform 0 int -3 +if(sm>=4) uniform 1 int -4 +todo(sm<4 | glsl) draw quad +probe all rgba (1.0, 1.0, 1.0, 1.0) +if(sm<4) uniform 0 float 7 +if(sm<4) uniform 4 float 7 +if(sm>=4) uniform 0 int 7 +if(sm>=4) uniform 1 int 7 +todo(sm<4 | glsl) draw quad +probe all rgba (-1.0, -1.0, -1.0, -1.0) + + +[pixel shader todo(sm<4)] +int a, b; + +float4 main() : sv_target +{ + if (a != b) + return -1.0; + else + return 1.0; +} + +[test] +if(sm<4) uniform 0 float -3 +if(sm<4) uniform 4 float -2 +if(sm>=4) uniform 0 int -3 +if(sm>=4) uniform 1 int -2 +todo(sm<4 | glsl) draw quad +probe all rgba (-1.0, -1.0, -1.0, -1.0) +if(sm<4) uniform 0 float 4 +if(sm<4) uniform 4 float 4 +if(sm>=4) uniform 0 int 4 +if(sm>=4) uniform 1 int 4 +todo(sm<4 | glsl) draw quad +probe all rgba (1.0, 1.0, 1.0, 1.0) + + +% Test "if" conditionals, using resource loads to ensure we aren't removing these instruction with optimizations. +[srv 0] +size (2d, 2, 2) +0.0 0.0 0.0 4.0 1.0 0.0 0.0 4.0 +0.0 1.0 0.0 4.0 1.0 1.0 0.0 4.0 + +[sampler 0] +filter linear linear linear +address clamp clamp clamp + +[pixel shader todo(sm<4)] +Texture2D tex; +sampler sam; +float a; + +float4 main() : sv_target +{ + if (a < 4) + return tex.Sample(sam, float2(0, 0)); + return float4(1, 2, 3, 4); +} + +[test] +uniform 0 float -2 +todo(sm<4 | glsl) draw quad +probe all rgba (0.0, 0.0, 0.0, 4.0) +uniform 0 float 4 +todo(sm<4 | glsl) draw quad +probe all rgba (1.0, 2.0, 3.0, 4.0) + + +[pixel shader todo(sm<4)] +Texture2D tex; +sampler sam; +float a; + +float4 main() : sv_target +{ + if (a >= 2) + return tex.Sample(sam, float2(0, 0)); + else if(a >= 1) + return tex.Sample(sam, float2(1, 0)); + else + return tex.Sample(sam, float2(1, 1)); +} + +[test] +uniform 0 float 2 +todo(sm<4 | glsl) draw quad +probe all rgba (0.0, 0.0, 0.0, 4.0) +uniform 0 float 1 +todo(sm<4 | glsl) draw quad +probe all rgba (1.0, 0.0, 0.0, 4.0) +uniform 0 float 0 +todo(sm<4 | glsl) draw quad +probe all rgba (1.0, 1.0, 0.0, 4.0)
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/d3dbc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d-shader/d3dbc.c index 4685afa08..fe4f9ec27 100644 --- a/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d-shader/d3dbc.c @@ -2594,12 +2594,12 @@ static void write_sm1_swizzle(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer write_sm1_instruction(ctx, buffer, &sm1_instr); }
-static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_ir_function_decl *entry_func) +static void write_sm1_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_block *block) { const struct hlsl_ir_node *instr;
- LIST_FOR_EACH_ENTRY(instr, &entry_func->body.instrs, struct hlsl_ir_node, entry) + LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) { if (instr->data_type) { @@ -2660,7 +2660,7 @@ int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_fun write_sm1_constant_defs(ctx, &buffer); write_sm1_semantic_dcls(ctx, &buffer); write_sm1_sampler_dcls(ctx, &buffer); - write_sm1_instructions(ctx, &buffer, entry_func); + write_sm1_block(ctx, &buffer, &entry_func->body);
put_u32(&buffer, D3DSIO_END);
From: Francisco Casas fcasas@codeweavers.com
--- libs/vkd3d-shader/ir.c | 113 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+)
diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index 610d907d9..bfba388e0 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -244,6 +244,116 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr return VKD3D_OK; }
+static enum vkd3d_shader_opcode get_handler_idx_from_rel_op(enum vkd3d_shader_rel_op rel_op, + enum vkd3d_data_type data_type, bool *requires_swap) +{ + switch (rel_op) + { + case VKD3D_SHADER_REL_OP_LT: + case VKD3D_SHADER_REL_OP_GT: + + *requires_swap = (rel_op == VKD3D_SHADER_REL_OP_GT); + if (data_type == VKD3D_DATA_INT) + return VKD3DSIH_ILT; + else if(data_type_is_integer(data_type)) + return VKD3DSIH_ULT; + else if(data_type_is_64_bit(data_type)) + return VKD3DSIH_DLT; + else + return VKD3DSIH_LTO; + break; + + case VKD3D_SHADER_REL_OP_GE: + case VKD3D_SHADER_REL_OP_LE: + *requires_swap = (rel_op == VKD3D_SHADER_REL_OP_LE); + if (data_type == VKD3D_DATA_INT) + return VKD3DSIH_IGE; + else if(data_type_is_integer(data_type)) + return VKD3DSIH_UGE; + else if(data_type_is_64_bit(data_type)) + return VKD3DSIH_DGEO; + else + return VKD3DSIH_GEO; + break; + + case VKD3D_SHADER_REL_OP_EQ: + *requires_swap = false; + if (data_type_is_integer(data_type)) + return VKD3DSIH_IEQ; + else if(data_type_is_64_bit(data_type)) + return VKD3DSIH_DEQO; + else + return VKD3DSIH_EQO; + break; + + case VKD3D_SHADER_REL_OP_NE: + *requires_swap = false; + if (data_type_is_integer(data_type)) + return VKD3DSIH_INE; + else if(data_type_is_64_bit(data_type)) + return VKD3DSIH_DNE; + else + return VKD3DSIH_NEO; + break; + } + vkd3d_unreachable(); +} + +static enum vkd3d_result vsir_program_lower_ifc(struct vsir_program *program) +{ + struct vkd3d_shader_instruction_array *instructions = &program->instructions; + struct vkd3d_shader_instruction *ifc_ins, *ins; + enum vkd3d_shader_opcode handler_idx; + unsigned int tmp_idx = ~0u; + unsigned int i; + bool swap; + + for (i = 0; i < instructions->count; ++i) + { + ifc_ins = &instructions->elements[i]; + + if (ifc_ins->handler_idx != VKD3DSIH_IFC) + continue; + + if (!shader_instruction_array_insert_at(instructions, i + 1, 2)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + if (tmp_idx == ~0u) + tmp_idx = program->temp_count++; + + /* Replace ifc comparison with actual comparison, saving the result in the tmp register. */ + handler_idx = get_handler_idx_from_rel_op(ifc_ins->flags, ifc_ins->src[0].reg.data_type, &swap); + + ins = &instructions->elements[i + 1]; + if (!vsir_instruction_init_with_params(program, ins, &ifc_ins->location, handler_idx, 1, 2)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + vsir_register_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); + ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; + ins->dst[0].reg.idx[0].offset = tmp_idx; + ins->dst[0].write_mask = VKD3DSP_WRITEMASK_0; + + ins->src[0] = ifc_ins->src[swap]; + ins->src[1] = ifc_ins->src[!swap]; + + /* Create new if instruction using the previous result. */ + ins = &instructions->elements[i + 2]; + if (!vsir_instruction_init_with_params(program, ins, &ifc_ins->location, VKD3DSIH_IF, 0, 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + ins->flags = VKD3D_SHADER_CONDITIONAL_OP_NZ; + + vsir_register_init(&ins->src[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); + ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; + ins->src[0].reg.idx[0].offset = tmp_idx; + ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); + + /* Make the original instruction no-op */ + vkd3d_shader_instruction_make_nop(ifc_ins); + } + + return VKD3D_OK; +} + static void shader_register_eliminate_phase_addressing(struct vkd3d_shader_register *reg, unsigned int instance_id) { @@ -5817,6 +5927,9 @@ enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t if ((result = vsir_program_lower_instructions(program)) < 0) return result;
+ if ((result = vsir_program_lower_ifc(program)) < 0) + return result; + if (program->shader_version.major >= 6) { if ((result = vsir_program_materialise_phi_ssas_to_temps(program)) < 0)
From: Francisco Casas fcasas@codeweavers.com
According to the documentation, if_comp is available from 2_x pixel and vertex shaders and, unlike "if bool" it doesn't expect a constant boolean register (from the input signature), so:
if_neq cond -cond
seems like a convenient way to write these, for profiles above 2.0. --- libs/vkd3d-shader/d3dbc.c | 54 ++++++++++++++++++++++++++++++ tests/hlsl/conditional.shader_test | 32 +++++++++--------- 2 files changed, 70 insertions(+), 16 deletions(-)
diff --git a/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d-shader/d3dbc.c index fe4f9ec27..93ddb0d5e 100644 --- a/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d-shader/d3dbc.c @@ -1793,6 +1793,7 @@ static uint32_t sm1_encode_register_type(D3DSHADER_PARAM_REGISTER_TYPE type) struct sm1_instruction { D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode; + unsigned int flags;
struct sm1_dst_register { @@ -1832,6 +1833,8 @@ static void write_sm1_instruction(struct hlsl_ctx *ctx, struct vkd3d_bytecode_bu uint32_t token = instr->opcode; unsigned int i;
+ token |= VKD3D_SM1_INSTRUCTION_FLAGS_MASK & (instr->flags << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT); + if (ctx->profile->major_version > 1) token |= (instr->has_dst + instr->src_count) << D3DSI_INSTLENGTH_SHIFT; put_u32(buffer, token); @@ -2394,6 +2397,49 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b } }
+static void write_sm1_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_block *block); + +static void write_sm1_if(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) +{ + const struct hlsl_ir_if *iff = hlsl_ir_if(instr); + const struct hlsl_ir_node *condition; + struct sm1_instruction sm1_ifc, sm1_else, sm1_endif; + + condition = iff->condition.node; + assert(condition->data_type->dimx == 1 && condition->data_type->dimy == 1); + + sm1_ifc = (struct sm1_instruction) + { + .opcode = D3DSIO_IFC, + .flags = VKD3D_SHADER_REL_OP_NE, /* Make it a "if_ne" instruction. */ + + .srcs[0].type = D3DSPR_TEMP, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(condition->reg.writemask), + .srcs[0].reg = condition->reg.id, + .srcs[0].mod = 0, + + .srcs[1].type = D3DSPR_TEMP, + .srcs[1].swizzle = hlsl_swizzle_from_writemask(condition->reg.writemask), + .srcs[1].reg = condition->reg.id, + .srcs[1].mod = D3DSPSM_NEG, + + .src_count = 2, + }; + write_sm1_instruction(ctx, buffer, &sm1_ifc); + write_sm1_block(ctx, buffer, &iff->then_block); + + if (!list_empty(&iff->else_block.instrs)) + { + sm1_else = (struct sm1_instruction){.opcode = D3DSIO_ELSE}; + write_sm1_instruction(ctx, buffer, &sm1_else); + write_sm1_block(ctx, buffer, &iff->else_block); + } + + sm1_endif = (struct sm1_instruction){.opcode = D3DSIO_ENDIF}; + write_sm1_instruction(ctx, buffer, &sm1_endif); +} + static void write_sm1_jump(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) { const struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); @@ -2623,6 +2669,14 @@ static void write_sm1_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * write_sm1_expr(ctx, buffer, instr); break;
+ case HLSL_IR_IF: + if (ctx->profile->major_version > 2 + || (ctx->profile->major_version == 2 && ctx->profile->minor_version >= 1)) + write_sm1_if(ctx, buffer, instr); + else + hlsl_fixme(ctx, &instr->loc, "Flatten "if" conditionals branches."); + break; + case HLSL_IR_JUMP: write_sm1_jump(ctx, buffer, instr); break; diff --git a/tests/hlsl/conditional.shader_test b/tests/hlsl/conditional.shader_test index a73b42898..2b9ca66bd 100644 --- a/tests/hlsl/conditional.shader_test +++ b/tests/hlsl/conditional.shader_test @@ -75,7 +75,7 @@ float main() : sv_target shader model >= 3.0
-[pixel shader todo(sm<4)] +[pixel shader] uniform float a;
float4 main() : sv_target @@ -87,14 +87,14 @@ float4 main() : sv_target
[test] uniform 0 float -2 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (1, 2, 3, 4) uniform 0 float 10 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (10, 20, 30, 40)
-[pixel shader todo(sm<4)] +[pixel shader] uniform float4 u;
float4 main() : sv_target @@ -108,7 +108,7 @@ float4 main() : sv_target
[test] uniform 0 float4 0.0 0.0 0.0 0.0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (0.9, 0.8, 0.7, 0.6)
[pixel shader] @@ -129,7 +129,7 @@ todo(glsl) draw quad probe all rgba (9.0, 10.0, 11.0, 12.0)
-[pixel shader todo(sm<4)] +[pixel shader] int a, b;
float4 main() : sv_target @@ -145,23 +145,23 @@ if(sm<4) uniform 0 float 8 if(sm<4) uniform 4 float 9 if(sm>=4) uniform 0 int 8 if(sm>=4) uniform 1 int 9 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (-1.0, -1.0, -1.0, -1.0) if(sm<4) uniform 0 float -3 if(sm<4) uniform 4 float -4 if(sm>=4) uniform 0 int -3 if(sm>=4) uniform 1 int -4 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (1.0, 1.0, 1.0, 1.0) if(sm<4) uniform 0 float 7 if(sm<4) uniform 4 float 7 if(sm>=4) uniform 0 int 7 if(sm>=4) uniform 1 int 7 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (1.0, 1.0, 1.0, 1.0)
-[pixel shader todo(sm<4)] +[pixel shader] int a, b;
float4 main() : sv_target @@ -177,23 +177,23 @@ if(sm<4) uniform 0 float 8 if(sm<4) uniform 4 float 9 if(sm>=4) uniform 0 int 8 if(sm>=4) uniform 1 int 9 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (-1.0, -1.0, -1.0, -1.0) if(sm<4) uniform 0 float -3 if(sm<4) uniform 4 float -4 if(sm>=4) uniform 0 int -3 if(sm>=4) uniform 1 int -4 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (1.0, 1.0, 1.0, 1.0) if(sm<4) uniform 0 float 7 if(sm<4) uniform 4 float 7 if(sm>=4) uniform 0 int 7 if(sm>=4) uniform 1 int 7 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (-1.0, -1.0, -1.0, -1.0)
-[pixel shader todo(sm<4)] +[pixel shader] int a, b;
float4 main() : sv_target @@ -209,13 +209,13 @@ if(sm<4) uniform 0 float -3 if(sm<4) uniform 4 float -2 if(sm>=4) uniform 0 int -3 if(sm>=4) uniform 1 int -2 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (-1.0, -1.0, -1.0, -1.0) if(sm<4) uniform 0 float 4 if(sm<4) uniform 4 float 4 if(sm>=4) uniform 0 int 4 if(sm>=4) uniform 1 int 4 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe all rgba (1.0, 1.0, 1.0, 1.0)