From: Nikolay Sivov nsivov@codeweavers.com
Signed-off-by: Nikolay Sivov nsivov@codeweavers.com --- libs/vkd3d-shader/d3dbc.c | 33 ++++++++++++++++++ libs/vkd3d-shader/hlsl.h | 2 ++ libs/vkd3d-shader/hlsl.y | 23 ++++++------- libs/vkd3d-shader/hlsl_codegen.c | 59 ++++++++++++++++++++++++++++++++ tests/hlsl-clip.shader_test | 18 +++++----- 5 files changed, 113 insertions(+), 22 deletions(-)
diff --git a/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d-shader/d3dbc.c index c35f8ca0..76e06333 100644 --- a/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d-shader/d3dbc.c @@ -1834,6 +1834,35 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b } }
+static void write_sm1_jump(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) +{ + const struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); + + switch (jump->type) + { + case HLSL_IR_JUMP_DISCARD_NEG: + { + struct hlsl_reg *reg = &jump->condition.node->reg; + + struct sm1_instruction instr = + { + .opcode = VKD3D_SM1_OP_TEXKILL, + + .dst.type = D3DSPR_TEMP, + .dst.reg = reg->id, + .dst.writemask = reg->writemask, + .has_dst = 1, + }; + + write_sm1_instruction(ctx, buffer, &instr); + break; + } + + default: + hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.\n", hlsl_jump_type_to_string(jump->type)); + } +} + static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) { const struct hlsl_ir_load *load = hlsl_ir_load(instr); @@ -2028,6 +2057,10 @@ static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b write_sm1_expr(ctx, buffer, instr); break;
+ case HLSL_IR_JUMP: + write_sm1_jump(ctx, buffer, instr); + break; + case HLSL_IR_LOAD: write_sm1_load(ctx, buffer, instr); break; diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 727e4ee9..28c7c691 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -1135,6 +1135,8 @@ struct hlsl_ir_load *hlsl_new_load_parent(struct hlsl_ctx *ctx, const struct hls const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, const struct hlsl_deref *deref, unsigned int comp, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_add_load_component(struct hlsl_ctx *ctx, struct list *instrs, + struct hlsl_ir_node *var_instr, unsigned int comp, const struct vkd3d_shader_location *loc);
struct hlsl_ir_node *hlsl_new_simple_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *lhs, struct hlsl_ir_node *rhs); struct hlsl_ir_node *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hlsl_deref *lhs, diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index 9de2d7f1..07a47a7f 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -273,9 +273,6 @@ static bool implicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_typ return hlsl_types_are_componentwise_equal(ctx, src, dst); }
-static struct hlsl_ir_node *add_load_component(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, - unsigned int comp, const struct vkd3d_shader_location *loc); - static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *node, struct hlsl_type *dst_type, const struct vkd3d_shader_location *loc) { @@ -333,7 +330,7 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs,
dst_comp_type = hlsl_type_get_component_type(ctx, dst_type, dst_idx);
- if (!(component_load = add_load_component(ctx, instrs, node, src_idx, loc))) + if (!(component_load = hlsl_add_load_component(ctx, instrs, node, src_idx, loc))) return NULL;
if (!(cast = hlsl_new_cast(ctx, component_load, dst_comp_type, loc))) @@ -663,7 +660,7 @@ static bool add_return(struct hlsl_ctx *ctx, struct list *instrs, return true; }
-static struct hlsl_ir_node *add_load_component(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, +struct hlsl_ir_node *hlsl_add_load_component(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, unsigned int comp, const struct vkd3d_shader_location *loc) { struct hlsl_ir_node *load, *store; @@ -1284,7 +1281,7 @@ static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct list *instrs, { if (operands[j]) { - if (!(load = add_load_component(ctx, instrs, operands[j], i, loc))) + if (!(load = hlsl_add_load_component(ctx, instrs, operands[j], i, loc))) return NULL;
cell_operands[j] = load; @@ -1779,7 +1776,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in return NULL; list_add_tail(instrs, &cell->entry);
- if (!(load = add_load_component(ctx, instrs, rhs, k++, &rhs->loc))) + if (!(load = hlsl_add_load_component(ctx, instrs, rhs, k++, &rhs->loc))) return NULL;
if (!hlsl_init_deref_from_index_chain(ctx, &deref, cell)) @@ -1868,7 +1865,7 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_type *dst_comp_type; struct hlsl_block block;
- if (!(load = add_load_component(ctx, instrs, src, k, &src->loc))) + if (!(load = hlsl_add_load_component(ctx, instrs, src, k, &src->loc))) return;
dst_comp_type = hlsl_type_get_component_type(ctx, dst->data_type, *store_index); @@ -2405,7 +2402,7 @@ static bool intrinsic_all(struct hlsl_ctx *ctx, count = hlsl_type_component_count(arg->data_type); for (i = 0; i < count; ++i) { - if (!(load = add_load_component(ctx, params->instrs, arg, i, loc))) + if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, i, loc))) return false;
if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, load, mul, loc))) @@ -2449,7 +2446,7 @@ static bool intrinsic_any(struct hlsl_ctx *ctx, count = hlsl_type_component_count(arg->data_type); for (i = 0; i < count; ++i) { - if (!(load = add_load_component(ctx, params->instrs, arg, i, loc))) + if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, i, loc))) return false;
if (!(or = add_binary_bitwise_expr(ctx, params->instrs, HLSL_OP2_BIT_OR, or, load, loc))) @@ -3062,10 +3059,10 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, { struct hlsl_ir_node *value1, *value2, *mul;
- if (!(value1 = add_load_component(ctx, params->instrs, cast1, j * cast1->data_type->dimx + k, loc))) + if (!(value1 = hlsl_add_load_component(ctx, params->instrs, cast1, j * cast1->data_type->dimx + k, loc))) return false;
- if (!(value2 = add_load_component(ctx, params->instrs, cast2, k * cast2->data_type->dimx + i, loc))) + if (!(value2 = hlsl_add_load_component(ctx, params->instrs, cast2, k * cast2->data_type->dimx + i, loc))) return false;
if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, value1, value2, loc))) @@ -3421,7 +3418,7 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, { struct hlsl_block block;
- if (!(load = add_load_component(ctx, params->instrs, arg, j * arg->data_type->dimx + i, loc))) + if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, j * arg->data_type->dimx + i, loc))) return false;
if (!hlsl_new_store_component(ctx, &block, &var_deref, i * var->data_type->dimx + j, load)) diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index ba624afc..e755c256 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -2584,6 +2584,61 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr return true; }
+static bool lower_discard_neg(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_node *zero, *bool_false, *or, *cmp, *load; + static const struct hlsl_constant_value zero_value; + struct hlsl_type *arg_type, *cmp_type; + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }; + struct hlsl_ir_jump *jump; + unsigned int i, count; + struct list instrs; + + if (instr->type != HLSL_IR_JUMP) + return false; + jump = hlsl_ir_jump(instr); + if (jump->type != HLSL_IR_JUMP_DISCARD_NEG) + return false; + + list_init(&instrs); + + arg_type = jump->condition.node->data_type; + if (!(zero = hlsl_new_constant(ctx, arg_type, &zero_value, &instr->loc))) + return false; + list_add_tail(&instrs, &zero->entry); + + operands[0] = jump->condition.node; + operands[1] = zero; + cmp_type = hlsl_get_numeric_type(ctx, arg_type->class, HLSL_TYPE_BOOL, arg_type->dimx, arg_type->dimy); + if (!(cmp = hlsl_new_expr(ctx, HLSL_OP2_LESS, operands, cmp_type, &instr->loc))) + return false; + list_add_tail(&instrs, &cmp->entry); + + if (!(bool_false = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &zero_value, &instr->loc))) + return false; + list_add_tail(&instrs, &bool_false->entry); + + or = bool_false; + + count = hlsl_type_component_count(cmp_type); + for (i = 0; i < count; ++i) + { + if (!(load = hlsl_add_load_component(ctx, &instrs, cmp, i, &instr->loc))) + return false; + + if (!(or = hlsl_new_binary_expr(ctx, HLSL_OP2_LOGIC_OR, or, load))) + return NULL; + list_add_tail(&instrs, &or->entry); + } + + list_move_tail(&instr->entry, &instrs); + hlsl_src_remove(&jump->condition); + hlsl_src_from_node(&jump->condition, or); + jump->type = HLSL_IR_JUMP_DISCARD_NZ; + + return true; +} + static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { switch (instr->type) @@ -4058,6 +4113,10 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, "Entry point "%s" is missing a [numthreads] attribute.", entry_func->func->name);
+ if (profile->major_version >= 4) + { + hlsl_transform_ir(ctx, lower_discard_neg, body, NULL); + } hlsl_transform_ir(ctx, lower_broadcasts, body, NULL); while (hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL)); do diff --git a/tests/hlsl-clip.shader_test b/tests/hlsl-clip.shader_test index d6c8b06d..f9859e1b 100644 --- a/tests/hlsl-clip.shader_test +++ b/tests/hlsl-clip.shader_test @@ -1,4 +1,4 @@ -[pixel shader todo] +[pixel shader] uniform float4 x;
float4 main() : sv_target @@ -9,14 +9,14 @@ float4 main() : sv_target
[test] uniform 0 float4 1 2 3 4 -todo draw quad -todo probe all rgba (1, 2, 3, 4) +draw quad +probe all rgba (1, 2, 3, 4) uniform 0 float4 9 8 7 6 -todo draw quad -todo probe all rgba (9, 8, 7, 6) +draw quad +probe all rgba (9, 8, 7, 6) uniform 0 float4 -1 8 7 6 -todo draw quad -todo probe all rgba (9, 8, 7, 6) +draw quad +probe all rgba (9, 8, 7, 6) uniform 0 float4 9 0 7 6 -todo draw quad -todo probe all rgba (9, 0, 7, 6) +draw quad +probe all rgba (9, 0, 7, 6)