How much of an heresy is this thing?
It has a negative diff count, you can't approve this! Also, it causes the HLSL compiler to emit slightly shorter and more readable TPF code. And using fewer registers.
I guess something similar should be doable for D3DBC too, though it requires some previous refactoring to have a single place where to intercept constant registers susceptible to be inlined.
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/tpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index 2166eb41..ca73baf4 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -4007,7 +4007,7 @@ static void write_sm4_constant(struct hlsl_ctx *ctx, instr.dst_count = 1;
sm4_src_from_constant_value(&instr.srcs[0], &constant->value, dimx, instr.dsts[0].writemask); - instr.src_count = 1, + instr.src_count = 1;
write_sm4_instruction(buffer, &instr); }
From: Giovanni Mascellani gmascellani@codeweavers.com
So that it can be used by sm4_src_from_node() in later commits. --- libs/vkd3d-shader/tpf.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index ca73baf4..3fbaca99 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -3512,16 +3512,6 @@ static void sm4_dst_from_node(struct sm4_dst_register *dst, const struct hlsl_ir sm4_register_from_node(&dst->reg, &dst->writemask, &swizzle_type, instr); }
-static void sm4_src_from_node(struct sm4_src_register *src, - const struct hlsl_ir_node *instr, unsigned int map_writemask) -{ - unsigned int writemask; - - sm4_register_from_node(&src->reg, &writemask, &src->swizzle_type, instr); - if (src->swizzle_type == VKD3D_SM4_SWIZZLE_VEC4) - src->swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); -} - static void sm4_src_from_constant_value(struct sm4_src_register *src, const struct hlsl_constant_value *value, unsigned int width, unsigned int map_writemask) { @@ -3545,6 +3535,16 @@ static void sm4_src_from_constant_value(struct sm4_src_register *src, } }
+static void sm4_src_from_node(struct sm4_src_register *src, + const struct hlsl_ir_node *instr, unsigned int map_writemask) +{ + unsigned int writemask; + + sm4_register_from_node(&src->reg, &writemask, &src->swizzle_type, instr); + if (src->swizzle_type == VKD3D_SM4_SWIZZLE_VEC4) + src->swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); +} + static uint32_t sm4_encode_register(const struct sm4_register *reg) { return (reg->type << VKD3D_SM4_REGISTER_TYPE_SHIFT)
From: Giovanni Mascellani gmascellani@codeweavers.com
--- libs/vkd3d-shader/tpf.c | 8 ++++++++ 1 file changed, 8 insertions(+)
diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index 3fbaca99..e28b4f57 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -3540,6 +3540,14 @@ static void sm4_src_from_node(struct sm4_src_register *src, { unsigned int writemask;
+ if (instr->type == HLSL_IR_CONSTANT) + { + struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); + + sm4_src_from_constant_value(src, &constant->value, instr->data_type->dimx, map_writemask); + return; + } + sm4_register_from_node(&src->reg, &writemask, &src->swizzle_type, instr); if (src->swizzle_type == VKD3D_SM4_SWIZZLE_VEC4) src->swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask);
From: Giovanni Mascellani gmascellani@codeweavers.com
Since constants are now inlined. --- libs/vkd3d-shader/hlsl_codegen.c | 4 ++++ libs/vkd3d-shader/tpf.c | 29 +++++++---------------------- 2 files changed, 11 insertions(+), 22 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index bbb5223b..33758726 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -3002,6 +3002,10 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx,
LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) { + /* In SM4 all constants are inlined. */ + if (ctx->profile->major_version >= 4 && instr->type == HLSL_IR_CONSTANT) + continue; + if (!instr->reg.allocated && instr->last_read) { instr->reg = allocate_numeric_registers_for_type(ctx, allocator, instr->index, instr->last_read, diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index e28b4f57..d71dfa21 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -4002,24 +4002,6 @@ static void write_sm4_binary_op_with_two_destinations(struct vkd3d_bytecode_buff write_sm4_instruction(buffer, &instr); }
-static void write_sm4_constant(struct hlsl_ctx *ctx, - struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_constant *constant) -{ - const unsigned int dimx = constant->node.data_type->dimx; - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_MOV; - - sm4_dst_from_node(&instr.dsts[0], &constant->node); - instr.dst_count = 1; - - sm4_src_from_constant_value(&instr.srcs[0], &constant->value, dimx, instr.dsts[0].writemask); - instr.src_count = 1; - - write_sm4_instruction(buffer, &instr); -} - static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, const struct hlsl_deref *resource, const struct hlsl_ir_node *coords, @@ -5080,16 +5062,19 @@ static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * }
assert(instr->data_type->class == HLSL_CLASS_SCALAR || instr->data_type->class == HLSL_CLASS_VECTOR); + + if (!instr->reg.allocated) + { + assert(instr->type == HLSL_IR_CONSTANT); + continue; + } }
switch (instr->type) { case HLSL_IR_CALL: - vkd3d_unreachable(); - case HLSL_IR_CONSTANT: - write_sm4_constant(ctx, buffer, hlsl_ir_constant(instr)); - break; + vkd3d_unreachable();
case HLSL_IR_EXPR: write_sm4_expr(ctx, buffer, hlsl_ir_expr(instr));
How much of an heresy is this thing?
It's not the way we eventually want to solve this, but I don't hate it enough to reject it.
That said, there are plenty of similar patches that can be written, and I don't want to spend a long time writing them all, instead of fixing this the right way. So I'll accept this, but I don't want to accept too many similar patches...
This merge request was approved by Zebediah Figura.
This merge request was approved by Henri Verbeet.