On Tue, Dec 21, 2021 at 2:04 PM Francisco Casas <fcasas(a)codeweavers.com> wrote:
diff --git a/libs/vkd3d-shader/hlsl_sm4.c b/libs/vkd3d-shader/hlsl_sm4.c index d55815f1..61247031 100644 --- a/libs/vkd3d-shader/hlsl_sm4.c +++ b/libs/vkd3d-shader/hlsl_sm4.c @@ -1661,6 +1661,50 @@ static void write_sm4_loop(struct hlsl_ctx *ctx, write_sm4_instruction(buffer, &instr); }
+ +static void write_sm4_gather(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, + const struct hlsl_deref *resource, const struct hlsl_deref *sampler, const struct hlsl_ir_node *coords, + unsigned int swizzle, const struct hlsl_ir_node *texel_offset) +{ + struct sm4_instruction instr; + unsigned int writemask; + + memset(&instr, 0, sizeof(instr)); + + /* TODO: possible optimization, if the offset ranges from -8 to 7, an VKD3D_SM4_OP_GATHER4 with + an aoffimmi modifier can be used. */ + instr.opcode = texel_offset? VKD3D_SM5_OP_GATHER4_PO : VKD3D_SM4_OP_GATHER4;
Not just an optimization, since the _PO variant is SM5 which is problematic if we're compiling for a 4.0 profile. It's okay to have this as a (temporary) TODO though.