From: Zebediah Figura zfigura@codeweavers.com
--- tests/uav.shader_test | 66 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+)
diff --git a/tests/uav.shader_test b/tests/uav.shader_test index 09ca05ec..aece4d48 100644 --- a/tests/uav.shader_test +++ b/tests/uav.shader_test @@ -22,6 +22,7 @@ float4 main() : sv_target return 0; }
+ [uav 1] format r32 float size (2, 2) @@ -54,3 +55,68 @@ probe uav 1 (0, 1) r (0.6) probe uav 1 (1, 0) r (0.2) probe uav 1 (1, 1) r (0.7) probe uav 2 (0, 0) rgba (2.0, 1.0, 4.0, 3.0) + + +% UAVs are implicitly allocated starting from the highest render target slot. +% They cannot overlap render target slots, and also cannot be allocated any +% lower than the highest render target. +% This ceases to be true with shader model 5.1. + +[render target 1] +format r32g32b32a32 float +size (640, 480) + +[uav 2] +size (1, 1) + +0.1 0.2 0.3 0.4 + +[pixel shader fail] +RWTexture2D<float4> u : register(u0); + +float4 main() : sv_target1 +{ + u[uint2(0, 0)] = float4(0.9, 0.8, 0.7, 0.6); + return 0; +} + +[pixel shader fail] +RWTexture2D<float4> u : register(u1); + +float4 main() : sv_target1 +{ + u[uint2(0, 0)] = float4(0.9, 0.8, 0.7, 0.6); + return 0; +} + +[pixel shader] +RWTexture2D<float4> u; + +float4 main() : sv_target1 +{ + u[uint2(0, 0)] = float4(0.9, 0.8, 0.7, 0.6); + return 0; +} + +[test] +todo draw quad +probe uav 2 (0, 0) rgba (0.9, 0.8, 0.7, 0.6) + + +[uav 3] +size (1, 1) + +0.1 0.2 0.3 0.4 + +[pixel shader] +RWTexture2D<float4> u : register(u3); + +float4 main() : sv_target1 +{ + u[uint2(0, 0)] = float4(0.9, 0.8, 0.7, 0.6); + return 0; +} + +[test] +todo draw quad +probe uav 3 (0, 0) rgba (0.9, 0.8, 0.7, 0.6)
From: Zebediah Figura zfigura@codeweavers.com
--- libs/vkd3d-shader/hlsl_codegen.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index aacaa95c..3e36aa05 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -2284,6 +2284,7 @@ object_types[] = { { HLSL_TYPE_SAMPLER, 's' }, { HLSL_TYPE_TEXTURE, 't' }, + { HLSL_TYPE_UAV, 'u' }, };
static const struct object_type_info *get_object_type_info(enum hlsl_base_type type) @@ -2302,7 +2303,20 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_base_type type) { const struct object_type_info *type_info = get_object_type_info(type); struct hlsl_ir_var *var; - uint32_t index = 0; + uint32_t min_index = 0; + uint32_t index; + + if (type == HLSL_TYPE_UAV) + { + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (var->semantic.name && (!ascii_strcasecmp(var->semantic.name, "color") + || !ascii_strcasecmp(var->semantic.name, "sv_target"))) + min_index = max(min_index, var->semantic.index + 1); + } + } + + index = min_index;
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { @@ -2315,7 +2329,13 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_base_type type) const struct hlsl_ir_var *reserved_object = get_reserved_object(ctx, type_info->reg_name, var->reg_reservation.index);
- if (reserved_object && reserved_object != var) + if (var->reg_reservation.index < min_index) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, + "UAV index (%u) must be higher than the maximum render target index (%u).", + var->reg_reservation.index, min_index - 1); + } + else if (reserved_object && reserved_object != var) { hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, "Multiple objects bound to %c%u.", type_info->reg_name, @@ -2578,6 +2598,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry { allocate_buffers(ctx); allocate_objects(ctx, HLSL_TYPE_TEXTURE); + allocate_objects(ctx, HLSL_TYPE_UAV); } allocate_semantic_registers(ctx); allocate_objects(ctx, HLSL_TYPE_SAMPLER);
From: Zebediah Figura zfigura@codeweavers.com
--- libs/vkd3d-shader/hlsl_sm4.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_sm4.c b/libs/vkd3d-shader/hlsl_sm4.c index 8ca37f3f..7884b4e7 100644 --- a/libs/vkd3d-shader/hlsl_sm4.c +++ b/libs/vkd3d-shader/hlsl_sm4.c @@ -1140,12 +1140,13 @@ static void write_sm4_dcl_sampler(struct vkd3d_bytecode_buffer *buffer, const st
static void write_sm4_dcl_texture(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) { + bool uav = (var->data_type->base_type == HLSL_TYPE_UAV); const struct sm4_instruction instr = { - .opcode = VKD3D_SM4_OP_DCL_RESOURCE + .opcode = (uav ? VKD3D_SM5_OP_DCL_UAV_TYPED : VKD3D_SM4_OP_DCL_RESOURCE) | (sm4_resource_dimension(var->data_type) << VKD3D_SM4_RESOURCE_TYPE_SHIFT),
- .dsts[0].reg.type = VKD3D_SM4_RT_RESOURCE, + .dsts[0].reg.type = uav ? VKD3D_SM5_RT_UAV : VKD3D_SM4_RT_RESOURCE, .dsts[0].reg.idx = {var->reg.id}, .dsts[0].reg.idx_count = 1, .dst_count = 1, @@ -2302,7 +2303,7 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx,
if (var->data_type->base_type == HLSL_TYPE_SAMPLER) write_sm4_dcl_sampler(&buffer, var); - else if (var->data_type->base_type == HLSL_TYPE_TEXTURE) + else if (var->data_type->base_type == HLSL_TYPE_TEXTURE || var->data_type->base_type == HLSL_TYPE_UAV) write_sm4_dcl_texture(&buffer, var); }
From: Zebediah Figura zfigura@codeweavers.com
--- libs/vkd3d-shader/hlsl.y | 3 ++- libs/vkd3d-shader/hlsl_sm4.c | 30 ++++++++++++++++++++++-------- 2 files changed, 24 insertions(+), 9 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index 62d86b9d..5154d7b5 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -786,7 +786,8 @@ static bool add_array_load(struct hlsl_ctx *ctx, struct list *instrs, struct hls const struct hlsl_type *expr_type = array->data_type, *index_type = index->data_type; struct hlsl_ir_expr *cast;
- if (expr_type->type == HLSL_CLASS_OBJECT && expr_type->base_type == HLSL_TYPE_TEXTURE + if (expr_type->type == HLSL_CLASS_OBJECT + && (expr_type->base_type == HLSL_TYPE_TEXTURE || expr_type->base_type == HLSL_TYPE_UAV) && expr_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) { struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_LOAD}; diff --git a/libs/vkd3d-shader/hlsl_sm4.c b/libs/vkd3d-shader/hlsl_sm4.c index 7884b4e7..f31ee169 100644 --- a/libs/vkd3d-shader/hlsl_sm4.c +++ b/libs/vkd3d-shader/hlsl_sm4.c @@ -855,6 +855,16 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r reg->idx_count = 1; *writemask = VKD3DSP_WRITEMASK_ALL; } + else if (data_type->type == HLSL_CLASS_OBJECT && data_type->base_type == HLSL_TYPE_UAV) + { + reg->type = VKD3D_SM5_RT_UAV; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + if (swizzle_type) + *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; + reg->idx[0] = var->reg.id; + reg->idx_count = 1; + *writemask = VKD3DSP_WRITEMASK_ALL; + } else if (data_type->type == HLSL_CLASS_OBJECT && data_type->base_type == HLSL_TYPE_SAMPLER) { reg->type = VKD3D_SM4_RT_SAMPLER; @@ -1397,24 +1407,28 @@ static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buf const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, const struct hlsl_deref *resource, const struct hlsl_ir_node *coords) { + bool uav = (resource_type->base_type == HLSL_TYPE_UAV); struct sm4_instruction instr; unsigned int dim_count;
memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_LD; + instr.opcode = uav ? VKD3D_SM5_OP_LD_UAV_TYPED : VKD3D_SM4_OP_LD;
sm4_dst_from_node(&instr.dsts[0], dst); instr.dst_count = 1;
sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL);
- /* Mipmap level is in the last component in the IR, but needs to be in the W - * component in the instruction. */ - dim_count = hlsl_sampler_dim_count(resource_type->sampler_dim); - if (dim_count == 1) - instr.srcs[0].swizzle = hlsl_combine_swizzles(instr.srcs[0].swizzle, HLSL_SWIZZLE(X, X, X, Y), 4); - if (dim_count == 2) - instr.srcs[0].swizzle = hlsl_combine_swizzles(instr.srcs[0].swizzle, HLSL_SWIZZLE(X, Y, X, Z), 4); + if (!uav) + { + /* Mipmap level is in the last component in the IR, but needs to be in the W + * component in the instruction. */ + dim_count = hlsl_sampler_dim_count(resource_type->sampler_dim); + if (dim_count == 1) + instr.srcs[0].swizzle = hlsl_combine_swizzles(instr.srcs[0].swizzle, HLSL_SWIZZLE(X, X, X, Y), 4); + if (dim_count == 2) + instr.srcs[0].swizzle = hlsl_combine_swizzles(instr.srcs[0].swizzle, HLSL_SWIZZLE(X, Y, X, Z), 4); + }
sm4_src_from_deref(ctx, &instr.srcs[1], resource, resource_type, instr.dsts[0].writemask);
From: Zebediah Figura zfigura@codeweavers.com
--- libs/vkd3d-shader/hlsl.c | 42 ++++++++++++++++++++++++++ libs/vkd3d-shader/hlsl.h | 16 ++++++++++ libs/vkd3d-shader/hlsl.y | 51 ++++++++++++++++++++++++++++---- libs/vkd3d-shader/hlsl_codegen.c | 13 ++++++++ 4 files changed, 117 insertions(+), 5 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index 8591fe31..097975b4 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -1129,6 +1129,20 @@ struct hlsl_ir_resource_load *hlsl_new_resource_load(struct hlsl_ctx *ctx, return load; }
+struct hlsl_ir_resource_store *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct hlsl_deref *resource, + struct hlsl_ir_node *coords, struct hlsl_ir_node *value, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_resource_store *store; + + if (!(store = hlsl_alloc(ctx, sizeof(*store)))) + return NULL; + init_node(&store->node, HLSL_IR_RESOURCE_STORE, NULL, loc); + hlsl_copy_deref(ctx, &store->resource, resource); + hlsl_src_from_node(&store->coords, coords); + hlsl_src_from_node(&store->value, value); + return store; +} + struct hlsl_ir_swizzle *hlsl_new_swizzle(struct hlsl_ctx *ctx, DWORD s, unsigned int components, struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc) { @@ -1511,6 +1525,7 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type) "HLSL_IR_LOOP", "HLSL_IR_JUMP", "HLSL_IR_RESOURCE_LOAD", + "HLSL_IR_RESOURCE_STORE", "HLSL_IR_STORE", "HLSL_IR_SWIZZLE", }; @@ -1800,6 +1815,17 @@ static void dump_ir_resource_load(struct vkd3d_string_buffer *buffer, const stru vkd3d_string_buffer_printf(buffer, ")"); }
+static void dump_ir_resource_store(struct vkd3d_string_buffer *buffer, const struct hlsl_ir_resource_store *store) +{ + vkd3d_string_buffer_printf(buffer, "store_resource(resource = "); + dump_deref(buffer, &store->resource); + vkd3d_string_buffer_printf(buffer, ", coords = "); + dump_src(buffer, &store->coords); + vkd3d_string_buffer_printf(buffer, ", value = "); + dump_src(buffer, &store->value); + vkd3d_string_buffer_printf(buffer, ")"); +} + static void dump_ir_store(struct vkd3d_string_buffer *buffer, const struct hlsl_ir_store *store) { vkd3d_string_buffer_printf(buffer, "= ("); @@ -1867,6 +1893,10 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, dump_ir_resource_load(buffer, hlsl_ir_resource_load(instr)); break;
+ case HLSL_IR_RESOURCE_STORE: + dump_ir_resource_store(buffer, hlsl_ir_resource_store(instr)); + break; + case HLSL_IR_STORE: dump_ir_store(buffer, hlsl_ir_store(instr)); break; @@ -1992,6 +2022,14 @@ static void free_ir_resource_load(struct hlsl_ir_resource_load *load) vkd3d_free(load); }
+static void free_ir_resource_store(struct hlsl_ir_resource_store *store) +{ + hlsl_src_remove(&store->resource.offset); + hlsl_src_remove(&store->coords); + hlsl_src_remove(&store->value); + vkd3d_free(store); +} + static void free_ir_store(struct hlsl_ir_store *store) { hlsl_src_remove(&store->rhs); @@ -2039,6 +2077,10 @@ void hlsl_free_instr(struct hlsl_ir_node *node) free_ir_resource_load(hlsl_ir_resource_load(node)); break;
+ case HLSL_IR_RESOURCE_STORE: + free_ir_resource_store(hlsl_ir_resource_store(node)); + break; + case HLSL_IR_STORE: free_ir_store(hlsl_ir_store(node)); break; diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index f237d6c4..7182a186 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -178,6 +178,7 @@ enum hlsl_ir_node_type HLSL_IR_LOOP, HLSL_IR_JUMP, HLSL_IR_RESOURCE_LOAD, + HLSL_IR_RESOURCE_STORE, HLSL_IR_STORE, HLSL_IR_SWIZZLE, }; @@ -410,6 +411,13 @@ struct hlsl_ir_resource_load struct hlsl_src coords, lod, texel_offset; };
+struct hlsl_ir_resource_store +{ + struct hlsl_ir_node node; + struct hlsl_deref resource; + struct hlsl_src coords, value; +}; + struct hlsl_ir_store { struct hlsl_ir_node node; @@ -578,6 +586,12 @@ static inline struct hlsl_ir_resource_load *hlsl_ir_resource_load(const struct h return CONTAINING_RECORD(node, struct hlsl_ir_resource_load, node); }
+static inline struct hlsl_ir_resource_store *hlsl_ir_resource_store(const struct hlsl_ir_node *node) +{ + assert(node->type == HLSL_IR_RESOURCE_STORE); + return CONTAINING_RECORD(node, struct hlsl_ir_resource_store, node); +} + static inline struct hlsl_ir_store *hlsl_ir_store(const struct hlsl_ir_node *node) { assert(node->type == HLSL_IR_STORE); @@ -781,6 +795,8 @@ struct hlsl_ir_store *hlsl_new_store_component(struct hlsl_ctx *ctx, struct hlsl struct hlsl_ir_loop *hlsl_new_loop(struct hlsl_ctx *ctx, struct vkd3d_shader_location loc); struct hlsl_ir_resource_load *hlsl_new_resource_load(struct hlsl_ctx *ctx, const struct hlsl_resource_load_params *params, const struct vkd3d_shader_location *loc); +struct hlsl_ir_resource_store *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct hlsl_deref *resource, + struct hlsl_ir_node *coords, struct hlsl_ir_node *value, const struct vkd3d_shader_location *loc); struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name, struct hlsl_struct_field *fields, size_t field_count); struct hlsl_ir_swizzle *hlsl_new_swizzle(struct hlsl_ctx *ctx, DWORD s, unsigned int components, diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index 5154d7b5..7906d593 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -1159,6 +1159,7 @@ static unsigned int evaluate_array_dimension(struct hlsl_ir_node *node) case HLSL_IR_IF: case HLSL_IR_JUMP: case HLSL_IR_LOOP: + case HLSL_IR_RESOURCE_STORE: case HLSL_IR_STORE: WARN("Invalid node type %s.\n", hlsl_node_type_to_string(node->type)); return 0; @@ -1699,7 +1700,6 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in enum parse_assign_op assign_op, struct hlsl_ir_node *rhs) { struct hlsl_type *lhs_type = lhs->data_type; - struct hlsl_ir_store *store; struct hlsl_ir_expr *copy; unsigned int writemask = 0;
@@ -1724,7 +1724,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in if (!(rhs = add_implicit_conversion(ctx, instrs, rhs, lhs_type, &rhs->loc))) return NULL;
- while (lhs->type != HLSL_IR_LOAD) + while (lhs->type != HLSL_IR_LOAD && lhs->type != HLSL_IR_RESOURCE_LOAD) { if (lhs->type == HLSL_IR_EXPR && hlsl_ir_expr(lhs)->op == HLSL_OP1_CAST) { @@ -1761,9 +1761,50 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in } }
- if (!(store = hlsl_new_store_index(ctx, &hlsl_ir_load(lhs)->src, NULL, rhs, writemask, &rhs->loc))) - return NULL; - list_add_tail(instrs, &store->node.entry); + if (lhs->type == HLSL_IR_RESOURCE_LOAD) + { + struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(lhs); + struct hlsl_ir_resource_store *store; + struct hlsl_type *resource_type; + struct hlsl_ir_swizzle *coords; + unsigned int dim_count; + + /* Such an lvalue was produced by an index expression. */ + assert(load->load_type == HLSL_RESOURCE_LOAD); + resource_type = load->resource.var->data_type; + assert(resource_type->type == HLSL_CLASS_OBJECT); + assert(resource_type->base_type == HLSL_TYPE_TEXTURE || resource_type->base_type == HLSL_TYPE_UAV); + + if (resource_type->base_type != HLSL_TYPE_UAV) + hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Read-only resources cannot be stored to."); + + dim_count = hlsl_sampler_dim_count(resource_type->sampler_dim); + + if (writemask != ((1u << resource_type->e.resource_format->dimx) - 1)) + hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, + "Resource store expressions must write to all components."); + + /* Remove the (implicit) mipmap level from the load expression. */ + assert(load->coords.node->data_type->type == HLSL_CLASS_VECTOR); + assert(load->coords.node->data_type->base_type == HLSL_TYPE_UINT); + assert(load->coords.node->data_type->dimx == dim_count + 1); + if (!(coords = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), dim_count, load->coords.node, &lhs->loc))) + return NULL; + list_add_tail(instrs, &coords->node.entry); + + if (!(store = hlsl_new_resource_store(ctx, &load->resource, &coords->node, rhs, &lhs->loc))) + return NULL; + list_add_tail(instrs, &store->node.entry); + } + else + { + struct hlsl_ir_store *store; + + if (!(store = hlsl_new_store_index(ctx, &hlsl_ir_load(lhs)->src, NULL, rhs, writemask, &rhs->loc))) + return NULL; + list_add_tail(instrs, &store->node.entry); + }
/* Don't use the instruction itself as a source, as this makes structure * splitting easier. Instead copy it here. Since we retrieve sources from diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 3e36aa05..6743f9c0 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -1588,6 +1588,7 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) case HLSL_IR_IF: case HLSL_IR_JUMP: case HLSL_IR_LOOP: + case HLSL_IR_RESOURCE_STORE: break; }
@@ -1724,6 +1725,18 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop load->lod.node->last_read = instr->index; break; } + case HLSL_IR_RESOURCE_STORE: + { + struct hlsl_ir_resource_store *store = hlsl_ir_resource_store(instr); + + var = store->resource.var; + var->last_read = max(var->last_read, var_last_read); + if (store->resource.offset.node) + store->resource.offset.node->last_read = instr->index; + store->coords.node->last_read = instr->index; + store->value.node->last_read = instr->index; + break; + } case HLSL_IR_SWIZZLE: { struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(instr);
From: Zebediah Figura zfigura@codeweavers.com
--- libs/vkd3d-shader/hlsl_sm4.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+)
diff --git a/libs/vkd3d-shader/hlsl_sm4.c b/libs/vkd3d-shader/hlsl_sm4.c index f31ee169..51dc1f0f 100644 --- a/libs/vkd3d-shader/hlsl_sm4.c +++ b/libs/vkd3d-shader/hlsl_sm4.c @@ -1608,6 +1608,24 @@ static void write_sm4_cast(struct hlsl_ctx *ctx, } }
+static void write_sm4_store_uav_typed(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_deref *dst, const struct hlsl_ir_node *coords, const struct hlsl_ir_node *value) +{ + struct sm4_instruction instr; + + memset(&instr, 0, sizeof(instr)); + instr.opcode = VKD3D_SM5_OP_STORE_UAV_TYPED; + + sm4_register_from_deref(ctx, &instr.dsts[0].reg, &instr.dsts[0].writemask, NULL, dst, dst->var->data_type); + instr.dst_count = 1; + + sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); + sm4_src_from_node(&instr.srcs[1], value, VKD3DSP_WRITEMASK_ALL); + instr.src_count = 2; + + write_sm4_instruction(buffer, &instr); +} + static void write_sm4_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr) { @@ -2176,6 +2194,18 @@ static void write_sm4_resource_load(struct hlsl_ctx *ctx, } }
+static void write_sm4_resource_store(struct hlsl_ctx *ctx, + struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_resource_store *store) +{ + if (!store->resource.var->is_uniform) + { + hlsl_fixme(ctx, &store->node.loc, "Store to non-uniform resource variable."); + return; + } + + write_sm4_store_uav_typed(ctx, buffer, &store->resource, store->coords.node, store->value.node); +} + static void write_sm4_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_store *store) { @@ -2261,6 +2291,10 @@ static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * write_sm4_resource_load(ctx, buffer, hlsl_ir_resource_load(instr)); break;
+ case HLSL_IR_RESOURCE_STORE: + write_sm4_resource_store(ctx, buffer, hlsl_ir_resource_store(instr)); + break; + case HLSL_IR_LOOP: write_sm4_loop(ctx, buffer, hlsl_ir_loop(instr)); break;
From: Zebediah Figura zfigura@codeweavers.com
--- libs/vkd3d-shader/hlsl_codegen.c | 13 +++++++++++++ tests/uav.shader_test | 6 +++--- 2 files changed, 16 insertions(+), 3 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 6743f9c0..080b7a25 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -786,6 +786,15 @@ static bool copy_propagation_transform_resource_load(struct hlsl_ctx *ctx, return progress; }
+static bool copy_propagation_transform_resource_store(struct hlsl_ctx *ctx, + struct hlsl_ir_resource_store *store, struct copy_propagation_state *state) +{ + bool progress = false; + + progress |= copy_propagation_transform_object_load(ctx, &store->resource, state); + return progress; +} + static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_store *store, struct copy_propagation_state *state) { @@ -932,6 +941,10 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b progress |= copy_propagation_transform_resource_load(ctx, hlsl_ir_resource_load(instr), state); break;
+ case HLSL_IR_RESOURCE_STORE: + progress |= copy_propagation_transform_resource_store(ctx, hlsl_ir_resource_store(instr), state); + break; + case HLSL_IR_STORE: copy_propagation_record_store(ctx, hlsl_ir_store(instr), state); break; diff --git a/tests/uav.shader_test b/tests/uav.shader_test index aece4d48..df5b9a8e 100644 --- a/tests/uav.shader_test +++ b/tests/uav.shader_test @@ -49,7 +49,7 @@ float4 main() : sv_target }
[test] -todo draw quad +draw quad probe uav 1 (0, 0) r (0.5) probe uav 1 (0, 1) r (0.6) probe uav 1 (1, 0) r (0.2) @@ -99,7 +99,7 @@ float4 main() : sv_target1 }
[test] -todo draw quad +draw quad probe uav 2 (0, 0) rgba (0.9, 0.8, 0.7, 0.6)
@@ -118,5 +118,5 @@ float4 main() : sv_target1 }
[test] -todo draw quad +draw quad probe uav 3 (0, 0) rgba (0.9, 0.8, 0.7, 0.6)
Giovanni Mascellani (@giomasce) commented about tests/uav.shader_test:
return 0;
}
This is intended, right?
Giovanni Mascellani (@giomasce) commented about libs/vkd3d-shader/hlsl_sm4.c:
const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst, const struct hlsl_deref *resource, const struct hlsl_ir_node *coords)
{
bool uav = (resource_type->base_type == HLSL_TYPE_UAV); struct sm4_instruction instr; unsigned int dim_count;
memset(&instr, 0, sizeof(instr));
- instr.opcode = VKD3D_SM4_OP_LD;
- instr.opcode = uav ? VKD3D_SM5_OP_LD_UAV_TYPED : VKD3D_SM4_OP_LD;
It seems that the native compiler generates instructions like this: ``` ld_uav_typed_indexable(texture2d)(float,float,float,float) r0.xyzw, l(0, 0, 0, 0), u2.xyzw ``` The instruction you generate miss the `_indexable(texture2d)` bit. AFAIU that would require adding a modifier of type `VKD3D_SM5_MODIFIER_RESOURCE_TYPE`. Is it intentional?
I guess that if the compiled shader is fed to vkd3d/wine we don't care too much about that field, but should we write it any way in the interest of compatibility with native?
On Thu Oct 20 14:57:06 2022 +0000, Giovanni Mascellani wrote:
This is intended, right?
Yes, to have a bit more visual separation between the tests.
On Thu Oct 20 14:57:08 2022 +0000, Giovanni Mascellani wrote:
It seems that the native compiler generates instructions like this:
ld_uav_typed_indexable(texture2d)(float,float,float,float) r0.xyzw, l(0, 0, 0, 0), u2.xyzw
The instruction you generate miss the `_indexable(texture2d)` bit. AFAIU that would require adding a modifier of type `VKD3D_SM5_MODIFIER_RESOURCE_TYPE`. Is it intentional? I guess that if the compiled shader is fed to vkd3d/wine we don't care too much about that field, but should we write it any way in the interest of compatibility with native?
We should write it, yes, but it's a bit out of scope of this patch. Those `VKD3D_SM5_MODIFIER_RESOURCE_TYPE` modifiers are generated on all sm 5.0 resource loads, not just UAV loads.
This merge request was approved by Giovanni Mascellani.
On Thu Oct 20 21:32:32 2022 +0000, Zebediah Figura wrote:
We should write it, yes, but it's a bit out of scope of this patch. Those `VKD3D_SM5_MODIFIER_RESOURCE_TYPE` modifiers are generated on all sm 5.0 resource loads, not just UAV loads.
Ok, got it.
Francisco Casas (@fcasas) commented about tests/uav.shader_test:
float4 main() : sv_target { /* All four components must be written in a single statement. */ u[uint2(0, 0)].xy = float4(1, 2);
It is not a fault of this patch but, since you are editing this file, `float4(1, 2)` and `float(3, 4)` are not valid.
I think that handling the HLSL_IR_RESOURCE_STORE's deref is missing in transform_deref_paths_into_offsets().
Specifically: ``` case HLSL_IR_RESOURCE_STORE: replace_deref_path_with_offset(ctx, &hlsl_ir_resource_store(instr)->resource, instr); ```
I think it only matters if the UAV is a member of a struct, but still.
Francisco Casas (@fcasas) commented about libs/vkd3d-shader/hlsl.y:
- if (!(store = hlsl_new_store_index(ctx, &hlsl_ir_load(lhs)->src, NULL, rhs, writemask, &rhs->loc)))
return NULL;
- list_add_tail(instrs, &store->node.entry);
- if (lhs->type == HLSL_IR_RESOURCE_LOAD)
- {
struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(lhs);
struct hlsl_ir_resource_store *store;
struct hlsl_type *resource_type;
struct hlsl_ir_swizzle *coords;
unsigned int dim_count;
/* Such an lvalue was produced by an index expression. */
assert(load->load_type == HLSL_RESOURCE_LOAD);
resource_type = load->resource.var->data_type;
assert(resource_type->type == HLSL_CLASS_OBJECT);
assert(resource_type->base_type == HLSL_TYPE_TEXTURE || resource_type->base_type == HLSL_TYPE_UAV);
These assertions fail when the object loaded is a member of a struct or array, for instance, with the following test:
``` [uav 2] size (1, 1)
0.1 0.2 0.3 0.4
[uav 3] size (1, 1)
0.5 0.6 0.7 0.8
[pixel shader] RWTexture2D<float4> u[2] : register(u3);
float4 main() : sv_target1 { u[0][uint2(0, 0)] = float4(1.1, 1.2, 1.3, 1.4); u[1][uint2(0, 0)] = float4(2.1, 2.2, 2.3, 2.4); return 0; }
[test] draw quad probe uav 2 (0, 0) rgba (1.1, 1.2, 1.3, 1.4) probe uav 3 (0, 0) rgba (2.1, 2.2, 2.3, 2.4)
```
It may be good to replace them with an hlsl_fixme for now, similar to the one we have in write_sm4_resource_load():
``` if (resource_type->type != HLSL_CLASS_OBJECT) { assert(resource_type->type == HLSL_CLASS_ARRAY || resource_type->type == HLSL_CLASS_STRUCT); hlsl_fixme(ctx, &load->node.loc, "Resource being a component of another variable."); return; } ```
Or outright supporting those cases if it proves to be simple enough.