-- v5: vkd3d-shader/fx: Do not align structured data section. vkd3d-shader/fx: Add initial support for writing buffers descriptions. vkd3d-shader/fx: Do not align strings for fx_4/fx_5 profiles. vkd3d-shader/fx: Use variable pointer in write_group().
From: Nikolay Sivov nsivov@codeweavers.com
The helper will need to access group annotations later, and these are available for variables.
Signed-off-by: Nikolay Sivov nsivov@codeweavers.com --- libs/vkd3d-shader/fx.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/libs/vkd3d-shader/fx.c b/libs/vkd3d-shader/fx.c index e1459f76c..b56dfa216 100644 --- a/libs/vkd3d-shader/fx.c +++ b/libs/vkd3d-shader/fx.c @@ -227,10 +227,10 @@ static void write_techniques(struct hlsl_scope *scope, struct fx_write_context * set_status(fx, fx->structured.status); }
-static void write_group(struct hlsl_scope *scope, const char *name, struct fx_write_context *fx) +static void write_group(struct hlsl_ir_var *var, struct fx_write_context *fx) { struct vkd3d_bytecode_buffer *buffer = &fx->structured; - uint32_t name_offset = write_string(name, fx); + uint32_t name_offset = write_string(var ? var->name : NULL, fx); uint32_t count_offset, count;
put_u32(buffer, name_offset); @@ -238,14 +238,15 @@ static void write_group(struct hlsl_scope *scope, const char *name, struct fx_wr put_u32(buffer, 0); /* Annotation count */
count = fx->technique_count; - write_techniques(scope, fx); + write_techniques(var ? var->scope : fx->ctx->globals, fx); set_u32(buffer, count_offset, fx->technique_count - count);
++fx->group_count; }
-static void write_groups(struct hlsl_scope *scope, struct fx_write_context *fx) +static void write_groups(struct fx_write_context *fx) { + struct hlsl_scope *scope = fx->ctx->globals; bool needs_default_group = false; struct hlsl_ir_var *var;
@@ -259,13 +260,13 @@ static void write_groups(struct hlsl_scope *scope, struct fx_write_context *fx) }
if (needs_default_group) - write_group(scope, NULL, fx); + write_group(NULL, fx); LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) { const struct hlsl_type *type = var->data_type;
if (type->base_type == HLSL_TYPE_EFFECT_GROUP) - write_group(var->scope, var->name, fx); + write_group(var, fx); } }
@@ -444,7 +445,7 @@ static int hlsl_fx_5_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) /* TODO: objects */ /* TODO: interface variables */
- write_groups(ctx->globals, &fx); + write_groups(&fx);
put_u32(&buffer, 0xfeff2001); /* Version. */ put_u32(&buffer, 0); /* Buffer count. */
From: Nikolay Sivov nsivov@codeweavers.com
Signed-off-by: Nikolay Sivov nsivov@codeweavers.com --- libs/vkd3d-shader/fx.c | 2 +- libs/vkd3d-shader/vkd3d_shader_main.c | 10 ++++++++-- libs/vkd3d-shader/vkd3d_shader_private.h | 1 + 3 files changed, 10 insertions(+), 3 deletions(-)
diff --git a/libs/vkd3d-shader/fx.c b/libs/vkd3d-shader/fx.c index b56dfa216..ac7ffae8c 100644 --- a/libs/vkd3d-shader/fx.c +++ b/libs/vkd3d-shader/fx.c @@ -145,7 +145,7 @@ static uint32_t write_fx_4_string(const char *string, struct fx_write_context *f if (!(string_entry = hlsl_alloc(fx->ctx, sizeof(*string_entry)))) return 0;
- string_entry->offset = put_string(&fx->unstructured, string); + string_entry->offset = bytecode_put_bytes_unaligned(&fx->unstructured, string, strlen(string) + 1); string_entry->string = string;
rb_put(&fx->strings, string, &string_entry->entry); diff --git a/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d-shader/vkd3d_shader_main.c index 1ef8d9494..6d6c57582 100644 --- a/libs/vkd3d-shader/vkd3d_shader_main.c +++ b/libs/vkd3d-shader/vkd3d_shader_main.c @@ -366,9 +366,9 @@ size_t bytecode_align(struct vkd3d_bytecode_buffer *buffer) return aligned_size; }
-size_t bytecode_put_bytes(struct vkd3d_bytecode_buffer *buffer, const void *bytes, size_t size) +size_t bytecode_put_bytes_unaligned(struct vkd3d_bytecode_buffer *buffer, const void *bytes, size_t size) { - size_t offset = bytecode_align(buffer); + size_t offset = buffer->size;
if (buffer->status) return offset; @@ -383,6 +383,12 @@ size_t bytecode_put_bytes(struct vkd3d_bytecode_buffer *buffer, const void *byte return offset; }
+size_t bytecode_put_bytes(struct vkd3d_bytecode_buffer *buffer, const void *bytes, size_t size) +{ + bytecode_align(buffer); + return bytecode_put_bytes_unaligned(buffer, bytes, size); +} + size_t bytecode_reserve_bytes(struct vkd3d_bytecode_buffer *buffer, size_t size) { size_t offset = bytecode_align(buffer); diff --git a/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d-shader/vkd3d_shader_private.h index 1030adf98..7239beaf7 100644 --- a/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d-shader/vkd3d_shader_private.h @@ -1418,6 +1418,7 @@ struct vkd3d_bytecode_buffer /* Align to the next 4-byte offset, and return that offset. */ size_t bytecode_align(struct vkd3d_bytecode_buffer *buffer); size_t bytecode_put_bytes(struct vkd3d_bytecode_buffer *buffer, const void *bytes, size_t size); +size_t bytecode_put_bytes_unaligned(struct vkd3d_bytecode_buffer *buffer, const void *bytes, size_t size); size_t bytecode_reserve_bytes(struct vkd3d_bytecode_buffer *buffer, size_t size); void set_u32(struct vkd3d_bytecode_buffer *buffer, size_t offset, uint32_t value); void set_string(struct vkd3d_bytecode_buffer *buffer, size_t offset, const char *string, size_t length);
From: Nikolay Sivov nsivov@codeweavers.com
Signed-off-by: Nikolay Sivov nsivov@codeweavers.com --- libs/vkd3d-shader/fx.c | 380 +++++++++++++++++++++++++++++-- libs/vkd3d-shader/hlsl.h | 7 + libs/vkd3d-shader/hlsl_codegen.c | 15 +- 3 files changed, 374 insertions(+), 28 deletions(-)
diff --git a/libs/vkd3d-shader/fx.c b/libs/vkd3d-shader/fx.c index ac7ffae8c..35f59473f 100644 --- a/libs/vkd3d-shader/fx.c +++ b/libs/vkd3d-shader/fx.c @@ -20,6 +20,11 @@
#include "hlsl.h"
+static inline size_t put_u32_unaligned(struct vkd3d_bytecode_buffer *buffer, uint32_t value) +{ + return bytecode_put_bytes_unaligned(buffer, &value, sizeof(value)); +} + struct string_entry { struct rb_entry entry; @@ -28,6 +33,20 @@ struct string_entry uint32_t offset; };
+struct type_entry +{ + struct rb_entry entry; + const char *name; + uint32_t elements_count; + uint32_t offset; +}; + +struct type_key +{ + const char *name; + uint32_t elements_count; +}; + static int string_storage_compare(const void *key, const struct rb_entry *entry) { struct string_entry *string_entry = RB_ENTRY_VALUE(entry, struct string_entry, entry); @@ -43,11 +62,31 @@ static void string_storage_destroy(struct rb_entry *entry, void *context) vkd3d_free(string_entry); }
+static int type_storage_compare(const void *key, const struct rb_entry *entry) +{ + struct type_entry *type_entry = RB_ENTRY_VALUE(entry, struct type_entry, entry); + const struct type_key *type_key = key; + int ret; + + if ((ret = strcmp(type_key->name, type_entry->name))) + return ret; + + return (int)type_key->elements_count - (int)type_entry->elements_count; +} + +static void type_storage_destroy(struct rb_entry *entry, void *context) +{ + struct type_entry *type_entry = RB_ENTRY_VALUE(entry, struct type_entry, entry); + + vkd3d_free(type_entry); +} + struct fx_write_context;
struct fx_write_context_ops { uint32_t (*write_string)(const char *string, struct fx_write_context *fx); + uint32_t (*write_type)(const struct hlsl_type *type, struct fx_write_context *fx); void (*write_technique)(struct hlsl_ir_var *var, struct fx_write_context *fx); void (*write_pass)(struct hlsl_ir_var *var, struct fx_write_context *fx); }; @@ -60,17 +99,28 @@ struct fx_write_context struct vkd3d_bytecode_buffer structured;
struct rb_tree strings; + struct rb_tree types;
unsigned int min_technique_version; unsigned int max_technique_version;
uint32_t technique_count; uint32_t group_count; + uint32_t buffer_count; + uint32_t numeric_variable_count; int status;
const struct fx_write_context_ops *ops; };
+static void set_status(struct fx_write_context *fx, int status) +{ + if (fx->status < 0) + return; + if (status < 0) + fx->status = status; +} + static uint32_t write_string(const char *string, struct fx_write_context *fx) { return fx->ops->write_string(string, fx); @@ -81,6 +131,41 @@ static void write_pass(struct hlsl_ir_var *var, struct fx_write_context *fx) fx->ops->write_pass(var, fx); }
+static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context *fx) +{ + struct type_entry *type_entry; + struct rb_entry *entry; + struct type_key key; + + if (type->class == HLSL_CLASS_ARRAY) + { + key.name = hlsl_get_multiarray_element_type(type)->name; + key.elements_count = hlsl_get_multiarray_size(type); + } + else + { + key.name = type->name; + key.elements_count = 0; + } + + if ((entry = rb_get(&fx->types, &key))) + { + type_entry = RB_ENTRY_VALUE(entry, struct type_entry, entry); + return type_entry->offset; + } + + if (!(type_entry = hlsl_alloc(fx->ctx, sizeof(*type_entry)))) + return 0; + + type_entry->offset = fx->ops->write_type(type, fx); + type_entry->name = key.name; + type_entry->elements_count = key.elements_count; + + rb_put(&fx->types, &key, &type_entry->entry); + + return type_entry->offset; +} + static void fx_write_context_init(struct hlsl_ctx *ctx, const struct fx_write_context_ops *ops, struct fx_write_context *fx) { @@ -107,12 +192,15 @@ static void fx_write_context_init(struct hlsl_ctx *ctx, const struct fx_write_co }
rb_init(&fx->strings, string_storage_compare); + rb_init(&fx->types, type_storage_compare); }
static int fx_write_context_cleanup(struct fx_write_context *fx) { int status = fx->status; + rb_destroy(&fx->strings, string_storage_destroy, NULL); + rb_destroy(&fx->types, type_storage_destroy, NULL);
return status; } @@ -181,6 +269,156 @@ static void write_fx_2_pass(struct hlsl_ir_var *var, struct fx_write_context *fx /* TODO: assignments */ }
+static uint32_t get_fx_4_type_size(const struct hlsl_type *type) +{ + uint32_t elements_count; + + elements_count = hlsl_get_multiarray_size(type); + type = hlsl_get_multiarray_element_type(type); + + return type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float) * elements_count; +} + +static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, struct fx_write_context *fx) +{ + static const unsigned int NUMERIC_BASE_TYPE_SHIFT = 3; + static const unsigned int NUMERIC_ROWS_SHIFT = 8; + static const unsigned int NUMERIC_COLUMNS_SHIFT = 11; + static const unsigned int NUMERIC_COLUMN_MAJOR_MASK = 0x4000; + static const uint32_t numeric_type_class[] = + { + [HLSL_CLASS_SCALAR] = 1, + [HLSL_CLASS_VECTOR] = 2, + [HLSL_CLASS_MATRIX] = 3, + }; + static const uint32_t numeric_base_type[] = + { + [HLSL_TYPE_FLOAT] = 1, + [HLSL_TYPE_INT ] = 2, + [HLSL_TYPE_UINT ] = 3, + [HLSL_TYPE_BOOL ] = 4, + }; + uint32_t value = 0; + + switch (type->class) + { + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_VECTOR: + case HLSL_CLASS_MATRIX: + value |= numeric_type_class[type->class]; + break; + default: + FIXME("Unexpected type class %u.\n", type->class); + set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); + return 0; + } + + switch (type->base_type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + case HLSL_TYPE_BOOL: + value |= (numeric_base_type[type->base_type] << NUMERIC_BASE_TYPE_SHIFT); + break; + default: + FIXME("Unexpected base type %u.\n", type->base_type); + set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); + return 0; + } + + value |= (type->dimy & 0x7) << NUMERIC_ROWS_SHIFT; + value |= (type->dimx & 0x7) << NUMERIC_COLUMNS_SHIFT; + if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) + value |= NUMERIC_COLUMN_MAJOR_MASK; + + return value; +} + +static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_context *fx) +{ + struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; + uint32_t name_offset, offset, size, stride, numeric_desc; + uint32_t elements_count = 0; + static const uint32_t variable_type[] = + { + [HLSL_CLASS_SCALAR] = 1, + [HLSL_CLASS_VECTOR] = 1, + [HLSL_CLASS_MATRIX] = 1, + [HLSL_CLASS_OBJECT] = 2, + [HLSL_CLASS_STRUCT] = 3, + }; + + /* Resolve arrays to element type and number of elements. */ + if (type->class == HLSL_CLASS_ARRAY) + { + elements_count = hlsl_get_multiarray_size(type); + type = hlsl_get_multiarray_element_type(type); + } + + name_offset = write_string(type->name, fx); + offset = put_u32_unaligned(buffer, name_offset); + + switch (type->class) + { + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_VECTOR: + case HLSL_CLASS_MATRIX: + case HLSL_CLASS_OBJECT: + case HLSL_CLASS_STRUCT: + put_u32_unaligned(buffer, variable_type[type->class]); + break; + default: + FIXME("Writing type class %u is not implemented.\n", type->class); + set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); + return 0; + } + + size = stride = type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float); + if (elements_count) + size *= elements_count; + stride = align(stride, 4 * sizeof(float)); + + put_u32_unaligned(buffer, elements_count); + put_u32_unaligned(buffer, size); /* Total size. */ + put_u32_unaligned(buffer, stride); /* Stride. */ + put_u32_unaligned(buffer, size); + + if (type->class == HLSL_CLASS_STRUCT) + { + size_t i; + + put_u32_unaligned(buffer, type->e.record.field_count); + for (i = 0; i < type->e.record.field_count; ++i) + { + const struct hlsl_struct_field *field = &type->e.record.fields[i]; + uint32_t semantic_offset, field_type_offset; + + name_offset = write_string(field->name, fx); + semantic_offset = write_string(field->semantic.name, fx); + field_type_offset = write_type(field->type, fx); + + put_u32_unaligned(buffer, name_offset); + put_u32_unaligned(buffer, semantic_offset); + put_u32_unaligned(buffer, field->reg_offset[HLSL_REGSET_NUMERIC]); + put_u32_unaligned(buffer, field_type_offset); + } + } + else if (type->class == HLSL_CLASS_OBJECT) + { + FIXME("Object types are not supported.\n"); + set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); + return 0; + } + else /* Numeric type */ + { + numeric_desc = get_fx_4_numeric_type_description(type, fx); + put_u32_unaligned(buffer, numeric_desc); + } + + return offset; +} + static void write_fx_4_technique(struct hlsl_ir_var *var, struct fx_write_context *fx) { struct vkd3d_bytecode_buffer *buffer = &fx->structured; @@ -202,14 +440,6 @@ static void write_fx_4_technique(struct hlsl_ir_var *var, struct fx_write_contex set_u32(buffer, count_offset, count); }
-static void set_status(struct fx_write_context *fx, int status) -{ - if (fx->status < 0) - return; - if (status < 0) - fx->status = status; -} - static void write_techniques(struct hlsl_scope *scope, struct fx_write_context *fx) { struct hlsl_ir_var *var; @@ -367,21 +597,135 @@ static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) static const struct fx_write_context_ops fx_4_ops = { .write_string = write_fx_4_string, + .write_type = write_fx_4_type, .write_technique = write_fx_4_technique, .write_pass = write_fx_4_pass, };
+static void write_fx_4_variable(struct hlsl_ir_var *var, struct fx_write_context *fx) +{ + struct vkd3d_bytecode_buffer *buffer = &fx->structured; + uint32_t semantic_offset, flags = 0; + uint32_t name_offset, type_offset; + enum fx_4_variable_flags + { + HAS_EXPLICIT_BIND_POINT = 0x4, + }; + + /* Explicit bind point. */ + if (var->reg_reservation.reg_type) + flags |= HAS_EXPLICIT_BIND_POINT; + + type_offset = write_type(var->data_type, fx); + name_offset = write_string(var->name, fx); + semantic_offset = write_string(var->semantic.name, fx); + + put_u32(buffer, name_offset); + put_u32(buffer, type_offset); + + semantic_offset = put_u32(buffer, semantic_offset); /* Semantic */ + put_u32(buffer, var->buffer_offset); /* Offset in the constant buffer */ + put_u32(buffer, 0); /* FIXME: default value offset */ + put_u32(buffer, flags); /* Flags */ + + put_u32(buffer, 0); /* Annotations count */ + /* FIXME: write annotations */ +} + +static void write_fx_4_buffer(struct hlsl_buffer *b, struct fx_write_context *fx) +{ + enum fx_4_buffer_flags + { + IS_TBUFFER = 0x1, + IS_SINGLE = 0x2, + }; + struct vkd3d_bytecode_buffer *buffer = &fx->structured; + uint32_t count = 0, bind_point = ~0u, flags = 0, size; + uint32_t name_offset, size_offset; + struct hlsl_ctx *ctx = fx->ctx; + struct hlsl_ir_var *var; + uint32_t count_offset; + + if (b->reservation.reg_type) + bind_point = b->reservation.reg_index; + if (b->type == HLSL_BUFFER_TEXTURE) + flags |= IS_TBUFFER; + /* FIXME: set 'single' flag for fx_5_0 */ + + name_offset = write_string(b->name, fx); + + put_u32(buffer, name_offset); /* Name */ + size_offset = put_u32(buffer, 0); /* Data size */ + put_u32(buffer, flags); /* Flags */ + count_offset = put_u32(buffer, 0); + put_u32(buffer, bind_point); /* Bind point */ + + put_u32(buffer, 0); /* Annotations count */ + /* FIXME: write annotations */ + + count = 0; + size = 0; + LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) + { + if (var->buffer != b) + continue; + + write_fx_4_variable(var, fx); + size += get_fx_4_type_size(var->data_type); + ++count; + } + + set_u32(buffer, count_offset, count); + set_u32(buffer, size_offset, align(size, 16)); + + fx->numeric_variable_count += count; +} + +static void write_buffers(struct fx_write_context *fx) +{ + struct hlsl_ctx *ctx = fx->ctx; + struct hlsl_buffer *buffer; + struct hlsl_ir_var *var; + + LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) + { + if (!(var->storage_modifiers & HLSL_STORAGE_UNIFORM) || hlsl_type_is_resource(var->data_type)) + continue; + + if (hlsl_var_has_buffer_offset_register_reservation(ctx, var)) + hlsl_calculate_buffer_offset(ctx, var, true); + } + + LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) + { + if (!(var->storage_modifiers & HLSL_STORAGE_UNIFORM) || hlsl_type_is_resource(var->data_type)) + continue; + + if (!hlsl_var_has_buffer_offset_register_reservation(ctx, var)) + hlsl_calculate_buffer_offset(ctx, var, false); + } + + LIST_FOR_EACH_ENTRY(buffer, &fx->ctx->buffers, struct hlsl_buffer, entry) + { + if (!buffer->size) + continue; + + write_fx_4_buffer(buffer, fx); + ++fx->buffer_count; + } +} + static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) { struct vkd3d_bytecode_buffer buffer = { 0 }; - struct fx_write_context fx; uint32_t size_offset, size; + struct fx_write_context fx;
fx_write_context_init(ctx, &fx_4_ops, &fx);
put_u32(&fx.unstructured, 0); /* Empty string placeholder. */
- /* TODO: buffers */ + write_buffers(&fx); /* TODO: objects */ /* TODO: shared buffers */ /* TODO: shared objects */ @@ -389,9 +733,9 @@ static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) write_techniques(ctx->globals, &fx);
put_u32(&buffer, ctx->profile->minor_version == 0 ? 0xfeff1001 : 0xfeff1011); /* Version. */ - put_u32(&buffer, 0); /* Buffer count. */ - put_u32(&buffer, 0); /* Variable count. */ - put_u32(&buffer, 0); /* Object count. */ + put_u32(&buffer, fx.buffer_count); /* Buffer count. */ + put_u32(&buffer, fx.numeric_variable_count); /* Numeric variable count. */ + put_u32(&buffer, 0); /* Object variable count. */ put_u32(&buffer, 0); /* Pool buffer count. */ put_u32(&buffer, 0); /* Pool variable count. */ put_u32(&buffer, 0); /* Pool object count. */ @@ -434,23 +778,23 @@ static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) static int hlsl_fx_5_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) { struct vkd3d_bytecode_buffer buffer = { 0 }; - struct fx_write_context fx; uint32_t size_offset, size; + struct fx_write_context fx;
fx_write_context_init(ctx, &fx_4_ops, &fx);
put_u32(&fx.unstructured, 0); /* Empty string placeholder. */
- /* TODO: buffers */ + write_buffers(&fx); /* TODO: objects */ /* TODO: interface variables */
write_groups(&fx);
put_u32(&buffer, 0xfeff2001); /* Version. */ - put_u32(&buffer, 0); /* Buffer count. */ - put_u32(&buffer, 0); /* Variable count. */ - put_u32(&buffer, 0); /* Object count. */ + put_u32(&buffer, fx.buffer_count); /* Buffer count. */ + put_u32(&buffer, fx.numeric_variable_count); /* Numeric variable count. */ + put_u32(&buffer, 0); /* Object variable count. */ put_u32(&buffer, 0); /* Pool buffer count. */ put_u32(&buffer, 0); /* Pool variable count. */ put_u32(&buffer, 0); /* Pool object count. */ diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 58188ce6f..747f52fb8 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -1146,6 +1146,11 @@ static inline unsigned int hlsl_sampler_dim_count(enum hlsl_sampler_dim dim) } }
+static inline bool hlsl_var_has_buffer_offset_register_reservation(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var) +{ + return var->reg_reservation.reg_type == 'c' && var->buffer == ctx->globals_buffer; +} + char *hlsl_sprintf_alloc(struct hlsl_ctx *ctx, const char *fmt, ...) VKD3D_PRINTF_FUNC(2, 3);
const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op); @@ -1313,6 +1318,8 @@ bool hlsl_type_is_resource(const struct hlsl_type *type); unsigned int hlsl_type_get_sm4_offset(const struct hlsl_type *type, unsigned int offset); bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2);
+void hlsl_calculate_buffer_offset(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, bool register_reservation); + const struct hlsl_type *hlsl_get_multiarray_element_type(const struct hlsl_type *type); unsigned int hlsl_get_multiarray_size(const struct hlsl_type *type);
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 6ad60e4c6..64652e230 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -4302,7 +4302,7 @@ static const struct hlsl_buffer *get_reserved_buffer(struct hlsl_ctx *ctx, uint3 return NULL; }
-static void calculate_buffer_offset(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, bool register_reservation) +void hlsl_calculate_buffer_offset(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, bool register_reservation) { unsigned int var_reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; enum hlsl_type_class var_class = var->data_type->class; @@ -4416,11 +4416,6 @@ static void validate_buffer_offsets(struct hlsl_ctx *ctx) } }
-static bool var_has_buffer_offset_register_reservation(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var) -{ - return var->reg_reservation.reg_type == 'c' && var->buffer == ctx->globals_buffer; -} - static void allocate_buffers(struct hlsl_ctx *ctx) { struct hlsl_buffer *buffer; @@ -4441,8 +4436,8 @@ static void allocate_buffers(struct hlsl_ctx *ctx) if (!var->is_uniform || hlsl_type_is_resource(var->data_type)) continue;
- if (var_has_buffer_offset_register_reservation(ctx, var)) - calculate_buffer_offset(ctx, var, true); + if (hlsl_var_has_buffer_offset_register_reservation(ctx, var)) + hlsl_calculate_buffer_offset(ctx, var, true); }
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) @@ -4450,8 +4445,8 @@ static void allocate_buffers(struct hlsl_ctx *ctx) if (!var->is_uniform || hlsl_type_is_resource(var->data_type)) continue;
- if (!var_has_buffer_offset_register_reservation(ctx, var)) - calculate_buffer_offset(ctx, var, false); + if (!hlsl_var_has_buffer_offset_register_reservation(ctx, var)) + hlsl_calculate_buffer_offset(ctx, var, false); }
validate_buffer_offsets(ctx);
From: Nikolay Sivov nsivov@codeweavers.com
Signed-off-by: Nikolay Sivov nsivov@codeweavers.com --- libs/vkd3d-shader/fx.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-)
diff --git a/libs/vkd3d-shader/fx.c b/libs/vkd3d-shader/fx.c index 35f59473f..2b4ada36c 100644 --- a/libs/vkd3d-shader/fx.c +++ b/libs/vkd3d-shader/fx.c @@ -718,8 +718,8 @@ static void write_buffers(struct fx_write_context *fx) static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) { struct vkd3d_bytecode_buffer buffer = { 0 }; - uint32_t size_offset, size; struct fx_write_context fx; + uint32_t size_offset;
fx_write_context_init(ctx, &fx_4_ops, &fx);
@@ -752,11 +752,10 @@ static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) put_u32(&buffer, 0); /* Shader count. */ put_u32(&buffer, 0); /* Inline shader count. */
- size = align(fx.unstructured.size, 4); - set_u32(&buffer, size_offset, size); + set_u32(&buffer, size_offset, fx.unstructured.size);
bytecode_put_bytes(&buffer, fx.unstructured.data, fx.unstructured.size); - bytecode_put_bytes(&buffer, fx.structured.data, fx.structured.size); + bytecode_put_bytes_unaligned(&buffer, fx.structured.data, fx.structured.size);
vkd3d_free(fx.unstructured.data); vkd3d_free(fx.structured.data); @@ -778,8 +777,8 @@ static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) static int hlsl_fx_5_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) { struct vkd3d_bytecode_buffer buffer = { 0 }; - uint32_t size_offset, size; struct fx_write_context fx; + uint32_t size_offset;
fx_write_context_init(ctx, &fx_4_ops, &fx);
@@ -816,11 +815,10 @@ static int hlsl_fx_5_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) put_u32(&buffer, 0); /* Interface variable element count. */ put_u32(&buffer, 0); /* Class instance elements count. */
- size = align(fx.unstructured.size, 4); - set_u32(&buffer, size_offset, size); + set_u32(&buffer, size_offset, fx.unstructured.size);
bytecode_put_bytes(&buffer, fx.unstructured.data, fx.unstructured.size); - bytecode_put_bytes(&buffer, fx.structured.data, fx.structured.size); + bytecode_put_bytes_unaligned(&buffer, fx.structured.data, fx.structured.size);
vkd3d_free(fx.unstructured.data); vkd3d_free(fx.structured.data);
On Thu Feb 8 13:15:31 2024 +0000, Zebediah Figura wrote:
Couldn't bytecode_put_bytes() just call bytecode_align() and then call bytecode_put_bytes_unaligned()? Then we only need two functions instead of three.
This is done now.
On Thu Feb 8 08:22:34 2024 +0000, Nikolay Sivov wrote:
I see now why additional field was easier - 'size' is set later on, that involves extern_vars which is not populated initially, and that depends on uniform_copy() which, if separated, depends on entry point presence. Still, it's better to use the same field everywhere, I'll see how I can separate this in a way that makes sense. E.g. there is no need to handle function parameters for effects, I don't think it's possible to use them there, and have $Params block written out as a result.
I pushed something for this. It does introduce some duplication. To avoid it, _emit() has to change to set buffer "size" and update variable offsets without adding <temp> variables.
Do we really want to be defining a type rbtree like this? The existing code is explicitly set up to be usable by the backend in such a way—cf. the "bytecode_offset" member—and sm4 types (and I think also sm1) are similarly supposed to be deduplicated, we just don't because it doesn't actually matter.
I'm not sure the answer is "no", mind. There's something to be said for orthogonality, and if we're going to be shoving things through vsir then we should maybe try to avoid hlsl leaking through so much. [Although on the other hand, if we're shoving things through vsir then how *do* we build reflection data? That's an unsolved question.] But I'm not sure the answer is yes either.
Either way, do we really want it to be an rbtree? It doesn't seem likely we'll have enough types to matter, and rbtrees are broadly just more work to deal with than an array or list.
On Thu Feb 8 21:30:56 2024 +0000, Zebediah Figura wrote:
Do we really want to be defining a type rbtree like this? The existing code is explicitly set up to be usable by the backend in such a way—cf. the "bytecode_offset" member—and sm4 types (and I think also sm1) are similarly supposed to be deduplicated, we just don't because it doesn't actually matter. I'm not sure the answer is "no", mind. There's something to be said for orthogonality, and if we're going to be shoving things through vsir then we should maybe try to avoid hlsl leaking through so much. [Although on the other hand, if we're shoving things through vsir then how *do* we build reflection data? That's an unsolved question.] But I'm not sure the answer is yes either. Either way, do we really want it to be an rbtree? It doesn't seem likely we'll have enough types to matter, and rbtrees are broadly just more work to deal with than an array or list.
I don't see your point. Why not have it as an rbtree? We have it like that for the compiler. There is one compare function, init/destroy, and that's it. Regarding duplicated types, what I want is a matching binary, because it's much easier to compare this way. There should be no problem of getting it to match in metadata parts, shaders will obviously differ.
I don't see your point. Why not have it as an rbtree? We have it like that for the compiler. There is one compare function, init/destroy, and that's it.
I'm not saying it's a lot more work, but it's still a bit more work. You have to define two callbacks, for instance, which you don't have to do with a simpler data structure. I don't think it's wrong to say that *any* use of a more complex data structure in place of a simpler one deserves justification.
The compiler uses an rbtree, but well, it was like that when I got here. The compiler also defines a lot more builtin types, and even then I'm not sure it's enough that an rbtree is *actually* an improvement.
On Fri Feb 9 18:43:16 2024 +0000, Zebediah Figura wrote:
I don't see your point. Why not have it as an rbtree? We have it like
that for the compiler. There is one compare function, init/destroy, and that's it. I'm not saying it's a lot more work, but it's still a bit more work. You have to define two callbacks, for instance, which you don't have to do with a simpler data structure. I don't think it's wrong to say that *any* use of a more complex data structure in place of a simpler one deserves justification. The compiler uses an rbtree, but well, it was like that when I got here. The compiler also defines a lot more builtin types, and even then I'm not sure it's enough that an rbtree is *actually* an improvement.
No problem in switching to a list, on my side. For strings it's normally a larger number of elements, because every type name, string constant, and variable name is there, but also not necessarily relevant for performance reasons, until we have actual numbers.
On Fri Feb 9 19:07:12 2024 +0000, Nikolay Sivov wrote:
No problem in switching to a list, on my side. For strings it's normally a larger number of elements, because every type name, string constant, and variable name is there, but also not necessarily relevant for performance reasons, until we have actual numbers.
Trees are also used for parameters in d3dx9 effects, and for types in d3d10 ones, so I didn't think twice.