For clarity.
Signed-off-by: Matteo Bruni mbruni@codeweavers.com Signed-off-by: Zebediah Figura zfigura@codeweavers.com Signed-off-by: Henri Verbeet hverbeet@codeweavers.com --- v2: no change
libs/vkd3d-shader/hlsl.c | 3 +++ libs/vkd3d-shader/hlsl.h | 2 +- libs/vkd3d-shader/hlsl_codegen.c | 7 ------- 3 files changed, 4 insertions(+), 8 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index 0b8c660c..bc593f82 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -1664,6 +1664,9 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const struct hlsl_profile_info * if (!(ctx->globals_buffer = hlsl_new_buffer(ctx, HLSL_BUFFER_CONSTANT, hlsl_strdup(ctx, "$Globals"), NULL, ctx->location))) return false; + if (!(ctx->params_buffer = hlsl_new_buffer(ctx, HLSL_BUFFER_CONSTANT, + hlsl_strdup(ctx, "$Params"), NULL, ctx->location))) + return false; ctx->cur_buffer = ctx->globals_buffer;
return true; diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index b62e7818..e0045acf 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -456,7 +456,7 @@ struct hlsl_ctx struct list extern_vars;
struct list buffers; - struct hlsl_buffer *cur_buffer, *globals_buffer; + struct hlsl_buffer *cur_buffer, *globals_buffer, *params_buffer; struct list types; struct rb_tree functions; const struct hlsl_ir_function_decl *cur_function; diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 6336ddfc..9afa590a 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -1150,13 +1150,6 @@ static void allocate_buffers(struct hlsl_ctx *ctx) hlsl_strdup(ctx, "$Params"), NULL, ctx->location))) return;
- /* The $Globals and $Params buffers should be allocated first, before all - * explicit buffers. */ - list_remove(¶ms_buffer->entry); - list_add_head(&ctx->buffers, ¶ms_buffer->entry); - list_remove(&ctx->globals_buffer->entry); - list_add_head(&ctx->buffers, &ctx->globals_buffer->entry); - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { if (var->is_uniform)
And change the way we handle alignment.
Signed-off-by: Zebediah Figura zfigura@codeweavers.com --- v2: use uint8_t; add align_buffer and get_buffer_size helpers; fix the uniform table size calculation
libs/vkd3d-shader/hlsl.h | 4 +- libs/vkd3d-shader/hlsl_codegen.c | 104 +++++++++++++++---------------- 2 files changed, 51 insertions(+), 57 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index e0045acf..54273ac6 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -126,7 +126,7 @@ struct hlsl_type } e;
unsigned int reg_size; - unsigned int bytecode_offset; + size_t bytecode_offset; };
struct hlsl_semantic @@ -144,7 +144,7 @@ struct hlsl_struct_field struct hlsl_semantic semantic; unsigned int reg_offset;
- unsigned int name_bytecode_offset; + size_t name_bytecode_offset; };
struct hlsl_reg diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 9afa590a..8e842bd1 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -1328,72 +1328,62 @@ static struct hlsl_reg hlsl_reg_from_deref(const struct hlsl_deref *deref, const struct bytecode_buffer { struct hlsl_ctx *ctx; - uint32_t *data; - size_t count, size; + uint8_t *data; + size_t size, capacity; int status; };
-/* Returns the token index. */ -static unsigned int put_dword(struct bytecode_buffer *buffer, uint32_t value) +static size_t put_bytes(struct bytecode_buffer *buffer, const void *bytes, size_t size, size_t alignment) { - unsigned int index = buffer->count; + size_t prev_size = buffer->size; + size_t offset = align(prev_size, alignment);
if (buffer->status) - return index; + return offset;
- if (!hlsl_array_reserve(buffer->ctx, (void **)&buffer->data, &buffer->size, - buffer->count + 1, sizeof(*buffer->data))) + if (!hlsl_array_reserve(buffer->ctx, (void **)&buffer->data, &buffer->capacity, offset + size, 1)) { buffer->status = VKD3D_ERROR_OUT_OF_MEMORY; - return index; + return offset; } - buffer->data[buffer->count++] = value; + memset(buffer->data + prev_size, 0xab, offset - prev_size); + memcpy(buffer->data + offset, bytes, size); + buffer->size = offset + size; + return offset; +}
- return index; +static size_t put_dword(struct bytecode_buffer *buffer, uint32_t value) +{ + return put_bytes(buffer, &value, sizeof(value), sizeof(value)); }
-/* Returns the token index. */ -static unsigned int put_float(struct bytecode_buffer *buffer, float value) +static size_t put_float(struct bytecode_buffer *buffer, float value) { - union - { - float f; - uint32_t u; - } u; - u.f = value; - return put_dword(buffer, u.u); + return put_bytes(buffer, &value, sizeof(value), sizeof(value)); }
-static void set_dword(struct bytecode_buffer *buffer, unsigned int index, uint32_t value) +static void set_dword(struct bytecode_buffer *buffer, size_t offset, uint32_t value) { if (buffer->status) return;
- assert(index < buffer->count); - buffer->data[index] = value; + assert(offset + sizeof(value) <= buffer->size); + memcpy(buffer->data + offset, &value, sizeof(value)); }
-/* Returns the token index. */ -static unsigned int put_string(struct bytecode_buffer *buffer, const char *str) +static size_t put_string(struct bytecode_buffer *buffer, const char *string) { - unsigned int index = buffer->count; - size_t len = strlen(str) + 1; - unsigned int token_count = (len + 3) / sizeof(*buffer->data); - - if (buffer->status) - return index; + return put_bytes(buffer, string, strlen(string) + 1, 1); +}
- if (!hlsl_array_reserve(buffer->ctx, (void **)&buffer->data, &buffer->size, - buffer->count + token_count, sizeof(*buffer->data))) - { - buffer->status = E_OUTOFMEMORY; - return index; - } +static size_t align_buffer(struct bytecode_buffer *buffer, uint32_t alignment) +{ + return put_bytes(buffer, NULL, 0, alignment); +}
- buffer->data[buffer->count + token_count - 1] = 0xabababab; - memcpy(buffer->data + buffer->count, str, len); - buffer->count += token_count; - return index; +static size_t get_buffer_size(struct bytecode_buffer *buffer) +{ + return buffer->size; }
static uint32_t sm1_version(enum vkd3d_shader_type type, unsigned int major, unsigned int minor) @@ -1509,9 +1499,10 @@ static unsigned int get_array_size(const struct hlsl_type *type) static void write_sm1_type(struct bytecode_buffer *buffer, struct hlsl_type *type, unsigned int ctab_start) { const struct hlsl_type *array_type = get_array_type(type); - unsigned int fields_offset = 0, field_count = 0; unsigned int array_size = get_array_size(type); struct hlsl_struct_field *field; + unsigned int field_count = 0; + size_t fields_offset = 0;
if (type->bytecode_offset) return; @@ -1524,12 +1515,12 @@ static void write_sm1_type(struct bytecode_buffer *buffer, struct hlsl_type *typ write_sm1_type(buffer, field->type, ctab_start); }
- fields_offset = (buffer->count - ctab_start) * sizeof(*buffer->data); + fields_offset = get_buffer_size(buffer) - ctab_start;
LIST_FOR_EACH_ENTRY(field, array_type->e.elements, struct hlsl_struct_field, entry) { - put_dword(buffer, (field->name_bytecode_offset - ctab_start) * sizeof(*buffer->data)); - put_dword(buffer, (field->type->bytecode_offset - ctab_start) * sizeof(*buffer->data)); + put_dword(buffer, field->name_bytecode_offset - ctab_start); + put_dword(buffer, field->type->bytecode_offset - ctab_start); ++field_count; } } @@ -1571,7 +1562,7 @@ static void sm1_sort_externs(struct hlsl_ctx *ctx) static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct bytecode_buffer *buffer, struct hlsl_ir_function_decl *entry_func) { - unsigned int ctab_start, vars_start, size_offset, creator_offset, offset; + size_t ctab_offset, ctab_start, ctab_end, vars_start, size_offset, creator_offset, offset; unsigned int uniform_count = 0; struct hlsl_ir_var *var;
@@ -1601,7 +1592,7 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct bytecode_buffer *buf sm1_sort_externs(ctx);
size_offset = put_dword(buffer, 0); - put_dword(buffer, MAKEFOURCC('C','T','A','B')); + ctab_offset = put_dword(buffer, MAKEFOURCC('C','T','A','B'));
ctab_start = put_dword(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); creator_offset = put_dword(buffer, 0); @@ -1611,7 +1602,7 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct bytecode_buffer *buf put_dword(buffer, 0); /* FIXME: flags */ put_dword(buffer, 0); /* FIXME: target string */
- vars_start = buffer->count; + vars_start = get_buffer_size(buffer);
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { @@ -1631,20 +1622,23 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct bytecode_buffer *buf { if (!var->semantic.name && var->reg.allocated) { - set_dword(buffer, vars_start + (uniform_count * 5), (buffer->count - ctab_start) * sizeof(*buffer->data)); - put_string(buffer, var->name); + size_t var_offset = vars_start + (uniform_count * 5 * sizeof(uint32_t)); + size_t name_offset; + + name_offset = put_string(buffer, var->name); + set_dword(buffer, var_offset, name_offset - ctab_start);
write_sm1_type(buffer, var->data_type, ctab_start); - set_dword(buffer, vars_start + (uniform_count * 5) + 3, - (var->data_type->bytecode_offset - ctab_start) * sizeof(*buffer->data)); + set_dword(buffer, var_offset + 3 * sizeof(uint32_t), var->data_type->bytecode_offset - ctab_start); ++uniform_count; } }
offset = put_string(buffer, vkd3d_shader_get_version(NULL, NULL)); - set_dword(buffer, creator_offset, (offset - ctab_start) * sizeof(*buffer->data)); + set_dword(buffer, creator_offset, offset - ctab_start);
- set_dword(buffer, size_offset, D3DSIO_COMMENT | ((buffer->count - (ctab_start - 1)) << 16)); + ctab_end = align_buffer(buffer, sizeof(uint32_t)); + set_dword(buffer, size_offset, D3DSIO_COMMENT | (((ctab_end - ctab_offset) / sizeof(uint32_t)) << 16)); }
static uint32_t sm1_encode_register_type(D3DSHADER_PARAM_REGISTER_TYPE type) @@ -2082,7 +2076,7 @@ static int write_sm1_shader(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl * if (!(ret = buffer.status)) { out->code = buffer.data; - out->size = buffer.count * sizeof(*buffer.data); + out->size = buffer.size; } return ret; }
On Thu, Jul 1, 2021 at 4:55 AM Zebediah Figura zfigura@codeweavers.com wrote:
And change the way we handle alignment.
Signed-off-by: Zebediah Figura zfigura@codeweavers.com
v2: use uint8_t; add align_buffer and get_buffer_size helpers; fix the uniform table size calculation
libs/vkd3d-shader/hlsl.h | 4 +- libs/vkd3d-shader/hlsl_codegen.c | 104 +++++++++++++++---------------- 2 files changed, 51 insertions(+), 57 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index e0045acf..54273ac6 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -126,7 +126,7 @@ struct hlsl_type } e;
unsigned int reg_size;
- unsigned int bytecode_offset;
- size_t bytecode_offset;
};
struct hlsl_semantic @@ -144,7 +144,7 @@ struct hlsl_struct_field struct hlsl_semantic semantic; unsigned int reg_offset;
- unsigned int name_bytecode_offset;
- size_t name_bytecode_offset;
};
struct hlsl_reg diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 9afa590a..8e842bd1 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -1328,72 +1328,62 @@ static struct hlsl_reg hlsl_reg_from_deref(const struct hlsl_deref *deref, const struct bytecode_buffer { struct hlsl_ctx *ctx;
- uint32_t *data;
- size_t count, size;
- uint8_t *data;
- size_t size, capacity; int status;
};
-/* Returns the token index. */ -static unsigned int put_dword(struct bytecode_buffer *buffer, uint32_t value) +static size_t put_bytes(struct bytecode_buffer *buffer, const void *bytes, size_t size, size_t alignment) {
- unsigned int index = buffer->count;
size_t prev_size = buffer->size;
size_t offset = align(prev_size, alignment);
if (buffer->status)
return index;
return offset;
- if (!hlsl_array_reserve(buffer->ctx, (void **)&buffer->data, &buffer->size,
buffer->count + 1, sizeof(*buffer->data)))
- if (!hlsl_array_reserve(buffer->ctx, (void **)&buffer->data, &buffer->capacity, offset + size, 1)) { buffer->status = VKD3D_ERROR_OUT_OF_MEMORY;
return index;
}return offset;
- buffer->data[buffer->count++] = value;
- memset(buffer->data + prev_size, 0xab, offset - prev_size);
- memcpy(buffer->data + offset, bytes, size);
- buffer->size = offset + size;
- return offset;
+}
- return index;
+static size_t put_dword(struct bytecode_buffer *buffer, uint32_t value) +{
- return put_bytes(buffer, &value, sizeof(value), sizeof(value));
}
-/* Returns the token index. */ -static unsigned int put_float(struct bytecode_buffer *buffer, float value) +static size_t put_float(struct bytecode_buffer *buffer, float value) {
- union
- {
float f;
uint32_t u;
- } u;
- u.f = value;
- return put_dword(buffer, u.u);
- return put_bytes(buffer, &value, sizeof(value), sizeof(value));
}
-static void set_dword(struct bytecode_buffer *buffer, unsigned int index, uint32_t value) +static void set_dword(struct bytecode_buffer *buffer, size_t offset, uint32_t value) { if (buffer->status) return;
- assert(index < buffer->count);
- buffer->data[index] = value;
- assert(offset + sizeof(value) <= buffer->size);
- memcpy(buffer->data + offset, &value, sizeof(value));
}
-/* Returns the token index. */ -static unsigned int put_string(struct bytecode_buffer *buffer, const char *str) +static size_t put_string(struct bytecode_buffer *buffer, const char *string) {
- unsigned int index = buffer->count;
- size_t len = strlen(str) + 1;
- unsigned int token_count = (len + 3) / sizeof(*buffer->data);
- if (buffer->status)
return index;
- return put_bytes(buffer, string, strlen(string) + 1, 1);
+}
- if (!hlsl_array_reserve(buffer->ctx, (void **)&buffer->data, &buffer->size,
buffer->count + token_count, sizeof(*buffer->data)))
- {
buffer->status = E_OUTOFMEMORY;
return index;
- }
+static size_t align_buffer(struct bytecode_buffer *buffer, uint32_t alignment) +{
- return put_bytes(buffer, NULL, 0, alignment);
+}
- buffer->data[buffer->count + token_count - 1] = 0xabababab;
- memcpy(buffer->data + buffer->count, str, len);
- buffer->count += token_count;
- return index;
+static size_t get_buffer_size(struct bytecode_buffer *buffer) +{
- return buffer->size;
}
static uint32_t sm1_version(enum vkd3d_shader_type type, unsigned int major, unsigned int minor) @@ -1509,9 +1499,10 @@ static unsigned int get_array_size(const struct hlsl_type *type) static void write_sm1_type(struct bytecode_buffer *buffer, struct hlsl_type *type, unsigned int ctab_start) { const struct hlsl_type *array_type = get_array_type(type);
- unsigned int fields_offset = 0, field_count = 0; unsigned int array_size = get_array_size(type); struct hlsl_struct_field *field;
unsigned int field_count = 0;
size_t fields_offset = 0;
if (type->bytecode_offset) return;
@@ -1524,12 +1515,12 @@ static void write_sm1_type(struct bytecode_buffer *buffer, struct hlsl_type *typ write_sm1_type(buffer, field->type, ctab_start); }
fields_offset = (buffer->count - ctab_start) * sizeof(*buffer->data);
fields_offset = get_buffer_size(buffer) - ctab_start;
This isn't quite what I was expecting when I proposed the helper. You access the current buffer offset whenever you want to reference it afterwards, either in some range computation or to overwrite the buffer in that location with the final value. In all cases (I believe?) what you actually want is the aligned offset, i.e. the location where you're going to start writing whatever is going to come next in the buffer. Which is why, in principle, every access to the current buffer offset needs to take the alignment (specifically of the next thing that's going into the buffer, whenever it might make a difference) into consideration.
Renaming align_buffer to get_buffer_offset() (and getting rid of get_buffer_size()) would do the trick, except it seems a bit ugly to have a get_ function modify the buffer. So it's probably better to only align the returned offset there without touching the buffer. That means though that you still need to explicitly align the buffer after the very last thing you're going to write (i.e. the current use of align_buffer()). Unless...
I'm not sure off the top of my head but it might be the case that the alignment is a property of the buffer (e.g. always 4 for SM1 DXBC). In that case it might make sense to store the alignment right in struct bytecode_buffer instead and, probably, align after writing the data in put_bytes() instead of before.
On 7/1/21 4:29 AM, Matteo Bruni wrote:
On Thu, Jul 1, 2021 at 4:55 AM Zebediah Figura zfigura@codeweavers.com wrote:
And change the way we handle alignment.
Signed-off-by: Zebediah Figura zfigura@codeweavers.com
v2: use uint8_t; add align_buffer and get_buffer_size helpers; fix the uniform table size calculation
libs/vkd3d-shader/hlsl.h | 4 +- libs/vkd3d-shader/hlsl_codegen.c | 104 +++++++++++++++---------------- 2 files changed, 51 insertions(+), 57 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index e0045acf..54273ac6 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -126,7 +126,7 @@ struct hlsl_type } e;
unsigned int reg_size;
- unsigned int bytecode_offset;
size_t bytecode_offset; };
struct hlsl_semantic
@@ -144,7 +144,7 @@ struct hlsl_struct_field struct hlsl_semantic semantic; unsigned int reg_offset;
- unsigned int name_bytecode_offset;
size_t name_bytecode_offset; };
struct hlsl_reg
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 9afa590a..8e842bd1 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -1328,72 +1328,62 @@ static struct hlsl_reg hlsl_reg_from_deref(const struct hlsl_deref *deref, const struct bytecode_buffer { struct hlsl_ctx *ctx;
- uint32_t *data;
- size_t count, size;
- uint8_t *data;
- size_t size, capacity; int status; };
-/* Returns the token index. */ -static unsigned int put_dword(struct bytecode_buffer *buffer, uint32_t value) +static size_t put_bytes(struct bytecode_buffer *buffer, const void *bytes, size_t size, size_t alignment) {
- unsigned int index = buffer->count;
size_t prev_size = buffer->size;
size_t offset = align(prev_size, alignment);
if (buffer->status)
return index;
return offset;
- if (!hlsl_array_reserve(buffer->ctx, (void **)&buffer->data, &buffer->size,
buffer->count + 1, sizeof(*buffer->data)))
- if (!hlsl_array_reserve(buffer->ctx, (void **)&buffer->data, &buffer->capacity, offset + size, 1)) { buffer->status = VKD3D_ERROR_OUT_OF_MEMORY;
return index;
return offset; }
- buffer->data[buffer->count++] = value;
- memset(buffer->data + prev_size, 0xab, offset - prev_size);
- memcpy(buffer->data + offset, bytes, size);
- buffer->size = offset + size;
- return offset;
+}
- return index;
+static size_t put_dword(struct bytecode_buffer *buffer, uint32_t value) +{
- return put_bytes(buffer, &value, sizeof(value), sizeof(value)); }
-/* Returns the token index. */ -static unsigned int put_float(struct bytecode_buffer *buffer, float value) +static size_t put_float(struct bytecode_buffer *buffer, float value) {
- union
- {
float f;
uint32_t u;
- } u;
- u.f = value;
- return put_dword(buffer, u.u);
- return put_bytes(buffer, &value, sizeof(value), sizeof(value)); }
-static void set_dword(struct bytecode_buffer *buffer, unsigned int index, uint32_t value) +static void set_dword(struct bytecode_buffer *buffer, size_t offset, uint32_t value) { if (buffer->status) return;
- assert(index < buffer->count);
- buffer->data[index] = value;
- assert(offset + sizeof(value) <= buffer->size);
- memcpy(buffer->data + offset, &value, sizeof(value)); }
-/* Returns the token index. */ -static unsigned int put_string(struct bytecode_buffer *buffer, const char *str) +static size_t put_string(struct bytecode_buffer *buffer, const char *string) {
- unsigned int index = buffer->count;
- size_t len = strlen(str) + 1;
- unsigned int token_count = (len + 3) / sizeof(*buffer->data);
- if (buffer->status)
return index;
- return put_bytes(buffer, string, strlen(string) + 1, 1);
+}
- if (!hlsl_array_reserve(buffer->ctx, (void **)&buffer->data, &buffer->size,
buffer->count + token_count, sizeof(*buffer->data)))
- {
buffer->status = E_OUTOFMEMORY;
return index;
- }
+static size_t align_buffer(struct bytecode_buffer *buffer, uint32_t alignment) +{
- return put_bytes(buffer, NULL, 0, alignment);
+}
- buffer->data[buffer->count + token_count - 1] = 0xabababab;
- memcpy(buffer->data + buffer->count, str, len);
- buffer->count += token_count;
- return index;
+static size_t get_buffer_size(struct bytecode_buffer *buffer) +{
return buffer->size; }
static uint32_t sm1_version(enum vkd3d_shader_type type, unsigned int major, unsigned int minor)
@@ -1509,9 +1499,10 @@ static unsigned int get_array_size(const struct hlsl_type *type) static void write_sm1_type(struct bytecode_buffer *buffer, struct hlsl_type *type, unsigned int ctab_start) { const struct hlsl_type *array_type = get_array_type(type);
- unsigned int fields_offset = 0, field_count = 0; unsigned int array_size = get_array_size(type); struct hlsl_struct_field *field;
unsigned int field_count = 0;
size_t fields_offset = 0;
if (type->bytecode_offset) return;
@@ -1524,12 +1515,12 @@ static void write_sm1_type(struct bytecode_buffer *buffer, struct hlsl_type *typ write_sm1_type(buffer, field->type, ctab_start); }
fields_offset = (buffer->count - ctab_start) * sizeof(*buffer->data);
fields_offset = get_buffer_size(buffer) - ctab_start;
This isn't quite what I was expecting when I proposed the helper. You access the current buffer offset whenever you want to reference it afterwards, either in some range computation or to overwrite the buffer in that location with the final value. In all cases (I believe?) what you actually want is the aligned offset, i.e. the location where you're going to start writing whatever is going to come next in the buffer. Which is why, in principle, every access to the current buffer offset needs to take the alignment (specifically of the next thing that's going into the buffer, whenever it might make a difference) into consideration.
Renaming align_buffer to get_buffer_offset() (and getting rid of get_buffer_size()) would do the trick, except it seems a bit ugly to have a get_ function modify the buffer. So it's probably better to only align the returned offset there without touching the buffer. That means though that you still need to explicitly align the buffer after the very last thing you're going to write (i.e. the current use of align_buffer()). Unless...
I'm not sure off the top of my head but it might be the case that the alignment is a property of the buffer (e.g. always 4 for SM1 DXBC). In that case it might make sense to store the alignment right in struct bytecode_buffer instead and, probably, align after writing the data in put_bytes() instead of before.
That's essentially what we were doing before. The actual rule seems to be that alignment is always 4, *except* for strings, which as it turns out don't need to be aligned (in SM1 or SM4).
Most of SM1/SM4 deals with bytecode offsets anyway, which means that theoretically the native parser could handle unaligned offsets. Since almost everything is aligned anyway it's a moot point.
I guess I can dodge the issue by just leaving the alignment behaviour alone, though.
On Thu, Jul 1, 2021 at 6:08 PM Zebediah Figura (she/her) zfigura@codeweavers.com wrote:
On 7/1/21 4:29 AM, Matteo Bruni wrote:
On Thu, Jul 1, 2021 at 4:55 AM Zebediah Figura zfigura@codeweavers.com wrote:
@@ -1524,12 +1515,12 @@ static void write_sm1_type(struct bytecode_buffer *buffer, struct hlsl_type *typ write_sm1_type(buffer, field->type, ctab_start); }
fields_offset = (buffer->count - ctab_start) * sizeof(*buffer->data);
fields_offset = get_buffer_size(buffer) - ctab_start;
This isn't quite what I was expecting when I proposed the helper. You access the current buffer offset whenever you want to reference it afterwards, either in some range computation or to overwrite the buffer in that location with the final value. In all cases (I believe?) what you actually want is the aligned offset, i.e. the location where you're going to start writing whatever is going to come next in the buffer. Which is why, in principle, every access to the current buffer offset needs to take the alignment (specifically of the next thing that's going into the buffer, whenever it might make a difference) into consideration.
Renaming align_buffer to get_buffer_offset() (and getting rid of get_buffer_size()) would do the trick, except it seems a bit ugly to have a get_ function modify the buffer. So it's probably better to only align the returned offset there without touching the buffer. That means though that you still need to explicitly align the buffer after the very last thing you're going to write (i.e. the current use of align_buffer()). Unless...
I'm not sure off the top of my head but it might be the case that the alignment is a property of the buffer (e.g. always 4 for SM1 DXBC). In that case it might make sense to store the alignment right in struct bytecode_buffer instead and, probably, align after writing the data in put_bytes() instead of before.
That's essentially what we were doing before. The actual rule seems to be that alignment is always 4, *except* for strings, which as it turns out don't need to be aligned (in SM1 or SM4).
Most of SM1/SM4 deals with bytecode offsets anyway, which means that theoretically the native parser could handle unaligned offsets. Since almost everything is aligned anyway it's a moot point.
I guess I can dodge the issue by just leaving the alignment behaviour alone, though.
Okay, I wasn't entirely sure that the string alignment change was intended. That means my last suggestion (i.e. my last paragraph above) is invalid. The rest should still apply, so if at some point you want to have another shot at changing the alignment handling, I guess you could give them a try. No particular hurry of course.
Signed-off-by: Zebediah Figura zfigura@codeweavers.com --- v3: Get rid of the "vkd3d_" prefix from put_* and set_* functions; replace it with "bytecode_" in remaining functions. Because these functions are called often it's nice for their names to be terse, and I'd like to try to avoid polluting the "vkd3d" namespace for functions that aren't library exports.
libs/vkd3d-shader/hlsl_codegen.c | 168 ++++++++--------------- libs/vkd3d-shader/vkd3d_shader_main.c | 28 ++++ libs/vkd3d-shader/vkd3d_shader_private.h | 36 +++++ 3 files changed, 119 insertions(+), 113 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 8e842bd1..ef6beadf 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -1325,67 +1325,6 @@ static struct hlsl_reg hlsl_reg_from_deref(const struct hlsl_deref *deref, const return ret; }
-struct bytecode_buffer -{ - struct hlsl_ctx *ctx; - uint8_t *data; - size_t size, capacity; - int status; -}; - -static size_t put_bytes(struct bytecode_buffer *buffer, const void *bytes, size_t size, size_t alignment) -{ - size_t prev_size = buffer->size; - size_t offset = align(prev_size, alignment); - - if (buffer->status) - return offset; - - if (!hlsl_array_reserve(buffer->ctx, (void **)&buffer->data, &buffer->capacity, offset + size, 1)) - { - buffer->status = VKD3D_ERROR_OUT_OF_MEMORY; - return offset; - } - memset(buffer->data + prev_size, 0xab, offset - prev_size); - memcpy(buffer->data + offset, bytes, size); - buffer->size = offset + size; - return offset; -} - -static size_t put_dword(struct bytecode_buffer *buffer, uint32_t value) -{ - return put_bytes(buffer, &value, sizeof(value), sizeof(value)); -} - -static size_t put_float(struct bytecode_buffer *buffer, float value) -{ - return put_bytes(buffer, &value, sizeof(value), sizeof(value)); -} - -static void set_dword(struct bytecode_buffer *buffer, size_t offset, uint32_t value) -{ - if (buffer->status) - return; - - assert(offset + sizeof(value) <= buffer->size); - memcpy(buffer->data + offset, &value, sizeof(value)); -} - -static size_t put_string(struct bytecode_buffer *buffer, const char *string) -{ - return put_bytes(buffer, string, strlen(string) + 1, 1); -} - -static size_t align_buffer(struct bytecode_buffer *buffer, uint32_t alignment) -{ - return put_bytes(buffer, NULL, 0, alignment); -} - -static size_t get_buffer_size(struct bytecode_buffer *buffer) -{ - return buffer->size; -} - static uint32_t sm1_version(enum vkd3d_shader_type type, unsigned int major, unsigned int minor) { if (type == VKD3D_SHADER_TYPE_VERTEX) @@ -1496,7 +1435,7 @@ static unsigned int get_array_size(const struct hlsl_type *type) return 1; }
-static void write_sm1_type(struct bytecode_buffer *buffer, struct hlsl_type *type, unsigned int ctab_start) +static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type, unsigned int ctab_start) { const struct hlsl_type *array_type = get_array_type(type); unsigned int array_size = get_array_size(type); @@ -1515,20 +1454,20 @@ static void write_sm1_type(struct bytecode_buffer *buffer, struct hlsl_type *typ write_sm1_type(buffer, field->type, ctab_start); }
- fields_offset = get_buffer_size(buffer) - ctab_start; + fields_offset = bytecode_get_size(buffer) - ctab_start;
LIST_FOR_EACH_ENTRY(field, array_type->e.elements, struct hlsl_struct_field, entry) { - put_dword(buffer, field->name_bytecode_offset - ctab_start); - put_dword(buffer, field->type->bytecode_offset - ctab_start); + put_u32(buffer, field->name_bytecode_offset - ctab_start); + put_u32(buffer, field->type->bytecode_offset - ctab_start); ++field_count; } }
- type->bytecode_offset = put_dword(buffer, sm1_class(type) | (sm1_base_type(type) << 16)); - put_dword(buffer, type->dimy | (type->dimx << 16)); - put_dword(buffer, array_size | (field_count << 16)); - put_dword(buffer, fields_offset); + type->bytecode_offset = put_u32(buffer, sm1_class(type) | (sm1_base_type(type) << 16)); + put_u32(buffer, type->dimy | (type->dimx << 16)); + put_u32(buffer, array_size | (field_count << 16)); + put_u32(buffer, fields_offset); }
static void sm1_sort_extern(struct list *sorted, struct hlsl_ir_var *to_sort) @@ -1559,7 +1498,7 @@ static void sm1_sort_externs(struct hlsl_ctx *ctx) list_move_tail(&ctx->extern_vars, &sorted); }
-static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct bytecode_buffer *buffer, +static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, struct hlsl_ir_function_decl *entry_func) { size_t ctab_offset, ctab_start, ctab_end, vars_start, size_offset, creator_offset, offset; @@ -1591,28 +1530,28 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct bytecode_buffer *buf
sm1_sort_externs(ctx);
- size_offset = put_dword(buffer, 0); - ctab_offset = put_dword(buffer, MAKEFOURCC('C','T','A','B')); + size_offset = put_u32(buffer, 0); + ctab_offset = put_u32(buffer, MAKEFOURCC('C','T','A','B'));
- ctab_start = put_dword(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); - creator_offset = put_dword(buffer, 0); - put_dword(buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); - put_dword(buffer, uniform_count); - put_dword(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); /* offset of constants */ - put_dword(buffer, 0); /* FIXME: flags */ - put_dword(buffer, 0); /* FIXME: target string */ + ctab_start = put_u32(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); + creator_offset = put_u32(buffer, 0); + put_u32(buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); + put_u32(buffer, uniform_count); + put_u32(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); /* offset of constants */ + put_u32(buffer, 0); /* FIXME: flags */ + put_u32(buffer, 0); /* FIXME: target string */
- vars_start = get_buffer_size(buffer); + vars_start = bytecode_get_size(buffer);
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { if (!var->semantic.name && var->reg.allocated) { - put_dword(buffer, 0); /* name */ - put_dword(buffer, D3DXRS_FLOAT4 | (var->reg.id << 16)); - put_dword(buffer, var->data_type->reg_size / 4); - put_dword(buffer, 0); /* type */ - put_dword(buffer, 0); /* FIXME: default value */ + put_u32(buffer, 0); /* name */ + put_u32(buffer, D3DXRS_FLOAT4 | (var->reg.id << 16)); + put_u32(buffer, var->data_type->reg_size / 4); + put_u32(buffer, 0); /* type */ + put_u32(buffer, 0); /* FIXME: default value */ } }
@@ -1626,19 +1565,19 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct bytecode_buffer *buf size_t name_offset;
name_offset = put_string(buffer, var->name); - set_dword(buffer, var_offset, name_offset - ctab_start); + set_u32(buffer, var_offset, name_offset - ctab_start);
write_sm1_type(buffer, var->data_type, ctab_start); - set_dword(buffer, var_offset + 3 * sizeof(uint32_t), var->data_type->bytecode_offset - ctab_start); + set_u32(buffer, var_offset + 3 * sizeof(uint32_t), var->data_type->bytecode_offset - ctab_start); ++uniform_count; } }
offset = put_string(buffer, vkd3d_shader_get_version(NULL, NULL)); - set_dword(buffer, creator_offset, offset - ctab_start); + set_u32(buffer, creator_offset, offset - ctab_start);
- ctab_end = align_buffer(buffer, sizeof(uint32_t)); - set_dword(buffer, size_offset, D3DSIO_COMMENT | (((ctab_end - ctab_offset) / sizeof(uint32_t)) << 16)); + ctab_end = bytecode_align(buffer, sizeof(uint32_t)); + set_u32(buffer, size_offset, D3DSIO_COMMENT | (((ctab_end - ctab_offset) / sizeof(uint32_t)) << 16)); }
static uint32_t sm1_encode_register_type(D3DSHADER_PARAM_REGISTER_TYPE type) @@ -1671,21 +1610,21 @@ struct sm1_instruction unsigned int has_dst; };
-static void write_sm1_dst_register(struct bytecode_buffer *buffer, const struct sm1_dst_register *reg) +static void write_sm1_dst_register(struct vkd3d_bytecode_buffer *buffer, const struct sm1_dst_register *reg) { assert(reg->writemask); - put_dword(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->writemask << 16) | reg->reg); + put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->writemask << 16) | reg->reg); }
-static void write_sm1_src_register(struct bytecode_buffer *buffer, +static void write_sm1_src_register(struct vkd3d_bytecode_buffer *buffer, const struct sm1_src_register *reg, unsigned int dst_writemask) { unsigned int swizzle = map_swizzle(reg->swizzle, dst_writemask);
- put_dword(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (swizzle << 16) | reg->reg); + put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (swizzle << 16) | reg->reg); }
-static void write_sm1_instruction(struct hlsl_ctx *ctx, struct bytecode_buffer *buffer, +static void write_sm1_instruction(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct sm1_instruction *instr) { uint32_t token = instr->opcode; @@ -1693,7 +1632,7 @@ static void write_sm1_instruction(struct hlsl_ctx *ctx, struct bytecode_buffer *
if (ctx->profile->major_version > 1) token |= (instr->has_dst + instr->src_count) << D3DSI_INSTLENGTH_SHIFT; - put_dword(buffer, token); + put_u32(buffer, token);
if (instr->has_dst) write_sm1_dst_register(buffer, &instr->dst); @@ -1702,7 +1641,7 @@ static void write_sm1_instruction(struct hlsl_ctx *ctx, struct bytecode_buffer * write_sm1_src_register(buffer, &instr->srcs[i], instr->dst.writemask); };
-static void write_sm1_binary_op(struct hlsl_ctx *ctx, struct bytecode_buffer *buffer, +static void write_sm1_binary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2) { @@ -1726,7 +1665,7 @@ static void write_sm1_binary_op(struct hlsl_ctx *ctx, struct bytecode_buffer *bu write_sm1_instruction(ctx, buffer, &instr); }
-static void write_sm1_unary_op(struct hlsl_ctx *ctx, struct bytecode_buffer *buffer, +static void write_sm1_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, const struct hlsl_reg *src, D3DSHADER_PARAM_SRCMOD_TYPE src_mod) { @@ -1748,7 +1687,7 @@ static void write_sm1_unary_op(struct hlsl_ctx *ctx, struct bytecode_buffer *buf write_sm1_instruction(ctx, buffer, &instr); }
-static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct bytecode_buffer *buffer) +static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) { unsigned int i, x;
@@ -1764,7 +1703,7 @@ static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct bytecode_buffer
if (ctx->profile->major_version > 1) token |= 5 << D3DSI_INSTLENGTH_SHIFT; - put_dword(buffer, token); + put_u32(buffer, token);
write_sm1_dst_register(buffer, ®); for (x = 0; x < 4; ++x) @@ -1772,7 +1711,7 @@ static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct bytecode_buffer } }
-static void write_sm1_semantic_dcl(struct hlsl_ctx *ctx, struct bytecode_buffer *buffer, +static void write_sm1_semantic_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var, bool output) { struct sm1_dst_register reg = {0}; @@ -1796,18 +1735,18 @@ static void write_sm1_semantic_dcl(struct hlsl_ctx *ctx, struct bytecode_buffer token = D3DSIO_DCL; if (ctx->profile->major_version > 1) token |= 2 << D3DSI_INSTLENGTH_SHIFT; - put_dword(buffer, token); + put_u32(buffer, token);
token = (1u << 31); token |= usage << D3DSP_DCL_USAGE_SHIFT; token |= usage_idx << D3DSP_DCL_USAGEINDEX_SHIFT; - put_dword(buffer, token); + put_u32(buffer, token);
reg.writemask = (1 << var->data_type->dimx) - 1; write_sm1_dst_register(buffer, ®); }
-static void write_sm1_semantic_dcls(struct hlsl_ctx *ctx, struct bytecode_buffer *buffer) +static void write_sm1_semantic_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) { bool write_in = false, write_out = false; struct hlsl_ir_var *var; @@ -1828,7 +1767,8 @@ static void write_sm1_semantic_dcls(struct hlsl_ctx *ctx, struct bytecode_buffer } }
-static void write_sm1_constant(struct hlsl_ctx *ctx, struct bytecode_buffer *buffer, const struct hlsl_ir_node *instr) +static void write_sm1_constant(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_ir_node *instr) { const struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); struct sm1_instruction sm1_instr = @@ -1851,7 +1791,7 @@ static void write_sm1_constant(struct hlsl_ctx *ctx, struct bytecode_buffer *buf write_sm1_instruction(ctx, buffer, &sm1_instr); }
-static void write_sm1_expr(struct hlsl_ctx *ctx, struct bytecode_buffer *buffer, const struct hlsl_ir_node *instr) +static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) { struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); struct hlsl_ir_node *arg1 = expr->operands[0].node; @@ -1901,7 +1841,7 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct bytecode_buffer *buffer, } }
-static void write_sm1_load(struct hlsl_ctx *ctx, struct bytecode_buffer *buffer, const struct hlsl_ir_node *instr) +static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) { const struct hlsl_ir_load *load = hlsl_ir_load(instr); const struct hlsl_reg reg = hlsl_reg_from_deref(&load->src, instr->data_type); @@ -1943,7 +1883,8 @@ static void write_sm1_load(struct hlsl_ctx *ctx, struct bytecode_buffer *buffer, write_sm1_instruction(ctx, buffer, &sm1_instr); }
-static void write_sm1_store(struct hlsl_ctx *ctx, struct bytecode_buffer *buffer, const struct hlsl_ir_node *instr) +static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_ir_node *instr) { const struct hlsl_ir_store *store = hlsl_ir_store(instr); const struct hlsl_ir_node *rhs = store->rhs.node; @@ -1986,7 +1927,8 @@ static void write_sm1_store(struct hlsl_ctx *ctx, struct bytecode_buffer *buffer write_sm1_instruction(ctx, buffer, &sm1_instr); }
-static void write_sm1_swizzle(struct hlsl_ctx *ctx, struct bytecode_buffer *buffer, const struct hlsl_ir_node *instr) +static void write_sm1_swizzle(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, + const struct hlsl_ir_node *instr) { const struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(instr); const struct hlsl_ir_node *val = swizzle->val.node; @@ -2011,7 +1953,7 @@ static void write_sm1_swizzle(struct hlsl_ctx *ctx, struct bytecode_buffer *buff write_sm1_instruction(ctx, buffer, &sm1_instr); }
-static void write_sm1_instructions(struct hlsl_ctx *ctx, struct bytecode_buffer *buffer, +static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_function_decl *entry_func) { const struct hlsl_ir_node *instr; @@ -2060,10 +2002,10 @@ static void write_sm1_instructions(struct hlsl_ctx *ctx, struct bytecode_buffer static int write_sm1_shader(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) { - struct bytecode_buffer buffer = {.ctx = ctx}; + struct vkd3d_bytecode_buffer buffer = {0}; int ret;
- put_dword(&buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); + put_u32(&buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version));
write_sm1_uniforms(ctx, &buffer, entry_func);
@@ -2071,7 +2013,7 @@ static int write_sm1_shader(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl * write_sm1_semantic_dcls(ctx, &buffer); write_sm1_instructions(ctx, &buffer, entry_func);
- put_dword(&buffer, D3DSIO_END); + put_u32(&buffer, D3DSIO_END);
if (!(ret = buffer.status)) { diff --git a/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d-shader/vkd3d_shader_main.c index 40d55b70..8b4f3533 100644 --- a/libs/vkd3d-shader/vkd3d_shader_main.c +++ b/libs/vkd3d-shader/vkd3d_shader_main.c @@ -282,6 +282,34 @@ void vkd3d_shader_error(struct vkd3d_shader_message_context *context, const stru va_end(args); }
+size_t bytecode_put_bytes(struct vkd3d_bytecode_buffer *buffer, const void *bytes, size_t size, size_t alignment) +{ + size_t prev_size = buffer->size; + size_t offset = align(prev_size, alignment); + + if (buffer->status) + return offset; + + if (!vkd3d_array_reserve((void **)&buffer->data, &buffer->capacity, offset + size, 1)) + { + buffer->status = VKD3D_ERROR_OUT_OF_MEMORY; + return offset; + } + memset(buffer->data + prev_size, 0xab, offset - prev_size); + memcpy(buffer->data + offset, bytes, size); + buffer->size = offset + size; + return offset; +} + +void set_u32(struct vkd3d_bytecode_buffer *buffer, size_t offset, uint32_t value) +{ + if (buffer->status) + return; + + assert(offset + sizeof(value) <= buffer->size); + memcpy(buffer->data + offset, &value, sizeof(value)); +} + static void vkd3d_shader_dump_blob(const char *path, const char *prefix, const void *data, size_t size) { static int shader_id = 0; diff --git a/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d-shader/vkd3d_shader_private.h index c61b3773..87e5801a 100644 --- a/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d-shader/vkd3d_shader_private.h @@ -899,6 +899,42 @@ void vkd3d_string_buffer_release(struct vkd3d_string_buffer_cache *list, void vkd3d_string_buffer_trace_(const struct vkd3d_string_buffer *buffer, const char *function) DECLSPEC_HIDDEN; int vkd3d_string_buffer_vprintf(struct vkd3d_string_buffer *buffer, const char *format, va_list args) DECLSPEC_HIDDEN;
+struct vkd3d_bytecode_buffer +{ + char *data; + size_t size, capacity; + int status; +}; + +size_t bytecode_put_bytes(struct vkd3d_bytecode_buffer *buffer, const void *bytes, + size_t size, size_t alignment) DECLSPEC_HIDDEN; +void set_u32(struct vkd3d_bytecode_buffer *buffer, size_t offset, uint32_t value) DECLSPEC_HIDDEN; + +static inline size_t put_u32(struct vkd3d_bytecode_buffer *buffer, uint32_t value) +{ + return bytecode_put_bytes(buffer, &value, sizeof(value), sizeof(value)); +} + +static inline size_t put_float(struct vkd3d_bytecode_buffer *buffer, float value) +{ + return bytecode_put_bytes(buffer, &value, sizeof(value), sizeof(value)); +} + +static inline size_t put_string(struct vkd3d_bytecode_buffer *buffer, const char *string) +{ + return bytecode_put_bytes(buffer, string, strlen(string) + 1, 1); +} + +static inline size_t bytecode_align(struct vkd3d_bytecode_buffer *buffer, uint32_t alignment) +{ + return bytecode_put_bytes(buffer, NULL, 0, alignment); +} + +static inline size_t bytecode_get_size(struct vkd3d_bytecode_buffer *buffer) +{ + return buffer->size; +} + struct vkd3d_shader_location { const char *source_name;
Signed-off-by: Zebediah Figura zfigura@codeweavers.com --- libs/vkd3d-shader/dxbc.c | 374 +++++++++++---------------------------- 1 file changed, 105 insertions(+), 269 deletions(-)
diff --git a/libs/vkd3d-shader/dxbc.c b/libs/vkd3d-shader/dxbc.c index 70ab70aa..b9add9bf 100644 --- a/libs/vkd3d-shader/dxbc.c +++ b/libs/vkd3d-shader/dxbc.c @@ -2897,86 +2897,38 @@ struct root_signature_writer_context { struct vkd3d_shader_message_context message_context;
- DWORD *data; - size_t position; - size_t capacity; + struct vkd3d_bytecode_buffer buffer;
size_t total_size_position; size_t chunk_position; -}; - -static bool write_dwords(struct root_signature_writer_context *context, - unsigned int count, DWORD d) -{ - unsigned int i; - - if (!vkd3d_array_reserve((void **)&context->data, &context->capacity, - context->position + count, sizeof(*context->data))) - return false; - for (i = 0; i < count; ++i) - context->data[context->position++] = d; - return true; -}
-static bool write_dword(struct root_signature_writer_context *context, DWORD d) -{ - return write_dwords(context, 1, d); -} - -static bool write_float(struct root_signature_writer_context *context, float f) -{ - union - { - float f; - DWORD d; - } u; - u.f = f; - return write_dword(context, u.d); -} + int status; +};
static size_t get_chunk_offset(struct root_signature_writer_context *context) { - return (context->position - context->chunk_position) * sizeof(DWORD); + return bytecode_get_size(&context->buffer) - context->chunk_position; }
-static int shader_write_root_signature_header(struct root_signature_writer_context *context) +static void shader_write_root_signature_header(struct root_signature_writer_context *context) { - if (!write_dword(context, TAG_DXBC)) - goto fail; + struct vkd3d_bytecode_buffer *buffer = &context->buffer; + unsigned int i;
+ put_u32(buffer, TAG_DXBC); /* The checksum is computed when all data is generated. */ - if (!write_dwords(context, 4, 0x00000000)) - goto fail; - - if (!write_dword(context, 0x00000001)) - goto fail; - - context->total_size_position = context->position; - if (!write_dword(context, 0xffffffff)) /* total size */ - goto fail; - - if (!write_dword(context, 1)) /* chunk count */ - goto fail; - - /* chunk offset */ - if (!write_dword(context, (context->position + 1) * sizeof(DWORD))) - goto fail; - - if (!write_dword(context, TAG_RTS0)) - goto fail; - if (!write_dword(context, 0xffffffff)) /* chunk size */ - goto fail; - context->chunk_position = context->position; - - return VKD3D_OK; - -fail: - vkd3d_shader_error(&context->message_context, NULL, VKD3D_SHADER_ERROR_RS_OUT_OF_MEMORY, - "Out of memory while writing root signature header."); - return VKD3D_ERROR_OUT_OF_MEMORY; -} - -static int shader_write_descriptor_ranges(struct root_signature_writer_context *context, + for (i = 0; i < 4; ++i) + put_u32(buffer, 0); + put_u32(buffer, 1); + context->total_size_position = put_u32(buffer, 0xffffffff); + put_u32(buffer, 1); /* chunk count */ + put_u32(buffer, bytecode_get_size(buffer) + sizeof(uint32_t)); /* chunk offset */ + put_u32(buffer, TAG_RTS0); + put_u32(buffer, 0xffffffff); + context->chunk_position = bytecode_get_size(buffer); +} + +static void shader_write_descriptor_ranges(struct vkd3d_bytecode_buffer *buffer, const struct vkd3d_shader_root_descriptor_table *table) { const struct vkd3d_shader_descriptor_range *ranges = table->descriptor_ranges; @@ -2984,27 +2936,15 @@ static int shader_write_descriptor_ranges(struct root_signature_writer_context *
for (i = 0; i < table->descriptor_range_count; ++i) { - if (!write_dword(context, ranges[i].range_type)) - goto fail; - if (!write_dword(context, ranges[i].descriptor_count)) - goto fail; - if (!write_dword(context, ranges[i].base_shader_register)) - goto fail; - if (!write_dword(context, ranges[i].register_space)) - goto fail; - if (!write_dword(context, ranges[i].descriptor_table_offset)) - goto fail; + put_u32(buffer, ranges[i].range_type); + put_u32(buffer, ranges[i].descriptor_count); + put_u32(buffer, ranges[i].base_shader_register); + put_u32(buffer, ranges[i].register_space); + put_u32(buffer, ranges[i].descriptor_table_offset); } - - return VKD3D_OK; - -fail: - vkd3d_shader_error(&context->message_context, NULL, VKD3D_SHADER_ERROR_RS_OUT_OF_MEMORY, - "Out of memory while writing root signature descriptor ranges."); - return VKD3D_ERROR_OUT_OF_MEMORY; }
-static int shader_write_descriptor_ranges1(struct root_signature_writer_context *context, +static void shader_write_descriptor_ranges1(struct vkd3d_bytecode_buffer *buffer, const struct vkd3d_shader_root_descriptor_table1 *table) { const struct vkd3d_shader_descriptor_range1 *ranges = table->descriptor_ranges; @@ -3012,175 +2952,113 @@ static int shader_write_descriptor_ranges1(struct root_signature_writer_context
for (i = 0; i < table->descriptor_range_count; ++i) { - if (!write_dword(context, ranges[i].range_type)) - goto fail; - if (!write_dword(context, ranges[i].descriptor_count)) - goto fail; - if (!write_dword(context, ranges[i].base_shader_register)) - goto fail; - if (!write_dword(context, ranges[i].register_space)) - goto fail; - if (!write_dword(context, ranges[i].flags)) - goto fail; - if (!write_dword(context, ranges[i].descriptor_table_offset)) - goto fail; + put_u32(buffer, ranges[i].range_type); + put_u32(buffer, ranges[i].descriptor_count); + put_u32(buffer, ranges[i].base_shader_register); + put_u32(buffer, ranges[i].register_space); + put_u32(buffer, ranges[i].flags); + put_u32(buffer, ranges[i].descriptor_table_offset); } - - return VKD3D_OK; - -fail: - vkd3d_shader_error(&context->message_context, NULL, VKD3D_SHADER_ERROR_RS_OUT_OF_MEMORY, - "Out of memory while writing root signature descriptor ranges."); - return VKD3D_ERROR_OUT_OF_MEMORY; }
-static int shader_write_descriptor_table(struct root_signature_writer_context *context, +static void shader_write_descriptor_table(struct root_signature_writer_context *context, const struct vkd3d_shader_root_descriptor_table *table) { - if (!write_dword(context, table->descriptor_range_count)) - goto fail; - if (!write_dword(context, get_chunk_offset(context) + sizeof(DWORD))) /* offset */ - goto fail; + struct vkd3d_bytecode_buffer *buffer = &context->buffer;
- return shader_write_descriptor_ranges(context, table); + put_u32(buffer, table->descriptor_range_count); + put_u32(buffer, get_chunk_offset(context) + sizeof(uint32_t)); /* offset */
-fail: - vkd3d_shader_error(&context->message_context, NULL, VKD3D_SHADER_ERROR_RS_OUT_OF_MEMORY, - "Out of memory while writing root signature root descriptor table."); - return VKD3D_ERROR_OUT_OF_MEMORY; + shader_write_descriptor_ranges(buffer, table); }
-static int shader_write_descriptor_table1(struct root_signature_writer_context *context, +static void shader_write_descriptor_table1(struct root_signature_writer_context *context, const struct vkd3d_shader_root_descriptor_table1 *table) { - if (!write_dword(context, table->descriptor_range_count)) - goto fail; - if (!write_dword(context, get_chunk_offset(context) + sizeof(DWORD))) /* offset */ - goto fail; + struct vkd3d_bytecode_buffer *buffer = &context->buffer;
- return shader_write_descriptor_ranges1(context, table); + put_u32(buffer, table->descriptor_range_count); + put_u32(buffer, get_chunk_offset(context) + sizeof(uint32_t)); /* offset */
-fail: - vkd3d_shader_error(&context->message_context, NULL, VKD3D_SHADER_ERROR_RS_OUT_OF_MEMORY, - "Out of memory while writing root signature root descriptor table."); - return VKD3D_ERROR_OUT_OF_MEMORY; + shader_write_descriptor_ranges1(buffer, table); }
-static int shader_write_root_constants(struct root_signature_writer_context *context, +static void shader_write_root_constants(struct vkd3d_bytecode_buffer *buffer, const struct vkd3d_shader_root_constants *constants) { - if (!write_dword(context, constants->shader_register)) - goto fail; - if (!write_dword(context, constants->register_space)) - goto fail; - if (!write_dword(context, constants->value_count)) - goto fail; - - return VKD3D_OK; - -fail: - vkd3d_shader_error(&context->message_context, NULL, VKD3D_SHADER_ERROR_RS_OUT_OF_MEMORY, - "Out of memory while writing root signature root constants."); - return VKD3D_ERROR_OUT_OF_MEMORY; + put_u32(buffer, constants->shader_register); + put_u32(buffer, constants->register_space); + put_u32(buffer, constants->value_count); }
-static int shader_write_root_descriptor(struct root_signature_writer_context *context, +static void shader_write_root_descriptor(struct vkd3d_bytecode_buffer *buffer, const struct vkd3d_shader_root_descriptor *descriptor) { - if (!write_dword(context, descriptor->shader_register)) - goto fail; - if (!write_dword(context, descriptor->register_space)) - goto fail; - - return VKD3D_OK; - -fail: - vkd3d_shader_error(&context->message_context, NULL, VKD3D_SHADER_ERROR_RS_OUT_OF_MEMORY, - "Out of memory while writing root signature root descriptor."); - return VKD3D_ERROR_OUT_OF_MEMORY; + put_u32(buffer, descriptor->shader_register); + put_u32(buffer, descriptor->register_space); }
-static int shader_write_root_descriptor1(struct root_signature_writer_context *context, +static void shader_write_root_descriptor1(struct vkd3d_bytecode_buffer *buffer, const struct vkd3d_shader_root_descriptor1 *descriptor) { - if (!write_dword(context, descriptor->shader_register)) - goto fail; - if (!write_dword(context, descriptor->register_space)) - goto fail; - if (!write_dword(context, descriptor->flags)) - goto fail; - - return VKD3D_OK; - -fail: - vkd3d_shader_error(&context->message_context, NULL, VKD3D_SHADER_ERROR_RS_OUT_OF_MEMORY, - "Out of memory while writing root signature root descriptor."); - return VKD3D_ERROR_OUT_OF_MEMORY; + put_u32(buffer, descriptor->shader_register); + put_u32(buffer, descriptor->register_space); + put_u32(buffer, descriptor->flags); }
-static int shader_write_root_parameters(struct root_signature_writer_context *context, +static void shader_write_root_parameters(struct root_signature_writer_context *context, const struct vkd3d_shader_versioned_root_signature_desc *desc) { unsigned int parameter_count = versioned_root_signature_get_parameter_count(desc); + struct vkd3d_bytecode_buffer *buffer = &context->buffer; size_t parameters_position; unsigned int i; - int ret;
- parameters_position = context->position; + parameters_position = bytecode_get_size(buffer); for (i = 0; i < parameter_count; ++i) { - if (!write_dword(context, versioned_root_signature_get_parameter_type(desc, i))) - goto fail; - if (!write_dword(context, versioned_root_signature_get_parameter_shader_visibility(desc, i))) - goto fail; - if (!write_dword(context, 0xffffffff)) /* offset */ - goto fail; + put_u32(buffer, versioned_root_signature_get_parameter_type(desc, i)); + put_u32(buffer, versioned_root_signature_get_parameter_shader_visibility(desc, i)); + put_u32(buffer, 0xffffffff); /* offset */ }
for (i = 0; i < parameter_count; ++i) { - context->data[parameters_position + 3 * i + 2] = get_chunk_offset(context); /* offset */ + set_u32(buffer, parameters_position + ((3 * i + 2) * sizeof(uint32_t)), get_chunk_offset(context));
switch (versioned_root_signature_get_parameter_type(desc, i)) { case VKD3D_SHADER_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE: if (desc->version == VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_0) - ret = shader_write_descriptor_table(context, &desc->u.v_1_0.parameters[i].u.descriptor_table); + shader_write_descriptor_table(context, &desc->u.v_1_0.parameters[i].u.descriptor_table); else - ret = shader_write_descriptor_table1(context, &desc->u.v_1_1.parameters[i].u.descriptor_table); + shader_write_descriptor_table1(context, &desc->u.v_1_1.parameters[i].u.descriptor_table); break; + case VKD3D_SHADER_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS: - ret = shader_write_root_constants(context, versioned_root_signature_get_root_constants(desc, i)); + shader_write_root_constants(buffer, versioned_root_signature_get_root_constants(desc, i)); break; + case VKD3D_SHADER_ROOT_PARAMETER_TYPE_CBV: case VKD3D_SHADER_ROOT_PARAMETER_TYPE_SRV: case VKD3D_SHADER_ROOT_PARAMETER_TYPE_UAV: if (desc->version == VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_0) - ret = shader_write_root_descriptor(context, &desc->u.v_1_0.parameters[i].u.descriptor); + shader_write_root_descriptor(buffer, &desc->u.v_1_0.parameters[i].u.descriptor); else - ret = shader_write_root_descriptor1(context, &desc->u.v_1_1.parameters[i].u.descriptor); + shader_write_root_descriptor1(buffer, &desc->u.v_1_1.parameters[i].u.descriptor); break; + default: FIXME("Unrecognized type %#x.\n", versioned_root_signature_get_parameter_type(desc, i)); vkd3d_shader_error(&context->message_context, NULL, VKD3D_SHADER_ERROR_RS_INVALID_ROOT_PARAMETER_TYPE, "Invalid/unrecognised root signature root parameter type %#x.", versioned_root_signature_get_parameter_type(desc, i)); - return VKD3D_ERROR_INVALID_ARGUMENT; + context->status = VKD3D_ERROR_INVALID_ARGUMENT; } - - if (ret < 0) - return ret; } - - return VKD3D_OK; - -fail: - vkd3d_shader_error(&context->message_context, NULL, VKD3D_SHADER_ERROR_RS_OUT_OF_MEMORY, - "Out of memory while writing root signature root parameters."); - return VKD3D_ERROR_OUT_OF_MEMORY; }
-static int shader_write_static_samplers(struct root_signature_writer_context *context, +static void shader_write_static_samplers(struct vkd3d_bytecode_buffer *buffer, const struct vkd3d_shader_versioned_root_signature_desc *desc) { const struct vkd3d_shader_static_sampler_desc *samplers = versioned_root_signature_get_static_samplers(desc); @@ -3188,75 +3066,39 @@ static int shader_write_static_samplers(struct root_signature_writer_context *co
for (i = 0; i < versioned_root_signature_get_static_sampler_count(desc); ++i) { - if (!write_dword(context, samplers[i].filter)) - goto fail; - if (!write_dword(context, samplers[i].address_u)) - goto fail; - if (!write_dword(context, samplers[i].address_v)) - goto fail; - if (!write_dword(context, samplers[i].address_w)) - goto fail; - if (!write_float(context, samplers[i].mip_lod_bias)) - goto fail; - if (!write_dword(context, samplers[i].max_anisotropy)) - goto fail; - if (!write_dword(context, samplers[i].comparison_func)) - goto fail; - if (!write_dword(context, samplers[i].border_colour)) - goto fail; - if (!write_float(context, samplers[i].min_lod)) - goto fail; - if (!write_float(context, samplers[i].max_lod)) - goto fail; - if (!write_dword(context, samplers[i].shader_register)) - goto fail; - if (!write_dword(context, samplers[i].register_space)) - goto fail; - if (!write_dword(context, samplers[i].shader_visibility)) - goto fail; + put_u32(buffer, samplers[i].filter); + put_u32(buffer, samplers[i].address_u); + put_u32(buffer, samplers[i].address_v); + put_u32(buffer, samplers[i].address_w); + put_float(buffer, samplers[i].mip_lod_bias); + put_u32(buffer, samplers[i].max_anisotropy); + put_u32(buffer, samplers[i].comparison_func); + put_u32(buffer, samplers[i].border_colour); + put_float(buffer, samplers[i].min_lod); + put_float(buffer, samplers[i].max_lod); + put_u32(buffer, samplers[i].shader_register); + put_u32(buffer, samplers[i].register_space); + put_u32(buffer, samplers[i].shader_visibility); } - - return VKD3D_OK; - -fail: - vkd3d_shader_error(&context->message_context, NULL, VKD3D_SHADER_ERROR_RS_OUT_OF_MEMORY, - "Out of memory while writing root signature static samplers."); - return VKD3D_ERROR_OUT_OF_MEMORY; }
-static int shader_write_root_signature(struct root_signature_writer_context *context, +static void shader_write_root_signature(struct root_signature_writer_context *context, const struct vkd3d_shader_versioned_root_signature_desc *desc) { + struct vkd3d_bytecode_buffer *buffer = &context->buffer; size_t samplers_offset_position; - int ret;
- if (!write_dword(context, desc->version)) - goto fail; + put_u32(buffer, desc->version); + put_u32(buffer, versioned_root_signature_get_parameter_count(desc)); + put_u32(buffer, get_chunk_offset(context) + 4 * sizeof(uint32_t)); /* offset */ + put_u32(buffer, versioned_root_signature_get_static_sampler_count(desc)); + samplers_offset_position = put_u32(buffer, 0xffffffff); + put_u32(buffer, versioned_root_signature_get_flags(desc));
- if (!write_dword(context, versioned_root_signature_get_parameter_count(desc))) - goto fail; - if (!write_dword(context, get_chunk_offset(context) + 4 * sizeof(DWORD))) /* offset */ - goto fail; + shader_write_root_parameters(context, desc);
- if (!write_dword(context, versioned_root_signature_get_static_sampler_count(desc))) - goto fail; - samplers_offset_position = context->position; - if (!write_dword(context, 0xffffffff)) /* offset */ - goto fail; - - if (!write_dword(context, versioned_root_signature_get_flags(desc))) - goto fail; - - if ((ret = shader_write_root_parameters(context, desc)) < 0) - return ret; - - context->data[samplers_offset_position] = get_chunk_offset(context); - return shader_write_static_samplers(context, desc); - -fail: - vkd3d_shader_error(&context->message_context, NULL, VKD3D_SHADER_ERROR_RS_OUT_OF_MEMORY, - "Out of memory while writing root signature."); - return VKD3D_ERROR_OUT_OF_MEMORY; + set_u32(buffer, samplers_offset_position, get_chunk_offset(context)); + shader_write_static_samplers(buffer, desc); }
static int validate_descriptor_table_v_1_0(const struct vkd3d_shader_root_descriptor_table *descriptor_table, @@ -3373,6 +3215,7 @@ int vkd3d_shader_serialize_root_signature(const struct vkd3d_shader_versioned_ro struct root_signature_writer_context context; size_t total_size, chunk_size; uint32_t checksum[4]; + unsigned int i; int ret;
TRACE("root_signature %p, dxbc %p, messages %p.\n", root_signature, dxbc, messages); @@ -3397,30 +3240,23 @@ int vkd3d_shader_serialize_root_signature(const struct vkd3d_shader_versioned_ro goto done;
memset(dxbc, 0, sizeof(*dxbc)); - if ((ret = shader_write_root_signature_header(&context)) < 0) - { - vkd3d_free(context.data); - goto done; - } - - if ((ret = shader_write_root_signature(&context, root_signature)) < 0) - { - vkd3d_free(context.data); - goto done; - } + shader_write_root_signature_header(&context); + shader_write_root_signature(&context, root_signature);
- total_size = context.position * sizeof(DWORD); + total_size = bytecode_get_size(&context.buffer); chunk_size = get_chunk_offset(&context); - context.data[context.total_size_position] = total_size; - context.data[context.chunk_position - 1] = chunk_size; + set_u32(&context.buffer, context.total_size_position, total_size); + set_u32(&context.buffer, context.chunk_position - sizeof(uint32_t), chunk_size);
- dxbc->code = context.data; + dxbc->code = context.buffer.data; dxbc->size = total_size;
vkd3d_compute_dxbc_checksum(dxbc->code, dxbc->size, checksum); - memcpy((uint32_t *)dxbc->code + 1, checksum, sizeof(checksum)); + for (i = 0; i < 4; ++i) + set_u32(&context.buffer, (i + 1) * sizeof(uint32_t), checksum[i]);
- ret = VKD3D_OK; + if (!(ret = context.buffer.status)) + ret = context.status;
done: vkd3d_shader_message_context_trace_messages(&context.message_context);
Signed-off-by: Zebediah Figura zfigura@codeweavers.com --- v2: no change.
libs/vkd3d-shader/dxbc.c | 453 +-------------------- libs/vkd3d-shader/sm4.h | 481 +++++++++++++++++++++++ libs/vkd3d-shader/vkd3d_shader_private.h | 13 - 3 files changed, 482 insertions(+), 465 deletions(-) create mode 100644 libs/vkd3d-shader/sm4.h
diff --git a/libs/vkd3d-shader/dxbc.c b/libs/vkd3d-shader/dxbc.c index b9add9bf..b1cb1dd4 100644 --- a/libs/vkd3d-shader/dxbc.c +++ b/libs/vkd3d-shader/dxbc.c @@ -18,458 +18,7 @@ */
#include "vkd3d_shader_private.h" - -#define VKD3D_SM4_INSTRUCTION_MODIFIER (0x1u << 31) - -#define VKD3D_SM4_MODIFIER_MASK 0x3fu - -#define VKD3D_SM5_MODIFIER_DATA_TYPE_SHIFT 6 -#define VKD3D_SM5_MODIFIER_DATA_TYPE_MASK (0xffffu << VKD3D_SM5_MODIFIER_DATA_TYPE_SHIFT) - -#define VKD3D_SM5_MODIFIER_RESOURCE_TYPE_SHIFT 6 -#define VKD3D_SM5_MODIFIER_RESOURCE_TYPE_MASK (0xfu << VKD3D_SM5_MODIFIER_RESOURCE_TYPE_SHIFT) - -#define VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_SHIFT 11 -#define VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_MASK (0xfffu << VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_SHIFT) - -#define VKD3D_SM4_AOFFIMMI_U_SHIFT 9 -#define VKD3D_SM4_AOFFIMMI_U_MASK (0xfu << VKD3D_SM4_AOFFIMMI_U_SHIFT) -#define VKD3D_SM4_AOFFIMMI_V_SHIFT 13 -#define VKD3D_SM4_AOFFIMMI_V_MASK (0xfu << VKD3D_SM4_AOFFIMMI_V_SHIFT) -#define VKD3D_SM4_AOFFIMMI_W_SHIFT 17 -#define VKD3D_SM4_AOFFIMMI_W_MASK (0xfu << VKD3D_SM4_AOFFIMMI_W_SHIFT) - -#define VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT 24 -#define VKD3D_SM4_INSTRUCTION_LENGTH_MASK (0x1fu << VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT) - -#define VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT 11 -#define VKD3D_SM4_INSTRUCTION_FLAGS_MASK (0x7u << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT) - -#define VKD3D_SM4_RESOURCE_TYPE_SHIFT 11 -#define VKD3D_SM4_RESOURCE_TYPE_MASK (0xfu << VKD3D_SM4_RESOURCE_TYPE_SHIFT) - -#define VKD3D_SM4_PRIMITIVE_TYPE_SHIFT 11 -#define VKD3D_SM4_PRIMITIVE_TYPE_MASK (0x3fu << VKD3D_SM4_PRIMITIVE_TYPE_SHIFT) - -#define VKD3D_SM4_INDEX_TYPE_SHIFT 11 -#define VKD3D_SM4_INDEX_TYPE_MASK (0x1u << VKD3D_SM4_INDEX_TYPE_SHIFT) - -#define VKD3D_SM4_SAMPLER_MODE_SHIFT 11 -#define VKD3D_SM4_SAMPLER_MODE_MASK (0xfu << VKD3D_SM4_SAMPLER_MODE_SHIFT) - -#define VKD3D_SM4_SHADER_DATA_TYPE_SHIFT 11 -#define VKD3D_SM4_SHADER_DATA_TYPE_MASK (0xfu << VKD3D_SM4_SHADER_DATA_TYPE_SHIFT) - -#define VKD3D_SM4_INTERPOLATION_MODE_SHIFT 11 -#define VKD3D_SM4_INTERPOLATION_MODE_MASK (0xfu << VKD3D_SM4_INTERPOLATION_MODE_SHIFT) - -#define VKD3D_SM4_GLOBAL_FLAGS_SHIFT 11 -#define VKD3D_SM4_GLOBAL_FLAGS_MASK (0xffu << VKD3D_SM4_GLOBAL_FLAGS_SHIFT) - -#define VKD3D_SM5_PRECISE_SHIFT 19 -#define VKD3D_SM5_PRECISE_MASK (0xfu << VKD3D_SM5_PRECISE_SHIFT) - -#define VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT 11 -#define VKD3D_SM5_CONTROL_POINT_COUNT_MASK (0xffu << VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT) - -#define VKD3D_SM5_FP_ARRAY_SIZE_SHIFT 16 -#define VKD3D_SM5_FP_TABLE_COUNT_MASK 0xffffu - -#define VKD3D_SM5_UAV_FLAGS_SHIFT 15 -#define VKD3D_SM5_UAV_FLAGS_MASK (0x1ffu << VKD3D_SM5_UAV_FLAGS_SHIFT) - -#define VKD3D_SM5_SYNC_FLAGS_SHIFT 11 -#define VKD3D_SM5_SYNC_FLAGS_MASK (0xffu << VKD3D_SM5_SYNC_FLAGS_SHIFT) - -#define VKD3D_SM5_TESSELLATOR_SHIFT 11 -#define VKD3D_SM5_TESSELLATOR_MASK (0xfu << VKD3D_SM5_TESSELLATOR_SHIFT) - -#define VKD3D_SM4_OPCODE_MASK 0xff - -#define VKD3D_SM4_REGISTER_MODIFIER (0x1u << 31) - -#define VKD3D_SM4_ADDRESSING_SHIFT2 28 -#define VKD3D_SM4_ADDRESSING_MASK2 (0x3u << VKD3D_SM4_ADDRESSING_SHIFT2) - -#define VKD3D_SM4_ADDRESSING_SHIFT1 25 -#define VKD3D_SM4_ADDRESSING_MASK1 (0x3u << VKD3D_SM4_ADDRESSING_SHIFT1) - -#define VKD3D_SM4_ADDRESSING_SHIFT0 22 -#define VKD3D_SM4_ADDRESSING_MASK0 (0x3u << VKD3D_SM4_ADDRESSING_SHIFT0) - -#define VKD3D_SM4_REGISTER_ORDER_SHIFT 20 -#define VKD3D_SM4_REGISTER_ORDER_MASK (0x3u << VKD3D_SM4_REGISTER_ORDER_SHIFT) - -#define VKD3D_SM4_REGISTER_TYPE_SHIFT 12 -#define VKD3D_SM4_REGISTER_TYPE_MASK (0xffu << VKD3D_SM4_REGISTER_TYPE_SHIFT) - -#define VKD3D_SM4_SWIZZLE_TYPE_SHIFT 2 -#define VKD3D_SM4_SWIZZLE_TYPE_MASK (0x3u << VKD3D_SM4_SWIZZLE_TYPE_SHIFT) - -#define VKD3D_SM4_DIMENSION_SHIFT 0 -#define VKD3D_SM4_DIMENSION_MASK (0x3u << VKD3D_SM4_DIMENSION_SHIFT) - -#define VKD3D_SM4_WRITEMASK_SHIFT 4 -#define VKD3D_SM4_WRITEMASK_MASK (0xfu << VKD3D_SM4_WRITEMASK_SHIFT) - -#define VKD3D_SM4_SWIZZLE_SHIFT 4 -#define VKD3D_SM4_SWIZZLE_MASK (0xffu << VKD3D_SM4_SWIZZLE_SHIFT) - -#define VKD3D_SM4_VERSION_MAJOR(version) (((version) >> 4) & 0xf) -#define VKD3D_SM4_VERSION_MINOR(version) (((version) >> 0) & 0xf) - -#define VKD3D_SM4_ADDRESSING_RELATIVE 0x2 -#define VKD3D_SM4_ADDRESSING_OFFSET 0x1 - -#define VKD3D_SM4_INSTRUCTION_FLAG_SATURATE 0x4 - -#define VKD3D_SM4_CONDITIONAL_NZ (0x1u << 18) - -#define VKD3D_SM4_TYPE_COMPONENT(com, i) (((com) >> (4 * (i))) & 0xfu) - -enum vkd3d_sm4_opcode -{ - VKD3D_SM4_OP_ADD = 0x00, - VKD3D_SM4_OP_AND = 0x01, - VKD3D_SM4_OP_BREAK = 0x02, - VKD3D_SM4_OP_BREAKC = 0x03, - VKD3D_SM4_OP_CASE = 0x06, - VKD3D_SM4_OP_CONTINUE = 0x07, - VKD3D_SM4_OP_CONTINUEC = 0x08, - VKD3D_SM4_OP_CUT = 0x09, - VKD3D_SM4_OP_DEFAULT = 0x0a, - VKD3D_SM4_OP_DERIV_RTX = 0x0b, - VKD3D_SM4_OP_DERIV_RTY = 0x0c, - VKD3D_SM4_OP_DISCARD = 0x0d, - VKD3D_SM4_OP_DIV = 0x0e, - VKD3D_SM4_OP_DP2 = 0x0f, - VKD3D_SM4_OP_DP3 = 0x10, - VKD3D_SM4_OP_DP4 = 0x11, - VKD3D_SM4_OP_ELSE = 0x12, - VKD3D_SM4_OP_EMIT = 0x13, - VKD3D_SM4_OP_ENDIF = 0x15, - VKD3D_SM4_OP_ENDLOOP = 0x16, - VKD3D_SM4_OP_ENDSWITCH = 0x17, - VKD3D_SM4_OP_EQ = 0x18, - VKD3D_SM4_OP_EXP = 0x19, - VKD3D_SM4_OP_FRC = 0x1a, - VKD3D_SM4_OP_FTOI = 0x1b, - VKD3D_SM4_OP_FTOU = 0x1c, - VKD3D_SM4_OP_GE = 0x1d, - VKD3D_SM4_OP_IADD = 0x1e, - VKD3D_SM4_OP_IF = 0x1f, - VKD3D_SM4_OP_IEQ = 0x20, - VKD3D_SM4_OP_IGE = 0x21, - VKD3D_SM4_OP_ILT = 0x22, - VKD3D_SM4_OP_IMAD = 0x23, - VKD3D_SM4_OP_IMAX = 0x24, - VKD3D_SM4_OP_IMIN = 0x25, - VKD3D_SM4_OP_IMUL = 0x26, - VKD3D_SM4_OP_INE = 0x27, - VKD3D_SM4_OP_INEG = 0x28, - VKD3D_SM4_OP_ISHL = 0x29, - VKD3D_SM4_OP_ISHR = 0x2a, - VKD3D_SM4_OP_ITOF = 0x2b, - VKD3D_SM4_OP_LABEL = 0x2c, - VKD3D_SM4_OP_LD = 0x2d, - VKD3D_SM4_OP_LD2DMS = 0x2e, - VKD3D_SM4_OP_LOG = 0x2f, - VKD3D_SM4_OP_LOOP = 0x30, - VKD3D_SM4_OP_LT = 0x31, - VKD3D_SM4_OP_MAD = 0x32, - VKD3D_SM4_OP_MIN = 0x33, - VKD3D_SM4_OP_MAX = 0x34, - VKD3D_SM4_OP_SHADER_DATA = 0x35, - VKD3D_SM4_OP_MOV = 0x36, - VKD3D_SM4_OP_MOVC = 0x37, - VKD3D_SM4_OP_MUL = 0x38, - VKD3D_SM4_OP_NE = 0x39, - VKD3D_SM4_OP_NOP = 0x3a, - VKD3D_SM4_OP_NOT = 0x3b, - VKD3D_SM4_OP_OR = 0x3c, - VKD3D_SM4_OP_RESINFO = 0x3d, - VKD3D_SM4_OP_RET = 0x3e, - VKD3D_SM4_OP_RETC = 0x3f, - VKD3D_SM4_OP_ROUND_NE = 0x40, - VKD3D_SM4_OP_ROUND_NI = 0x41, - VKD3D_SM4_OP_ROUND_PI = 0x42, - VKD3D_SM4_OP_ROUND_Z = 0x43, - VKD3D_SM4_OP_RSQ = 0x44, - VKD3D_SM4_OP_SAMPLE = 0x45, - VKD3D_SM4_OP_SAMPLE_C = 0x46, - VKD3D_SM4_OP_SAMPLE_C_LZ = 0x47, - VKD3D_SM4_OP_SAMPLE_LOD = 0x48, - VKD3D_SM4_OP_SAMPLE_GRAD = 0x49, - VKD3D_SM4_OP_SAMPLE_B = 0x4a, - VKD3D_SM4_OP_SQRT = 0x4b, - VKD3D_SM4_OP_SWITCH = 0x4c, - VKD3D_SM4_OP_SINCOS = 0x4d, - VKD3D_SM4_OP_UDIV = 0x4e, - VKD3D_SM4_OP_ULT = 0x4f, - VKD3D_SM4_OP_UGE = 0x50, - VKD3D_SM4_OP_UMUL = 0x51, - VKD3D_SM4_OP_UMAX = 0x53, - VKD3D_SM4_OP_UMIN = 0x54, - VKD3D_SM4_OP_USHR = 0x55, - VKD3D_SM4_OP_UTOF = 0x56, - VKD3D_SM4_OP_XOR = 0x57, - VKD3D_SM4_OP_DCL_RESOURCE = 0x58, - VKD3D_SM4_OP_DCL_CONSTANT_BUFFER = 0x59, - VKD3D_SM4_OP_DCL_SAMPLER = 0x5a, - VKD3D_SM4_OP_DCL_INDEX_RANGE = 0x5b, - VKD3D_SM4_OP_DCL_OUTPUT_TOPOLOGY = 0x5c, - VKD3D_SM4_OP_DCL_INPUT_PRIMITIVE = 0x5d, - VKD3D_SM4_OP_DCL_VERTICES_OUT = 0x5e, - VKD3D_SM4_OP_DCL_INPUT = 0x5f, - VKD3D_SM4_OP_DCL_INPUT_SGV = 0x60, - VKD3D_SM4_OP_DCL_INPUT_SIV = 0x61, - VKD3D_SM4_OP_DCL_INPUT_PS = 0x62, - VKD3D_SM4_OP_DCL_INPUT_PS_SGV = 0x63, - VKD3D_SM4_OP_DCL_INPUT_PS_SIV = 0x64, - VKD3D_SM4_OP_DCL_OUTPUT = 0x65, - VKD3D_SM4_OP_DCL_OUTPUT_SIV = 0x67, - VKD3D_SM4_OP_DCL_TEMPS = 0x68, - VKD3D_SM4_OP_DCL_INDEXABLE_TEMP = 0x69, - VKD3D_SM4_OP_DCL_GLOBAL_FLAGS = 0x6a, - VKD3D_SM4_OP_LOD = 0x6c, - VKD3D_SM4_OP_GATHER4 = 0x6d, - VKD3D_SM4_OP_SAMPLE_POS = 0x6e, - VKD3D_SM4_OP_SAMPLE_INFO = 0x6f, - VKD3D_SM5_OP_HS_DECLS = 0x71, - VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE = 0x72, - VKD3D_SM5_OP_HS_FORK_PHASE = 0x73, - VKD3D_SM5_OP_HS_JOIN_PHASE = 0x74, - VKD3D_SM5_OP_EMIT_STREAM = 0x75, - VKD3D_SM5_OP_CUT_STREAM = 0x76, - VKD3D_SM5_OP_FCALL = 0x78, - VKD3D_SM5_OP_BUFINFO = 0x79, - VKD3D_SM5_OP_DERIV_RTX_COARSE = 0x7a, - VKD3D_SM5_OP_DERIV_RTX_FINE = 0x7b, - VKD3D_SM5_OP_DERIV_RTY_COARSE = 0x7c, - VKD3D_SM5_OP_DERIV_RTY_FINE = 0x7d, - VKD3D_SM5_OP_GATHER4_C = 0x7e, - VKD3D_SM5_OP_GATHER4_PO = 0x7f, - VKD3D_SM5_OP_GATHER4_PO_C = 0x80, - VKD3D_SM5_OP_RCP = 0x81, - VKD3D_SM5_OP_F32TOF16 = 0x82, - VKD3D_SM5_OP_F16TOF32 = 0x83, - VKD3D_SM5_OP_COUNTBITS = 0x86, - VKD3D_SM5_OP_FIRSTBIT_HI = 0x87, - VKD3D_SM5_OP_FIRSTBIT_LO = 0x88, - VKD3D_SM5_OP_FIRSTBIT_SHI = 0x89, - VKD3D_SM5_OP_UBFE = 0x8a, - VKD3D_SM5_OP_IBFE = 0x8b, - VKD3D_SM5_OP_BFI = 0x8c, - VKD3D_SM5_OP_BFREV = 0x8d, - VKD3D_SM5_OP_SWAPC = 0x8e, - VKD3D_SM5_OP_DCL_STREAM = 0x8f, - VKD3D_SM5_OP_DCL_FUNCTION_BODY = 0x90, - VKD3D_SM5_OP_DCL_FUNCTION_TABLE = 0x91, - VKD3D_SM5_OP_DCL_INTERFACE = 0x92, - VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT = 0x93, - VKD3D_SM5_OP_DCL_OUTPUT_CONTROL_POINT_COUNT = 0x94, - VKD3D_SM5_OP_DCL_TESSELLATOR_DOMAIN = 0x95, - VKD3D_SM5_OP_DCL_TESSELLATOR_PARTITIONING = 0x96, - VKD3D_SM5_OP_DCL_TESSELLATOR_OUTPUT_PRIMITIVE = 0x97, - VKD3D_SM5_OP_DCL_HS_MAX_TESSFACTOR = 0x98, - VKD3D_SM5_OP_DCL_HS_FORK_PHASE_INSTANCE_COUNT = 0x99, - VKD3D_SM5_OP_DCL_HS_JOIN_PHASE_INSTANCE_COUNT = 0x9a, - VKD3D_SM5_OP_DCL_THREAD_GROUP = 0x9b, - VKD3D_SM5_OP_DCL_UAV_TYPED = 0x9c, - VKD3D_SM5_OP_DCL_UAV_RAW = 0x9d, - VKD3D_SM5_OP_DCL_UAV_STRUCTURED = 0x9e, - VKD3D_SM5_OP_DCL_TGSM_RAW = 0x9f, - VKD3D_SM5_OP_DCL_TGSM_STRUCTURED = 0xa0, - VKD3D_SM5_OP_DCL_RESOURCE_RAW = 0xa1, - VKD3D_SM5_OP_DCL_RESOURCE_STRUCTURED = 0xa2, - VKD3D_SM5_OP_LD_UAV_TYPED = 0xa3, - VKD3D_SM5_OP_STORE_UAV_TYPED = 0xa4, - VKD3D_SM5_OP_LD_RAW = 0xa5, - VKD3D_SM5_OP_STORE_RAW = 0xa6, - VKD3D_SM5_OP_LD_STRUCTURED = 0xa7, - VKD3D_SM5_OP_STORE_STRUCTURED = 0xa8, - VKD3D_SM5_OP_ATOMIC_AND = 0xa9, - VKD3D_SM5_OP_ATOMIC_OR = 0xaa, - VKD3D_SM5_OP_ATOMIC_XOR = 0xab, - VKD3D_SM5_OP_ATOMIC_CMP_STORE = 0xac, - VKD3D_SM5_OP_ATOMIC_IADD = 0xad, - VKD3D_SM5_OP_ATOMIC_IMAX = 0xae, - VKD3D_SM5_OP_ATOMIC_IMIN = 0xaf, - VKD3D_SM5_OP_ATOMIC_UMAX = 0xb0, - VKD3D_SM5_OP_ATOMIC_UMIN = 0xb1, - VKD3D_SM5_OP_IMM_ATOMIC_ALLOC = 0xb2, - VKD3D_SM5_OP_IMM_ATOMIC_CONSUME = 0xb3, - VKD3D_SM5_OP_IMM_ATOMIC_IADD = 0xb4, - VKD3D_SM5_OP_IMM_ATOMIC_AND = 0xb5, - VKD3D_SM5_OP_IMM_ATOMIC_OR = 0xb6, - VKD3D_SM5_OP_IMM_ATOMIC_XOR = 0xb7, - VKD3D_SM5_OP_IMM_ATOMIC_EXCH = 0xb8, - VKD3D_SM5_OP_IMM_ATOMIC_CMP_EXCH = 0xb9, - VKD3D_SM5_OP_IMM_ATOMIC_IMAX = 0xba, - VKD3D_SM5_OP_IMM_ATOMIC_IMIN = 0xbb, - VKD3D_SM5_OP_IMM_ATOMIC_UMAX = 0xbc, - VKD3D_SM5_OP_IMM_ATOMIC_UMIN = 0xbd, - VKD3D_SM5_OP_SYNC = 0xbe, - VKD3D_SM5_OP_EVAL_SAMPLE_INDEX = 0xcc, - VKD3D_SM5_OP_EVAL_CENTROID = 0xcd, - VKD3D_SM5_OP_DCL_GS_INSTANCES = 0xce, -}; - -enum vkd3d_sm4_instruction_modifier -{ - VKD3D_SM4_MODIFIER_AOFFIMMI = 0x1, - VKD3D_SM5_MODIFIER_RESOURCE_TYPE = 0x2, - VKD3D_SM5_MODIFIER_DATA_TYPE = 0x3, -}; - -enum vkd3d_sm4_register_type -{ - VKD3D_SM4_RT_TEMP = 0x00, - VKD3D_SM4_RT_INPUT = 0x01, - VKD3D_SM4_RT_OUTPUT = 0x02, - VKD3D_SM4_RT_INDEXABLE_TEMP = 0x03, - VKD3D_SM4_RT_IMMCONST = 0x04, - VKD3D_SM4_RT_SAMPLER = 0x06, - VKD3D_SM4_RT_RESOURCE = 0x07, - VKD3D_SM4_RT_CONSTBUFFER = 0x08, - VKD3D_SM4_RT_IMMCONSTBUFFER = 0x09, - VKD3D_SM4_RT_PRIMID = 0x0b, - VKD3D_SM4_RT_DEPTHOUT = 0x0c, - VKD3D_SM4_RT_NULL = 0x0d, - VKD3D_SM4_RT_RASTERIZER = 0x0e, - VKD3D_SM4_RT_OMASK = 0x0f, - VKD3D_SM5_RT_STREAM = 0x10, - VKD3D_SM5_RT_FUNCTION_BODY = 0x11, - VKD3D_SM5_RT_FUNCTION_POINTER = 0x13, - VKD3D_SM5_RT_OUTPUT_CONTROL_POINT_ID = 0x16, - VKD3D_SM5_RT_FORK_INSTANCE_ID = 0x17, - VKD3D_SM5_RT_JOIN_INSTANCE_ID = 0x18, - VKD3D_SM5_RT_INPUT_CONTROL_POINT = 0x19, - VKD3D_SM5_RT_OUTPUT_CONTROL_POINT = 0x1a, - VKD3D_SM5_RT_PATCH_CONSTANT_DATA = 0x1b, - VKD3D_SM5_RT_DOMAIN_LOCATION = 0x1c, - VKD3D_SM5_RT_UAV = 0x1e, - VKD3D_SM5_RT_SHARED_MEMORY = 0x1f, - VKD3D_SM5_RT_THREAD_ID = 0x20, - VKD3D_SM5_RT_THREAD_GROUP_ID = 0x21, - VKD3D_SM5_RT_LOCAL_THREAD_ID = 0x22, - VKD3D_SM5_RT_COVERAGE = 0x23, - VKD3D_SM5_RT_LOCAL_THREAD_INDEX = 0x24, - VKD3D_SM5_RT_GS_INSTANCE_ID = 0x25, - VKD3D_SM5_RT_DEPTHOUT_GREATER_EQUAL = 0x26, - VKD3D_SM5_RT_DEPTHOUT_LESS_EQUAL = 0x27, -}; - -enum vkd3d_sm4_register_modifier -{ - VKD3D_SM4_REGISTER_MODIFIER_NONE = 0x01, - VKD3D_SM4_REGISTER_MODIFIER_NEGATE = 0x41, - VKD3D_SM4_REGISTER_MODIFIER_ABS = 0x81, - VKD3D_SM4_REGISTER_MODIFIER_ABS_NEGATE = 0xc1, -}; - -enum vkd3d_sm4_output_primitive_type -{ - VKD3D_SM4_OUTPUT_PT_POINTLIST = 0x1, - VKD3D_SM4_OUTPUT_PT_LINESTRIP = 0x3, - VKD3D_SM4_OUTPUT_PT_TRIANGLESTRIP = 0x5, -}; - -enum vkd3d_sm4_input_primitive_type -{ - VKD3D_SM4_INPUT_PT_POINT = 0x01, - VKD3D_SM4_INPUT_PT_LINE = 0x02, - VKD3D_SM4_INPUT_PT_TRIANGLE = 0x03, - VKD3D_SM4_INPUT_PT_LINEADJ = 0x06, - VKD3D_SM4_INPUT_PT_TRIANGLEADJ = 0x07, - VKD3D_SM5_INPUT_PT_PATCH1 = 0x08, - VKD3D_SM5_INPUT_PT_PATCH2 = 0x09, - VKD3D_SM5_INPUT_PT_PATCH3 = 0x0a, - VKD3D_SM5_INPUT_PT_PATCH4 = 0x0b, - VKD3D_SM5_INPUT_PT_PATCH5 = 0x0c, - VKD3D_SM5_INPUT_PT_PATCH6 = 0x0d, - VKD3D_SM5_INPUT_PT_PATCH7 = 0x0e, - VKD3D_SM5_INPUT_PT_PATCH8 = 0x0f, - VKD3D_SM5_INPUT_PT_PATCH9 = 0x10, - VKD3D_SM5_INPUT_PT_PATCH10 = 0x11, - VKD3D_SM5_INPUT_PT_PATCH11 = 0x12, - VKD3D_SM5_INPUT_PT_PATCH12 = 0x13, - VKD3D_SM5_INPUT_PT_PATCH13 = 0x14, - VKD3D_SM5_INPUT_PT_PATCH14 = 0x15, - VKD3D_SM5_INPUT_PT_PATCH15 = 0x16, - VKD3D_SM5_INPUT_PT_PATCH16 = 0x17, - VKD3D_SM5_INPUT_PT_PATCH17 = 0x18, - VKD3D_SM5_INPUT_PT_PATCH18 = 0x19, - VKD3D_SM5_INPUT_PT_PATCH19 = 0x1a, - VKD3D_SM5_INPUT_PT_PATCH20 = 0x1b, - VKD3D_SM5_INPUT_PT_PATCH21 = 0x1c, - VKD3D_SM5_INPUT_PT_PATCH22 = 0x1d, - VKD3D_SM5_INPUT_PT_PATCH23 = 0x1e, - VKD3D_SM5_INPUT_PT_PATCH24 = 0x1f, - VKD3D_SM5_INPUT_PT_PATCH25 = 0x20, - VKD3D_SM5_INPUT_PT_PATCH26 = 0x21, - VKD3D_SM5_INPUT_PT_PATCH27 = 0x22, - VKD3D_SM5_INPUT_PT_PATCH28 = 0x23, - VKD3D_SM5_INPUT_PT_PATCH29 = 0x24, - VKD3D_SM5_INPUT_PT_PATCH30 = 0x25, - VKD3D_SM5_INPUT_PT_PATCH31 = 0x26, - VKD3D_SM5_INPUT_PT_PATCH32 = 0x27, -}; - -enum vkd3d_sm4_swizzle_type -{ - VKD3D_SM4_SWIZZLE_NONE = 0x0, - VKD3D_SM4_SWIZZLE_VEC4 = 0x1, - VKD3D_SM4_SWIZZLE_SCALAR = 0x2, -}; - -enum vkd3d_sm4_dimension -{ - VKD3D_SM4_DIMENSION_NONE = 0x0, - VKD3D_SM4_DIMENSION_SCALAR = 0x1, - VKD3D_SM4_DIMENSION_VEC4 = 0x2, -}; - -enum vkd3d_sm4_resource_type -{ - VKD3D_SM4_RESOURCE_BUFFER = 0x1, - VKD3D_SM4_RESOURCE_TEXTURE_1D = 0x2, - VKD3D_SM4_RESOURCE_TEXTURE_2D = 0x3, - VKD3D_SM4_RESOURCE_TEXTURE_2DMS = 0x4, - VKD3D_SM4_RESOURCE_TEXTURE_3D = 0x5, - VKD3D_SM4_RESOURCE_TEXTURE_CUBE = 0x6, - VKD3D_SM4_RESOURCE_TEXTURE_1DARRAY = 0x7, - VKD3D_SM4_RESOURCE_TEXTURE_2DARRAY = 0x8, - VKD3D_SM4_RESOURCE_TEXTURE_2DMSARRAY = 0x9, - VKD3D_SM4_RESOURCE_TEXTURE_CUBEARRAY = 0xa, - VKD3D_SM4_RESOURCE_RAW_BUFFER = 0xb, - VKD3D_SM4_RESOURCE_STRUCTURED_BUFFER = 0xc, -}; - -enum vkd3d_sm4_data_type -{ - VKD3D_SM4_DATA_UNORM = 0x1, - VKD3D_SM4_DATA_SNORM = 0x2, - VKD3D_SM4_DATA_INT = 0x3, - VKD3D_SM4_DATA_UINT = 0x4, - VKD3D_SM4_DATA_FLOAT = 0x5, - VKD3D_SM4_DATA_MIXED = 0x6, - VKD3D_SM4_DATA_DOUBLE = 0x7, - VKD3D_SM4_DATA_CONTINUED = 0x8, - VKD3D_SM4_DATA_UNUSED = 0x9, -}; - -enum vkd3d_sm4_sampler_mode -{ - VKD3D_SM4_SAMPLER_DEFAULT = 0x0, - VKD3D_SM4_SAMPLER_COMPARISON = 0x1, -}; - -enum vkd3d_sm4_shader_data_type -{ - VKD3D_SM4_SHADER_DATA_IMMEDIATE_CONSTANT_BUFFER = 0x3, - VKD3D_SM4_SHADER_DATA_MESSAGE = 0x4, -}; +#include "sm4.h"
struct vkd3d_shader_src_param_entry { diff --git a/libs/vkd3d-shader/sm4.h b/libs/vkd3d-shader/sm4.h new file mode 100644 index 00000000..2eb89afb --- /dev/null +++ b/libs/vkd3d-shader/sm4.h @@ -0,0 +1,481 @@ +/* + * Copyright 2009 Henri Verbeet for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifndef __VKD3D_SM4_H +#define __VKD3D_SM4_H + +#define VKD3D_SM4_PS 0x0000u +#define VKD3D_SM4_VS 0x0001u +#define VKD3D_SM4_GS 0x0002u +#define VKD3D_SM5_HS 0x0003u +#define VKD3D_SM5_DS 0x0004u +#define VKD3D_SM5_CS 0x0005u + +#define VKD3D_SM4_INSTRUCTION_MODIFIER (0x1u << 31) + +#define VKD3D_SM4_MODIFIER_MASK 0x3fu + +#define VKD3D_SM5_MODIFIER_DATA_TYPE_SHIFT 6 +#define VKD3D_SM5_MODIFIER_DATA_TYPE_MASK (0xffffu << VKD3D_SM5_MODIFIER_DATA_TYPE_SHIFT) + +#define VKD3D_SM5_MODIFIER_RESOURCE_TYPE_SHIFT 6 +#define VKD3D_SM5_MODIFIER_RESOURCE_TYPE_MASK (0xfu << VKD3D_SM5_MODIFIER_RESOURCE_TYPE_SHIFT) + +#define VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_SHIFT 11 +#define VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_MASK (0xfffu << VKD3D_SM5_MODIFIER_RESOURCE_STRIDE_SHIFT) + +#define VKD3D_SM4_AOFFIMMI_U_SHIFT 9 +#define VKD3D_SM4_AOFFIMMI_U_MASK (0xfu << VKD3D_SM4_AOFFIMMI_U_SHIFT) +#define VKD3D_SM4_AOFFIMMI_V_SHIFT 13 +#define VKD3D_SM4_AOFFIMMI_V_MASK (0xfu << VKD3D_SM4_AOFFIMMI_V_SHIFT) +#define VKD3D_SM4_AOFFIMMI_W_SHIFT 17 +#define VKD3D_SM4_AOFFIMMI_W_MASK (0xfu << VKD3D_SM4_AOFFIMMI_W_SHIFT) + +#define VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT 24 +#define VKD3D_SM4_INSTRUCTION_LENGTH_MASK (0x1fu << VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT) + +#define VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT 11 +#define VKD3D_SM4_INSTRUCTION_FLAGS_MASK (0x7u << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT) + +#define VKD3D_SM4_RESOURCE_TYPE_SHIFT 11 +#define VKD3D_SM4_RESOURCE_TYPE_MASK (0xfu << VKD3D_SM4_RESOURCE_TYPE_SHIFT) + +#define VKD3D_SM4_PRIMITIVE_TYPE_SHIFT 11 +#define VKD3D_SM4_PRIMITIVE_TYPE_MASK (0x3fu << VKD3D_SM4_PRIMITIVE_TYPE_SHIFT) + +#define VKD3D_SM4_INDEX_TYPE_SHIFT 11 +#define VKD3D_SM4_INDEX_TYPE_MASK (0x1u << VKD3D_SM4_INDEX_TYPE_SHIFT) + +#define VKD3D_SM4_SAMPLER_MODE_SHIFT 11 +#define VKD3D_SM4_SAMPLER_MODE_MASK (0xfu << VKD3D_SM4_SAMPLER_MODE_SHIFT) + +#define VKD3D_SM4_SHADER_DATA_TYPE_SHIFT 11 +#define VKD3D_SM4_SHADER_DATA_TYPE_MASK (0xfu << VKD3D_SM4_SHADER_DATA_TYPE_SHIFT) + +#define VKD3D_SM4_INTERPOLATION_MODE_SHIFT 11 +#define VKD3D_SM4_INTERPOLATION_MODE_MASK (0xfu << VKD3D_SM4_INTERPOLATION_MODE_SHIFT) + +#define VKD3D_SM4_GLOBAL_FLAGS_SHIFT 11 +#define VKD3D_SM4_GLOBAL_FLAGS_MASK (0xffu << VKD3D_SM4_GLOBAL_FLAGS_SHIFT) + +#define VKD3D_SM5_PRECISE_SHIFT 19 +#define VKD3D_SM5_PRECISE_MASK (0xfu << VKD3D_SM5_PRECISE_SHIFT) + +#define VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT 11 +#define VKD3D_SM5_CONTROL_POINT_COUNT_MASK (0xffu << VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT) + +#define VKD3D_SM5_FP_ARRAY_SIZE_SHIFT 16 +#define VKD3D_SM5_FP_TABLE_COUNT_MASK 0xffffu + +#define VKD3D_SM5_UAV_FLAGS_SHIFT 15 +#define VKD3D_SM5_UAV_FLAGS_MASK (0x1ffu << VKD3D_SM5_UAV_FLAGS_SHIFT) + +#define VKD3D_SM5_SYNC_FLAGS_SHIFT 11 +#define VKD3D_SM5_SYNC_FLAGS_MASK (0xffu << VKD3D_SM5_SYNC_FLAGS_SHIFT) + +#define VKD3D_SM5_TESSELLATOR_SHIFT 11 +#define VKD3D_SM5_TESSELLATOR_MASK (0xfu << VKD3D_SM5_TESSELLATOR_SHIFT) + +#define VKD3D_SM4_OPCODE_MASK 0xff + +#define VKD3D_SM4_REGISTER_MODIFIER (0x1u << 31) + +#define VKD3D_SM4_ADDRESSING_SHIFT2 28 +#define VKD3D_SM4_ADDRESSING_MASK2 (0x3u << VKD3D_SM4_ADDRESSING_SHIFT2) + +#define VKD3D_SM4_ADDRESSING_SHIFT1 25 +#define VKD3D_SM4_ADDRESSING_MASK1 (0x3u << VKD3D_SM4_ADDRESSING_SHIFT1) + +#define VKD3D_SM4_ADDRESSING_SHIFT0 22 +#define VKD3D_SM4_ADDRESSING_MASK0 (0x3u << VKD3D_SM4_ADDRESSING_SHIFT0) + +#define VKD3D_SM4_REGISTER_ORDER_SHIFT 20 +#define VKD3D_SM4_REGISTER_ORDER_MASK (0x3u << VKD3D_SM4_REGISTER_ORDER_SHIFT) + +#define VKD3D_SM4_REGISTER_TYPE_SHIFT 12 +#define VKD3D_SM4_REGISTER_TYPE_MASK (0xffu << VKD3D_SM4_REGISTER_TYPE_SHIFT) + +#define VKD3D_SM4_SWIZZLE_TYPE_SHIFT 2 +#define VKD3D_SM4_SWIZZLE_TYPE_MASK (0x3u << VKD3D_SM4_SWIZZLE_TYPE_SHIFT) + +#define VKD3D_SM4_DIMENSION_SHIFT 0 +#define VKD3D_SM4_DIMENSION_MASK (0x3u << VKD3D_SM4_DIMENSION_SHIFT) + +#define VKD3D_SM4_WRITEMASK_SHIFT 4 +#define VKD3D_SM4_WRITEMASK_MASK (0xfu << VKD3D_SM4_WRITEMASK_SHIFT) + +#define VKD3D_SM4_SWIZZLE_SHIFT 4 +#define VKD3D_SM4_SWIZZLE_MASK (0xffu << VKD3D_SM4_SWIZZLE_SHIFT) + +#define VKD3D_SM4_VERSION_MAJOR(version) (((version) >> 4) & 0xf) +#define VKD3D_SM4_VERSION_MINOR(version) (((version) >> 0) & 0xf) + +#define VKD3D_SM4_ADDRESSING_RELATIVE 0x2 +#define VKD3D_SM4_ADDRESSING_OFFSET 0x1 + +#define VKD3D_SM4_INSTRUCTION_FLAG_SATURATE 0x4 + +#define VKD3D_SM4_CONDITIONAL_NZ (0x1u << 18) + +#define VKD3D_SM4_TYPE_COMPONENT(com, i) (((com) >> (4 * (i))) & 0xfu) + +enum vkd3d_sm4_opcode +{ + VKD3D_SM4_OP_ADD = 0x00, + VKD3D_SM4_OP_AND = 0x01, + VKD3D_SM4_OP_BREAK = 0x02, + VKD3D_SM4_OP_BREAKC = 0x03, + VKD3D_SM4_OP_CASE = 0x06, + VKD3D_SM4_OP_CONTINUE = 0x07, + VKD3D_SM4_OP_CONTINUEC = 0x08, + VKD3D_SM4_OP_CUT = 0x09, + VKD3D_SM4_OP_DEFAULT = 0x0a, + VKD3D_SM4_OP_DERIV_RTX = 0x0b, + VKD3D_SM4_OP_DERIV_RTY = 0x0c, + VKD3D_SM4_OP_DISCARD = 0x0d, + VKD3D_SM4_OP_DIV = 0x0e, + VKD3D_SM4_OP_DP2 = 0x0f, + VKD3D_SM4_OP_DP3 = 0x10, + VKD3D_SM4_OP_DP4 = 0x11, + VKD3D_SM4_OP_ELSE = 0x12, + VKD3D_SM4_OP_EMIT = 0x13, + VKD3D_SM4_OP_ENDIF = 0x15, + VKD3D_SM4_OP_ENDLOOP = 0x16, + VKD3D_SM4_OP_ENDSWITCH = 0x17, + VKD3D_SM4_OP_EQ = 0x18, + VKD3D_SM4_OP_EXP = 0x19, + VKD3D_SM4_OP_FRC = 0x1a, + VKD3D_SM4_OP_FTOI = 0x1b, + VKD3D_SM4_OP_FTOU = 0x1c, + VKD3D_SM4_OP_GE = 0x1d, + VKD3D_SM4_OP_IADD = 0x1e, + VKD3D_SM4_OP_IF = 0x1f, + VKD3D_SM4_OP_IEQ = 0x20, + VKD3D_SM4_OP_IGE = 0x21, + VKD3D_SM4_OP_ILT = 0x22, + VKD3D_SM4_OP_IMAD = 0x23, + VKD3D_SM4_OP_IMAX = 0x24, + VKD3D_SM4_OP_IMIN = 0x25, + VKD3D_SM4_OP_IMUL = 0x26, + VKD3D_SM4_OP_INE = 0x27, + VKD3D_SM4_OP_INEG = 0x28, + VKD3D_SM4_OP_ISHL = 0x29, + VKD3D_SM4_OP_ISHR = 0x2a, + VKD3D_SM4_OP_ITOF = 0x2b, + VKD3D_SM4_OP_LABEL = 0x2c, + VKD3D_SM4_OP_LD = 0x2d, + VKD3D_SM4_OP_LD2DMS = 0x2e, + VKD3D_SM4_OP_LOG = 0x2f, + VKD3D_SM4_OP_LOOP = 0x30, + VKD3D_SM4_OP_LT = 0x31, + VKD3D_SM4_OP_MAD = 0x32, + VKD3D_SM4_OP_MIN = 0x33, + VKD3D_SM4_OP_MAX = 0x34, + VKD3D_SM4_OP_SHADER_DATA = 0x35, + VKD3D_SM4_OP_MOV = 0x36, + VKD3D_SM4_OP_MOVC = 0x37, + VKD3D_SM4_OP_MUL = 0x38, + VKD3D_SM4_OP_NE = 0x39, + VKD3D_SM4_OP_NOP = 0x3a, + VKD3D_SM4_OP_NOT = 0x3b, + VKD3D_SM4_OP_OR = 0x3c, + VKD3D_SM4_OP_RESINFO = 0x3d, + VKD3D_SM4_OP_RET = 0x3e, + VKD3D_SM4_OP_RETC = 0x3f, + VKD3D_SM4_OP_ROUND_NE = 0x40, + VKD3D_SM4_OP_ROUND_NI = 0x41, + VKD3D_SM4_OP_ROUND_PI = 0x42, + VKD3D_SM4_OP_ROUND_Z = 0x43, + VKD3D_SM4_OP_RSQ = 0x44, + VKD3D_SM4_OP_SAMPLE = 0x45, + VKD3D_SM4_OP_SAMPLE_C = 0x46, + VKD3D_SM4_OP_SAMPLE_C_LZ = 0x47, + VKD3D_SM4_OP_SAMPLE_LOD = 0x48, + VKD3D_SM4_OP_SAMPLE_GRAD = 0x49, + VKD3D_SM4_OP_SAMPLE_B = 0x4a, + VKD3D_SM4_OP_SQRT = 0x4b, + VKD3D_SM4_OP_SWITCH = 0x4c, + VKD3D_SM4_OP_SINCOS = 0x4d, + VKD3D_SM4_OP_UDIV = 0x4e, + VKD3D_SM4_OP_ULT = 0x4f, + VKD3D_SM4_OP_UGE = 0x50, + VKD3D_SM4_OP_UMUL = 0x51, + VKD3D_SM4_OP_UMAX = 0x53, + VKD3D_SM4_OP_UMIN = 0x54, + VKD3D_SM4_OP_USHR = 0x55, + VKD3D_SM4_OP_UTOF = 0x56, + VKD3D_SM4_OP_XOR = 0x57, + VKD3D_SM4_OP_DCL_RESOURCE = 0x58, + VKD3D_SM4_OP_DCL_CONSTANT_BUFFER = 0x59, + VKD3D_SM4_OP_DCL_SAMPLER = 0x5a, + VKD3D_SM4_OP_DCL_INDEX_RANGE = 0x5b, + VKD3D_SM4_OP_DCL_OUTPUT_TOPOLOGY = 0x5c, + VKD3D_SM4_OP_DCL_INPUT_PRIMITIVE = 0x5d, + VKD3D_SM4_OP_DCL_VERTICES_OUT = 0x5e, + VKD3D_SM4_OP_DCL_INPUT = 0x5f, + VKD3D_SM4_OP_DCL_INPUT_SGV = 0x60, + VKD3D_SM4_OP_DCL_INPUT_SIV = 0x61, + VKD3D_SM4_OP_DCL_INPUT_PS = 0x62, + VKD3D_SM4_OP_DCL_INPUT_PS_SGV = 0x63, + VKD3D_SM4_OP_DCL_INPUT_PS_SIV = 0x64, + VKD3D_SM4_OP_DCL_OUTPUT = 0x65, + VKD3D_SM4_OP_DCL_OUTPUT_SIV = 0x67, + VKD3D_SM4_OP_DCL_TEMPS = 0x68, + VKD3D_SM4_OP_DCL_INDEXABLE_TEMP = 0x69, + VKD3D_SM4_OP_DCL_GLOBAL_FLAGS = 0x6a, + VKD3D_SM4_OP_LOD = 0x6c, + VKD3D_SM4_OP_GATHER4 = 0x6d, + VKD3D_SM4_OP_SAMPLE_POS = 0x6e, + VKD3D_SM4_OP_SAMPLE_INFO = 0x6f, + VKD3D_SM5_OP_HS_DECLS = 0x71, + VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE = 0x72, + VKD3D_SM5_OP_HS_FORK_PHASE = 0x73, + VKD3D_SM5_OP_HS_JOIN_PHASE = 0x74, + VKD3D_SM5_OP_EMIT_STREAM = 0x75, + VKD3D_SM5_OP_CUT_STREAM = 0x76, + VKD3D_SM5_OP_FCALL = 0x78, + VKD3D_SM5_OP_BUFINFO = 0x79, + VKD3D_SM5_OP_DERIV_RTX_COARSE = 0x7a, + VKD3D_SM5_OP_DERIV_RTX_FINE = 0x7b, + VKD3D_SM5_OP_DERIV_RTY_COARSE = 0x7c, + VKD3D_SM5_OP_DERIV_RTY_FINE = 0x7d, + VKD3D_SM5_OP_GATHER4_C = 0x7e, + VKD3D_SM5_OP_GATHER4_PO = 0x7f, + VKD3D_SM5_OP_GATHER4_PO_C = 0x80, + VKD3D_SM5_OP_RCP = 0x81, + VKD3D_SM5_OP_F32TOF16 = 0x82, + VKD3D_SM5_OP_F16TOF32 = 0x83, + VKD3D_SM5_OP_COUNTBITS = 0x86, + VKD3D_SM5_OP_FIRSTBIT_HI = 0x87, + VKD3D_SM5_OP_FIRSTBIT_LO = 0x88, + VKD3D_SM5_OP_FIRSTBIT_SHI = 0x89, + VKD3D_SM5_OP_UBFE = 0x8a, + VKD3D_SM5_OP_IBFE = 0x8b, + VKD3D_SM5_OP_BFI = 0x8c, + VKD3D_SM5_OP_BFREV = 0x8d, + VKD3D_SM5_OP_SWAPC = 0x8e, + VKD3D_SM5_OP_DCL_STREAM = 0x8f, + VKD3D_SM5_OP_DCL_FUNCTION_BODY = 0x90, + VKD3D_SM5_OP_DCL_FUNCTION_TABLE = 0x91, + VKD3D_SM5_OP_DCL_INTERFACE = 0x92, + VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT = 0x93, + VKD3D_SM5_OP_DCL_OUTPUT_CONTROL_POINT_COUNT = 0x94, + VKD3D_SM5_OP_DCL_TESSELLATOR_DOMAIN = 0x95, + VKD3D_SM5_OP_DCL_TESSELLATOR_PARTITIONING = 0x96, + VKD3D_SM5_OP_DCL_TESSELLATOR_OUTPUT_PRIMITIVE = 0x97, + VKD3D_SM5_OP_DCL_HS_MAX_TESSFACTOR = 0x98, + VKD3D_SM5_OP_DCL_HS_FORK_PHASE_INSTANCE_COUNT = 0x99, + VKD3D_SM5_OP_DCL_HS_JOIN_PHASE_INSTANCE_COUNT = 0x9a, + VKD3D_SM5_OP_DCL_THREAD_GROUP = 0x9b, + VKD3D_SM5_OP_DCL_UAV_TYPED = 0x9c, + VKD3D_SM5_OP_DCL_UAV_RAW = 0x9d, + VKD3D_SM5_OP_DCL_UAV_STRUCTURED = 0x9e, + VKD3D_SM5_OP_DCL_TGSM_RAW = 0x9f, + VKD3D_SM5_OP_DCL_TGSM_STRUCTURED = 0xa0, + VKD3D_SM5_OP_DCL_RESOURCE_RAW = 0xa1, + VKD3D_SM5_OP_DCL_RESOURCE_STRUCTURED = 0xa2, + VKD3D_SM5_OP_LD_UAV_TYPED = 0xa3, + VKD3D_SM5_OP_STORE_UAV_TYPED = 0xa4, + VKD3D_SM5_OP_LD_RAW = 0xa5, + VKD3D_SM5_OP_STORE_RAW = 0xa6, + VKD3D_SM5_OP_LD_STRUCTURED = 0xa7, + VKD3D_SM5_OP_STORE_STRUCTURED = 0xa8, + VKD3D_SM5_OP_ATOMIC_AND = 0xa9, + VKD3D_SM5_OP_ATOMIC_OR = 0xaa, + VKD3D_SM5_OP_ATOMIC_XOR = 0xab, + VKD3D_SM5_OP_ATOMIC_CMP_STORE = 0xac, + VKD3D_SM5_OP_ATOMIC_IADD = 0xad, + VKD3D_SM5_OP_ATOMIC_IMAX = 0xae, + VKD3D_SM5_OP_ATOMIC_IMIN = 0xaf, + VKD3D_SM5_OP_ATOMIC_UMAX = 0xb0, + VKD3D_SM5_OP_ATOMIC_UMIN = 0xb1, + VKD3D_SM5_OP_IMM_ATOMIC_ALLOC = 0xb2, + VKD3D_SM5_OP_IMM_ATOMIC_CONSUME = 0xb3, + VKD3D_SM5_OP_IMM_ATOMIC_IADD = 0xb4, + VKD3D_SM5_OP_IMM_ATOMIC_AND = 0xb5, + VKD3D_SM5_OP_IMM_ATOMIC_OR = 0xb6, + VKD3D_SM5_OP_IMM_ATOMIC_XOR = 0xb7, + VKD3D_SM5_OP_IMM_ATOMIC_EXCH = 0xb8, + VKD3D_SM5_OP_IMM_ATOMIC_CMP_EXCH = 0xb9, + VKD3D_SM5_OP_IMM_ATOMIC_IMAX = 0xba, + VKD3D_SM5_OP_IMM_ATOMIC_IMIN = 0xbb, + VKD3D_SM5_OP_IMM_ATOMIC_UMAX = 0xbc, + VKD3D_SM5_OP_IMM_ATOMIC_UMIN = 0xbd, + VKD3D_SM5_OP_SYNC = 0xbe, + VKD3D_SM5_OP_EVAL_SAMPLE_INDEX = 0xcc, + VKD3D_SM5_OP_EVAL_CENTROID = 0xcd, + VKD3D_SM5_OP_DCL_GS_INSTANCES = 0xce, +}; + +enum vkd3d_sm4_instruction_modifier +{ + VKD3D_SM4_MODIFIER_AOFFIMMI = 0x1, + VKD3D_SM5_MODIFIER_RESOURCE_TYPE = 0x2, + VKD3D_SM5_MODIFIER_DATA_TYPE = 0x3, +}; + +enum vkd3d_sm4_register_type +{ + VKD3D_SM4_RT_TEMP = 0x00, + VKD3D_SM4_RT_INPUT = 0x01, + VKD3D_SM4_RT_OUTPUT = 0x02, + VKD3D_SM4_RT_INDEXABLE_TEMP = 0x03, + VKD3D_SM4_RT_IMMCONST = 0x04, + VKD3D_SM4_RT_SAMPLER = 0x06, + VKD3D_SM4_RT_RESOURCE = 0x07, + VKD3D_SM4_RT_CONSTBUFFER = 0x08, + VKD3D_SM4_RT_IMMCONSTBUFFER = 0x09, + VKD3D_SM4_RT_PRIMID = 0x0b, + VKD3D_SM4_RT_DEPTHOUT = 0x0c, + VKD3D_SM4_RT_NULL = 0x0d, + VKD3D_SM4_RT_RASTERIZER = 0x0e, + VKD3D_SM4_RT_OMASK = 0x0f, + VKD3D_SM5_RT_STREAM = 0x10, + VKD3D_SM5_RT_FUNCTION_BODY = 0x11, + VKD3D_SM5_RT_FUNCTION_POINTER = 0x13, + VKD3D_SM5_RT_OUTPUT_CONTROL_POINT_ID = 0x16, + VKD3D_SM5_RT_FORK_INSTANCE_ID = 0x17, + VKD3D_SM5_RT_JOIN_INSTANCE_ID = 0x18, + VKD3D_SM5_RT_INPUT_CONTROL_POINT = 0x19, + VKD3D_SM5_RT_OUTPUT_CONTROL_POINT = 0x1a, + VKD3D_SM5_RT_PATCH_CONSTANT_DATA = 0x1b, + VKD3D_SM5_RT_DOMAIN_LOCATION = 0x1c, + VKD3D_SM5_RT_UAV = 0x1e, + VKD3D_SM5_RT_SHARED_MEMORY = 0x1f, + VKD3D_SM5_RT_THREAD_ID = 0x20, + VKD3D_SM5_RT_THREAD_GROUP_ID = 0x21, + VKD3D_SM5_RT_LOCAL_THREAD_ID = 0x22, + VKD3D_SM5_RT_COVERAGE = 0x23, + VKD3D_SM5_RT_LOCAL_THREAD_INDEX = 0x24, + VKD3D_SM5_RT_GS_INSTANCE_ID = 0x25, + VKD3D_SM5_RT_DEPTHOUT_GREATER_EQUAL = 0x26, + VKD3D_SM5_RT_DEPTHOUT_LESS_EQUAL = 0x27, +}; + +enum vkd3d_sm4_register_modifier +{ + VKD3D_SM4_REGISTER_MODIFIER_NONE = 0x01, + VKD3D_SM4_REGISTER_MODIFIER_NEGATE = 0x41, + VKD3D_SM4_REGISTER_MODIFIER_ABS = 0x81, + VKD3D_SM4_REGISTER_MODIFIER_ABS_NEGATE = 0xc1, +}; + +enum vkd3d_sm4_output_primitive_type +{ + VKD3D_SM4_OUTPUT_PT_POINTLIST = 0x1, + VKD3D_SM4_OUTPUT_PT_LINESTRIP = 0x3, + VKD3D_SM4_OUTPUT_PT_TRIANGLESTRIP = 0x5, +}; + +enum vkd3d_sm4_input_primitive_type +{ + VKD3D_SM4_INPUT_PT_POINT = 0x01, + VKD3D_SM4_INPUT_PT_LINE = 0x02, + VKD3D_SM4_INPUT_PT_TRIANGLE = 0x03, + VKD3D_SM4_INPUT_PT_LINEADJ = 0x06, + VKD3D_SM4_INPUT_PT_TRIANGLEADJ = 0x07, + VKD3D_SM5_INPUT_PT_PATCH1 = 0x08, + VKD3D_SM5_INPUT_PT_PATCH2 = 0x09, + VKD3D_SM5_INPUT_PT_PATCH3 = 0x0a, + VKD3D_SM5_INPUT_PT_PATCH4 = 0x0b, + VKD3D_SM5_INPUT_PT_PATCH5 = 0x0c, + VKD3D_SM5_INPUT_PT_PATCH6 = 0x0d, + VKD3D_SM5_INPUT_PT_PATCH7 = 0x0e, + VKD3D_SM5_INPUT_PT_PATCH8 = 0x0f, + VKD3D_SM5_INPUT_PT_PATCH9 = 0x10, + VKD3D_SM5_INPUT_PT_PATCH10 = 0x11, + VKD3D_SM5_INPUT_PT_PATCH11 = 0x12, + VKD3D_SM5_INPUT_PT_PATCH12 = 0x13, + VKD3D_SM5_INPUT_PT_PATCH13 = 0x14, + VKD3D_SM5_INPUT_PT_PATCH14 = 0x15, + VKD3D_SM5_INPUT_PT_PATCH15 = 0x16, + VKD3D_SM5_INPUT_PT_PATCH16 = 0x17, + VKD3D_SM5_INPUT_PT_PATCH17 = 0x18, + VKD3D_SM5_INPUT_PT_PATCH18 = 0x19, + VKD3D_SM5_INPUT_PT_PATCH19 = 0x1a, + VKD3D_SM5_INPUT_PT_PATCH20 = 0x1b, + VKD3D_SM5_INPUT_PT_PATCH21 = 0x1c, + VKD3D_SM5_INPUT_PT_PATCH22 = 0x1d, + VKD3D_SM5_INPUT_PT_PATCH23 = 0x1e, + VKD3D_SM5_INPUT_PT_PATCH24 = 0x1f, + VKD3D_SM5_INPUT_PT_PATCH25 = 0x20, + VKD3D_SM5_INPUT_PT_PATCH26 = 0x21, + VKD3D_SM5_INPUT_PT_PATCH27 = 0x22, + VKD3D_SM5_INPUT_PT_PATCH28 = 0x23, + VKD3D_SM5_INPUT_PT_PATCH29 = 0x24, + VKD3D_SM5_INPUT_PT_PATCH30 = 0x25, + VKD3D_SM5_INPUT_PT_PATCH31 = 0x26, + VKD3D_SM5_INPUT_PT_PATCH32 = 0x27, +}; + +enum vkd3d_sm4_swizzle_type +{ + VKD3D_SM4_SWIZZLE_NONE = 0x0, + VKD3D_SM4_SWIZZLE_VEC4 = 0x1, + VKD3D_SM4_SWIZZLE_SCALAR = 0x2, +}; + +enum vkd3d_sm4_dimension +{ + VKD3D_SM4_DIMENSION_NONE = 0x0, + VKD3D_SM4_DIMENSION_SCALAR = 0x1, + VKD3D_SM4_DIMENSION_VEC4 = 0x2, +}; + +enum vkd3d_sm4_resource_type +{ + VKD3D_SM4_RESOURCE_BUFFER = 0x1, + VKD3D_SM4_RESOURCE_TEXTURE_1D = 0x2, + VKD3D_SM4_RESOURCE_TEXTURE_2D = 0x3, + VKD3D_SM4_RESOURCE_TEXTURE_2DMS = 0x4, + VKD3D_SM4_RESOURCE_TEXTURE_3D = 0x5, + VKD3D_SM4_RESOURCE_TEXTURE_CUBE = 0x6, + VKD3D_SM4_RESOURCE_TEXTURE_1DARRAY = 0x7, + VKD3D_SM4_RESOURCE_TEXTURE_2DARRAY = 0x8, + VKD3D_SM4_RESOURCE_TEXTURE_2DMSARRAY = 0x9, + VKD3D_SM4_RESOURCE_TEXTURE_CUBEARRAY = 0xa, + VKD3D_SM4_RESOURCE_RAW_BUFFER = 0xb, + VKD3D_SM4_RESOURCE_STRUCTURED_BUFFER = 0xc, +}; + +enum vkd3d_sm4_data_type +{ + VKD3D_SM4_DATA_UNORM = 0x1, + VKD3D_SM4_DATA_SNORM = 0x2, + VKD3D_SM4_DATA_INT = 0x3, + VKD3D_SM4_DATA_UINT = 0x4, + VKD3D_SM4_DATA_FLOAT = 0x5, + VKD3D_SM4_DATA_MIXED = 0x6, + VKD3D_SM4_DATA_DOUBLE = 0x7, + VKD3D_SM4_DATA_CONTINUED = 0x8, + VKD3D_SM4_DATA_UNUSED = 0x9, +}; + +enum vkd3d_sm4_sampler_mode +{ + VKD3D_SM4_SAMPLER_DEFAULT = 0x0, + VKD3D_SM4_SAMPLER_COMPARISON = 0x1, +}; + +enum vkd3d_sm4_shader_data_type +{ + VKD3D_SM4_SHADER_DATA_IMMEDIATE_CONSTANT_BUFFER = 0x3, + VKD3D_SM4_SHADER_DATA_MESSAGE = 0x4, +}; + +#endif /* __VKD3D_SM4_H */ diff --git a/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d-shader/vkd3d_shader_private.h index 87e5801a..642dd13b 100644 --- a/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d-shader/vkd3d_shader_private.h @@ -543,19 +543,6 @@ enum vkd3d_shader_conditional_op VKD3D_SHADER_CONDITIONAL_OP_Z = 1 };
-#define VKD3D_SM1_VS 0xfffeu -#define VKD3D_SM1_PS 0xffffu -#define VKD3D_SM4_PS 0x0000u -#define VKD3D_SM4_VS 0x0001u -#define VKD3D_SM4_GS 0x0002u -#define VKD3D_SM5_HS 0x0003u -#define VKD3D_SM5_DS 0x0004u -#define VKD3D_SM5_CS 0x0005u - -/* Shader version tokens, and shader end tokens */ -#define VKD3DPS_VERSION(major, minor) ((VKD3D_SM1_PS << 16) | ((major) << 8) | (minor)) -#define VKD3DVS_VERSION(major, minor) ((VKD3D_SM1_VS << 16) | ((major) << 8) | (minor)) - #define MAX_IMMEDIATE_CONSTANT_BUFFER_SIZE 4096 #define MAX_REG_OUTPUT 32
On Thu, Jul 1, 2021 at 5:10 AM Zebediah Figura zfigura@codeweavers.com wrote:
For clarity.
Signed-off-by: Matteo Bruni mbruni@codeweavers.com Signed-off-by: Zebediah Figura zfigura@codeweavers.com Signed-off-by: Henri Verbeet hverbeet@codeweavers.com
v2: no change
libs/vkd3d-shader/hlsl.c | 3 +++ libs/vkd3d-shader/hlsl.h | 2 +- libs/vkd3d-shader/hlsl_codegen.c | 7 ------- 3 files changed, 4 insertions(+), 8 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index 0b8c660c..bc593f82 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -1664,6 +1664,9 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const struct hlsl_profile_info * if (!(ctx->globals_buffer = hlsl_new_buffer(ctx, HLSL_BUFFER_CONSTANT, hlsl_strdup(ctx, "$Globals"), NULL, ctx->location))) return false;
if (!(ctx->params_buffer = hlsl_new_buffer(ctx, HLSL_BUFFER_CONSTANT,
hlsl_strdup(ctx, "$Params"), NULL, ctx->location)))
return false;
ctx->cur_buffer = ctx->globals_buffer;
return true;
diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index b62e7818..e0045acf 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -456,7 +456,7 @@ struct hlsl_ctx struct list extern_vars;
struct list buffers;
- struct hlsl_buffer *cur_buffer, *globals_buffer;
- struct hlsl_buffer *cur_buffer, *globals_buffer, *params_buffer; struct list types; struct rb_tree functions; const struct hlsl_ir_function_decl *cur_function;
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 6336ddfc..9afa590a 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -1150,13 +1150,6 @@ static void allocate_buffers(struct hlsl_ctx *ctx) hlsl_strdup(ctx, "$Params"), NULL, ctx->location))) return;
- /* The $Globals and $Params buffers should be allocated first, before all
* explicit buffers. */
- list_remove(¶ms_buffer->entry);
- list_add_head(&ctx->buffers, ¶ms_buffer->entry);
- list_remove(&ctx->globals_buffer->entry);
- list_add_head(&ctx->buffers, &ctx->globals_buffer->entry);
- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { if (var->is_uniform)
-- 2.32.0
On second thought, I withdraw my signed-off. It's only partially visible here but it seems we're still (re)creating the $Params buffer right above this removed hunk.