Signed-off-by: Zebediah Figura zfigura@codeweavers.com --- libs/vkd3d-shader/hlsl_codegen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index e254cede..81e60101 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -717,7 +717,7 @@ static struct hlsl_reg allocate_range(struct liveness *liveness,
static const char *debug_register(char class, struct hlsl_reg reg, const struct hlsl_type *type) { - if (type->reg_size > 4) + if (type->reg_size > 1) return vkd3d_dbg_sprintf("%c%u-%c%u", class, reg.id, class, reg.id + type->reg_size - 1); return vkd3d_dbg_sprintf("%c%u%s", class, reg.id, debug_hlsl_writemask(reg.writemask));
Signed-off-by: Zebediah Figura zfigura@codeweavers.com --- Makefile.am | 1 + include/vkd3d_d3d9types.h | 121 +++++++++++++++++++++++++++++++ libs/vkd3d-shader/hlsl.c | 2 +- libs/vkd3d-shader/hlsl.h | 3 +- libs/vkd3d-shader/hlsl_codegen.c | 55 +++++++++++++- 5 files changed, 178 insertions(+), 4 deletions(-) create mode 100644 include/vkd3d_d3d9types.h
diff --git a/Makefile.am b/Makefile.am index a79ad02f..2ab2233a 100644 --- a/Makefile.am +++ b/Makefile.am @@ -19,6 +19,7 @@ vkd3d_public_headers = \ include/vkd3d.h \ include/vkd3d_d3d12.h \ include/vkd3d_d3d12sdklayers.h \ + include/vkd3d_d3d9types.h \ include/vkd3d_d3dcommon.h \ include/vkd3d_d3dcompiler.h \ include/vkd3d_dxgibase.h \ diff --git a/include/vkd3d_d3d9types.h b/include/vkd3d_d3d9types.h new file mode 100644 index 00000000..de2ba471 --- /dev/null +++ b/include/vkd3d_d3d9types.h @@ -0,0 +1,121 @@ +/* + * Copyright 2002-2003 Jason Edmeades + * Copyright 2002-2003 Raphael Junqueira + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifndef __VKD3D_D3D9TYPES_H +#define __VKD3D_D3D9TYPES_H +#ifndef _d3d9TYPES_H_ + +#define D3DPS_VERSION(major, minor) (0xffff0000 | ((major) << 8) | (minor)) +#define D3DVS_VERSION(major, minor) (0xfffe0000 | ((major) << 8) | (minor)) + +typedef enum _D3DSHADER_INSTRUCTION_OPCODE_TYPE +{ + D3DSIO_NOP = 0x00, + D3DSIO_MOV = 0x01, + D3DSIO_ADD = 0x02, + D3DSIO_SUB = 0x03, + D3DSIO_MAD = 0x04, + D3DSIO_MUL = 0x05, + D3DSIO_RCP = 0x06, + D3DSIO_RSQ = 0x07, + D3DSIO_DP3 = 0x08, + D3DSIO_DP4 = 0x09, + D3DSIO_MIN = 0x0a, + D3DSIO_MAX = 0x0b, + D3DSIO_SLT = 0x0c, + D3DSIO_SGE = 0x0d, + D3DSIO_EXP = 0x0e, + D3DSIO_LOG = 0x0f, + D3DSIO_LIT = 0x10, + D3DSIO_DST = 0x11, + D3DSIO_LRP = 0x12, + D3DSIO_FRC = 0x13, + D3DSIO_M4x4 = 0x14, + D3DSIO_M4x3 = 0x15, + D3DSIO_M3x4 = 0x16, + D3DSIO_M3x3 = 0x17, + D3DSIO_M3x2 = 0x18, + D3DSIO_CALL = 0x19, + D3DSIO_CALLNZ = 0x1a, + D3DSIO_LOOP = 0x1b, + D3DSIO_RET = 0x1c, + D3DSIO_ENDLOOP = 0x1d, + D3DSIO_LABEL = 0x1e, + D3DSIO_DCL = 0x1f, + D3DSIO_POW = 0x20, + D3DSIO_CRS = 0x21, + D3DSIO_SGN = 0x22, + D3DSIO_ABS = 0x23, + D3DSIO_NRM = 0x24, + D3DSIO_SINCOS = 0x25, + D3DSIO_REP = 0x26, + D3DSIO_ENDREP = 0x27, + D3DSIO_IF = 0x28, + D3DSIO_IFC = 0x29, + D3DSIO_ELSE = 0x2a, + D3DSIO_ENDIF = 0x2b, + D3DSIO_BREAK = 0x2c, + D3DSIO_BREAKC = 0x2d, + D3DSIO_MOVA = 0x2e, + D3DSIO_DEFB = 0x2f, + D3DSIO_DEFI = 0x30, + + D3DSIO_TEXCOORD = 0x40, + D3DSIO_TEXKILL = 0x41, + D3DSIO_TEX = 0x42, + D3DSIO_TEXBEM = 0x43, + D3DSIO_TEXBEML = 0x44, + D3DSIO_TEXREG2AR = 0x45, + D3DSIO_TEXREG2GB = 0x46, + D3DSIO_TEXM3x2PAD = 0x47, + D3DSIO_TEXM3x2TEX = 0x48, + D3DSIO_TEXM3x3PAD = 0x49, + D3DSIO_TEXM3x3TEX = 0x4a, + D3DSIO_TEXM3x3DIFF = 0x4b, + D3DSIO_TEXM3x3SPEC = 0x4c, + D3DSIO_TEXM3x3VSPEC = 0x4d, + D3DSIO_EXPP = 0x4e, + D3DSIO_LOGP = 0x4f, + D3DSIO_CND = 0x50, + D3DSIO_DEF = 0x51, + D3DSIO_TEXREG2RGB = 0x52, + D3DSIO_TEXDP3TEX = 0x53, + D3DSIO_TEXM3x2DEPTH = 0x54, + D3DSIO_TEXDP3 = 0x55, + D3DSIO_TEXM3x3 = 0x56, + D3DSIO_TEXDEPTH = 0x57, + D3DSIO_CMP = 0x58, + D3DSIO_BEM = 0x59, + D3DSIO_DP2ADD = 0x5a, + D3DSIO_DSX = 0x5b, + D3DSIO_DSY = 0x5c, + D3DSIO_TEXLDD = 0x5d, + D3DSIO_SETP = 0x5e, + D3DSIO_TEXLDL = 0x5f, + D3DSIO_BREAKP = 0x60, + + D3DSIO_PHASE = 0xfffd, + D3DSIO_COMMENT = 0xfffe, + D3DSIO_END = 0xffff, + + D3DSIO_FORCE_DWORD = 0x7fffffff, +} D3DSHADER_INSTRUCTION_OPCODE_TYPE; + +#endif /* _d3d9TYPES_H_ */ +#endif /* __VKD3D_D3D9TYPES_H */ diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index d797aa10..10105eaf 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -1671,7 +1671,7 @@ int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d hlsl_error(&ctx, entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, "Entry point "%s" is missing a return value semantic.", entry_point);
- ret = hlsl_emit_dxbc(&ctx, entry_func); + ret = hlsl_emit_dxbc(&ctx, entry_func, dxbc);
hlsl_ctx_cleanup(&ctx); return ret; diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 923eff55..8493f749 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -530,7 +530,8 @@ bool hlsl_add_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *decl, bool local_var
void hlsl_dump_function(const struct hlsl_ir_function_decl *func) DECLSPEC_HIDDEN;
-int hlsl_emit_dxbc(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) DECLSPEC_HIDDEN; +int hlsl_emit_dxbc(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, + struct vkd3d_shader_code *out) DECLSPEC_HIDDEN;
void hlsl_free_instr(struct hlsl_ir_node *node) DECLSPEC_HIDDEN; void hlsl_free_instr_list(struct list *list) DECLSPEC_HIDDEN; diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 81e60101..9ebd1490 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -20,6 +20,7 @@
#include "hlsl.h" #include <stdio.h> +#include "vkd3d_d3d9types.h"
/* Split uniforms into two variables representing the constant and temp * registers, and copy the former to the latter, so that writes to uniforms @@ -873,7 +874,53 @@ static void allocate_temp_registers(struct hlsl_ir_function_decl *entry_func) allocate_temp_registers_recurse(entry_func->body, &liveness); }
-int hlsl_emit_dxbc(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) +struct bytecode_buffer +{ + uint32_t *data; + size_t count, size; + int status; +}; + +static void put_dword(struct bytecode_buffer *buffer, uint32_t value) +{ + if (buffer->status) + return; + + if (!vkd3d_array_reserve((void **)&buffer->data, &buffer->size, buffer->count + 1, sizeof(*buffer->data))) + { + buffer->status = VKD3D_ERROR_OUT_OF_MEMORY; + return; + } + buffer->data[buffer->count++] = value; +} + +static uint32_t sm1_version(enum vkd3d_shader_type type, unsigned int major, unsigned int minor) +{ + if (type == VKD3D_SHADER_TYPE_VERTEX) + return D3DVS_VERSION(major, minor); + else + return D3DPS_VERSION(major, minor); +} + +static int write_sm1_shader(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, + struct vkd3d_shader_code *out) +{ + struct bytecode_buffer buffer = {0}; + int ret; + + put_dword(&buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); + + put_dword(&buffer, D3DSIO_END); + + if (!(ret = buffer.status)) + { + out->code = buffer.data; + out->size = buffer.count * sizeof(uint32_t); + } + return ret; +} + +int hlsl_emit_dxbc(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) { struct hlsl_ir_var *var;
@@ -916,5 +963,9 @@ int hlsl_emit_dxbc(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_fun
if (ctx->failed) return VKD3D_ERROR_INVALID_SHADER; - return VKD3D_ERROR_NOT_IMPLEMENTED; + + if (ctx->profile->major_version < 4) + return write_sm1_shader(ctx, entry_func, out); + else + return VKD3D_ERROR_NOT_IMPLEMENTED; }
Signed-off-by: Matteo Bruni mbruni@codeweavers.com
Signed-off-by: Henri Verbeet hverbeet@codeweavers.com
On Fri, 16 Apr 2021 at 02:04, Zebediah Figura zfigura@codeweavers.com wrote:
+static void put_dword(struct bytecode_buffer *buffer, uint32_t value) +{
- if (buffer->status)
return;
- if (!vkd3d_array_reserve((void **)&buffer->data, &buffer->size, buffer->count + 1, sizeof(*buffer->data)))
- {
buffer->status = VKD3D_ERROR_OUT_OF_MEMORY;
return;
- }
- buffer->data[buffer->count++] = value;
+}
I'll leave it to Matteo and you to decide how much you care, but for consistency, perhaps "emit_u32()"?
On 4/20/21 9:30 AM, Henri Verbeet wrote:
On Fri, 16 Apr 2021 at 02:04, Zebediah Figura zfigura@codeweavers.com wrote:
+static void put_dword(struct bytecode_buffer *buffer, uint32_t value) +{
- if (buffer->status)
return;
- if (!vkd3d_array_reserve((void **)&buffer->data, &buffer->size, buffer->count + 1, sizeof(*buffer->data)))
- {
buffer->status = VKD3D_ERROR_OUT_OF_MEMORY;
return;
- }
- buffer->data[buffer->count++] = value;
+}
I'll leave it to Matteo and you to decide how much you care, but for consistency, perhaps "emit_u32()"?
I'm fine with this as well.
Signed-off-by: Zebediah Figura zfigura@codeweavers.com --- libs/vkd3d-shader/hlsl.c | 1 + libs/vkd3d-shader/hlsl.h | 3 ++- libs/vkd3d-shader/hlsl_codegen.c | 22 ++++++++++++---------- 3 files changed, 15 insertions(+), 11 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index 10105eaf..50b40d65 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -1578,6 +1578,7 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const struct hlsl_profile_info * rb_init(&ctx->functions, compare_function_rb);
list_init(&ctx->static_initializers); + list_init(&ctx->extern_vars);
return true; } diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 8493f749..3a0bf38d 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -213,7 +213,7 @@ struct hlsl_ir_var const char *name; const char *semantic; const struct hlsl_reg_reservation *reg_reservation; - struct list scope_entry, param_entry; + struct list scope_entry, param_entry, extern_entry;
unsigned int first_write, last_read; struct hlsl_reg reg; @@ -418,6 +418,7 @@ struct hlsl_ctx struct hlsl_scope *cur_scope; struct hlsl_scope *globals; struct list scopes; + struct list extern_vars;
struct list types; struct rb_tree functions; diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 9ebd1490..12d5988e 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -45,7 +45,8 @@ static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct list *instrs, stru return; } vkd3d_string_buffer_release(&ctx->string_buffers, name); - list_add_head(&ctx->globals->vars, &const_var->scope_entry); + list_add_before(&var->scope_entry, &const_var->scope_entry); + list_add_tail(&ctx->extern_vars, &const_var->extern_entry); var->is_uniform = 0; const_var->is_uniform = 1;
@@ -87,7 +88,8 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct list *instrs, struct } vkd3d_string_buffer_release(&ctx->string_buffers, name); varying->is_input_varying = 1; - list_add_head(&ctx->globals->vars, &varying->scope_entry); + list_add_before(&var->scope_entry, &varying->scope_entry); + list_add_tail(&ctx->extern_vars, &varying->extern_entry);
if (!(load = hlsl_new_var_load(varying, var->loc))) { @@ -164,7 +166,8 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct list *instrs, struct } vkd3d_string_buffer_release(&ctx->string_buffers, name); varying->is_output_varying = 1; - list_add_head(&ctx->globals->vars, &varying->scope_entry); + list_add_before(&var->scope_entry, &varying->scope_entry); + list_add_tail(&ctx->extern_vars, &varying->extern_entry);
if (!(offset = hlsl_new_uint_constant(ctx, field_offset * 4, var->loc))) { @@ -593,7 +596,7 @@ static void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl var->first_write = var->last_read = 0; }
- LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { if (var->is_uniform || var->is_input_varying) var->first_write = 1; @@ -601,11 +604,6 @@ static void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl var->last_read = UINT_MAX; }
- LIST_FOR_EACH_ENTRY(var, entry_func->parameters, struct hlsl_ir_var, param_entry) - { - var->first_write = 1; - } - if (entry_func->return_var) entry_func->return_var->last_read = UINT_MAX;
@@ -846,7 +844,7 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi struct liveness liveness = {0}; struct hlsl_ir_var *var;
- LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { if (var->is_uniform && var->last_read) { @@ -928,12 +926,16 @@ int hlsl_emit_dxbc(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_fun
LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) { + if (var->data_type->type == HLSL_CLASS_OBJECT) + list_add_tail(&ctx->extern_vars, &var->extern_entry); if (var->is_uniform) prepend_uniform_copy(ctx, entry_func->body, var); }
LIST_FOR_EACH_ENTRY(var, entry_func->parameters, struct hlsl_ir_var, param_entry) { + if (var->data_type->type == HLSL_CLASS_OBJECT) + list_add_tail(&ctx->extern_vars, &var->extern_entry); if (var->is_uniform) prepend_uniform_copy(ctx, entry_func->body, var); if (var->is_input_varying)
Signed-off-by: Matteo Bruni mbruni@codeweavers.com
Signed-off-by: Henri Verbeet hverbeet@codeweavers.com
Signed-off-by: Zebediah Figura zfigura@codeweavers.com --- libs/vkd3d-shader/hlsl_codegen.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 12d5988e..25227057 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -25,39 +25,43 @@ /* Split uniforms into two variables representing the constant and temp * registers, and copy the former to the latter, so that writes to uniforms * work. */ -static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_var *var) +static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_var *temp) { struct vkd3d_string_buffer *name; - struct hlsl_ir_var *const_var; + struct hlsl_ir_var *uniform; struct hlsl_ir_store *store; struct hlsl_ir_load *load;
- if (!(name = vkd3d_string_buffer_get(&ctx->string_buffers))) + /* Use the synthetic name for the temp, rather than the uniform, so that we + * can write the uniform name into the shader reflection data. */ + + if (!(uniform = hlsl_new_var(temp->name, temp->data_type, temp->loc, NULL, temp->reg_reservation))) { ctx->failed = true; return; } - vkd3d_string_buffer_printf(name, "<uniform-%s>", var->name); - if (!(const_var = hlsl_new_var(vkd3d_strdup(name->buffer), var->data_type, var->loc, NULL, var->reg_reservation))) + list_add_before(&temp->scope_entry, &uniform->scope_entry); + list_add_tail(&ctx->extern_vars, &uniform->extern_entry); + temp->is_uniform = 0; + uniform->is_uniform = 1; + + if (!(name = vkd3d_string_buffer_get(&ctx->string_buffers))) { - vkd3d_string_buffer_release(&ctx->string_buffers, name); ctx->failed = true; return; } + vkd3d_string_buffer_printf(name, "<temp-%s>", temp->name); + temp->name = vkd3d_strdup(name->buffer); vkd3d_string_buffer_release(&ctx->string_buffers, name); - list_add_before(&var->scope_entry, &const_var->scope_entry); - list_add_tail(&ctx->extern_vars, &const_var->extern_entry); - var->is_uniform = 0; - const_var->is_uniform = 1;
- if (!(load = hlsl_new_var_load(const_var, var->loc))) + if (!(load = hlsl_new_var_load(uniform, temp->loc))) { ctx->failed = true; return; } list_add_head(instrs, &load->node.entry);
- if (!(store = hlsl_new_simple_store(var, &load->node))) + if (!(store = hlsl_new_simple_store(temp, &load->node))) { ctx->failed = true; return;
Signed-off-by: Matteo Bruni mbruni@codeweavers.com --- I think a better way to handle this is to use a separate temp variable for each LOAD of the uniform, thus replacing each uniform LOAD with a STORE to a temp variable and a LOAD from that. That way we get less register pressure without having to complicate the register allocation or liveness computation algorithms. That is assuming that register pressure on the temporary registers is quite problematic for SM2 or even SM3 shaders since there are so few of them and there is no spilling to save the day.
Of course there is a complication with that, that is preserving "writes" to uniforms. That could be taken into account pretty simply with write tracking, but another option is to do the above optimization only when legacy mode is not set (AFAIU writing to uniforms / globals is only allowed on older d3dx9 / d3dcompiler versions).
Just food for thought. Also, ignore everything if you have something fancier already queued up.
On 4/20/21 3:15 AM, Matteo Bruni wrote:
Signed-off-by: Matteo Bruni mbruni@codeweavers.com
I think a better way to handle this is to use a separate temp variable for each LOAD of the uniform, thus replacing each uniform LOAD with a STORE to a temp variable and a LOAD from that. That way we get less register pressure without having to complicate the register allocation or liveness computation algorithms. That is assuming that register pressure on the temporary registers is quite problematic for SM2 or even SM3 shaders since there are so few of them and there is no spilling to save the day.
Of course there is a complication with that, that is preserving "writes" to uniforms. That could be taken into account pretty simply with write tracking, but another option is to do the above optimization only when legacy mode is not set (AFAIU writing to uniforms / globals is only allowed on older d3dx9 / d3dcompiler versions).
Just food for thought. Also, ignore everything if you have something fancier already queued up.
It's a bit orthogonal to this patch, but yes, I see the point. It's not clear to me that write tracking isn't more complicated, though. Personally I'm inclined to leave it alone until it actually does become a problem.
(as a side note—I don't know if it makes sense to try to code register limits into the HLSL compiler (or validator). As I understand wined3d/vkd3d shader translation layers won't care, which removes much of the reason for us to. Not that wine should be considered vkd3d-shader's only customer, but...)
On Tue, Apr 20, 2021 at 5:30 PM Zebediah Figura (she/her) zfigura@codeweavers.com wrote:
On 4/20/21 3:15 AM, Matteo Bruni wrote:
Signed-off-by: Matteo Bruni mbruni@codeweavers.com
I think a better way to handle this is to use a separate temp variable for each LOAD of the uniform, thus replacing each uniform LOAD with a STORE to a temp variable and a LOAD from that. That way we get less register pressure without having to complicate the register allocation or liveness computation algorithms. That is assuming that register pressure on the temporary registers is quite problematic for SM2 or even SM3 shaders since there are so few of them and there is no spilling to save the day.
Of course there is a complication with that, that is preserving "writes" to uniforms. That could be taken into account pretty simply with write tracking, but another option is to do the above optimization only when legacy mode is not set (AFAIU writing to uniforms / globals is only allowed on older d3dx9 / d3dcompiler versions).
Just food for thought. Also, ignore everything if you have something fancier already queued up.
It's a bit orthogonal to this patch, but yes, I see the point. It's not clear to me that write tracking isn't more complicated, though. Personally I'm inclined to leave it alone until it actually does become a problem.
That's certainly fine. I probably misspoke about write tracking, for some reason I was thinking we had a last_write thing already but we obviously don't :#
The point about "legacy mode" or something like that stands though and is more generally relevant. The HLSL compiler was heavily overhauled with the October 2006 SDK, among other things dropping ps_1_* output support, and the parser again with the March 2008 SDK, which made it quite a bit more strict. Not sure off the top of my head what DLL versions those correspond to (the former is probably around _32 / _33 time given that the option to use the older compiler is called D3DXSHADER_USE_LEGACY_D3DX9_31_DLL) but I can try to find out. For reference, the latter in particular is what made writing to globals invalid. Again it's not urgent by any means but it's also something to keep in mind.
(as a side note—I don't know if it makes sense to try to code register limits into the HLSL compiler (or validator). As I understand wined3d/vkd3d shader translation layers won't care, which removes much of the reason for us to. Not that wine should be considered vkd3d-shader's only customer, but...)
Yeah, sure, this should only matter on "native" d3d, at least for temporary variables. Not so much for uniforms or varyings though (but those have a different set of issues and solutions, e.g. you want to coalesce identical constants).
On 4/20/21 3:19 PM, Matteo Bruni wrote:
On Tue, Apr 20, 2021 at 5:30 PM Zebediah Figura (she/her) zfigura@codeweavers.com wrote:
On 4/20/21 3:15 AM, Matteo Bruni wrote:
Signed-off-by: Matteo Bruni mbruni@codeweavers.com
I think a better way to handle this is to use a separate temp variable for each LOAD of the uniform, thus replacing each uniform LOAD with a STORE to a temp variable and a LOAD from that. That way we get less register pressure without having to complicate the register allocation or liveness computation algorithms. That is assuming that register pressure on the temporary registers is quite problematic for SM2 or even SM3 shaders since there are so few of them and there is no spilling to save the day.
Of course there is a complication with that, that is preserving "writes" to uniforms. That could be taken into account pretty simply with write tracking, but another option is to do the above optimization only when legacy mode is not set (AFAIU writing to uniforms / globals is only allowed on older d3dx9 / d3dcompiler versions).
Just food for thought. Also, ignore everything if you have something fancier already queued up.
It's a bit orthogonal to this patch, but yes, I see the point. It's not clear to me that write tracking isn't more complicated, though. Personally I'm inclined to leave it alone until it actually does become a problem.
That's certainly fine. I probably misspoke about write tracking, for some reason I was thinking we had a last_write thing already but we obviously don't :#
The point about "legacy mode" or something like that stands though and is more generally relevant. The HLSL compiler was heavily overhauled with the October 2006 SDK, among other things dropping ps_1_* output support, and the parser again with the March 2008 SDK, which made it quite a bit more strict. Not sure off the top of my head what DLL versions those correspond to (the former is probably around _32 / _33 time given that the option to use the older compiler is called D3DXSHADER_USE_LEGACY_D3DX9_31_DLL) but I can try to find out. For reference, the latter in particular is what made writing to globals invalid. Again it's not urgent by any means but it's also something to keep in mind.
You're right, I should have mentioned that. I think I checked varyings—which are always valid to both read from and write to—and then assumed that the same would be true of uniforms, which isn't the case.
Thus far I've avoided touching vkd3d options because API design is hard, but I can at least put it on the long-term list of things to look into...
(as a side note—I don't know if it makes sense to try to code register limits into the HLSL compiler (or validator). As I understand wined3d/vkd3d shader translation layers won't care, which removes much of the reason for us to. Not that wine should be considered vkd3d-shader's only customer, but...)
Yeah, sure, this should only matter on "native" d3d, at least for temporary variables. Not so much for uniforms or varyings though (but those have a different set of issues and solutions, e.g. you want to coalesce identical constants).
Indeed, I'll have to write some validation for those.
Signed-off-by: Henri Verbeet hverbeet@codeweavers.com
Signed-off-by: Zebediah Figura zfigura@codeweavers.com --- Makefile.am | 2 + include/.gitignore | 1 + include/vkd3d_d3d9types.h | 6 + include/vkd3d_d3dx9shader.idl | 76 +++++++++ libs/vkd3d-shader/hlsl.h | 7 +- libs/vkd3d-shader/hlsl.y | 1 + libs/vkd3d-shader/hlsl_codegen.c | 254 ++++++++++++++++++++++++++++++- 7 files changed, 344 insertions(+), 3 deletions(-) create mode 100644 include/vkd3d_d3dx9shader.idl
diff --git a/Makefile.am b/Makefile.am index 2ab2233a..cbf75ad8 100644 --- a/Makefile.am +++ b/Makefile.am @@ -7,6 +7,7 @@ widl_headers = \ include/vkd3d_d3d12.h \ include/vkd3d_d3d12sdklayers.h \ include/vkd3d_d3dcommon.h \ + include/vkd3d_d3dx9shader.h \ include/vkd3d_dxgi.h \ include/vkd3d_dxgi1_2.h \ include/vkd3d_dxgi1_3.h \ @@ -22,6 +23,7 @@ vkd3d_public_headers = \ include/vkd3d_d3d9types.h \ include/vkd3d_d3dcommon.h \ include/vkd3d_d3dcompiler.h \ + include/vkd3d_d3dx9shader.h \ include/vkd3d_dxgibase.h \ include/vkd3d_dxgiformat.h \ include/vkd3d_shader.h \ diff --git a/include/.gitignore b/include/.gitignore index 3c711da0..37b101d4 100644 --- a/include/.gitignore +++ b/include/.gitignore @@ -5,6 +5,7 @@ stamp-h1 vkd3d_d3d12.h vkd3d_d3d12sdklayers.h vkd3d_d3dcommon.h +vkd3d_d3dx9shader.h vkd3d_dxgi.h vkd3d_dxgi1_2.h vkd3d_dxgi1_3.h diff --git a/include/vkd3d_d3d9types.h b/include/vkd3d_d3d9types.h index de2ba471..7a8c15f4 100644 --- a/include/vkd3d_d3d9types.h +++ b/include/vkd3d_d3d9types.h @@ -21,6 +21,12 @@ #define __VKD3D_D3D9TYPES_H #ifndef _d3d9TYPES_H_
+#ifndef MAKEFOURCC +#define MAKEFOURCC(ch0, ch1, ch2, ch3) \ + ((DWORD)(BYTE)(ch0) | ((DWORD)(BYTE)(ch1) << 8) | \ + ((DWORD)(BYTE)(ch2) << 16) | ((DWORD)(BYTE)(ch3) << 24 )) +#endif + #define D3DPS_VERSION(major, minor) (0xffff0000 | ((major) << 8) | (minor)) #define D3DVS_VERSION(major, minor) (0xfffe0000 | ((major) << 8) | (minor))
diff --git a/include/vkd3d_d3dx9shader.idl b/include/vkd3d_d3dx9shader.idl new file mode 100644 index 00000000..4000ff29 --- /dev/null +++ b/include/vkd3d_d3dx9shader.idl @@ -0,0 +1,76 @@ +/* + * Copyright 2008 Luis Busquets + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +import "vkd3d_windows.h"; +import "vkd3d_d3d9types.h"; + +typedef enum _D3DXREGISTER_SET +{ + D3DXRS_BOOL, + D3DXRS_INT4, + D3DXRS_FLOAT4, + D3DXRS_SAMPLER, + D3DXRS_FORCE_DWORD = 0x7fffffff, +} D3DXREGISTER_SET; + +typedef enum D3DXPARAMETER_CLASS +{ + D3DXPC_SCALAR, + D3DXPC_VECTOR, + D3DXPC_MATRIX_ROWS, + D3DXPC_MATRIX_COLUMNS, + D3DXPC_OBJECT, + D3DXPC_STRUCT, + D3DXPC_FORCE_DWORD = 0x7fffffff, +} D3DXPARAMETER_CLASS; + +typedef enum D3DXPARAMETER_TYPE +{ + D3DXPT_VOID, + D3DXPT_BOOL, + D3DXPT_INT, + D3DXPT_FLOAT, + D3DXPT_STRING, + D3DXPT_TEXTURE, + D3DXPT_TEXTURE1D, + D3DXPT_TEXTURE2D, + D3DXPT_TEXTURE3D, + D3DXPT_TEXTURECUBE, + D3DXPT_SAMPLER, + D3DXPT_SAMPLER1D, + D3DXPT_SAMPLER2D, + D3DXPT_SAMPLER3D, + D3DXPT_SAMPLERCUBE, + D3DXPT_PIXELSHADER, + D3DXPT_VERTEXSHADER, + D3DXPT_PIXELFRAGMENT, + D3DXPT_VERTEXFRAGMENT, + D3DXPT_UNSUPPORTED, + D3DXPT_FORCE_DWORD = 0x7fffffff, +} D3DXPARAMETER_TYPE; + +typedef struct _D3DXSHADER_CONSTANTTABLE +{ + DWORD Size; + DWORD Creator; + DWORD Version; + DWORD Constants; + DWORD ConstantInfo; + DWORD Flags; + DWORD Target; +} D3DXSHADER_CONSTANTTABLE; diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 3a0bf38d..3dc7a26b 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -115,7 +115,6 @@ struct hlsl_type unsigned int modifiers; unsigned int dimx; unsigned int dimy; - unsigned int reg_size; union { struct list *elements; @@ -125,6 +124,9 @@ struct hlsl_type unsigned int elements_count; } array; } e; + + unsigned int reg_size; + unsigned int bytecode_offset; };
struct hlsl_struct_field @@ -134,7 +136,9 @@ struct hlsl_struct_field struct hlsl_type *type; const char *name; const char *semantic; + unsigned int reg_offset; + unsigned int name_offset; };
struct hlsl_reg @@ -221,6 +225,7 @@ struct hlsl_ir_var uint32_t is_input_varying : 1; uint32_t is_output_varying : 1; uint32_t is_uniform : 1; + uint32_t is_param : 1; };
struct hlsl_ir_function diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index 6216b100..8cd80533 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -765,6 +765,7 @@ static bool add_func_parameter(struct hlsl_ctx *ctx, struct list *list,
if (!(var = hlsl_new_var(param->name, param->type, loc, param->semantic, param->reg_reservation))) return false; + var->is_param = 1;
if (param->type->type != HLSL_CLASS_OBJECT) { diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 25227057..73f8bdb0 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -20,7 +20,7 @@
#include "hlsl.h" #include <stdio.h> -#include "vkd3d_d3d9types.h" +#include "vkd3d_d3dx9shader.h"
/* Split uniforms into two variables representing the constant and temp * registers, and copy the former to the latter, so that writes to uniforms @@ -44,6 +44,7 @@ static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct list *instrs, stru list_add_tail(&ctx->extern_vars, &uniform->extern_entry); temp->is_uniform = 0; uniform->is_uniform = 1; + uniform->is_param = temp->is_param;
if (!(name = vkd3d_string_buffer_get(&ctx->string_buffers))) { @@ -92,6 +93,7 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct list *instrs, struct } vkd3d_string_buffer_release(&ctx->string_buffers, name); varying->is_input_varying = 1; + varying->is_param = var->is_param; list_add_before(&var->scope_entry, &varying->scope_entry); list_add_tail(&ctx->extern_vars, &varying->extern_entry);
@@ -170,6 +172,7 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct list *instrs, struct } vkd3d_string_buffer_release(&ctx->string_buffers, name); varying->is_output_varying = 1; + varying->is_param = var->is_param; list_add_before(&var->scope_entry, &varying->scope_entry); list_add_tail(&ctx->extern_vars, &varying->extern_entry);
@@ -896,6 +899,34 @@ static void put_dword(struct bytecode_buffer *buffer, uint32_t value) buffer->data[buffer->count++] = value; }
+static void set_dword(struct bytecode_buffer *buffer, unsigned int index, uint32_t value) +{ + if (buffer->status) + return; + + assert(index < buffer->count); + buffer->data[index] = value; +} + +static void put_string(struct bytecode_buffer *buffer, const char *str) +{ + size_t len = strlen(str) + 1; + unsigned int token_count = (len + 3) / sizeof(*buffer->data); + + if (buffer->status) + return; + + if (!vkd3d_array_reserve((void **)&buffer->data, &buffer->size, buffer->count + token_count, sizeof(*buffer->data))) + { + buffer->status = E_OUTOFMEMORY; + return; + } + + buffer->data[buffer->count + token_count - 1] = 0xabababab; + memcpy(buffer->data + buffer->count, str, len); + buffer->count += token_count; +} + static uint32_t sm1_version(enum vkd3d_shader_type type, unsigned int major, unsigned int minor) { if (type == VKD3D_SHADER_TYPE_VERTEX) @@ -904,6 +935,223 @@ static uint32_t sm1_version(enum vkd3d_shader_type type, unsigned int major, uns return D3DPS_VERSION(major, minor); }
+static D3DXPARAMETER_CLASS sm1_class(const struct hlsl_type *type) +{ + switch (type->type) + { + case HLSL_CLASS_ARRAY: + return sm1_class(type->e.array.type); + case HLSL_CLASS_MATRIX: + assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); + if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) + return D3DXPC_MATRIX_COLUMNS; + else + return D3DXPC_MATRIX_ROWS; + case HLSL_CLASS_OBJECT: + return D3DXPC_OBJECT; + case HLSL_CLASS_SCALAR: + return D3DXPC_SCALAR; + case HLSL_CLASS_STRUCT: + return D3DXPC_STRUCT; + case HLSL_CLASS_VECTOR: + return D3DXPC_VECTOR; + default: + ERR("Invalid class %#x.\n", type->type); + assert(0); + return 0; + } +} + +static D3DXPARAMETER_TYPE sm1_base_type(const struct hlsl_type *type) +{ + switch (type->base_type) + { + case HLSL_TYPE_BOOL: + return D3DXPT_BOOL; + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + return D3DXPT_FLOAT; + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + return D3DXPT_INT; + case HLSL_TYPE_PIXELSHADER: + return D3DXPT_PIXELSHADER; + case HLSL_TYPE_SAMPLER: + switch (type->sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: + return D3DXPT_SAMPLER1D; + case HLSL_SAMPLER_DIM_2D: + return D3DXPT_SAMPLER2D; + case HLSL_SAMPLER_DIM_3D: + return D3DXPT_SAMPLER3D; + case HLSL_SAMPLER_DIM_CUBE: + return D3DXPT_SAMPLERCUBE; + case HLSL_SAMPLER_DIM_GENERIC: + return D3DXPT_SAMPLER; + default: + ERR("Invalid dimension %#x.\n", type->sampler_dim); + } + break; + case HLSL_TYPE_STRING: + return D3DXPT_STRING; + case HLSL_TYPE_TEXTURE: + switch (type->sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: + return D3DXPT_TEXTURE1D; + case HLSL_SAMPLER_DIM_2D: + return D3DXPT_TEXTURE2D; + case HLSL_SAMPLER_DIM_3D: + return D3DXPT_TEXTURE3D; + case HLSL_SAMPLER_DIM_CUBE: + return D3DXPT_TEXTURECUBE; + case HLSL_SAMPLER_DIM_GENERIC: + return D3DXPT_TEXTURE; + default: + ERR("Invalid dimension %#x.\n", type->sampler_dim); + } + break; + case HLSL_TYPE_VERTEXSHADER: + return D3DXPT_VERTEXSHADER; + case HLSL_TYPE_VOID: + return D3DXPT_VOID; + default: + assert(0); + } + assert(0); + return 0; +} + +static const struct hlsl_type *get_array_type(const struct hlsl_type *type) +{ + if (type->type == HLSL_CLASS_ARRAY) + return get_array_type(type->e.array.type); + return type; +} + +static unsigned int get_array_size(const struct hlsl_type *type) +{ + if (type->type == HLSL_CLASS_ARRAY) + return get_array_size(type->e.array.type) * type->e.array.elements_count; + return 1; +} + +static void write_sm1_type(struct bytecode_buffer *buffer, struct hlsl_type *type, unsigned int ctab_start) +{ + const struct hlsl_type *array_type = get_array_type(type); + unsigned int fields_offset = 0, field_count = 0; + unsigned int array_size = get_array_size(type); + struct hlsl_struct_field *field; + + if (type->bytecode_offset) + return; + + if (array_type->type == HLSL_CLASS_STRUCT) + { + LIST_FOR_EACH_ENTRY(field, array_type->e.elements, struct hlsl_struct_field, entry) + { + field->name_offset = buffer->count; + put_string(buffer, field->name); + write_sm1_type(buffer, field->type, ctab_start); + } + + fields_offset = (buffer->count - ctab_start) * sizeof(*buffer->data); + + LIST_FOR_EACH_ENTRY(field, array_type->e.elements, struct hlsl_struct_field, entry) + { + put_dword(buffer, (field->name_offset - ctab_start) * sizeof(*buffer->data)); + put_dword(buffer, (field->type->bytecode_offset - ctab_start) * sizeof(*buffer->data)); + ++field_count; + } + } + + type->bytecode_offset = buffer->count; + put_dword(buffer, sm1_class(type) | (sm1_base_type(type) << 16)); + put_dword(buffer, type->dimy | (type->dimx << 16)); + put_dword(buffer, array_size | (field_count << 16)); + put_dword(buffer, fields_offset); +} + +static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct bytecode_buffer *buffer, + struct hlsl_ir_function_decl *entry_func) +{ + unsigned int ctab_start, vars_start; + unsigned int uniform_count = 0; + struct hlsl_ir_var *var; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (!var->semantic && var->reg.allocated) + { + ++uniform_count; + + if (var->is_param && var->is_uniform) + { + struct vkd3d_string_buffer *name; + + if (!(name = vkd3d_string_buffer_get(&ctx->string_buffers))) + { + buffer->status = VKD3D_ERROR_OUT_OF_MEMORY; + return; + } + vkd3d_string_buffer_printf(name, "$%s", var->name); + vkd3d_free((char *)var->name); + var->name = vkd3d_strdup(name->buffer); + vkd3d_string_buffer_release(&ctx->string_buffers, name); + } + } + } + + put_dword(buffer, 0); /* COMMENT tag + size */ + put_dword(buffer, MAKEFOURCC('C','T','A','B')); + + ctab_start = buffer->count; + + put_dword(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); /* size of this header */ + put_dword(buffer, 0); /* creator */ + put_dword(buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); + put_dword(buffer, uniform_count); + put_dword(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); /* offset of constants */ + put_dword(buffer, 0); /* FIXME: flags */ + put_dword(buffer, 0); /* FIXME: target string */ + + vars_start = buffer->count; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (!var->semantic && var->reg.allocated) + { + put_dword(buffer, 0); /* name */ + put_dword(buffer, D3DXRS_FLOAT4 | (var->reg.id << 16)); + put_dword(buffer, var->data_type->reg_size); + put_dword(buffer, 0); /* type */ + put_dword(buffer, 0); /* FIXME: default value */ + } + } + + uniform_count = 0; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (!var->semantic && var->reg.allocated) + { + set_dword(buffer, vars_start + (uniform_count * 5), (buffer->count - ctab_start) * sizeof(*buffer->data)); + put_string(buffer, var->name); + + write_sm1_type(buffer, var->data_type, ctab_start); + set_dword(buffer, vars_start + (uniform_count * 5) + 3, + (var->data_type->bytecode_offset - ctab_start) * sizeof(*buffer->data)); + ++uniform_count; + } + } + + set_dword(buffer, ctab_start + 1, (buffer->count - ctab_start) * sizeof(*buffer->data)); + put_string(buffer, vkd3d_shader_get_version(NULL, NULL)); + + set_dword(buffer, ctab_start - 2, D3DSIO_COMMENT | ((buffer->count - (ctab_start - 1)) << 16)); +} + static int write_sm1_shader(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) { @@ -912,12 +1160,14 @@ static int write_sm1_shader(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *
put_dword(&buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version));
+ write_sm1_uniforms(ctx, &buffer, entry_func); + put_dword(&buffer, D3DSIO_END);
if (!(ret = buffer.status)) { out->code = buffer.data; - out->size = buffer.count * sizeof(uint32_t); + out->size = buffer.count * sizeof(*buffer.data); } return ret; }
Signed-off-by: Matteo Bruni mbruni@codeweavers.com
On Fri, Apr 16, 2021 at 2:04 AM Zebediah Figura zfigura@codeweavers.com wrote:
Signed-off-by: Zebediah Figura zfigura@codeweavers.com
Makefile.am | 2 + include/.gitignore | 1 + include/vkd3d_d3d9types.h | 6 + include/vkd3d_d3dx9shader.idl | 76 +++++++++ libs/vkd3d-shader/hlsl.h | 7 +- libs/vkd3d-shader/hlsl.y | 1 + libs/vkd3d-shader/hlsl_codegen.c | 254 ++++++++++++++++++++++++++++++- 7 files changed, 344 insertions(+), 3 deletions(-) create mode 100644 include/vkd3d_d3dx9shader.idl
diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 3a0bf38d..3dc7a26b 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -115,7 +115,6 @@ struct hlsl_type unsigned int modifiers; unsigned int dimx; unsigned int dimy;
- unsigned int reg_size; union { struct list *elements;
@@ -125,6 +124,9 @@ struct hlsl_type unsigned int elements_count; } array; } e;
- unsigned int reg_size;
- unsigned int bytecode_offset;
};
struct hlsl_struct_field @@ -134,7 +136,9 @@ struct hlsl_struct_field struct hlsl_type *type; const char *name; const char *semantic;
- unsigned int reg_offset;
- unsigned int name_offset;
Although these are indeed two offsets, they offset into very different things. I don't know that I have any good suggestions (name_bc_offset?) and I guess I'm not too disturbed by the current naming anyway, but I feel there is room for improvement.
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 25227057..73f8bdb0 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c
- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
- {
if (!var->semantic && var->reg.allocated)
{
put_dword(buffer, 0); /* name */
put_dword(buffer, D3DXRS_FLOAT4 | (var->reg.id << 16));
This is fine for now, but maybe a comment mentioning that we might not always want the float register set here would be useful. Relatedly, we might have the same variable allocated to multiple register sets (e.g. if it's used in a MOV and in a FOR) so in theory there needs to be a loop (and we need to support multiple register allocations per var, or something to the same effect). Again, no need to change the code right away, just something that might be worth a comment.
On 4/20/21 3:16 AM, Matteo Bruni wrote:
On Fri, Apr 16, 2021 at 2:04 AM Zebediah Figura zfigura@codeweavers.com wrote:
Signed-off-by: Zebediah Figura zfigura@codeweavers.com
Makefile.am | 2 + include/.gitignore | 1 + include/vkd3d_d3d9types.h | 6 + include/vkd3d_d3dx9shader.idl | 76 +++++++++ libs/vkd3d-shader/hlsl.h | 7 +- libs/vkd3d-shader/hlsl.y | 1 + libs/vkd3d-shader/hlsl_codegen.c | 254 ++++++++++++++++++++++++++++++- 7 files changed, 344 insertions(+), 3 deletions(-) create mode 100644 include/vkd3d_d3dx9shader.idl
diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 3a0bf38d..3dc7a26b 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -115,7 +115,6 @@ struct hlsl_type unsigned int modifiers; unsigned int dimx; unsigned int dimy;
- unsigned int reg_size; union { struct list *elements;
@@ -125,6 +124,9 @@ struct hlsl_type unsigned int elements_count; } array; } e;
- unsigned int reg_size;
- unsigned int bytecode_offset;
};
struct hlsl_struct_field @@ -134,7 +136,9 @@ struct hlsl_struct_field struct hlsl_type *type; const char *name; const char *semantic;
- unsigned int reg_offset;
- unsigned int name_offset;
Although these are indeed two offsets, they offset into very different things. I don't know that I have any good suggestions (name_bc_offset?) and I guess I'm not too disturbed by the current naming anyway, but I feel there is room for improvement.
Yes, you're right; honestly the mistake here is grouping them together via whitespace I think, but of course the naming is what led me to do that. "name_bytecode_offset" strikes me as at least a better name than "name_offset".
I've vacillated for a while, also; it may make more sense not to store reg_offset + reg_size but instead to calculate them as needed. But I've tried that approach a couple times and the code doesn't look prettier :-(
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 25227057..73f8bdb0 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c
- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
- {
if (!var->semantic && var->reg.allocated)
{
put_dword(buffer, 0); /* name */
put_dword(buffer, D3DXRS_FLOAT4 | (var->reg.id << 16));
This is fine for now, but maybe a comment mentioning that we might not always want the float register set here would be useful. Relatedly, we might have the same variable allocated to multiple register sets (e.g. if it's used in a MOV and in a FOR) so in theory there needs to be a loop (and we need to support multiple register allocations per var, or something to the same effect). Again, no need to change the code right away, just something that might be worth a comment.
Fair enough. Actually I'd be inclined to handle this by creating multiple hlsl_ir_var objects.
On Tue, Apr 20, 2021 at 10:15 PM Zebediah Figura (she/her) zfigura@codeweavers.com wrote:
On 4/20/21 3:16 AM, Matteo Bruni wrote:
On Fri, Apr 16, 2021 at 2:04 AM Zebediah Figura zfigura@codeweavers.com wrote:
Signed-off-by: Zebediah Figura zfigura@codeweavers.com
Makefile.am | 2 + include/.gitignore | 1 + include/vkd3d_d3d9types.h | 6 + include/vkd3d_d3dx9shader.idl | 76 +++++++++ libs/vkd3d-shader/hlsl.h | 7 +- libs/vkd3d-shader/hlsl.y | 1 + libs/vkd3d-shader/hlsl_codegen.c | 254 ++++++++++++++++++++++++++++++- 7 files changed, 344 insertions(+), 3 deletions(-) create mode 100644 include/vkd3d_d3dx9shader.idl
diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 3a0bf38d..3dc7a26b 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -115,7 +115,6 @@ struct hlsl_type unsigned int modifiers; unsigned int dimx; unsigned int dimy;
- unsigned int reg_size; union { struct list *elements;
@@ -125,6 +124,9 @@ struct hlsl_type unsigned int elements_count; } array; } e;
- unsigned int reg_size;
- unsigned int bytecode_offset;
};
struct hlsl_struct_field @@ -134,7 +136,9 @@ struct hlsl_struct_field struct hlsl_type *type; const char *name; const char *semantic;
- unsigned int reg_offset;
- unsigned int name_offset;
Although these are indeed two offsets, they offset into very different things. I don't know that I have any good suggestions (name_bc_offset?) and I guess I'm not too disturbed by the current naming anyway, but I feel there is room for improvement.
Yes, you're right; honestly the mistake here is grouping them together via whitespace I think, but of course the naming is what led me to do that. "name_bytecode_offset" strikes me as at least a better name than "name_offset".
I've vacillated for a while, also; it may make more sense not to store reg_offset + reg_size but instead to calculate them as needed. But I've tried that approach a couple times and the code doesn't look prettier :-(
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 25227057..73f8bdb0 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c
- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
- {
if (!var->semantic && var->reg.allocated)
{
put_dword(buffer, 0); /* name */
put_dword(buffer, D3DXRS_FLOAT4 | (var->reg.id << 16));
This is fine for now, but maybe a comment mentioning that we might not always want the float register set here would be useful. Relatedly, we might have the same variable allocated to multiple register sets (e.g. if it's used in a MOV and in a FOR) so in theory there needs to be a loop (and we need to support multiple register allocations per var, or something to the same effect). Again, no need to change the code right away, just something that might be worth a comment.
Fair enough. Actually I'd be inclined to handle this by creating multiple hlsl_ir_var objects.
That sounds better than my idea.
Signed-off-by: Henri Verbeet hverbeet@codeweavers.com
On Fri, 16 Apr 2021 at 02:04, Zebediah Figura zfigura@codeweavers.com wrote:
- ctab_start = buffer->count;
- put_dword(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); /* size of this header */
- put_dword(buffer, 0); /* creator */
...
- set_dword(buffer, ctab_start + 1, (buffer->count - ctab_start) * sizeof(*buffer->data));
- put_string(buffer, vkd3d_shader_get_version(NULL, NULL));
Just a thought, but it might be helpful for things like put_dword() and put_string() to return the offset at which they wrote something, so that you could instead do something like the following:
ctab_base = put_dword(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); creator_offset = put_dword(buffer, 0); ... offset = put_string(buffer, vkd3d_shader_get_version(NULL, NULL)); set_dword(buffer, creator_offset, (offset - ctab_base) * sizeof(*buffer->data));
On 4/20/21 9:30 AM, Henri Verbeet wrote:
On Fri, 16 Apr 2021 at 02:04, Zebediah Figura zfigura@codeweavers.com wrote:
- ctab_start = buffer->count;
- put_dword(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); /* size of this header */
- put_dword(buffer, 0); /* creator */
...
- set_dword(buffer, ctab_start + 1, (buffer->count - ctab_start) * sizeof(*buffer->data));
- put_string(buffer, vkd3d_shader_get_version(NULL, NULL));
Just a thought, but it might be helpful for things like put_dword() and put_string() to return the offset at which they wrote something, so that you could instead do something like the following:
ctab_base = put_dword(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); creator_offset = put_dword(buffer, 0); ... offset = put_string(buffer, vkd3d_shader_get_version(NULL, NULL)); set_dword(buffer, creator_offset, (offset - ctab_base) * sizeof(*buffer->data));
Thanks, I do like that idea.
Signed-off-by: Zebediah Figura zfigura@codeweavers.com --- libs/vkd3d-shader/hlsl_codegen.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+)
diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 73f8bdb0..1b5e8a27 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -1073,6 +1073,34 @@ static void write_sm1_type(struct bytecode_buffer *buffer, struct hlsl_type *typ put_dword(buffer, fields_offset); }
+static void sm1_sort_extern(struct list *sorted, struct hlsl_ir_var *to_sort) +{ + struct hlsl_ir_var *var; + + list_remove(&to_sort->extern_entry); + + LIST_FOR_EACH_ENTRY(var, sorted, struct hlsl_ir_var, extern_entry) + { + if (strcmp(to_sort->name, var->name) < 0) + { + list_add_before(&var->extern_entry, &to_sort->extern_entry); + return; + } + } + + list_add_tail(sorted, &to_sort->extern_entry); +} + +static void sm1_sort_externs(struct hlsl_ctx *ctx) +{ + struct list sorted = LIST_INIT(sorted); + struct hlsl_ir_var *var, *next; + + LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + sm1_sort_extern(&sorted, var); + list_move_tail(&ctx->extern_vars, &sorted); +} + static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct bytecode_buffer *buffer, struct hlsl_ir_function_decl *entry_func) { @@ -1103,6 +1131,8 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct bytecode_buffer *buf } }
+ sm1_sort_externs(ctx); + put_dword(buffer, 0); /* COMMENT tag + size */ put_dword(buffer, MAKEFOURCC('C','T','A','B'));
Signed-off-by: Matteo Bruni mbruni@codeweavers.com
Signed-off-by: Henri Verbeet hverbeet@codeweavers.com