Signed-off-by: Zebediah Figura zfigura@codeweavers.com --- include/vkd3d_d3d9types.h | 41 +++++++++ libs/vkd3d-shader/hlsl_codegen.c | 137 +++++++++++++++++++++++++++---- 2 files changed, 164 insertions(+), 14 deletions(-)
diff --git a/include/vkd3d_d3d9types.h b/include/vkd3d_d3d9types.h index 7a8c15f4..ba7dcbfb 100644 --- a/include/vkd3d_d3d9types.h +++ b/include/vkd3d_d3d9types.h @@ -27,6 +27,19 @@ ((DWORD)(BYTE)(ch2) << 16) | ((DWORD)(BYTE)(ch3) << 24 )) #endif
+#define D3DSI_INSTLENGTH_SHIFT 24 + +#define D3DSP_REGTYPE_SHIFT 28 +#define D3DSP_REGTYPE_SHIFT2 8 +#define D3DSP_REGTYPE_MASK (0x7 << D3DSP_REGTYPE_SHIFT) +#define D3DSP_REGTYPE_MASK2 0x00001800 + +#define D3DSP_WRITEMASK_0 0x00010000 +#define D3DSP_WRITEMASK_1 0x00020000 +#define D3DSP_WRITEMASK_2 0x00040000 +#define D3DSP_WRITEMASK_3 0x00080000 +#define D3DSP_WRITEMASK_ALL 0x000f0000 + #define D3DPS_VERSION(major, minor) (0xffff0000 | ((major) << 8) | (minor)) #define D3DVS_VERSION(major, minor) (0xfffe0000 | ((major) << 8) | (minor))
@@ -123,5 +136,33 @@ typedef enum _D3DSHADER_INSTRUCTION_OPCODE_TYPE D3DSIO_FORCE_DWORD = 0x7fffffff, } D3DSHADER_INSTRUCTION_OPCODE_TYPE;
+typedef enum _D3DSHADER_PARAM_REGISTER_TYPE +{ + D3DSPR_TEMP = 0x00, + D3DSPR_INPUT = 0x01, + D3DSPR_CONST = 0x02, + D3DSPR_ADDR = 0x03, + D3DSPR_TEXTURE = 0x03, + D3DSPR_RASTOUT = 0x04, + D3DSPR_ATTROUT = 0x05, + D3DSPR_TEXCRDOUT = 0x06, + D3DSPR_OUTPUT = 0x06, + D3DSPR_CONSTINT = 0x07, + D3DSPR_COLOROUT = 0x08, + D3DSPR_DEPTHOUT = 0x09, + D3DSPR_SAMPLER = 0x0a, + D3DSPR_CONST2 = 0x0b, + D3DSPR_CONST3 = 0x0c, + D3DSPR_CONST4 = 0x0d, + D3DSPR_CONSTBOOL = 0x0e, + D3DSPR_LOOP = 0x0f, + D3DSPR_TEMPFLOAT16 = 0x10, + D3DSPR_MISCTYPE = 0x11, + D3DSPR_LABEL = 0x12, + D3DSPR_PREDICATE = 0x13, + + D3DSPR_FORCE_DWORD = 0x7fffffff, +} D3DSHADER_PARAM_REGISTER_TYPE; + #endif /* _d3d9TYPES_H_ */ #endif /* __VKD3D_D3D9TYPES_H */ diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 0fd1f6bd..21ebcfc2 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -804,7 +804,19 @@ static void allocate_temp_registers_recurse(struct list *instrs, struct liveness } }
-static void allocate_const_registers_recurse(struct list *instrs, struct liveness *liveness) +struct vec4 +{ + float f[4]; +}; + +struct constant_defs +{ + struct vec4 *values; + size_t count, size; +}; + +static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct list *instrs, struct liveness *liveness, + struct constant_defs *defs) { struct hlsl_ir_node *instr;
@@ -815,28 +827,83 @@ static void allocate_const_registers_recurse(struct list *instrs, struct livenes case HLSL_IR_CONSTANT: { struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); + const struct hlsl_type *type = instr->data_type; + unsigned int reg_size = type->reg_size, x, y, i, writemask;
- if (instr->data_type->reg_size > 1) - constant->reg = allocate_range(liveness, 1, UINT_MAX, instr->data_type->reg_size); + if (reg_size > 1) + constant->reg = allocate_range(liveness, 1, UINT_MAX, reg_size); else - constant->reg = allocate_register(liveness, 1, UINT_MAX, instr->data_type->dimx); - TRACE("Allocated constant @%u to %s.\n", instr->index, - debug_register('c', constant->reg, instr->data_type)); + constant->reg = allocate_register(liveness, 1, UINT_MAX, type->dimx); + TRACE("Allocated constant @%u to %s.\n", instr->index, debug_register('c', constant->reg, type)); + + if (!vkd3d_array_reserve((void **)&defs->values, &defs->size, + constant->reg.id + reg_size, sizeof(*defs->values))) + { + ctx->failed = true; + return; + } + defs->count = constant->reg.id + reg_size; + + assert(type->type <= HLSL_CLASS_LAST_NUMERIC); + + if (!(writemask = constant->reg.writemask)) + writemask = (1 << type->dimx) - 1; + + for (y = 0; y < type->dimy; ++y) + { + for (x = 0, i = 0; x < 4; ++x) + { + float f; + + if (!(writemask & (1 << x))) + continue; + + switch (type->base_type) + { + case HLSL_TYPE_BOOL: + f = constant->value.b[i++]; + break; + + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + f = constant->value.f[i++]; + break; + + case HLSL_TYPE_INT: + f = constant->value.i[i++]; + break; + + case HLSL_TYPE_UINT: + f = constant->value.u[i++]; + break; + + case HLSL_TYPE_DOUBLE: + FIXME("Double constant.\n"); + return; + + default: + assert(0); + return; + } + defs->values[constant->reg.id + y].f[x] = f; + } + } + break; }
case HLSL_IR_IF: { struct hlsl_ir_if *iff = hlsl_ir_if(instr); - allocate_const_registers_recurse(&iff->then_instrs, liveness); - allocate_const_registers_recurse(&iff->else_instrs, liveness); + allocate_const_registers_recurse(ctx, &iff->then_instrs, liveness, defs); + allocate_const_registers_recurse(ctx, &iff->else_instrs, liveness, defs); break; }
case HLSL_IR_LOOP: { struct hlsl_ir_loop *loop = hlsl_ir_loop(instr); - allocate_const_registers_recurse(&loop->body, liveness); + allocate_const_registers_recurse(ctx, &loop->body, liveness, defs); break; }
@@ -846,11 +913,14 @@ static void allocate_const_registers_recurse(struct list *instrs, struct livenes } }
-static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) +static struct constant_defs allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) { + struct constant_defs defs = {0}; struct liveness liveness = {0}; struct hlsl_ir_var *var;
+ allocate_const_registers_recurse(ctx, entry_func->body, &liveness, &defs); + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { if (var->is_uniform && var->last_read) @@ -866,7 +936,7 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi } }
- allocate_const_registers_recurse(entry_func->body, &liveness); + return defs; }
/* Simple greedy temporary register allocation pass that just assigns a unique @@ -1186,8 +1256,44 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct bytecode_buffer *buf set_dword(buffer, size_offset, D3DSIO_COMMENT | ((buffer->count - (ctab_start - 1)) << 16)); }
+static uint32_t sm1_encode_register_type(D3DSHADER_PARAM_REGISTER_TYPE type) +{ + return ((type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) + | ((type << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2); +} + +static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct bytecode_buffer *buffer, struct constant_defs *defs) +{ + unsigned int i, x; + + for (i = 0; i < defs->count; ++i) + { + uint32_t token = D3DSIO_DEF; + + if (ctx->profile->major_version > 1) + token |= 5 << D3DSI_INSTLENGTH_SHIFT; + put_dword(buffer, token); + + token = (1u << 31); + token |= sm1_encode_register_type(D3DSPR_CONST); + token |= D3DSP_WRITEMASK_ALL; + token |= i; + put_dword(buffer, token); + for (x = 0; x < 4; ++x) + { + union + { + float f; + uint32_t u; + } u; + u.f = defs->values[i].f[x]; + put_dword(buffer, u.u); + } + } +} + static int write_sm1_shader(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, - struct vkd3d_shader_code *out) + struct constant_defs *constant_defs, struct vkd3d_shader_code *out) { struct bytecode_buffer buffer = {0}; int ret; @@ -1196,6 +1302,8 @@ static int write_sm1_shader(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *
write_sm1_uniforms(ctx, &buffer, entry_func);
+ write_sm1_constant_defs(ctx, &buffer, constant_defs); + put_dword(&buffer, D3DSIO_END);
if (!(ret = buffer.status)) @@ -1208,6 +1316,7 @@ static int write_sm1_shader(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *
int hlsl_emit_dxbc(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) { + struct constant_defs constant_defs; struct hlsl_ir_var *var;
list_move_head(entry_func->body, &ctx->static_initializers); @@ -1249,13 +1358,13 @@ int hlsl_emit_dxbc(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_fun
allocate_temp_registers(entry_func); if (ctx->profile->major_version < 4) - allocate_const_registers(ctx, entry_func); + constant_defs = allocate_const_registers(ctx, entry_func);
if (ctx->failed) return VKD3D_ERROR_INVALID_SHADER;
if (ctx->profile->major_version < 4) - return write_sm1_shader(ctx, entry_func, out); + return write_sm1_shader(ctx, entry_func, &constant_defs, out); else return VKD3D_ERROR_NOT_IMPLEMENTED; }