On Mon, Nov 1, 2021 at 11:52 AM Nikolay Sivov nsivov@codeweavers.com wrote:
Signed-off-by: Nikolay Sivov nsivov@codeweavers.com
dlls/d3d10/effect.c | 556 +++++++++++++++++++++++++++++++++++++- dlls/d3d10/tests/effect.c | 114 ++++++++ 2 files changed, 669 insertions(+), 1 deletion(-)
diff --git a/dlls/d3d10/effect.c b/dlls/d3d10/effect.c index 78a3d0c386a..3ae2ad4f481 100644 --- a/dlls/d3d10/effect.c +++ b/dlls/d3d10/effect.c @@ -30,6 +30,9 @@ WINE_DEFAULT_DEBUG_CHANNEL(d3d10); ((DWORD)(ch2) << 16) | ((DWORD)(ch3) << 24 )) #define TAG_DXBC MAKE_TAG('D', 'X', 'B', 'C') #define TAG_FX10 MAKE_TAG('F', 'X', '1', '0') +#define TAG_FXLC MAKE_TAG('F', 'X', 'L', 'C') +#define TAG_CLI4 MAKE_TAG('C', 'L', 'I', '4') +#define TAG_CTAB MAKE_TAG('C', 'T', 'A', 'B')
#define D3D10_FX10_TYPE_COLUMN_SHIFT 11 #define D3D10_FX10_TYPE_COLUMN_MASK (0x7 << D3D10_FX10_TYPE_COLUMN_SHIFT) @@ -180,6 +183,89 @@ static enum d3d10_effect_container_type get_var_container_type(const struct d3d1 } }
+struct preshader_instr +{
- unsigned int comp_count : 16;
- unsigned int reserved : 4;
- unsigned int opcode : 11;
- unsigned int scalar : 1;
+};
I see now what's going on with this i.e. this is used to represent instructions as encoded in the bytecode but also after changing "their" opcode with "ours". Is that just to speed up execution?
I find this kind of setup a bit ugly. I do wonder what it would look like if this went all the way e.g. what about using this structure only for parsing and a separate "unpacked" struct preshader_instr for execution? Probably a fixed-size one, including the operands (and operands count) right in the same structure. It would be slightly more tricky at parse time (e.g. you won't know how much memory you'll need before parsing the whole preshader) while making the execution slightly more straightforward. Thinking about it, what I'm suggesting is probably not a good tradeoff and I'd be hard pressed to ask you to make this type of change anyway. I guess I'm mostly curious if you tried something like that (or otherwise something different) before settling on the current solution.
+enum preshader_op +{
- D3D10_PRESHADER_OP_FTOU = 0,
- D3D10_PRESHADER_OP_ADD,
- D3D10_PRESHADER_OP_LAST_OP,
D3D10_PRESHADER_OP_COUNT maybe? Also not great... Maybe just get rid of it and use ~0u or something for the sentinel value?
+};
+struct preshader_op_info +{
- unsigned short idx;
- unsigned short opcode;
- char name[8];
- pres_op_func func;
+};
I already mentioned that this is a bit confusing: idx is "our" opcode, while opcode is "dxbc"'s opcode. Just getting rid of idx, as you wrote in your previous reply, works for me.
+enum d3d10_reg_table_type +{
- D3D10_REG_TABLE_CONSTANTS = 1,
- D3D10_REG_TABLE_CB = 2,
- D3D10_REG_TABLE_RESULT = 4,
- D3D10_REG_TABLE_TEMP = 7,
- D3D10_REG_TABLE_LAST,
For this one D3D10_REG_TABLE_COUNT works.
+};
+struct d3d10_effect_preshader +{
- struct d3d10_reg_table reg_tables[D3D10_REG_TABLE_LAST];
- ID3D10Blob *code;
- struct d3d10_ctab_var *vars;
- unsigned int vars_count;
+};
+struct d3d10_preshader_parse_context +{
- struct d3d10_effect_preshader *preshader;
- struct d3d10_effect *effect;
+};
struct d3d10_effect_prop_dependency { unsigned int id; @@ -192,11 +278,117 @@ struct d3d10_effect_prop_dependency struct d3d10_effect_variable *v; unsigned int offset; } var;
struct
{
struct d3d10_effect_variable *v;
struct d3d10_effect_preshader index;
} u;} index_expr;
};
I still think that we can get rid of the "u" name for the union. It doesn't need to happen in this patch though.
+static HRESULT d3d10_effect_preshader_eval(struct d3d10_effect_preshader *p) +{
- unsigned int i, j, regt, offset, instr_count, input_count;
- const DWORD *ip = ID3D10Blob_GetBufferPointer(p->code);
- float *dst, *args[4], *retval;
- struct preshader_instr ins;
- dst = d3d10_effect_preshader_get_reg_ptr(p, D3D10_REG_TABLE_RESULT, 0);
- memset(dst, 0, sizeof(float) * p->reg_tables[D3D10_REG_TABLE_RESULT].count);
- /* Update constant buffer */
- dst = d3d10_effect_preshader_get_reg_ptr(p, D3D10_REG_TABLE_CB, 0);
- for (i = 0; i < p->vars_count; ++i)
- {
struct d3d10_ctab_var *v = &p->vars[i];
memcpy(dst + v->offset, v->v->buffer->u.buffer.local_buffer, v->length * sizeof(*dst))
- }
- instr_count = *ip++;
- for (i = 0; i < instr_count; ++i)
- {
*(DWORD *)&ins = *ip++;
input_count = *ip++;
if (input_count > ARRAY_SIZE(args))
{
FIXME("Unexpected argument count %u.\n", input_count);
return E_FAIL;
}
/* Arguments */
for (j = 0; j < input_count; ++j)
{
ip++; /* TODO: currently ignored field */
regt = *ip++;
offset = *ip++;
args[j] = d3d10_effect_preshader_get_reg_ptr(p, regt, offset);
}
ip++; /* TODO: currently ignored field */
regt = *ip++;
offset = *ip++;
retval = d3d10_effect_preshader_get_reg_ptr(p, regt, offset);
*retval = preshader_ops[ins.opcode].func(args, input_count);
- }
- return S_OK;
+}
Maybe add to the TODO comments an indication of what the ignored field is? If it makes sense.
static void d3d10_effect_clear_prop_dependencies(struct d3d10_effect_prop_dependencies *d) {
- unsigned int i;
- for (i = 0; i < d->count; ++i)
- {
struct d3d10_effect_prop_dependency *dep = &d->entries[i];
switch (dep->operation)
{
case D3D10_EOO_INDEX_EXPRESSION:
d3d10_effect_preshader_clear(&dep->u.index_expr.index);
break;
default:
;
}
- } heap_free(d->entries); memset(d, 0, sizeof(*d));
}
I think you can drop the default: entirely without triggering warnings.
@@ -546,6 +741,38 @@ static void d3d10_effect_update_dependent_props(struct d3d10_effect_prop_depende
break;
case D3D10_EOO_INDEX_EXPRESSION:
v = d->u.index_expr.v;
if (FAILED(hr = d3d10_effect_preshader_eval(&d->u.index_expr.index)))
{
WARN("Failed to evaluate index expression, hr %#x.\n", hr);
return;
}
variable_idx = *d->u.index_expr.index.reg_tables[D3D10_REG_TABLE_RESULT].u.dword;
if (variable_idx >= v->type->element_count)
{
WARN("Expression evaluated to invalid index value %u, array %s of size %u.\n",
variable_idx, debugstr_a(v->name), v->type->element_count);
return;
}
switch (property_info->type)
{
case D3D10_SVT_VERTEXSHADER:
case D3D10_SVT_PIXELSHADER:
case D3D10_SVT_GEOMETRYSHADER:
((void **)dst)[d->idx] = v;
*dst_index = variable_idx;
break;
default:
((void **)dst)[d->idx] = &v->elements[variable_idx];
}
break;
default: FIXME("Unsupported property update for %u.\n", d->operation); }
Again mostly wondering: does D3D10_EOO_INDEX_EXPRESSION apply to other object types, like textures? Similarly, have you seen any case where d->idx is not 0 for shaders? What does it even mean a non-zero idx in that case?
+static HRESULT parse_fx10_fxlc(void *ctx, const char *data, unsigned int data_size) +{
- struct d3d10_preshader_parse_context *context = ctx;
- struct d3d10_effect_preshader *p = context->preshader;
- uint32_t ins_count;
- const char *ptr;
- unsigned int i;
- HRESULT hr;
- if (data_size % sizeof(DWORD))
- {
WARN("FXLC size misaligned %u.\n", data_size);
return E_FAIL;
- }
- /* Parse through bytecode copy, so we can patch opcodes. */
- if (FAILED(hr = D3DCreateBlob(data_size, &p->code)))
return hr;
- memcpy(ID3D10Blob_GetBufferPointer(p->code), data, data_size);
- ptr = ID3D10Blob_GetBufferPointer(p->code);
Not a big deal, but you could move up this assignment to avoid duplicating the GetBufferPointer() call.
- read_dword(&ptr, &ins_count);
- TRACE("%u instructions.\n", ins_count);
- for (i = 0; i < ins_count; ++i)
- {
if (FAILED(hr = parse_fx10_preshader_instr(p, &ptr)))
{
WARN("Failed to parse instruction %u.\n", i);
return hr;
}
- }
- if (FAILED(hr = d3d10_reg_table_allocate(&p->reg_tables[D3D10_REG_TABLE_RESULT],
p->reg_tables[D3D10_REG_TABLE_RESULT].count))) return hr;
- if (FAILED(hr = d3d10_reg_table_allocate(&p->reg_tables[D3D10_REG_TABLE_TEMP],
p->reg_tables[D3D10_REG_TABLE_TEMP].count))) return hr;
All the calls to d3d10_reg_table_allocate() but one pass table->count as count value, which is then initialized again with the same value by the function. That works but it doesn't look particularly nice.
+static HRESULT parse_fx10_ctab(void *ctx, const char *data, unsigned int data_size) +{
- struct d3d10_preshader_parse_context *context = ctx;
- struct d3d10_effect_preshader *p = context->preshader;
- struct ctab_header
- {
DWORD size;
DWORD creator;
DWORD version;
DWORD constants;
DWORD constantinfo;
DWORD flags;
DWORD target;
- } header;
- struct ctab_const_info
- {
DWORD name;
WORD register_set;
WORD register_index;
WORD register_count;
WORD reserved;
DWORD typeinfo;
DWORD default_value;
- } *info;
- unsigned int i, cb_reg_count = 0;
- const char *ptr = data;
- const char *name;
- size_t name_len;
- HRESULT hr;
- if (data_size < sizeof(header))
- {
WARN("Invalid constant table size %u.\n", data_size);
return E_FAIL;
- }
- read_dword(&ptr, &header.size);
- read_dword(&ptr, &header.creator);
- read_dword(&ptr, &header.version);
- read_dword(&ptr, &header.constants);
- read_dword(&ptr, &header.constantinfo);
- read_dword(&ptr, &header.flags);
- read_dword(&ptr, &header.target);
- if (!require_space(header.constantinfo, header.constants, sizeof(*info) / sizeof(DWORD),
data_size))
I think you want plain sizeof(*info) there, without any division.
diff --git a/dlls/d3d10/tests/effect.c b/dlls/d3d10/tests/effect.c index da644d8d9db..c2ed9c6071e 100644 --- a/dlls/d3d10/tests/effect.c +++ b/dlls/d3d10/tests/effect.c @@ -7801,6 +7801,119 @@ static void test_effect_dynamic_numeric_field(void) ok(!refcount, "Device has %u references left.\n", refcount); }
+#if 0 +float4 g_var; +PixelShader ps_array[2]; +VertexShader vs_array[2]; +technique10 tech +{
- pass p0
- {
SetPixelShader( ps_array[g_var.z] );
SetVertexShader( vs_array[g_var.x + 0.1f] );
- }
+} +#endif +static DWORD fx_test_index_expression[] = +{
- 0x43425844, 0x83787296, 0xf866b7a9, 0x7b56930c, 0xdc9d061a, 0x00000001, 0x000003e9, 0x00000001,
- 0x00000024, 0x30315846, 0x000003bd, 0xfeff1001, 0x00000001, 0x00000001, 0x00000002, 0x00000000,
- 0x00000000, 0x00000000, 0x00000001, 0x000002cd, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
- 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x00000000, 0x6f6c4724,
- 0x736c6162, 0x6f6c6600, 0x00347461, 0x0000000d, 0x00000001, 0x00000000, 0x00000010, 0x00000010,
- 0x00000010, 0x0000210a, 0x61765f67, 0x69500072, 0x536c6578, 0x65646168, 0x00360072, 0x00020000,
- 0x00020000, 0x00000000, 0x00000000, 0x00000000, 0x00050000, 0x73700000, 0x7272615f, 0x56007961,
- 0x65747265, 0x61685378, 0x00726564, 0x00000067, 0x00000002, 0x00000002, 0x00000000, 0x00000000,
- 0x00000000, 0x00000006, 0x615f7376, 0x79617272, 0x63657400, 0x30700068, 0x0000ec00, 0x42584400,
- 0x990a9743, 0xd17834e6, 0x40de477e, 0xf476a79f, 0x00000101, 0x0000ec00, 0x00000300, 0x00002c00,
- 0x0000a800, 0x0000b400, 0x41544300, 0x00007442, 0x00001c00, 0x00004b00, 0x58040000, 0x00000146,
- 0x00001c00, 0x00010000, 0x00004800, 0x00003000, 0x00000200, 0x00000100, 0x00003800, 0x00000000,
- 0x765f6700, 0xab007261, 0x030001ab, 0x04000100, 0x00000100, 0x00000000, 0x00787400, 0x7263694d,
- 0x666f736f, 0x52282074, 0x4c482029, 0x53204c53, 0x65646168, 0x6f432072, 0x6c69706d, 0x31207265,
- 0x00312e30, 0x494c43ab, 0x00000434, 0x00000000, 0x4c584600, 0x00003043, 0x00000100, 0x30000100,
- 0x00000113, 0x00000000, 0x00000200, 0x00000200, 0x00000000, 0x00000400, 0x00000000, 0xf0f0f000,
- 0x0f0f0ff0, 0x00ffff0f, 0x00005e00, 0x0000a100, 0x00012800, 0x42584400, 0x78de2e43, 0x31414e7a,
- 0xf69158cd, 0x416c97b6, 0x00000192, 0x00012800, 0x00000300, 0x00002c00, 0x0000a800, 0x0000c400,
- 0x41544300, 0x00007442, 0x00001c00, 0x00004b00, 0x58040000, 0x00000146, 0x00001c00, 0x00010000,
- 0x00004800, 0x00003000, 0x00000200, 0x00000100, 0x00003800, 0x00000000, 0x765f6700, 0xab007261,
- 0x030001ab, 0x04000100, 0x00000100, 0x00000000, 0x00787400, 0x7263694d, 0x666f736f, 0x52282074,
- 0x4c482029, 0x53204c53, 0x65646168, 0x6f432072, 0x6c69706d, 0x31207265, 0x00312e30, 0x494c43ab,
- 0x00001434, 0x00000400, 0xcccccd00, 0x0000003d, 0x00000000, 0x00000000, 0x4c584600, 0x00005c43,
- 0x00000200, 0x40000100, 0x00000220, 0x00000000, 0x00000200, 0x00000000, 0x00000000, 0x00000100,
- 0x00000000, 0x00000000, 0x00000700, 0x00000000, 0x30000100, 0x00000113, 0x00000000, 0x00000700,
- 0x00000000, 0x00000000, 0x00000400, 0x00000000, 0xf0f0f000, 0x0f0f0ff0, 0x00ffff0f, 0x00009000,
- 0x00019900, 0x00000400, 0x00001000, 0x00000000, 0x00000100, 0xffffff00, 0x000000ff, 0x00003000,
- 0x00001400, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00005e00, 0x00004200,
- 0x00000000, 0xffffff00, 0x000000ff, 0x00000000, 0x00000000, 0x00009000, 0x00007400, 0x00000000,
- 0xffffff00, 0x000000ff, 0x00000000, 0x00000000, 0x00009900, 0x00000100, 0x00000000, 0x00009e00,
- 0x00000200, 0x00000000, 0x00000700, 0x00000000, 0x00000500, 0x00019100, 0x00000600, 0x00000000,
- 0x00000500, 0x0002c500, 0x00000000,
+};
+static void test_effect_index_expression(void) +{
- ID3D10EffectTechnique *tech;
- ID3D10EffectPass *pass;
- ID3D10Effect *effect;
- ID3D10Device *device;
- ULONG refcount;
- HRESULT hr;
- D3D10_PASS_SHADER_DESC shader_desc;
- ID3D10EffectVectorVariable *vector;
- ID3D10EffectVariable *v;
- float val[4];
Nitpick, order of the declarations. Nice test otherwise.