Convert all consecutive calls to d7_DrawPrimitive(TRIANGLE_FAN) into a single call to d7_DrawPrimitive(TRIANGLE_LIST) with all the vertices.
Note, it *increase* the number of vertices, but bandwith is much less costly than multiple calls.
Note, only a very precise subset of the calls get buffered in order to ensure that the disruption is minimal.
Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=33814
-- v17: ddraw: Also Buffer D3DPT_POINTLIST ddraw: Add a local buffer in d3d_device7_DrawPrimitive()
From: Steve Schnepp steve.schnepp@pwkf.org
Convert all consecutive calls to d7_DrawPrimitive() into a single call to wined3d.
The buffer is flushed upon every state change. It is also flushed when incompatible arguments are passed to d7_DrawPrimitive(), such as a changing fvf or incompatible primitive_type.
Note, the call does *increase* the number of data to transmit, but in a limited fashion as it leverages indexed wined3d primitives. And bandwith overhead is much less costly than multiple calls overhead.
Finally, only a subset of the calls get buffered for now in order to ensure that the disruption is minimal.
It is opt-in and enabled via WINEDEBUG="+ddraw_buffer"
Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=33814 --- dlls/ddraw/ddraw_private.h | 15 ++++ dlls/ddraw/device.c | 177 ++++++++++++++++++++++++++++++++++++- 2 files changed, 190 insertions(+), 2 deletions(-)
diff --git a/dlls/ddraw/ddraw_private.h b/dlls/ddraw/ddraw_private.h index 09e8133350b..2a322117202 100644 --- a/dlls/ddraw/ddraw_private.h +++ b/dlls/ddraw/ddraw_private.h @@ -317,6 +317,18 @@ DWORD ddraw_allocate_handle(struct ddraw_handle_table *t, void *object, enum ddr void *ddraw_free_handle(struct ddraw_handle_table *t, DWORD handle, enum ddraw_handle_type type) DECLSPEC_HIDDEN; void *ddraw_get_object(struct ddraw_handle_table *t, DWORD handle, enum ddraw_handle_type type) DECLSPEC_HIDDEN;
+struct d3d_device_buffer { + DWORD fvf; + UINT stride; + DWORD buffer_indice_count; + DWORD buffer_vertex_count; + WORD *buffer_indices; + char *buffer_vertices; + + unsigned int idx_buffer_pos; + unsigned int vertex_buffer_pos; +}; + struct d3d_device { /* IUnknown */ @@ -369,6 +381,9 @@ struct d3d_device
struct wined3d_stateblock *recording, *state, *update_state; const struct wined3d_stateblock_state *stateblock_state; + + /* Vertices Buffer for squashing DrawPrimitive() calls before sending it to wined3d */ + struct d3d_device_buffer ddraw_device_buffer; };
HRESULT d3d_device_create(struct ddraw *ddraw, const GUID *guid, struct ddraw_surface *target, IUnknown *rt_iface, diff --git a/dlls/ddraw/device.c b/dlls/ddraw/device.c index 1cfef5007d5..d3562d43e13 100644 --- a/dlls/ddraw/device.c +++ b/dlls/ddraw/device.c @@ -31,6 +31,11 @@
WINE_DEFAULT_DEBUG_CHANNEL(ddraw); WINE_DECLARE_DEBUG_CHANNEL(winediag); +WINE_DECLARE_DEBUG_CHANNEL(ddraw_perf); +WINE_DECLARE_DEBUG_CHANNEL(ddraw_buffer); + +static HRESULT ddraw_buffer_flush(struct d3d_device *device); +static HRESULT ddraw_buffer_add(struct d3d_device *device, D3DPRIMITIVETYPE primitive_type, DWORD fvf, void *vertices, DWORD vertex_count, DWORD flags, UINT stride);
/* The device ID */ const GUID IID_D3DDEVICE_WineD3D = { @@ -1592,6 +1597,8 @@ static HRESULT d3d_device7_EndScene(IDirect3DDevice7 *iface)
TRACE("iface %p.\n", iface);
+ ddraw_buffer_flush(device); + wined3d_mutex_lock(); hr = wined3d_device_end_scene(device->wined3d_device); wined3d_mutex_unlock(); @@ -2548,6 +2555,8 @@ static HRESULT d3d_device7_SetRenderState(IDirect3DDevice7 *iface,
TRACE("iface %p, state %#x, value %#lx.\n", iface, state, value);
+ ddraw_buffer_flush(device); + wined3d_mutex_lock(); /* Some render states need special care */ switch (state) @@ -3451,10 +3460,39 @@ static HRESULT d3d_device7_DrawPrimitive(IDirect3DDevice7 *iface, stride = get_flexible_vertex_size(fvf); size = vertex_count * stride;
+ if (!TRACE_ON(ddraw_buffer)) goto old; + + hr = ddraw_buffer_add(device, primitive_type, fvf, vertices, vertex_count, flags, stride); + if (hr == D3D_OK) { + /* Buffered successfuly -> returning immediatly :-) */ + return D3D_OK; + } + + FIXME("iface %p, primitive_type %#x, fvf %#lx, vertices %p, vertex_count %lu, flags %#lx.\n", + iface, primitive_type, fvf, vertices, vertex_count, flags); + FIXME("cannot buffer, flushing first\n"); + + /* Cannot buffer, need to flush the rest, then process this one */ + hr = ddraw_buffer_flush(device); + + hr = ddraw_buffer_add(device, primitive_type, fvf, vertices, vertex_count, flags, stride); + if (hr == D3D_OK) { + /* Buffered successfuly -> returning immediatly :-) */ + return D3D_OK; + } + + FIXME("REALLY cannot buffer, skipping\n"); + + /* let's skip it */ + return hr; + +old: + wined3d_mutex_lock();
- if (FAILED(hr = wined3d_streaming_buffer_upload(device->wined3d_device, - &device->vertex_buffer, vertices, size, stride, &vb_pos))) + hr = wined3d_streaming_buffer_upload(device->wined3d_device, + &device->vertex_buffer, vertices, size, stride, &vb_pos); + if (FAILED(hr)) goto done;
hr = wined3d_stateblock_set_stream_source(device->state, 0, device->vertex_buffer.buffer, 0, stride); @@ -4641,6 +4679,8 @@ static HRESULT d3d_device7_SetTexture(IDirect3DDevice7 *iface,
TRACE("iface %p, stage %lu, texture %p.\n", iface, stage, texture);
+ ddraw_buffer_flush(device); + if (surf && (surf->surface_desc.ddsCaps.dwCaps & DDSCAPS_TEXTURE)) { if (surf->draw_texture) @@ -4925,6 +4965,8 @@ static HRESULT d3d_device7_SetTextureStageState(IDirect3DDevice7 *iface, TRACE("iface %p, stage %lu, state %#x, value %#lx.\n", iface, stage, state, value);
+ ddraw_buffer_flush(device); + if (state > D3DTSS_TEXTURETRANSFORMFLAGS) { WARN("Invalid state %#x passed.\n", state); @@ -6877,6 +6919,9 @@ static HRESULT d3d_device_init(struct d3d_device *device, struct ddraw *ddraw, c wined3d_streaming_buffer_init(&device->vertex_buffer, WINED3D_BIND_VERTEX_BUFFER); wined3d_streaming_buffer_init(&device->index_buffer, WINED3D_BIND_INDEX_BUFFER);
+ /* Initialize the ddraw triangle buffer. */ + device->ddraw_device_buffer.buffer_vertex_count = 0; + /* Render to the back buffer */ rtv = ddraw_surface_get_rendertarget_view(target); if (FAILED(hr = wined3d_device_context_set_rendertarget_views(device->immediate_context, 0, 1, &rtv, TRUE))) @@ -6963,3 +7008,131 @@ HRESULT d3d_device_create(struct ddraw *ddraw, const GUID *guid, struct ddraw_su
return D3D_OK; } + +/* + * DirectD3D vertex buffer helpers + * + * For now, it only supports : + * d3d_device.version == 7 + * primitive_type == D3DPT_TRIANGLEFAN + * flags == 0 + * vertex_count >= 3 + * + * Note : it does transform D3DPT_TRIANGLEFAN into D3DPT_TRIANGLELIST. + */ +static HRESULT ddraw_buffer_add(struct d3d_device *device, D3DPRIMITIVETYPE primitive_type, DWORD fvf, void *vertices, DWORD vertex_count, DWORD flags, UINT stride) { + int buffer_indice_count_initial = device->ddraw_device_buffer.buffer_vertex_count; + + if (primitive_type != D3DPT_TRIANGLEFAN) return WINED3DERR_NOTAVAILABLE; + if (flags) return WINED3DERR_NOTAVAILABLE; + + if (vertex_count < 3) { + WARN("vertex_count %lu lower than 3. not buffering", vertex_count); + return WINED3DERR_NOTAVAILABLE; + } + + if (! device->ddraw_device_buffer.buffer_vertex_count) { + /* New buffer, setting everything up */ + device->ddraw_device_buffer.fvf = fvf; + device->ddraw_device_buffer.stride = stride; + + /* We map & unmap directly. + * That way, we only reserve the space and other calls will have a new one. + * It should not happen, but let's be safe. + * + * We will fill it with following calls */ + wined3d_streaming_buffer_map(device->wined3d_device, &device->vertex_buffer, D3DMAXNUMVERTICES, stride, + &device->ddraw_device_buffer.vertex_buffer_pos, (void**) &device->ddraw_device_buffer.buffer_vertices); + wined3d_streaming_buffer_unmap(&device->vertex_buffer); + + wined3d_streaming_buffer_map(device->wined3d_device, &device->index_buffer, D3DMAXNUMVERTICES, sizeof(*device->ddraw_device_buffer.buffer_indices), + &device->ddraw_device_buffer.idx_buffer_pos, (void**) &device->ddraw_device_buffer.buffer_indices); + wined3d_streaming_buffer_unmap(&device->index_buffer); + } else if (fvf != device->ddraw_device_buffer.fvf) { + /* Not the same fvf as the buffered one. Cannot buffer more of those */ + TRACE_(ddraw_perf)("Buffering failed due to mismatched fvf %ld != buffer.fvf %ld \n", fvf, device->ddraw_device_buffer.fvf); + return WINED3DERR_NOTAVAILABLE; + } + + /* append all the vertices to the buffer */ + memcpy(device->ddraw_device_buffer.buffer_vertices + device->ddraw_device_buffer.buffer_vertex_count * stride, vertices, vertex_count * stride); + + TRACE_(ddraw_perf)("vertex count %lu stride %d buffer_vertex_count %lu buffer_indice_count %lu\n", vertex_count, stride, + device->ddraw_device_buffer.buffer_vertex_count, device->ddraw_device_buffer.buffer_indice_count); + + /* Create the index */ + + /* The first triangle is the same, therefore the indices are simply copied over */ + device->ddraw_device_buffer.buffer_indices[device->ddraw_device_buffer.buffer_indice_count++] = device->ddraw_device_buffer.buffer_vertex_count++; + device->ddraw_device_buffer.buffer_indices[device->ddraw_device_buffer.buffer_indice_count++] = device->ddraw_device_buffer.buffer_vertex_count++; + device->ddraw_device_buffer.buffer_indices[device->ddraw_device_buffer.buffer_indice_count++] = device->ddraw_device_buffer.buffer_vertex_count++; + + TRACE_(ddraw_perf)("vertex count %lu stride %d buffer_vertex_count %lu buffer_indice_count %lu\n", vertex_count, stride, + device->ddraw_device_buffer.buffer_vertex_count, device->ddraw_device_buffer.buffer_indice_count); + + /* Next triangles are recreated with : 2 next vertices then the 1rst one. + * So, it will *increase* the number of total vertices from 4 to 6, 5 to 9, 6 to 12, ... */ + for (int idx = 3; idx < vertex_count; idx ++) { + /* Copy the 2 last ones */ + device->ddraw_device_buffer.buffer_indices[device->ddraw_device_buffer.buffer_indice_count++] = device->ddraw_device_buffer.buffer_vertex_count - 1; + device->ddraw_device_buffer.buffer_indices[device->ddraw_device_buffer.buffer_indice_count++] = device->ddraw_device_buffer.buffer_vertex_count; + device->ddraw_device_buffer.buffer_indices[device->ddraw_device_buffer.buffer_indice_count++] = buffer_indice_count_initial; + device->ddraw_device_buffer.buffer_vertex_count++; + + TRACE_(ddraw_perf)("idx %d vertex count %lu stride %d buffer_vertex_count %lu buffer_indice_count %lu\n", idx, vertex_count, stride, + device->ddraw_device_buffer.buffer_vertex_count, device->ddraw_device_buffer.buffer_indice_count); + } + + TRACE_(ddraw_perf)("buffer_vertex_count %lu buffer_indice_count %lu max %d\n", + device->ddraw_device_buffer.buffer_vertex_count, device->ddraw_device_buffer.buffer_indice_count, D3DMAXNUMVERTICES); + + /* Buffered ! */ + return D3D_OK; +} + +/* Flushing the buffer if it isn't empty. + * + * It will delegate to a single call to wined3d with the correct parameters, + * and a (hopefully) huge list of triangles vertices. */ + +static HRESULT ddraw_buffer_flush_internal(struct d3d_device *device) { + HRESULT hr; + + TRACE_(ddraw_perf)("buffer_vertex_count %lu buffer_indice_count %lu\n", device->ddraw_device_buffer.buffer_vertex_count, device->ddraw_device_buffer.buffer_indice_count); + + /* Calling wined3d directly */ + wined3d_mutex_lock(); + + hr = wined3d_stateblock_set_stream_source(device->state, 0, device->vertex_buffer.buffer, 0, device->ddraw_device_buffer.stride); + + if (FAILED(hr)) + goto done; + + wined3d_stateblock_set_index_buffer(device->state, device->index_buffer.buffer, WINED3DFMT_R16_UINT); + wined3d_stateblock_set_vertex_declaration(device->state, ddraw_find_decl(device->ddraw, device->ddraw_device_buffer.fvf)); + wined3d_device_context_set_primitive_type(device->immediate_context, wined3d_primitive_type_from_ddraw(D3DPT_TRIANGLELIST), 0); + wined3d_device_apply_stateblock(device->wined3d_device, device->state); + d3d_device_sync_surfaces(device); + + wined3d_device_context_draw_indexed(device->immediate_context, + device->ddraw_device_buffer.vertex_buffer_pos / device->ddraw_device_buffer.stride, + device->ddraw_device_buffer.idx_buffer_pos / sizeof(*device->ddraw_device_buffer.buffer_indices), + device->ddraw_device_buffer.buffer_indice_count, 0, 0); + + +done: + wined3d_mutex_unlock(); + + /* Reset the buffer */ + device->ddraw_device_buffer.buffer_vertex_count = 0; + device->ddraw_device_buffer.buffer_indice_count = 0; + + return hr; +} + +static HRESULT ddraw_buffer_flush(struct d3d_device *device) { + /* Nothing to do if it is empty */ + if (! device->ddraw_device_buffer.buffer_vertex_count) return D3D_OK; + + return ddraw_buffer_flush_internal(device); +}
From: Steve Schnepp steve.schnepp@pwkf.org
The D3DPT_POINTLIST are used to write blips on the minimap. *Each* point has its own DrawPrimitive(), which is very costly!
---
The implementation itself needs to be refactored with helper functions. Will amend the commit later, but i pushed early for feedback --- dlls/ddraw/ddraw_private.h | 1 + dlls/ddraw/device.c | 54 ++++++++++++++++++++++++-------------- 2 files changed, 36 insertions(+), 19 deletions(-)
diff --git a/dlls/ddraw/ddraw_private.h b/dlls/ddraw/ddraw_private.h index 2a322117202..020f1be1173 100644 --- a/dlls/ddraw/ddraw_private.h +++ b/dlls/ddraw/ddraw_private.h @@ -318,6 +318,7 @@ void *ddraw_free_handle(struct ddraw_handle_table *t, DWORD handle, enum ddraw_h void *ddraw_get_object(struct ddraw_handle_table *t, DWORD handle, enum ddraw_handle_type type) DECLSPEC_HIDDEN;
struct d3d_device_buffer { + D3DPRIMITIVETYPE primitive_type; DWORD fvf; UINT stride; DWORD buffer_indice_count; diff --git a/dlls/ddraw/device.c b/dlls/ddraw/device.c index d3562d43e13..5fdc32c659a 100644 --- a/dlls/ddraw/device.c +++ b/dlls/ddraw/device.c @@ -7014,25 +7014,18 @@ HRESULT d3d_device_create(struct ddraw *ddraw, const GUID *guid, struct ddraw_su * * For now, it only supports : * d3d_device.version == 7 - * primitive_type == D3DPT_TRIANGLEFAN * flags == 0 - * vertex_count >= 3 * * Note : it does transform D3DPT_TRIANGLEFAN into D3DPT_TRIANGLELIST. */ static HRESULT ddraw_buffer_add(struct d3d_device *device, D3DPRIMITIVETYPE primitive_type, DWORD fvf, void *vertices, DWORD vertex_count, DWORD flags, UINT stride) { int buffer_indice_count_initial = device->ddraw_device_buffer.buffer_vertex_count;
- if (primitive_type != D3DPT_TRIANGLEFAN) return WINED3DERR_NOTAVAILABLE; - if (flags) return WINED3DERR_NOTAVAILABLE; - - if (vertex_count < 3) { - WARN("vertex_count %lu lower than 3. not buffering", vertex_count); - return WINED3DERR_NOTAVAILABLE; - } + if (flags) return WINED3DERR_NOTAVAILABLE;
if (! device->ddraw_device_buffer.buffer_vertex_count) { /* New buffer, setting everything up */ + device->ddraw_device_buffer.primitive_type = primitive_type; device->ddraw_device_buffer.fvf = fvf; device->ddraw_device_buffer.stride = stride;
@@ -7052,24 +7045,41 @@ static HRESULT ddraw_buffer_add(struct d3d_device *device, D3DPRIMITIVETYPE prim /* Not the same fvf as the buffered one. Cannot buffer more of those */ TRACE_(ddraw_perf)("Buffering failed due to mismatched fvf %ld != buffer.fvf %ld \n", fvf, device->ddraw_device_buffer.fvf); return WINED3DERR_NOTAVAILABLE; + } else if (primitive_type != device->ddraw_device_buffer.primitive_type) { + TRACE_(ddraw_perf)("Buffering failed due to mismatched primitive_type %d != buffer.primitive_type %d \n", primitive_type, device->ddraw_device_buffer.primitive_type); + return WINED3DERR_NOTAVAILABLE; }
/* append all the vertices to the buffer */ memcpy(device->ddraw_device_buffer.buffer_vertices + device->ddraw_device_buffer.buffer_vertex_count * stride, vertices, vertex_count * stride); - - TRACE_(ddraw_perf)("vertex count %lu stride %d buffer_vertex_count %lu buffer_indice_count %lu\n", vertex_count, stride, + TRACE_(ddraw_perf)("vertex count %lu stride %d buffer_vertex_count %05lu buffer_indice_count %05lu\n", vertex_count, stride, device->ddraw_device_buffer.buffer_vertex_count, device->ddraw_device_buffer.buffer_indice_count);
/* Create the index */ + if (primitive_type == D3DPT_TRIANGLEFAN) goto fan; + if (primitive_type == D3DPT_POINTLIST) goto points; + + FIXME("primitive_type %#x not supported\n", primitive_type); + goto done; + +points: + for (int idx = 0; idx < vertex_count; idx ++) { + device->ddraw_device_buffer.buffer_indices[device->ddraw_device_buffer.buffer_indice_count++] = device->ddraw_device_buffer.buffer_vertex_count++; + } + + goto done; + +fan: + if (vertex_count < 3) { + WARN("vertex_count %lu lower than 3. not buffering", vertex_count); + return WINED3DERR_NOTAVAILABLE; + }
/* The first triangle is the same, therefore the indices are simply copied over */ device->ddraw_device_buffer.buffer_indices[device->ddraw_device_buffer.buffer_indice_count++] = device->ddraw_device_buffer.buffer_vertex_count++; device->ddraw_device_buffer.buffer_indices[device->ddraw_device_buffer.buffer_indice_count++] = device->ddraw_device_buffer.buffer_vertex_count++; device->ddraw_device_buffer.buffer_indices[device->ddraw_device_buffer.buffer_indice_count++] = device->ddraw_device_buffer.buffer_vertex_count++;
- TRACE_(ddraw_perf)("vertex count %lu stride %d buffer_vertex_count %lu buffer_indice_count %lu\n", vertex_count, stride, - device->ddraw_device_buffer.buffer_vertex_count, device->ddraw_device_buffer.buffer_indice_count); - /* Next triangles are recreated with : 2 next vertices then the 1rst one. * So, it will *increase* the number of total vertices from 4 to 6, 5 to 9, 6 to 12, ... */ for (int idx = 3; idx < vertex_count; idx ++) { @@ -7079,11 +7089,12 @@ static HRESULT ddraw_buffer_add(struct d3d_device *device, D3DPRIMITIVETYPE prim device->ddraw_device_buffer.buffer_indices[device->ddraw_device_buffer.buffer_indice_count++] = buffer_indice_count_initial; device->ddraw_device_buffer.buffer_vertex_count++;
- TRACE_(ddraw_perf)("idx %d vertex count %lu stride %d buffer_vertex_count %lu buffer_indice_count %lu\n", idx, vertex_count, stride, - device->ddraw_device_buffer.buffer_vertex_count, device->ddraw_device_buffer.buffer_indice_count); + TRACE_(ddraw_perf)("vertex count %lu stride %d buffer_vertex_count %05lu buffer_indice_count %05lu idx %d\n", vertex_count, stride, + device->ddraw_device_buffer.buffer_vertex_count, device->ddraw_device_buffer.buffer_indice_count, idx); }
- TRACE_(ddraw_perf)("buffer_vertex_count %lu buffer_indice_count %lu max %d\n", +done: + TRACE_(ddraw_perf)("buffer_vertex_count %05lu buffer_indice_count %05lu max %d\n", device->ddraw_device_buffer.buffer_vertex_count, device->ddraw_device_buffer.buffer_indice_count, D3DMAXNUMVERTICES);
/* Buffered ! */ @@ -7098,7 +7109,7 @@ static HRESULT ddraw_buffer_add(struct d3d_device *device, D3DPRIMITIVETYPE prim static HRESULT ddraw_buffer_flush_internal(struct d3d_device *device) { HRESULT hr;
- TRACE_(ddraw_perf)("buffer_vertex_count %lu buffer_indice_count %lu\n", device->ddraw_device_buffer.buffer_vertex_count, device->ddraw_device_buffer.buffer_indice_count); + TRACE_(ddraw_perf)("primitive_type %#x buffer_vertex_count %05lu buffer_indice_count %05lu\n", device->ddraw_device_buffer.primitive_type, device->ddraw_device_buffer.buffer_vertex_count, device->ddraw_device_buffer.buffer_indice_count);
/* Calling wined3d directly */ wined3d_mutex_lock(); @@ -7110,7 +7121,13 @@ static HRESULT ddraw_buffer_flush_internal(struct d3d_device *device) {
wined3d_stateblock_set_index_buffer(device->state, device->index_buffer.buffer, WINED3DFMT_R16_UINT); wined3d_stateblock_set_vertex_declaration(device->state, ddraw_find_decl(device->ddraw, device->ddraw_device_buffer.fvf)); + wined3d_device_context_set_primitive_type(device->immediate_context, wined3d_primitive_type_from_ddraw(D3DPT_TRIANGLELIST), 0); + + if (device->ddraw_device_buffer.primitive_type == D3DPT_POINTLIST) { + wined3d_device_context_set_primitive_type(device->immediate_context, wined3d_primitive_type_from_ddraw(D3DPT_POINTLIST), 0); + } + wined3d_device_apply_stateblock(device->wined3d_device, device->state); d3d_device_sync_surfaces(device);
@@ -7119,7 +7136,6 @@ static HRESULT ddraw_buffer_flush_internal(struct d3d_device *device) { device->ddraw_device_buffer.idx_buffer_pos / sizeof(*device->ddraw_device_buffer.buffer_indices), device->ddraw_device_buffer.buffer_indice_count, 0, 0);
- done: wined3d_mutex_unlock();
On Fri Feb 17 08:05:01 2023 +0000, Zebediah Figura wrote:
This is kind of difficult to review at the moment, since it's spread out across several commits. We avoid fixup commits or intermediate commits that change the approach in Wine, since they're hard to review, and relatedly hard to read when committed. In this case I believe commits 1, 2, 3, 4, 6 can and should all be squashed together.
Squashed 1, 2, 3, 4, 6 together.
Zebediah Figura (@zfigura) commented about dlls/ddraw/ddraw_private.h:
struct wined3d_stateblock *recording, *state, *update_state; const struct wined3d_stateblock_state *stateblock_state;
- /* Vertices Buffer for squashing DrawPrimitive() calls before sending it to wined3d */
"Vertex buffer", and please end it with a period.
Zebediah Figura (@zfigura) commented about dlls/ddraw/device.c:
WINE_DEFAULT_DEBUG_CHANNEL(ddraw); WINE_DECLARE_DEBUG_CHANNEL(winediag); +WINE_DECLARE_DEBUG_CHANNEL(ddraw_perf); +WINE_DECLARE_DEBUG_CHANNEL(ddraw_buffer);
+static HRESULT ddraw_buffer_flush_d7(IDirect3DDevice7 *iface); +static int ddraw_buffer_add_d7(IDirect3DDevice7 *iface, D3DPRIMITIVETYPE primitive_type, DWORD fvf, void *vertices, DWORD vertex_count, DWORD flags, UINT stride);
We want to avoid forward declarations, so please move the functions before where they're used instead.
Zebediah Figura (@zfigura) commented about dlls/ddraw/device.c:
TRACE("iface %p.\n", iface);
- // Flush the vertices buffer
- ddraw_buffer_flush_d7(iface);
I guess the intent here is to ensure that draws are flushed before a flip or blit, but I'm not confident this is actually enough. Given that EndScene() is a no-op I don't know if we know for sure that applications can be relied on to actually call it in the right places. Probably it would be better to actually explicitly flush at any such point.
Zebediah Figura (@zfigura) commented about dlls/ddraw/device.c:
return D3D_OK;
}
+/*
- DirectD3D vertex buffer helpers
- For now, it only supports :
- d3d_device.version == 7
- primitive_type == D3DPT_TRIANGLEFAN
- fvf == 0x2c4
- flags == 0
- vertex_count >= 3
- Note : it does transform D3DPT_TRIANGLEFAN into D3DPT_TRIANGLELIST.
- */
+static int ddraw_buffer_add_d7(IDirect3DDevice7 *iface, D3DPRIMITIVETYPE primitive_type, DWORD fvf, void *vertices, DWORD vertex_count, DWORD flags, UINT stride) {
Let's return "bool" here instead of "int".
Also, there's no need for the "d7" suffix; this isn't d3d7-specific.
Zebediah Figura (@zfigura) commented about dlls/ddraw/device.c:
+/*
- DirectD3D vertex buffer helpers
- For now, it only supports :
- d3d_device.version == 7
- primitive_type == D3DPT_TRIANGLEFAN
- fvf == 0x2c4
- flags == 0
- vertex_count >= 3
- Note : it does transform D3DPT_TRIANGLEFAN into D3DPT_TRIANGLELIST.
- */
+static int ddraw_buffer_add_d7(IDirect3DDevice7 *iface, D3DPRIMITIVETYPE primitive_type, DWORD fvf, void *vertices, DWORD vertex_count, DWORD flags, UINT stride) {
- struct d3d_device *device = impl_from_IDirect3DDevice7(iface);
- if (!TRACE_ON(ddraw_buffer)) return 0;
We avoid hiding things behind flags like this. We want this to be the default behaviour, and I don't see any reason for it not to be.
Zebediah Figura (@zfigura) commented about dlls/ddraw/ddraw_private.h:
void *ddraw_free_handle(struct ddraw_handle_table *t, DWORD handle, enum ddraw_handle_type type) DECLSPEC_HIDDEN; void *ddraw_get_object(struct ddraw_handle_table *t, DWORD handle, enum ddraw_handle_type type) DECLSPEC_HIDDEN;
+struct d3d_device_buffer {
- D3DPRIMITIVETYPE primitive_type;
- DWORD fvf;
- UINT stride;
- DWORD buffer_indice_count;
- DWORD buffer_vertex_count;
- WORD *buffer_indices;
- char *buffer_vertices;
- unsigned int idx_buffer_pos;
- unsigned int vertex_buffer_pos;
+};
The naming here isn't great, although it is difficult to uniquely describe what we're doing ("vertex buffer" already having a meaning, and so on.) Perhaps "vertex_batch"? And we can drop the "d3d" and "ddraw" prefixes. Similarly for the struct members themselves, no need to prepend a redundant "buffer".
We shouldn't need the "buffer_indices" or "buffer_vertices" members; it should be possible to just make them into local variables.
"buffer_indice_count" should be "index_count".
Let's also avoid DWORD, and use standard C types like unsigned int here.
And, finally, please try to match the surrounding code with respect to things like spacing and brace placement. E.g. braces should go on a new line.
Zebediah Figura (@zfigura) commented about dlls/ddraw/device.c:
stride = get_flexible_vertex_size(fvf); size = vertex_count * stride;
- if (!TRACE_ON(ddraw_buffer)) goto old;
I don't think we want this. If we're going to do batching like this we want it to be on by default, and at that point I don't see a reason we'd ever want to turn it off. Since we're writing directly into the streaming buffer now I don't think we should have worse performance even for well-behaved applications.
Zebediah Figura (@zfigura) commented about dlls/ddraw/device.c:
wined3d_streaming_buffer_init(&device->vertex_buffer, WINED3D_BIND_VERTEX_BUFFER); wined3d_streaming_buffer_init(&device->index_buffer, WINED3D_BIND_INDEX_BUFFER);
- /* Initialize the ddraw triangle buffer. */
- device->ddraw_device_buffer.buffer_vertex_count = 0;
No need for this; the device is already zero-initialized.
Zebediah Figura (@zfigura) commented about dlls/ddraw/device.c:
return D3D_OK;
}
+/*
- DirectD3D vertex buffer helpers
- For now, it only supports :
- d3d_device.version == 7
- flags == 0
- Note : it does transform D3DPT_TRIANGLEFAN into D3DPT_TRIANGLELIST.
- */
+static HRESULT ddraw_buffer_add(struct d3d_device *device, D3DPRIMITIVETYPE primitive_type, DWORD fvf, void *vertices, DWORD vertex_count, DWORD flags, UINT stride) {
- int buffer_indice_count_initial = device->ddraw_device_buffer.buffer_vertex_count;
- if (flags) return WINED3DERR_NOTAVAILABLE;
No need for this; flags are always ignored anyway.
Zebediah Figura (@zfigura) commented about dlls/ddraw/device.c:
- FIXME("primitive_type %#x not supported\n", primitive_type);
- goto done;
+points:
- for (int idx = 0; idx < vertex_count; idx ++) {
device->ddraw_device_buffer.buffer_indices[device->ddraw_device_buffer.buffer_indice_count++] = device->ddraw_device_buffer.buffer_vertex_count++;
- }
- goto done;
+fan:
- if (vertex_count < 3) {
WARN("vertex_count %lu lower than 3. not buffering", vertex_count);
return WINED3DERR_NOTAVAILABLE;
- }
"Vertex count %lu is lower than 3; not buffering.\n"
(We try to make messages grammatical in Direct3D, but more importantly, it's missing a final newline.)
Somewhat relatedly, should we apply a maximum limit? We don't want to waste time rewriting vertices if the application is well-behaved.
Zebediah Figura (@zfigura) commented about dlls/ddraw/device.c:
return D3D_OK;
}
+/*
- DirectD3D vertex buffer helpers
- For now, it only supports :
- d3d_device.version == 7
- flags == 0
- Note : it does transform D3DPT_TRIANGLEFAN into D3DPT_TRIANGLELIST.
- */
This isn't true (with respect to version) or particularly interesting (with respect to flags), and anyway I think the code tells us as much.
Zebediah Figura (@zfigura) commented about dlls/ddraw/device.c:
- /* append all the vertices to the buffer */
- memcpy(device->ddraw_device_buffer.buffer_vertices + device->ddraw_device_buffer.buffer_vertex_count * stride, vertices, vertex_count * stride);
- TRACE_(ddraw_perf)("vertex count %lu stride %d buffer_vertex_count %05lu buffer_indice_count %05lu\n", vertex_count, stride,
device->ddraw_device_buffer.buffer_vertex_count, device->ddraw_device_buffer.buffer_indice_count);
- /* Create the index */
- if (primitive_type == D3DPT_TRIANGLEFAN) goto fan;
- if (primitive_type == D3DPT_POINTLIST) goto points;
- FIXME("primitive_type %#x not supported\n", primitive_type);
- goto done;
+points:
- for (int idx = 0; idx < vertex_count; idx ++) {
device->ddraw_device_buffer.buffer_indices[device->ddraw_device_buffer.buffer_indice_count++] = device->ddraw_device_buffer.buffer_vertex_count++;
- }
"unsigned int", and the iterator needs to be declared outside the loop (some compilers we target can't handle iterators declared inside the loop).
Zebediah Figura (@zfigura) commented about dlls/ddraw/device.c:
* We will fill it with following calls */
wined3d_streaming_buffer_map(device->wined3d_device, &device->vertex_buffer, D3DMAXNUMVERTICES, stride,
&device->ddraw_device_buffer.vertex_buffer_pos, (void**) &device->ddraw_device_buffer.buffer_vertices);
wined3d_streaming_buffer_unmap(&device->vertex_buffer);
wined3d_streaming_buffer_map(device->wined3d_device, &device->index_buffer, D3DMAXNUMVERTICES, sizeof(*device->ddraw_device_buffer.buffer_indices),
&device->ddraw_device_buffer.idx_buffer_pos, (void**) &device->ddraw_device_buffer.buffer_indices);
wined3d_streaming_buffer_unmap(&device->index_buffer);
- } else if (fvf != device->ddraw_device_buffer.fvf) {
/* Not the same fvf as the buffered one. Cannot buffer more of those */
TRACE_(ddraw_perf)("Buffering failed due to mismatched fvf %ld != buffer.fvf %ld \n", fvf, device->ddraw_device_buffer.fvf);
return WINED3DERR_NOTAVAILABLE;
- } else if (primitive_type != device->ddraw_device_buffer.primitive_type) {
TRACE_(ddraw_perf)("Buffering failed due to mismatched primitive_type %d != buffer.primitive_type %d \n", primitive_type, device->ddraw_device_buffer.primitive_type);
return WINED3DERR_NOTAVAILABLE;
- }
Instead of returning false and retrying; I'd just flush here; that way you only need to call this function once, and it can't fail.
Zebediah Figura (@zfigura) commented about dlls/ddraw/device.c:
wined3d_streaming_buffer_map(device->wined3d_device, &device->index_buffer, D3DMAXNUMVERTICES, sizeof(*device->ddraw_device_buffer.buffer_indices),
&device->ddraw_device_buffer.idx_buffer_pos, (void**) &device->ddraw_device_buffer.buffer_indices);
wined3d_streaming_buffer_unmap(&device->index_buffer);
- } else if (fvf != device->ddraw_device_buffer.fvf) {
/* Not the same fvf as the buffered one. Cannot buffer more of those */
TRACE_(ddraw_perf)("Buffering failed due to mismatched fvf %ld != buffer.fvf %ld \n", fvf, device->ddraw_device_buffer.fvf);
return WINED3DERR_NOTAVAILABLE;
- } else if (primitive_type != device->ddraw_device_buffer.primitive_type) {
TRACE_(ddraw_perf)("Buffering failed due to mismatched primitive_type %d != buffer.primitive_type %d \n", primitive_type, device->ddraw_device_buffer.primitive_type);
return WINED3DERR_NOTAVAILABLE;
- }
- /* append all the vertices to the buffer */
- memcpy(device->ddraw_device_buffer.buffer_vertices + device->ddraw_device_buffer.buffer_vertex_count * stride, vertices, vertex_count * stride);
- TRACE_(ddraw_perf)("vertex count %lu stride %d buffer_vertex_count %05lu buffer_indice_count %05lu\n", vertex_count, stride,
device->ddraw_device_buffer.buffer_vertex_count, device->ddraw_device_buffer.buffer_indice_count);
I don't think we need this trace, but if we do, we want to use the existing d3d_perf channel. Same for the other ddraw_perf traces below.
Zebediah Figura (@zfigura) commented about dlls/ddraw/device.c:
device->ddraw_device_buffer.primitive_type = primitive_type;
device->ddraw_device_buffer.fvf = fvf;
device->ddraw_device_buffer.stride = stride;
/* We map & unmap directly.
* That way, we only reserve the space and other calls will have a new one.
* It should not happen, but let's be safe.
*
* We will fill it with following calls */
wined3d_streaming_buffer_map(device->wined3d_device, &device->vertex_buffer, D3DMAXNUMVERTICES, stride,
&device->ddraw_device_buffer.vertex_buffer_pos, (void**) &device->ddraw_device_buffer.buffer_vertices);
wined3d_streaming_buffer_unmap(&device->vertex_buffer);
wined3d_streaming_buffer_map(device->wined3d_device, &device->index_buffer, D3DMAXNUMVERTICES, sizeof(*device->ddraw_device_buffer.buffer_indices),
&device->ddraw_device_buffer.idx_buffer_pos, (void**) &device->ddraw_device_buffer.buffer_indices);
wined3d_streaming_buffer_unmap(&device->index_buffer);
This isn't legal I'm afraid; you need to unmap *after* writing data to the buffer.
I also noticed a potential problem I hadn't considered when suggesting this approach: the streaming buffer is kind of a ring buffer, so if we run into the end, we'll need to flush before writing more data. Probably we should add an extra argument to wined3d_streaming_buffer_map() so it'll return an error if we're going to run over the end of the buffer.
Zebediah Figura (@zfigura) commented about dlls/ddraw/device.c:
- } else if (primitive_type != device->ddraw_device_buffer.primitive_type) {
TRACE_(ddraw_perf)("Buffering failed due to mismatched primitive_type %d != buffer.primitive_type %d \n", primitive_type, device->ddraw_device_buffer.primitive_type);
return WINED3DERR_NOTAVAILABLE;
- }
- /* append all the vertices to the buffer */
- memcpy(device->ddraw_device_buffer.buffer_vertices + device->ddraw_device_buffer.buffer_vertex_count * stride, vertices, vertex_count * stride);
- TRACE_(ddraw_perf)("vertex count %lu stride %d buffer_vertex_count %05lu buffer_indice_count %05lu\n", vertex_count, stride,
device->ddraw_device_buffer.buffer_vertex_count, device->ddraw_device_buffer.buffer_indice_count);
- /* Create the index */
- if (primitive_type == D3DPT_TRIANGLEFAN) goto fan;
- if (primitive_type == D3DPT_POINTLIST) goto points;
- FIXME("primitive_type %#x not supported\n", primitive_type);
- goto done;
Let's use helper functions instead of gotos, please.
Zebediah Figura (@zfigura) commented about dlls/ddraw/device.c:
- It will delegate to a single call to wined3d with the correct parameters,
- and a (hopefully) huge list of triangles vertices. */
+static HRESULT ddraw_buffer_flush_internal(struct d3d_device *device) {
- HRESULT hr;
- TRACE_(ddraw_perf)("primitive_type %#x buffer_vertex_count %05lu buffer_indice_count %05lu\n", device->ddraw_device_buffer.primitive_type, device->ddraw_device_buffer.buffer_vertex_count, device->ddraw_device_buffer.buffer_indice_count);
- /* Calling wined3d directly */
- wined3d_mutex_lock();
- hr = wined3d_stateblock_set_stream_source(device->state, 0, device->vertex_buffer.buffer, 0, device->ddraw_device_buffer.stride);
- if (FAILED(hr))
goto done;
I wouldn't bother with the error check here; the function can only actually fail on invalid parameters.
Zebediah Figura (@zfigura) commented about dlls/ddraw/device.c:
+done:
- wined3d_mutex_unlock();
- /* Reset the buffer */
- device->ddraw_device_buffer.buffer_vertex_count = 0;
- device->ddraw_device_buffer.buffer_indice_count = 0;
- return hr;
+}
+static HRESULT ddraw_buffer_flush(struct d3d_device *device) {
- /* Nothing to do if it is empty */
- if (! device->ddraw_device_buffer.buffer_vertex_count) return D3D_OK;
- return ddraw_buffer_flush_internal(device);
+}
This function isn't doing anything, is it? I.e. we could just combine it with ddraw_buffer_flush_internal().
Zebediah Figura (@zfigura) commented about dlls/ddraw/device.c:
- wined3d_stateblock_set_index_buffer(device->state, device->index_buffer.buffer, WINED3DFMT_R16_UINT);
- wined3d_stateblock_set_vertex_declaration(device->state, ddraw_find_decl(device->ddraw, device->ddraw_device_buffer.fvf));
- wined3d_device_context_set_primitive_type(device->immediate_context, wined3d_primitive_type_from_ddraw(D3DPT_TRIANGLELIST), 0);
- if (device->ddraw_device_buffer.primitive_type == D3DPT_POINTLIST) {
- wined3d_device_context_set_primitive_type(device->immediate_context, wined3d_primitive_type_from_ddraw(D3DPT_POINTLIST), 0);
- }
- wined3d_device_apply_stateblock(device->wined3d_device, device->state);
- d3d_device_sync_surfaces(device);
- wined3d_device_context_draw_indexed(device->immediate_context,
device->ddraw_device_buffer.vertex_buffer_pos / device->ddraw_device_buffer.stride,
device->ddraw_device_buffer.idx_buffer_pos / sizeof(*device->ddraw_device_buffer.buffer_indices),
device->ddraw_device_buffer.buffer_indice_count, 0, 0);
Is it faster to use an indexed draw here? (Or at least not slower, I guess). I guess it probably takes up less space for triangle fans, at least, so it's probably a good idea nonethelesss...
Sorry for the slowness of review thus far.
One other general thing I notice is that we're still missing some flushes. We basically need to flush any time state is set on the stateblock or device, so we're missing at least the following:
* SetTransform() * MultiplyTransform() * setup_lighting() * SetViewport() * SetMaterial() * SetLight() * LightEnable() * SetClipPlane() * stateblock reset in ddraw_surface_create() for primary surfaces * reset in ddraw_set_cooperative_level() * d3d_device_set_render_target() * d3d_device_update_depth_stencil()
I also believe we need to flush before a flip, blit, clear, or download, since we might be drawing to the source surface in that case. That includes:
* Blt() * BltFast() * GetDC() * Lock() * Load()
We should be able to skip execute buffers at least, since they're not going through vertex batching.
add an extra argument to wined3d_streaming_buffer_map()
I didn't do that yet.
On Mon Feb 20 23:30:48 2023 +0000, Zebediah Figura wrote:
This function isn't doing anything, is it? I.e. we could just combine it with ddraw_buffer_flush_internal().
No, it was done for specific inlining. I changed the code to make it obvious.
On Mon Feb 20 23:30:48 2023 +0000, Zebediah Figura wrote:
Is it faster to use an indexed draw here? (Or at least not slower, I guess). I guess it probably takes up less space for triangle fans, at least, so it's probably a good idea nonethelesss...
I'm using indexed draws for 2 reasons : 1. less space for fans indeed 2. letting the gpu know it is exactly the same vertice, therefore avoiding any visual artifacts.