From: Steve Schnepp steve.schnepp@pwkf.org
Convert all consecutive calls to d7_DrawPrimitive(TRIANGLE_FAN) into a single call to d7_DrawPrimitive(TRIANGLE_LIST) with all the vertices.
Note, it *increase* the number of vertices, but bandwith is much less costly than multiple calls.
Note, only a very precise subset of the calls get buffered in order to ensure that the disruption is minimal.
Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=33814 --- dlls/ddraw/ddraw_private.h | 10 ++++ dlls/ddraw/device.c | 111 ++++++++++++++++++++++++++++++++++++- 2 files changed, 119 insertions(+), 2 deletions(-)
diff --git a/dlls/ddraw/ddraw_private.h b/dlls/ddraw/ddraw_private.h index 09e8133350b..5c77e813311 100644 --- a/dlls/ddraw/ddraw_private.h +++ b/dlls/ddraw/ddraw_private.h @@ -317,6 +317,13 @@ DWORD ddraw_allocate_handle(struct ddraw_handle_table *t, void *object, enum ddr void *ddraw_free_handle(struct ddraw_handle_table *t, DWORD handle, enum ddraw_handle_type type) DECLSPEC_HIDDEN; void *ddraw_get_object(struct ddraw_handle_table *t, DWORD handle, enum ddraw_handle_type type) DECLSPEC_HIDDEN;
+#define D3D_BUFFER_SIZE (2 * 1024 * 1024) /* 2 MiB buffer */ + +struct d3d_device_buffer { + DWORD buffer_vertex_count; + char buffer_vertices[D3D_BUFFER_SIZE]; +}; + struct d3d_device { /* IUnknown */ @@ -369,6 +376,9 @@ struct d3d_device
struct wined3d_stateblock *recording, *state, *update_state; const struct wined3d_stateblock_state *stateblock_state; + + /* Vertices Buffer for squashing DrawPrimitive() calls before sending it to wined3d */ + struct d3d_device_buffer ddraw_device_buffer; };
HRESULT d3d_device_create(struct ddraw *ddraw, const GUID *guid, struct ddraw_surface *target, IUnknown *rt_iface, diff --git a/dlls/ddraw/device.c b/dlls/ddraw/device.c index 1cfef5007d5..0b057403d99 100644 --- a/dlls/ddraw/device.c +++ b/dlls/ddraw/device.c @@ -31,6 +31,11 @@
WINE_DEFAULT_DEBUG_CHANNEL(ddraw); WINE_DECLARE_DEBUG_CHANNEL(winediag); +WINE_DECLARE_DEBUG_CHANNEL(ddraw_perf); +WINE_DECLARE_DEBUG_CHANNEL(ddraw_buffer); + +static HRESULT ddraw_buffer_flush_d7(IDirect3DDevice7 *iface); +static int ddraw_buffer_add_d7(IDirect3DDevice7 *iface, D3DPRIMITIVETYPE primitive_type, DWORD fvf, void *vertices, DWORD vertex_count, DWORD flags, UINT stride);
/* The device ID */ const GUID IID_D3DDEVICE_WineD3D = { @@ -1592,6 +1597,9 @@ static HRESULT d3d_device7_EndScene(IDirect3DDevice7 *iface)
TRACE("iface %p.\n", iface);
+ // Flush the vertices buffer + ddraw_buffer_flush_d7(iface); + wined3d_mutex_lock(); hr = wined3d_device_end_scene(device->wined3d_device); wined3d_mutex_unlock(); @@ -2548,6 +2556,9 @@ static HRESULT d3d_device7_SetRenderState(IDirect3DDevice7 *iface,
TRACE("iface %p, state %#x, value %#lx.\n", iface, state, value);
+ // Flush the vertices buffer + ddraw_buffer_flush_d7(iface); + wined3d_mutex_lock(); /* Some render states need special care */ switch (state) @@ -3451,10 +3462,21 @@ static HRESULT d3d_device7_DrawPrimitive(IDirect3DDevice7 *iface, stride = get_flexible_vertex_size(fvf); size = vertex_count * stride;
+ if (primitive_type == D3DPT_TRIANGLELIST) { + // We are currently flushing the buffer + } else { + if (ddraw_buffer_add_d7(iface, primitive_type, fvf, vertices, vertex_count, flags, stride)) { + return D3D_OK; + } + // Cannot buffer, need to flush the rest, then process this one + ddraw_buffer_flush_d7(iface); + } + wined3d_mutex_lock();
- if (FAILED(hr = wined3d_streaming_buffer_upload(device->wined3d_device, - &device->vertex_buffer, vertices, size, stride, &vb_pos))) + hr = wined3d_streaming_buffer_upload(device->wined3d_device, + &device->vertex_buffer, vertices, size, stride, &vb_pos); + if (FAILED(hr)) goto done;
hr = wined3d_stateblock_set_stream_source(device->state, 0, device->vertex_buffer.buffer, 0, stride); @@ -4641,6 +4663,9 @@ static HRESULT d3d_device7_SetTexture(IDirect3DDevice7 *iface,
TRACE("iface %p, stage %lu, texture %p.\n", iface, stage, texture);
+ // Flush the vertices buffer + ddraw_buffer_flush_d7(iface); + if (surf && (surf->surface_desc.ddsCaps.dwCaps & DDSCAPS_TEXTURE)) { if (surf->draw_texture) @@ -4925,6 +4950,9 @@ static HRESULT d3d_device7_SetTextureStageState(IDirect3DDevice7 *iface, TRACE("iface %p, stage %lu, state %#x, value %#lx.\n", iface, stage, state, value);
+ // Flush the vertices buffer + ddraw_buffer_flush_d7(iface); + if (state > D3DTSS_TEXTURETRANSFORMFLAGS) { WARN("Invalid state %#x passed.\n", state); @@ -6963,3 +6991,82 @@ HRESULT d3d_device_create(struct ddraw *ddraw, const GUID *guid, struct ddraw_su
return D3D_OK; } + +/* + * DirectD3D vertex buffer helpers + * + * For now, it only supports : + * d3d_device.version == 7 + * primitive_type == D3DPT_TRIANGLEFAN + * fvf == 0x2c4 + * flags == 0 + * vertex_count >= 3 + * + * Note : it does transform D3DPT_TRIANGLEFAN into D3DPT_TRIANGLELIST. + */ +static int ddraw_buffer_add_d7(IDirect3DDevice7 *iface, D3DPRIMITIVETYPE primitive_type, DWORD fvf, void *vertices, DWORD vertex_count, DWORD flags, UINT stride) { + struct d3d_device *device = impl_from_IDirect3DDevice7(iface); + + if (!TRACE_ON(ddraw_buffer)) return 0; + if (primitive_type != D3DPT_TRIANGLEFAN) return 0; + if (fvf != 0x2c4) return 0; + if (flags) return 0; + + if (vertex_count < 3) { + WARN("vertex_count %lu lower than 3. not buffering"); + return 0; + } + + /* First triangle is simply copied over */ + memcpy(device->ddraw_device_buffer.buffer_vertices + device->ddraw_device_buffer.buffer_vertex_count * stride, vertices, stride * 3); + device->ddraw_device_buffer.buffer_vertex_count += 3; + TRACE_(ddraw_perf)("only %lu vertex count. buffering. buffer_vertex_count %d stride %d\n", vertex_count, device->ddraw_device_buffer.buffer_vertex_count, stride); + + /* Next triangles are recreated with : 2 next vertices then the 1rst one. + * So, it will *increase* the number of total vertices from 4 to 6, 5 to 9, 6 to 12, ... */ + for (int idx = 2; idx < vertex_count-1; idx ++) { + /* Copy the 2 last ones */ + char* next_vertice = vertices; + next_vertice += idx * stride; + memcpy(device->ddraw_device_buffer.buffer_vertices + device->ddraw_device_buffer.buffer_vertex_count * stride, next_vertice, stride * 2); + device->ddraw_device_buffer.buffer_vertex_count += 2; + + /* Copy the first again */ + memcpy(device->ddraw_device_buffer.buffer_vertices + device->ddraw_device_buffer.buffer_vertex_count * stride, vertices, stride); + device->ddraw_device_buffer.buffer_vertex_count += 1; + TRACE_(ddraw_perf)("idx %d buffer_vertex_count %lu stride %d\n", idx, device->ddraw_device_buffer.buffer_vertex_count, stride); + } + + /* Buffered ! */ + return 1; +} + +/* Flushing the buffer if it isn't empty. + * + * It will delegate to a single call to DrawPrimitive with the correct parameters, + * and a (hopefully) huge list of triangles vertices. */ + +static HRESULT ddraw_buffer_flush_d7(IDirect3DDevice7 *iface) { + struct d3d_device *device = impl_from_IDirect3DDevice7(iface); + + TRACE_(ddraw_perf)("buffer_vertex_count %lu\n", device->ddraw_device_buffer.buffer_vertex_count); + + /* Nothing to do if it is empty */ + if (! device->ddraw_device_buffer.buffer_vertex_count) return D3D_OK; + + /* Delegate the call */ + { + HRESULT ret; + const DWORD from_flush_flags = 0xF0F0F0F0; // Using a specific flag to recognise & avoid recursion loops + + ret = d3d_device7_DrawPrimitive(iface, D3DPT_TRIANGLELIST, 0x2c4, device->ddraw_device_buffer.buffer_vertices, device->ddraw_device_buffer.buffer_vertex_count, from_flush_flags); + + if (ret != D3D_OK) FIXME("error in call delegation %d", ret); + } + + /* Flush the buffer */ + device->ddraw_device_buffer.buffer_vertex_count = 0; + + /* The error isn't really useful as it is too late, so always returning */ + return D3D_OK; +}