Convert all consecutive calls to d7_DrawPrimitive(TRIANGLE_FAN) into a single call to d7_DrawPrimitive(TRIANGLE_LIST) with all the vertices.
Note, it *increase* the number of vertices, but bandwith is much less costly than multiple calls.
Note, only a very precise subset of the calls get buffered in order to ensure that the disruption is minimal.
Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=33814
-- v7: ddraw: avoid magic fvf number
From: Steve Schnepp steve.schnepp@pwkf.org
Convert all consecutive calls to d7_DrawPrimitive(TRIANGLE_FAN) into a single call to d7_DrawPrimitive(TRIANGLE_LIST) with all the vertices.
Note, it *increase* the number of vertices, but bandwith is much less costly than multiple calls.
Note, only a very precise subset of the calls get buffered in order to ensure that the disruption is minimal.
Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=33814 --- dlls/ddraw/ddraw_private.h | 10 ++++ dlls/ddraw/device.c | 111 ++++++++++++++++++++++++++++++++++++- 2 files changed, 119 insertions(+), 2 deletions(-)
diff --git a/dlls/ddraw/ddraw_private.h b/dlls/ddraw/ddraw_private.h index 09e8133350b..5c77e813311 100644 --- a/dlls/ddraw/ddraw_private.h +++ b/dlls/ddraw/ddraw_private.h @@ -317,6 +317,13 @@ DWORD ddraw_allocate_handle(struct ddraw_handle_table *t, void *object, enum ddr void *ddraw_free_handle(struct ddraw_handle_table *t, DWORD handle, enum ddraw_handle_type type) DECLSPEC_HIDDEN; void *ddraw_get_object(struct ddraw_handle_table *t, DWORD handle, enum ddraw_handle_type type) DECLSPEC_HIDDEN;
+#define D3D_BUFFER_SIZE (2 * 1024 * 1024) /* 2 MiB buffer */ + +struct d3d_device_buffer { + DWORD buffer_vertex_count; + char buffer_vertices[D3D_BUFFER_SIZE]; +}; + struct d3d_device { /* IUnknown */ @@ -369,6 +376,9 @@ struct d3d_device
struct wined3d_stateblock *recording, *state, *update_state; const struct wined3d_stateblock_state *stateblock_state; + + /* Vertices Buffer for squashing DrawPrimitive() calls before sending it to wined3d */ + struct d3d_device_buffer ddraw_device_buffer; };
HRESULT d3d_device_create(struct ddraw *ddraw, const GUID *guid, struct ddraw_surface *target, IUnknown *rt_iface, diff --git a/dlls/ddraw/device.c b/dlls/ddraw/device.c index 1cfef5007d5..e0a3b4ca5e6 100644 --- a/dlls/ddraw/device.c +++ b/dlls/ddraw/device.c @@ -31,6 +31,11 @@
WINE_DEFAULT_DEBUG_CHANNEL(ddraw); WINE_DECLARE_DEBUG_CHANNEL(winediag); +WINE_DECLARE_DEBUG_CHANNEL(ddraw_perf); +WINE_DECLARE_DEBUG_CHANNEL(ddraw_buffer); + +static HRESULT ddraw_buffer_flush_d7(IDirect3DDevice7 *iface); +static int ddraw_buffer_add_d7(IDirect3DDevice7 *iface, D3DPRIMITIVETYPE primitive_type, DWORD fvf, void *vertices, DWORD vertex_count, DWORD flags, UINT stride);
/* The device ID */ const GUID IID_D3DDEVICE_WineD3D = { @@ -1592,6 +1597,9 @@ static HRESULT d3d_device7_EndScene(IDirect3DDevice7 *iface)
TRACE("iface %p.\n", iface);
+ // Flush the vertices buffer + ddraw_buffer_flush_d7(iface); + wined3d_mutex_lock(); hr = wined3d_device_end_scene(device->wined3d_device); wined3d_mutex_unlock(); @@ -2548,6 +2556,9 @@ static HRESULT d3d_device7_SetRenderState(IDirect3DDevice7 *iface,
TRACE("iface %p, state %#x, value %#lx.\n", iface, state, value);
+ // Flush the vertices buffer + ddraw_buffer_flush_d7(iface); + wined3d_mutex_lock(); /* Some render states need special care */ switch (state) @@ -3451,10 +3462,21 @@ static HRESULT d3d_device7_DrawPrimitive(IDirect3DDevice7 *iface, stride = get_flexible_vertex_size(fvf); size = vertex_count * stride;
+ if (primitive_type == D3DPT_TRIANGLELIST) { + // We are currently flushing the buffer + } else { + if (ddraw_buffer_add_d7(iface, primitive_type, fvf, vertices, vertex_count, flags, stride)) { + return D3D_OK; + } + // Cannot buffer, need to flush the rest, then process this one + ddraw_buffer_flush_d7(iface); + } + wined3d_mutex_lock();
- if (FAILED(hr = wined3d_streaming_buffer_upload(device->wined3d_device, - &device->vertex_buffer, vertices, size, stride, &vb_pos))) + hr = wined3d_streaming_buffer_upload(device->wined3d_device, + &device->vertex_buffer, vertices, size, stride, &vb_pos); + if (FAILED(hr)) goto done;
hr = wined3d_stateblock_set_stream_source(device->state, 0, device->vertex_buffer.buffer, 0, stride); @@ -4641,6 +4663,9 @@ static HRESULT d3d_device7_SetTexture(IDirect3DDevice7 *iface,
TRACE("iface %p, stage %lu, texture %p.\n", iface, stage, texture);
+ // Flush the vertices buffer + ddraw_buffer_flush_d7(iface); + if (surf && (surf->surface_desc.ddsCaps.dwCaps & DDSCAPS_TEXTURE)) { if (surf->draw_texture) @@ -4925,6 +4950,9 @@ static HRESULT d3d_device7_SetTextureStageState(IDirect3DDevice7 *iface, TRACE("iface %p, stage %lu, state %#x, value %#lx.\n", iface, stage, state, value);
+ // Flush the vertices buffer + ddraw_buffer_flush_d7(iface); + if (state > D3DTSS_TEXTURETRANSFORMFLAGS) { WARN("Invalid state %#x passed.\n", state); @@ -6963,3 +6991,82 @@ HRESULT d3d_device_create(struct ddraw *ddraw, const GUID *guid, struct ddraw_su
return D3D_OK; } + +/* + * DirectD3D vertex buffer helpers + * + * For now, it only supports : + * d3d_device.version == 7 + * primitive_type == D3DPT_TRIANGLEFAN + * fvf == 0x2c4 + * flags == 0 + * vertex_count >= 3 + * + * Note : it does transform D3DPT_TRIANGLEFAN into D3DPT_TRIANGLELIST. + */ +static int ddraw_buffer_add_d7(IDirect3DDevice7 *iface, D3DPRIMITIVETYPE primitive_type, DWORD fvf, void *vertices, DWORD vertex_count, DWORD flags, UINT stride) { + struct d3d_device *device = impl_from_IDirect3DDevice7(iface); + + if (!TRACE_ON(ddraw_buffer)) return 0; + if (primitive_type != D3DPT_TRIANGLEFAN) return 0; + if (fvf != 0x2c4) return 0; + if (flags) return 0; + + if (vertex_count < 3) { + WARN("vertex_count %lu lower than 3. not buffering", vertex_count); + return 0; + } + + /* First triangle is simply copied over */ + memcpy(device->ddraw_device_buffer.buffer_vertices + device->ddraw_device_buffer.buffer_vertex_count * stride, vertices, stride * 3); + device->ddraw_device_buffer.buffer_vertex_count += 3; + TRACE_(ddraw_perf)("only %lu vertex count. buffering. buffer_vertex_count %lu stride %d\n", vertex_count, device->ddraw_device_buffer.buffer_vertex_count, stride); + + /* Next triangles are recreated with : 2 next vertices then the 1rst one. + * So, it will *increase* the number of total vertices from 4 to 6, 5 to 9, 6 to 12, ... */ + for (int idx = 2; idx < vertex_count-1; idx ++) { + /* Copy the 2 last ones */ + char* next_vertice = vertices; + next_vertice += idx * stride; + memcpy(device->ddraw_device_buffer.buffer_vertices + device->ddraw_device_buffer.buffer_vertex_count * stride, next_vertice, stride * 2); + device->ddraw_device_buffer.buffer_vertex_count += 2; + + /* Copy the first again */ + memcpy(device->ddraw_device_buffer.buffer_vertices + device->ddraw_device_buffer.buffer_vertex_count * stride, vertices, stride); + device->ddraw_device_buffer.buffer_vertex_count += 1; + TRACE_(ddraw_perf)("idx %d buffer_vertex_count %lu stride %d\n", idx, device->ddraw_device_buffer.buffer_vertex_count, stride); + } + + /* Buffered ! */ + return 1; +} + +/* Flushing the buffer if it isn't empty. + * + * It will delegate to a single call to DrawPrimitive with the correct parameters, + * and a (hopefully) huge list of triangles vertices. */ + +static HRESULT ddraw_buffer_flush_d7(IDirect3DDevice7 *iface) { + struct d3d_device *device = impl_from_IDirect3DDevice7(iface); + + TRACE_(ddraw_perf)("buffer_vertex_count %lu\n", device->ddraw_device_buffer.buffer_vertex_count); + + /* Nothing to do if it is empty */ + if (! device->ddraw_device_buffer.buffer_vertex_count) return D3D_OK; + + /* Delegate the call */ + { + HRESULT ret; + const DWORD from_flush_flags = 0xF0F0F0F0; // Using a specific flag to recognise & avoid recursion loops + + ret = d3d_device7_DrawPrimitive(iface, D3DPT_TRIANGLELIST, 0x2c4, device->ddraw_device_buffer.buffer_vertices, device->ddraw_device_buffer.buffer_vertex_count, from_flush_flags); + + if (ret != D3D_OK) FIXME("error in call delegation %ld", ret); + } + + /* Flush the buffer */ + device->ddraw_device_buffer.buffer_vertex_count = 0; + + /* The error isn't really useful as it is too late, so always returning */ + return D3D_OK; +}
From: Steve Schnepp steve.schnepp@pwkf.org
--- dlls/ddraw/ddraw_private.h | 2 + dlls/ddraw/device.c | 80 +++++++++++++++++++++++--------------- 2 files changed, 51 insertions(+), 31 deletions(-)
diff --git a/dlls/ddraw/ddraw_private.h b/dlls/ddraw/ddraw_private.h index 5c77e813311..325ff22c1ae 100644 --- a/dlls/ddraw/ddraw_private.h +++ b/dlls/ddraw/ddraw_private.h @@ -320,7 +320,9 @@ void *ddraw_get_object(struct ddraw_handle_table *t, DWORD handle, enum ddraw_ha #define D3D_BUFFER_SIZE (2 * 1024 * 1024) /* 2 MiB buffer */
struct d3d_device_buffer { + DWORD buffer_indice_count; DWORD buffer_vertex_count; + WORD buffer_indices[D3DMAXNUMVERTICES]; char buffer_vertices[D3D_BUFFER_SIZE]; };
diff --git a/dlls/ddraw/device.c b/dlls/ddraw/device.c index e0a3b4ca5e6..ce5709c615f 100644 --- a/dlls/ddraw/device.c +++ b/dlls/ddraw/device.c @@ -35,7 +35,7 @@ WINE_DECLARE_DEBUG_CHANNEL(ddraw_perf); WINE_DECLARE_DEBUG_CHANNEL(ddraw_buffer);
static HRESULT ddraw_buffer_flush_d7(IDirect3DDevice7 *iface); -static int ddraw_buffer_add_d7(IDirect3DDevice7 *iface, D3DPRIMITIVETYPE primitive_type, DWORD fvf, void *vertices, DWORD vertex_count, DWORD flags, UINT stride); +static HRESULT ddraw_buffer_add_d7(IDirect3DDevice7 *iface, D3DPRIMITIVETYPE primitive_type, DWORD fvf, void *vertices, DWORD vertex_count, DWORD flags, UINT stride);
/* The device ID */ const GUID IID_D3DDEVICE_WineD3D = { @@ -3462,16 +3462,15 @@ static HRESULT d3d_device7_DrawPrimitive(IDirect3DDevice7 *iface, stride = get_flexible_vertex_size(fvf); size = vertex_count * stride;
- if (primitive_type == D3DPT_TRIANGLELIST) { - // We are currently flushing the buffer - } else { - if (ddraw_buffer_add_d7(iface, primitive_type, fvf, vertices, vertex_count, flags, stride)) { - return D3D_OK; - } - // Cannot buffer, need to flush the rest, then process this one - ddraw_buffer_flush_d7(iface); + hr = ddraw_buffer_add_d7(iface, primitive_type, fvf, vertices, vertex_count, flags, stride); + if (hr == D3D_OK) { + /* Buffered successfuly -> returning immediatly :-) */ + return D3D_OK; }
+ // Cannot buffer, need to flush the rest, then process this one + ddraw_buffer_flush_d7(iface); + wined3d_mutex_lock();
hr = wined3d_streaming_buffer_upload(device->wined3d_device, @@ -6905,6 +6904,11 @@ static HRESULT d3d_device_init(struct d3d_device *device, struct ddraw *ddraw, c wined3d_streaming_buffer_init(&device->vertex_buffer, WINED3D_BIND_VERTEX_BUFFER); wined3d_streaming_buffer_init(&device->index_buffer, WINED3D_BIND_INDEX_BUFFER);
+ /* Initialize the ddraw triangle buffer. + * The 3 first vertices indexes will *always* be { 0, 1, 2 }. + */ + device->ddraw_device_buffer.buffer_vertex_count = 0; + /* Render to the back buffer */ rtv = ddraw_surface_get_rendertarget_view(target); if (FAILED(hr = wined3d_device_context_set_rendertarget_views(device->immediate_context, 0, 1, &rtv, TRUE))) @@ -7004,41 +7008,51 @@ HRESULT d3d_device_create(struct ddraw *ddraw, const GUID *guid, struct ddraw_su * * Note : it does transform D3DPT_TRIANGLEFAN into D3DPT_TRIANGLELIST. */ -static int ddraw_buffer_add_d7(IDirect3DDevice7 *iface, D3DPRIMITIVETYPE primitive_type, DWORD fvf, void *vertices, DWORD vertex_count, DWORD flags, UINT stride) { +static HRESULT ddraw_buffer_add_d7(IDirect3DDevice7 *iface, D3DPRIMITIVETYPE primitive_type, DWORD fvf, void *vertices, DWORD vertex_count, DWORD flags, UINT stride) { struct d3d_device *device = impl_from_IDirect3DDevice7(iface); + int buffer_indice_count_initial = device->ddraw_device_buffer.buffer_vertex_count;
- if (!TRACE_ON(ddraw_buffer)) return 0; - if (primitive_type != D3DPT_TRIANGLEFAN) return 0; - if (fvf != 0x2c4) return 0; - if (flags) return 0; + if (!TRACE_ON(ddraw_buffer)) return WINED3DERR_NOTAVAILABLE; + if (primitive_type != D3DPT_TRIANGLEFAN) return WINED3DERR_NOTAVAILABLE; + if (fvf != 0x2c4) return WINED3DERR_NOTAVAILABLE; + if (flags) return WINED3DERR_NOTAVAILABLE;
if (vertex_count < 3) { WARN("vertex_count %lu lower than 3. not buffering", vertex_count); - return 0; + return WINED3DERR_NOTAVAILABLE; }
- /* First triangle is simply copied over */ - memcpy(device->ddraw_device_buffer.buffer_vertices + device->ddraw_device_buffer.buffer_vertex_count * stride, vertices, stride * 3); - device->ddraw_device_buffer.buffer_vertex_count += 3; - TRACE_(ddraw_perf)("only %lu vertex count. buffering. buffer_vertex_count %lu stride %d\n", vertex_count, device->ddraw_device_buffer.buffer_vertex_count, stride); + /* append all the vertices to the buffer */ + memcpy(device->ddraw_device_buffer.buffer_vertices + device->ddraw_device_buffer.buffer_vertex_count * stride, vertices, vertex_count * stride); + + TRACE_(ddraw_perf)("vertex count %lu stride %d buffer_vertex_count %lu buffer_indice_count %lu\n", vertex_count, stride, + device->ddraw_device_buffer.buffer_vertex_count, device->ddraw_device_buffer.buffer_indice_count); + + /* Create the index */ + + /* The first triangle is the same, therefore the indices are simply copied over */ + device->ddraw_device_buffer.buffer_indices[device->ddraw_device_buffer.buffer_indice_count++] = device->ddraw_device_buffer.buffer_vertex_count++; + device->ddraw_device_buffer.buffer_indices[device->ddraw_device_buffer.buffer_indice_count++] = device->ddraw_device_buffer.buffer_vertex_count++; + device->ddraw_device_buffer.buffer_indices[device->ddraw_device_buffer.buffer_indice_count++] = device->ddraw_device_buffer.buffer_vertex_count++; + + TRACE_(ddraw_perf)("vertex count %lu stride %d buffer_vertex_count %lu buffer_indice_count %lu\n", vertex_count, stride, + device->ddraw_device_buffer.buffer_vertex_count, device->ddraw_device_buffer.buffer_indice_count);
/* Next triangles are recreated with : 2 next vertices then the 1rst one. * So, it will *increase* the number of total vertices from 4 to 6, 5 to 9, 6 to 12, ... */ for (int idx = 2; idx < vertex_count-1; idx ++) { /* Copy the 2 last ones */ - char* next_vertice = vertices; - next_vertice += idx * stride; - memcpy(device->ddraw_device_buffer.buffer_vertices + device->ddraw_device_buffer.buffer_vertex_count * stride, next_vertice, stride * 2); - device->ddraw_device_buffer.buffer_vertex_count += 2; + device->ddraw_device_buffer.buffer_indices[device->ddraw_device_buffer.buffer_indice_count++] = device->ddraw_device_buffer.buffer_vertex_count - 1; + device->ddraw_device_buffer.buffer_indices[device->ddraw_device_buffer.buffer_indice_count++] = device->ddraw_device_buffer.buffer_vertex_count; + device->ddraw_device_buffer.buffer_indices[device->ddraw_device_buffer.buffer_indice_count++] = buffer_indice_count_initial; + device->ddraw_device_buffer.buffer_vertex_count++;
- /* Copy the first again */ - memcpy(device->ddraw_device_buffer.buffer_vertices + device->ddraw_device_buffer.buffer_vertex_count * stride, vertices, stride); - device->ddraw_device_buffer.buffer_vertex_count += 1; - TRACE_(ddraw_perf)("idx %d buffer_vertex_count %lu stride %d\n", idx, device->ddraw_device_buffer.buffer_vertex_count, stride); + TRACE_(ddraw_perf)("idx %d vertex count %lu stride %d buffer_vertex_count %lu buffer_indice_count %lu\n", idx, vertex_count, stride, + device->ddraw_device_buffer.buffer_vertex_count, device->ddraw_device_buffer.buffer_indice_count); }
/* Buffered ! */ - return 1; + return D3D_OK; }
/* Flushing the buffer if it isn't empty. @@ -7049,7 +7063,7 @@ static int ddraw_buffer_add_d7(IDirect3DDevice7 *iface, D3DPRIMITIVETYPE primiti static HRESULT ddraw_buffer_flush_d7(IDirect3DDevice7 *iface) { struct d3d_device *device = impl_from_IDirect3DDevice7(iface);
- TRACE_(ddraw_perf)("buffer_vertex_count %lu\n", device->ddraw_device_buffer.buffer_vertex_count); + TRACE_(ddraw_perf)("buffer_vertex_count %lu buffer_indice_count %lu\n", device->ddraw_device_buffer.buffer_vertex_count, device->ddraw_device_buffer.buffer_indice_count);
/* Nothing to do if it is empty */ if (! device->ddraw_device_buffer.buffer_vertex_count) return D3D_OK; @@ -7059,13 +7073,17 @@ static HRESULT ddraw_buffer_flush_d7(IDirect3DDevice7 *iface) { HRESULT ret; const DWORD from_flush_flags = 0xF0F0F0F0; // Using a specific flag to recognise & avoid recursion loops
- ret = d3d_device7_DrawPrimitive(iface, D3DPT_TRIANGLELIST, 0x2c4, device->ddraw_device_buffer.buffer_vertices, device->ddraw_device_buffer.buffer_vertex_count, from_flush_flags); + ret = d3d_device7_DrawIndexedPrimitive(iface, D3DPT_TRIANGLELIST, 0x2c4, + device->ddraw_device_buffer.buffer_vertices, device->ddraw_device_buffer.buffer_vertex_count, + device->ddraw_device_buffer.buffer_indices, device->ddraw_device_buffer.buffer_indice_count, + from_flush_flags);
if (ret != D3D_OK) FIXME("error in call delegation %ld", ret); }
- /* Flush the buffer */ + /* Reset the buffer */ device->ddraw_device_buffer.buffer_vertex_count = 0; + device->ddraw_device_buffer.buffer_indice_count = 0;
/* The error isn't really useful as it is too late, so always returning */ return D3D_OK;
From: Steve Schnepp steve.schnepp@pwkf.org
--- dlls/ddraw/ddraw_private.h | 1 + dlls/ddraw/device.c | 16 +++++++++++++--- 2 files changed, 14 insertions(+), 3 deletions(-)
diff --git a/dlls/ddraw/ddraw_private.h b/dlls/ddraw/ddraw_private.h index 325ff22c1ae..8455b157522 100644 --- a/dlls/ddraw/ddraw_private.h +++ b/dlls/ddraw/ddraw_private.h @@ -320,6 +320,7 @@ void *ddraw_get_object(struct ddraw_handle_table *t, DWORD handle, enum ddraw_ha #define D3D_BUFFER_SIZE (2 * 1024 * 1024) /* 2 MiB buffer */
struct d3d_device_buffer { + DWORD fvf; DWORD buffer_indice_count; DWORD buffer_vertex_count; WORD buffer_indices[D3DMAXNUMVERTICES]; diff --git a/dlls/ddraw/device.c b/dlls/ddraw/device.c index ce5709c615f..ed1873d1594 100644 --- a/dlls/ddraw/device.c +++ b/dlls/ddraw/device.c @@ -7002,7 +7002,6 @@ HRESULT d3d_device_create(struct ddraw *ddraw, const GUID *guid, struct ddraw_su * For now, it only supports : * d3d_device.version == 7 * primitive_type == D3DPT_TRIANGLEFAN - * fvf == 0x2c4 * flags == 0 * vertex_count >= 3 * @@ -7014,7 +7013,6 @@ static HRESULT ddraw_buffer_add_d7(IDirect3DDevice7 *iface, D3DPRIMITIVETYPE pri
if (!TRACE_ON(ddraw_buffer)) return WINED3DERR_NOTAVAILABLE; if (primitive_type != D3DPT_TRIANGLEFAN) return WINED3DERR_NOTAVAILABLE; - if (fvf != 0x2c4) return WINED3DERR_NOTAVAILABLE; if (flags) return WINED3DERR_NOTAVAILABLE;
if (vertex_count < 3) { @@ -7022,6 +7020,18 @@ static HRESULT ddraw_buffer_add_d7(IDirect3DDevice7 *iface, D3DPRIMITIVETYPE pri return WINED3DERR_NOTAVAILABLE; }
+ if (device->ddraw_device_buffer.buffer_vertex_count) { + if (fvf != device->ddraw_device_buffer.fvf) { + /* Not the same fvf as the buffered one. Cannot buffer more of those */ + TRACE_(ddraw_perf)("Buffering failed due to mismatched fvf %ld != buffer.fvf %ld \n", fvf, device->ddraw_device_buffer.fvf); + return WINED3DERR_NOTAVAILABLE; + } + } else { + /* New buffer, setting fvf */ + device->ddraw_device_buffer.fvf = fvf; + } + + /* append all the vertices to the buffer */ memcpy(device->ddraw_device_buffer.buffer_vertices + device->ddraw_device_buffer.buffer_vertex_count * stride, vertices, vertex_count * stride);
@@ -7073,7 +7083,7 @@ static HRESULT ddraw_buffer_flush_d7(IDirect3DDevice7 *iface) { HRESULT ret; const DWORD from_flush_flags = 0xF0F0F0F0; // Using a specific flag to recognise & avoid recursion loops
- ret = d3d_device7_DrawIndexedPrimitive(iface, D3DPT_TRIANGLELIST, 0x2c4, + ret = d3d_device7_DrawIndexedPrimitive(iface, D3DPT_TRIANGLELIST, device->ddraw_device_buffer.fvf, device->ddraw_device_buffer.buffer_vertices, device->ddraw_device_buffer.buffer_vertex_count, device->ddraw_device_buffer.buffer_indices, device->ddraw_device_buffer.buffer_indice_count, from_flush_flags);