Convert all consecutive calls to d7_DrawPrimitive(TRIANGLE_FAN) into a single call to d7_DrawPrimitive(TRIANGLE_LIST) with all the vertices.
Note, it *increase* the number of vertices, but bandwith is much less costly than multiple calls.
Note, only a very precise subset of the calls get buffered in order to ensure that the disruption is minimal.
Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=33814
-- v13: ddraw: Convert buffer API to use d3d_device struct ddraw: Also Buffer D3DPT_POINTLIST ddraw: directly buffering in the streaming buffer
From: Steve Schnepp steve.schnepp@pwkf.org
Convert all consecutive calls to d7_DrawPrimitive(TRIANGLE_FAN) into a single call to d7_DrawPrimitive(TRIANGLE_LIST) with all the vertices.
Note, it *increase* the number of vertices, but bandwith is much less costly than multiple calls.
Note, only a very precise subset of the calls get buffered in order to ensure that the disruption is minimal.
Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=33814 --- dlls/ddraw/ddraw_private.h | 10 ++++ dlls/ddraw/device.c | 111 ++++++++++++++++++++++++++++++++++++- 2 files changed, 119 insertions(+), 2 deletions(-)
diff --git a/dlls/ddraw/ddraw_private.h b/dlls/ddraw/ddraw_private.h index 09e8133350b..5c77e813311 100644 --- a/dlls/ddraw/ddraw_private.h +++ b/dlls/ddraw/ddraw_private.h @@ -317,6 +317,13 @@ DWORD ddraw_allocate_handle(struct ddraw_handle_table *t, void *object, enum ddr void *ddraw_free_handle(struct ddraw_handle_table *t, DWORD handle, enum ddraw_handle_type type) DECLSPEC_HIDDEN; void *ddraw_get_object(struct ddraw_handle_table *t, DWORD handle, enum ddraw_handle_type type) DECLSPEC_HIDDEN;
+#define D3D_BUFFER_SIZE (2 * 1024 * 1024) /* 2 MiB buffer */ + +struct d3d_device_buffer { + DWORD buffer_vertex_count; + char buffer_vertices[D3D_BUFFER_SIZE]; +}; + struct d3d_device { /* IUnknown */ @@ -369,6 +376,9 @@ struct d3d_device
struct wined3d_stateblock *recording, *state, *update_state; const struct wined3d_stateblock_state *stateblock_state; + + /* Vertices Buffer for squashing DrawPrimitive() calls before sending it to wined3d */ + struct d3d_device_buffer ddraw_device_buffer; };
HRESULT d3d_device_create(struct ddraw *ddraw, const GUID *guid, struct ddraw_surface *target, IUnknown *rt_iface, diff --git a/dlls/ddraw/device.c b/dlls/ddraw/device.c index 1cfef5007d5..e0a3b4ca5e6 100644 --- a/dlls/ddraw/device.c +++ b/dlls/ddraw/device.c @@ -31,6 +31,11 @@
WINE_DEFAULT_DEBUG_CHANNEL(ddraw); WINE_DECLARE_DEBUG_CHANNEL(winediag); +WINE_DECLARE_DEBUG_CHANNEL(ddraw_perf); +WINE_DECLARE_DEBUG_CHANNEL(ddraw_buffer); + +static HRESULT ddraw_buffer_flush_d7(IDirect3DDevice7 *iface); +static int ddraw_buffer_add_d7(IDirect3DDevice7 *iface, D3DPRIMITIVETYPE primitive_type, DWORD fvf, void *vertices, DWORD vertex_count, DWORD flags, UINT stride);
/* The device ID */ const GUID IID_D3DDEVICE_WineD3D = { @@ -1592,6 +1597,9 @@ static HRESULT d3d_device7_EndScene(IDirect3DDevice7 *iface)
TRACE("iface %p.\n", iface);
+ // Flush the vertices buffer + ddraw_buffer_flush_d7(iface); + wined3d_mutex_lock(); hr = wined3d_device_end_scene(device->wined3d_device); wined3d_mutex_unlock(); @@ -2548,6 +2556,9 @@ static HRESULT d3d_device7_SetRenderState(IDirect3DDevice7 *iface,
TRACE("iface %p, state %#x, value %#lx.\n", iface, state, value);
+ // Flush the vertices buffer + ddraw_buffer_flush_d7(iface); + wined3d_mutex_lock(); /* Some render states need special care */ switch (state) @@ -3451,10 +3462,21 @@ static HRESULT d3d_device7_DrawPrimitive(IDirect3DDevice7 *iface, stride = get_flexible_vertex_size(fvf); size = vertex_count * stride;
+ if (primitive_type == D3DPT_TRIANGLELIST) { + // We are currently flushing the buffer + } else { + if (ddraw_buffer_add_d7(iface, primitive_type, fvf, vertices, vertex_count, flags, stride)) { + return D3D_OK; + } + // Cannot buffer, need to flush the rest, then process this one + ddraw_buffer_flush_d7(iface); + } + wined3d_mutex_lock();
- if (FAILED(hr = wined3d_streaming_buffer_upload(device->wined3d_device, - &device->vertex_buffer, vertices, size, stride, &vb_pos))) + hr = wined3d_streaming_buffer_upload(device->wined3d_device, + &device->vertex_buffer, vertices, size, stride, &vb_pos); + if (FAILED(hr)) goto done;
hr = wined3d_stateblock_set_stream_source(device->state, 0, device->vertex_buffer.buffer, 0, stride); @@ -4641,6 +4663,9 @@ static HRESULT d3d_device7_SetTexture(IDirect3DDevice7 *iface,
TRACE("iface %p, stage %lu, texture %p.\n", iface, stage, texture);
+ // Flush the vertices buffer + ddraw_buffer_flush_d7(iface); + if (surf && (surf->surface_desc.ddsCaps.dwCaps & DDSCAPS_TEXTURE)) { if (surf->draw_texture) @@ -4925,6 +4950,9 @@ static HRESULT d3d_device7_SetTextureStageState(IDirect3DDevice7 *iface, TRACE("iface %p, stage %lu, state %#x, value %#lx.\n", iface, stage, state, value);
+ // Flush the vertices buffer + ddraw_buffer_flush_d7(iface); + if (state > D3DTSS_TEXTURETRANSFORMFLAGS) { WARN("Invalid state %#x passed.\n", state); @@ -6963,3 +6991,82 @@ HRESULT d3d_device_create(struct ddraw *ddraw, const GUID *guid, struct ddraw_su
return D3D_OK; } + +/* + * DirectD3D vertex buffer helpers + * + * For now, it only supports : + * d3d_device.version == 7 + * primitive_type == D3DPT_TRIANGLEFAN + * fvf == 0x2c4 + * flags == 0 + * vertex_count >= 3 + * + * Note : it does transform D3DPT_TRIANGLEFAN into D3DPT_TRIANGLELIST. + */ +static int ddraw_buffer_add_d7(IDirect3DDevice7 *iface, D3DPRIMITIVETYPE primitive_type, DWORD fvf, void *vertices, DWORD vertex_count, DWORD flags, UINT stride) { + struct d3d_device *device = impl_from_IDirect3DDevice7(iface); + + if (!TRACE_ON(ddraw_buffer)) return 0; + if (primitive_type != D3DPT_TRIANGLEFAN) return 0; + if (fvf != 0x2c4) return 0; + if (flags) return 0; + + if (vertex_count < 3) { + WARN("vertex_count %lu lower than 3. not buffering", vertex_count); + return 0; + } + + /* First triangle is simply copied over */ + memcpy(device->ddraw_device_buffer.buffer_vertices + device->ddraw_device_buffer.buffer_vertex_count * stride, vertices, stride * 3); + device->ddraw_device_buffer.buffer_vertex_count += 3; + TRACE_(ddraw_perf)("only %lu vertex count. buffering. buffer_vertex_count %lu stride %d\n", vertex_count, device->ddraw_device_buffer.buffer_vertex_count, stride); + + /* Next triangles are recreated with : 2 next vertices then the 1rst one. + * So, it will *increase* the number of total vertices from 4 to 6, 5 to 9, 6 to 12, ... */ + for (int idx = 2; idx < vertex_count-1; idx ++) { + /* Copy the 2 last ones */ + char* next_vertice = vertices; + next_vertice += idx * stride; + memcpy(device->ddraw_device_buffer.buffer_vertices + device->ddraw_device_buffer.buffer_vertex_count * stride, next_vertice, stride * 2); + device->ddraw_device_buffer.buffer_vertex_count += 2; + + /* Copy the first again */ + memcpy(device->ddraw_device_buffer.buffer_vertices + device->ddraw_device_buffer.buffer_vertex_count * stride, vertices, stride); + device->ddraw_device_buffer.buffer_vertex_count += 1; + TRACE_(ddraw_perf)("idx %d buffer_vertex_count %lu stride %d\n", idx, device->ddraw_device_buffer.buffer_vertex_count, stride); + } + + /* Buffered ! */ + return 1; +} + +/* Flushing the buffer if it isn't empty. + * + * It will delegate to a single call to DrawPrimitive with the correct parameters, + * and a (hopefully) huge list of triangles vertices. */ + +static HRESULT ddraw_buffer_flush_d7(IDirect3DDevice7 *iface) { + struct d3d_device *device = impl_from_IDirect3DDevice7(iface); + + TRACE_(ddraw_perf)("buffer_vertex_count %lu\n", device->ddraw_device_buffer.buffer_vertex_count); + + /* Nothing to do if it is empty */ + if (! device->ddraw_device_buffer.buffer_vertex_count) return D3D_OK; + + /* Delegate the call */ + { + HRESULT ret; + const DWORD from_flush_flags = 0xF0F0F0F0; // Using a specific flag to recognise & avoid recursion loops + + ret = d3d_device7_DrawPrimitive(iface, D3DPT_TRIANGLELIST, 0x2c4, device->ddraw_device_buffer.buffer_vertices, device->ddraw_device_buffer.buffer_vertex_count, from_flush_flags); + + if (ret != D3D_OK) FIXME("error in call delegation %ld", ret); + } + + /* Flush the buffer */ + device->ddraw_device_buffer.buffer_vertex_count = 0; + + /* The error isn't really useful as it is too late, so always returning */ + return D3D_OK; +}
From: Steve Schnepp steve.schnepp@pwkf.org
--- dlls/ddraw/ddraw_private.h | 2 + dlls/ddraw/device.c | 80 +++++++++++++++++++++++--------------- 2 files changed, 51 insertions(+), 31 deletions(-)
diff --git a/dlls/ddraw/ddraw_private.h b/dlls/ddraw/ddraw_private.h index 5c77e813311..325ff22c1ae 100644 --- a/dlls/ddraw/ddraw_private.h +++ b/dlls/ddraw/ddraw_private.h @@ -320,7 +320,9 @@ void *ddraw_get_object(struct ddraw_handle_table *t, DWORD handle, enum ddraw_ha #define D3D_BUFFER_SIZE (2 * 1024 * 1024) /* 2 MiB buffer */
struct d3d_device_buffer { + DWORD buffer_indice_count; DWORD buffer_vertex_count; + WORD buffer_indices[D3DMAXNUMVERTICES]; char buffer_vertices[D3D_BUFFER_SIZE]; };
diff --git a/dlls/ddraw/device.c b/dlls/ddraw/device.c index e0a3b4ca5e6..ce5709c615f 100644 --- a/dlls/ddraw/device.c +++ b/dlls/ddraw/device.c @@ -35,7 +35,7 @@ WINE_DECLARE_DEBUG_CHANNEL(ddraw_perf); WINE_DECLARE_DEBUG_CHANNEL(ddraw_buffer);
static HRESULT ddraw_buffer_flush_d7(IDirect3DDevice7 *iface); -static int ddraw_buffer_add_d7(IDirect3DDevice7 *iface, D3DPRIMITIVETYPE primitive_type, DWORD fvf, void *vertices, DWORD vertex_count, DWORD flags, UINT stride); +static HRESULT ddraw_buffer_add_d7(IDirect3DDevice7 *iface, D3DPRIMITIVETYPE primitive_type, DWORD fvf, void *vertices, DWORD vertex_count, DWORD flags, UINT stride);
/* The device ID */ const GUID IID_D3DDEVICE_WineD3D = { @@ -3462,16 +3462,15 @@ static HRESULT d3d_device7_DrawPrimitive(IDirect3DDevice7 *iface, stride = get_flexible_vertex_size(fvf); size = vertex_count * stride;
- if (primitive_type == D3DPT_TRIANGLELIST) { - // We are currently flushing the buffer - } else { - if (ddraw_buffer_add_d7(iface, primitive_type, fvf, vertices, vertex_count, flags, stride)) { - return D3D_OK; - } - // Cannot buffer, need to flush the rest, then process this one - ddraw_buffer_flush_d7(iface); + hr = ddraw_buffer_add_d7(iface, primitive_type, fvf, vertices, vertex_count, flags, stride); + if (hr == D3D_OK) { + /* Buffered successfuly -> returning immediatly :-) */ + return D3D_OK; }
+ // Cannot buffer, need to flush the rest, then process this one + ddraw_buffer_flush_d7(iface); + wined3d_mutex_lock();
hr = wined3d_streaming_buffer_upload(device->wined3d_device, @@ -6905,6 +6904,11 @@ static HRESULT d3d_device_init(struct d3d_device *device, struct ddraw *ddraw, c wined3d_streaming_buffer_init(&device->vertex_buffer, WINED3D_BIND_VERTEX_BUFFER); wined3d_streaming_buffer_init(&device->index_buffer, WINED3D_BIND_INDEX_BUFFER);
+ /* Initialize the ddraw triangle buffer. + * The 3 first vertices indexes will *always* be { 0, 1, 2 }. + */ + device->ddraw_device_buffer.buffer_vertex_count = 0; + /* Render to the back buffer */ rtv = ddraw_surface_get_rendertarget_view(target); if (FAILED(hr = wined3d_device_context_set_rendertarget_views(device->immediate_context, 0, 1, &rtv, TRUE))) @@ -7004,41 +7008,51 @@ HRESULT d3d_device_create(struct ddraw *ddraw, const GUID *guid, struct ddraw_su * * Note : it does transform D3DPT_TRIANGLEFAN into D3DPT_TRIANGLELIST. */ -static int ddraw_buffer_add_d7(IDirect3DDevice7 *iface, D3DPRIMITIVETYPE primitive_type, DWORD fvf, void *vertices, DWORD vertex_count, DWORD flags, UINT stride) { +static HRESULT ddraw_buffer_add_d7(IDirect3DDevice7 *iface, D3DPRIMITIVETYPE primitive_type, DWORD fvf, void *vertices, DWORD vertex_count, DWORD flags, UINT stride) { struct d3d_device *device = impl_from_IDirect3DDevice7(iface); + int buffer_indice_count_initial = device->ddraw_device_buffer.buffer_vertex_count;
- if (!TRACE_ON(ddraw_buffer)) return 0; - if (primitive_type != D3DPT_TRIANGLEFAN) return 0; - if (fvf != 0x2c4) return 0; - if (flags) return 0; + if (!TRACE_ON(ddraw_buffer)) return WINED3DERR_NOTAVAILABLE; + if (primitive_type != D3DPT_TRIANGLEFAN) return WINED3DERR_NOTAVAILABLE; + if (fvf != 0x2c4) return WINED3DERR_NOTAVAILABLE; + if (flags) return WINED3DERR_NOTAVAILABLE;
if (vertex_count < 3) { WARN("vertex_count %lu lower than 3. not buffering", vertex_count); - return 0; + return WINED3DERR_NOTAVAILABLE; }
- /* First triangle is simply copied over */ - memcpy(device->ddraw_device_buffer.buffer_vertices + device->ddraw_device_buffer.buffer_vertex_count * stride, vertices, stride * 3); - device->ddraw_device_buffer.buffer_vertex_count += 3; - TRACE_(ddraw_perf)("only %lu vertex count. buffering. buffer_vertex_count %lu stride %d\n", vertex_count, device->ddraw_device_buffer.buffer_vertex_count, stride); + /* append all the vertices to the buffer */ + memcpy(device->ddraw_device_buffer.buffer_vertices + device->ddraw_device_buffer.buffer_vertex_count * stride, vertices, vertex_count * stride); + + TRACE_(ddraw_perf)("vertex count %lu stride %d buffer_vertex_count %lu buffer_indice_count %lu\n", vertex_count, stride, + device->ddraw_device_buffer.buffer_vertex_count, device->ddraw_device_buffer.buffer_indice_count); + + /* Create the index */ + + /* The first triangle is the same, therefore the indices are simply copied over */ + device->ddraw_device_buffer.buffer_indices[device->ddraw_device_buffer.buffer_indice_count++] = device->ddraw_device_buffer.buffer_vertex_count++; + device->ddraw_device_buffer.buffer_indices[device->ddraw_device_buffer.buffer_indice_count++] = device->ddraw_device_buffer.buffer_vertex_count++; + device->ddraw_device_buffer.buffer_indices[device->ddraw_device_buffer.buffer_indice_count++] = device->ddraw_device_buffer.buffer_vertex_count++; + + TRACE_(ddraw_perf)("vertex count %lu stride %d buffer_vertex_count %lu buffer_indice_count %lu\n", vertex_count, stride, + device->ddraw_device_buffer.buffer_vertex_count, device->ddraw_device_buffer.buffer_indice_count);
/* Next triangles are recreated with : 2 next vertices then the 1rst one. * So, it will *increase* the number of total vertices from 4 to 6, 5 to 9, 6 to 12, ... */ for (int idx = 2; idx < vertex_count-1; idx ++) { /* Copy the 2 last ones */ - char* next_vertice = vertices; - next_vertice += idx * stride; - memcpy(device->ddraw_device_buffer.buffer_vertices + device->ddraw_device_buffer.buffer_vertex_count * stride, next_vertice, stride * 2); - device->ddraw_device_buffer.buffer_vertex_count += 2; + device->ddraw_device_buffer.buffer_indices[device->ddraw_device_buffer.buffer_indice_count++] = device->ddraw_device_buffer.buffer_vertex_count - 1; + device->ddraw_device_buffer.buffer_indices[device->ddraw_device_buffer.buffer_indice_count++] = device->ddraw_device_buffer.buffer_vertex_count; + device->ddraw_device_buffer.buffer_indices[device->ddraw_device_buffer.buffer_indice_count++] = buffer_indice_count_initial; + device->ddraw_device_buffer.buffer_vertex_count++;
- /* Copy the first again */ - memcpy(device->ddraw_device_buffer.buffer_vertices + device->ddraw_device_buffer.buffer_vertex_count * stride, vertices, stride); - device->ddraw_device_buffer.buffer_vertex_count += 1; - TRACE_(ddraw_perf)("idx %d buffer_vertex_count %lu stride %d\n", idx, device->ddraw_device_buffer.buffer_vertex_count, stride); + TRACE_(ddraw_perf)("idx %d vertex count %lu stride %d buffer_vertex_count %lu buffer_indice_count %lu\n", idx, vertex_count, stride, + device->ddraw_device_buffer.buffer_vertex_count, device->ddraw_device_buffer.buffer_indice_count); }
/* Buffered ! */ - return 1; + return D3D_OK; }
/* Flushing the buffer if it isn't empty. @@ -7049,7 +7063,7 @@ static int ddraw_buffer_add_d7(IDirect3DDevice7 *iface, D3DPRIMITIVETYPE primiti static HRESULT ddraw_buffer_flush_d7(IDirect3DDevice7 *iface) { struct d3d_device *device = impl_from_IDirect3DDevice7(iface);
- TRACE_(ddraw_perf)("buffer_vertex_count %lu\n", device->ddraw_device_buffer.buffer_vertex_count); + TRACE_(ddraw_perf)("buffer_vertex_count %lu buffer_indice_count %lu\n", device->ddraw_device_buffer.buffer_vertex_count, device->ddraw_device_buffer.buffer_indice_count);
/* Nothing to do if it is empty */ if (! device->ddraw_device_buffer.buffer_vertex_count) return D3D_OK; @@ -7059,13 +7073,17 @@ static HRESULT ddraw_buffer_flush_d7(IDirect3DDevice7 *iface) { HRESULT ret; const DWORD from_flush_flags = 0xF0F0F0F0; // Using a specific flag to recognise & avoid recursion loops
- ret = d3d_device7_DrawPrimitive(iface, D3DPT_TRIANGLELIST, 0x2c4, device->ddraw_device_buffer.buffer_vertices, device->ddraw_device_buffer.buffer_vertex_count, from_flush_flags); + ret = d3d_device7_DrawIndexedPrimitive(iface, D3DPT_TRIANGLELIST, 0x2c4, + device->ddraw_device_buffer.buffer_vertices, device->ddraw_device_buffer.buffer_vertex_count, + device->ddraw_device_buffer.buffer_indices, device->ddraw_device_buffer.buffer_indice_count, + from_flush_flags);
if (ret != D3D_OK) FIXME("error in call delegation %ld", ret); }
- /* Flush the buffer */ + /* Reset the buffer */ device->ddraw_device_buffer.buffer_vertex_count = 0; + device->ddraw_device_buffer.buffer_indice_count = 0;
/* The error isn't really useful as it is too late, so always returning */ return D3D_OK;
From: Steve Schnepp steve.schnepp@pwkf.org
--- dlls/ddraw/ddraw_private.h | 1 + dlls/ddraw/device.c | 16 +++++++++++++--- 2 files changed, 14 insertions(+), 3 deletions(-)
diff --git a/dlls/ddraw/ddraw_private.h b/dlls/ddraw/ddraw_private.h index 325ff22c1ae..8455b157522 100644 --- a/dlls/ddraw/ddraw_private.h +++ b/dlls/ddraw/ddraw_private.h @@ -320,6 +320,7 @@ void *ddraw_get_object(struct ddraw_handle_table *t, DWORD handle, enum ddraw_ha #define D3D_BUFFER_SIZE (2 * 1024 * 1024) /* 2 MiB buffer */
struct d3d_device_buffer { + DWORD fvf; DWORD buffer_indice_count; DWORD buffer_vertex_count; WORD buffer_indices[D3DMAXNUMVERTICES]; diff --git a/dlls/ddraw/device.c b/dlls/ddraw/device.c index ce5709c615f..ed1873d1594 100644 --- a/dlls/ddraw/device.c +++ b/dlls/ddraw/device.c @@ -7002,7 +7002,6 @@ HRESULT d3d_device_create(struct ddraw *ddraw, const GUID *guid, struct ddraw_su * For now, it only supports : * d3d_device.version == 7 * primitive_type == D3DPT_TRIANGLEFAN - * fvf == 0x2c4 * flags == 0 * vertex_count >= 3 * @@ -7014,7 +7013,6 @@ static HRESULT ddraw_buffer_add_d7(IDirect3DDevice7 *iface, D3DPRIMITIVETYPE pri
if (!TRACE_ON(ddraw_buffer)) return WINED3DERR_NOTAVAILABLE; if (primitive_type != D3DPT_TRIANGLEFAN) return WINED3DERR_NOTAVAILABLE; - if (fvf != 0x2c4) return WINED3DERR_NOTAVAILABLE; if (flags) return WINED3DERR_NOTAVAILABLE;
if (vertex_count < 3) { @@ -7022,6 +7020,18 @@ static HRESULT ddraw_buffer_add_d7(IDirect3DDevice7 *iface, D3DPRIMITIVETYPE pri return WINED3DERR_NOTAVAILABLE; }
+ if (device->ddraw_device_buffer.buffer_vertex_count) { + if (fvf != device->ddraw_device_buffer.fvf) { + /* Not the same fvf as the buffered one. Cannot buffer more of those */ + TRACE_(ddraw_perf)("Buffering failed due to mismatched fvf %ld != buffer.fvf %ld \n", fvf, device->ddraw_device_buffer.fvf); + return WINED3DERR_NOTAVAILABLE; + } + } else { + /* New buffer, setting fvf */ + device->ddraw_device_buffer.fvf = fvf; + } + + /* append all the vertices to the buffer */ memcpy(device->ddraw_device_buffer.buffer_vertices + device->ddraw_device_buffer.buffer_vertex_count * stride, vertices, vertex_count * stride);
@@ -7073,7 +7083,7 @@ static HRESULT ddraw_buffer_flush_d7(IDirect3DDevice7 *iface) { HRESULT ret; const DWORD from_flush_flags = 0xF0F0F0F0; // Using a specific flag to recognise & avoid recursion loops
- ret = d3d_device7_DrawIndexedPrimitive(iface, D3DPT_TRIANGLELIST, 0x2c4, + ret = d3d_device7_DrawIndexedPrimitive(iface, D3DPT_TRIANGLELIST, device->ddraw_device_buffer.fvf, device->ddraw_device_buffer.buffer_vertices, device->ddraw_device_buffer.buffer_vertex_count, device->ddraw_device_buffer.buffer_indices, device->ddraw_device_buffer.buffer_indice_count, from_flush_flags);
From: Steve Schnepp steve.schnepp@pwkf.org
--- dlls/ddraw/ddraw_private.h | 10 ++-- dlls/ddraw/device.c | 102 +++++++++++++++++++++++++++---------- 2 files changed, 80 insertions(+), 32 deletions(-)
diff --git a/dlls/ddraw/ddraw_private.h b/dlls/ddraw/ddraw_private.h index 8455b157522..2a322117202 100644 --- a/dlls/ddraw/ddraw_private.h +++ b/dlls/ddraw/ddraw_private.h @@ -317,14 +317,16 @@ DWORD ddraw_allocate_handle(struct ddraw_handle_table *t, void *object, enum ddr void *ddraw_free_handle(struct ddraw_handle_table *t, DWORD handle, enum ddraw_handle_type type) DECLSPEC_HIDDEN; void *ddraw_get_object(struct ddraw_handle_table *t, DWORD handle, enum ddraw_handle_type type) DECLSPEC_HIDDEN;
-#define D3D_BUFFER_SIZE (2 * 1024 * 1024) /* 2 MiB buffer */ - struct d3d_device_buffer { DWORD fvf; + UINT stride; DWORD buffer_indice_count; DWORD buffer_vertex_count; - WORD buffer_indices[D3DMAXNUMVERTICES]; - char buffer_vertices[D3D_BUFFER_SIZE]; + WORD *buffer_indices; + char *buffer_vertices; + + unsigned int idx_buffer_pos; + unsigned int vertex_buffer_pos; };
struct d3d_device diff --git a/dlls/ddraw/device.c b/dlls/ddraw/device.c index ed1873d1594..10ddeeded73 100644 --- a/dlls/ddraw/device.c +++ b/dlls/ddraw/device.c @@ -3462,14 +3462,33 @@ static HRESULT d3d_device7_DrawPrimitive(IDirect3DDevice7 *iface, stride = get_flexible_vertex_size(fvf); size = vertex_count * stride;
+ if (!TRACE_ON(ddraw_buffer)) goto old; + hr = ddraw_buffer_add_d7(iface, primitive_type, fvf, vertices, vertex_count, flags, stride); if (hr == D3D_OK) { /* Buffered successfuly -> returning immediatly :-) */ return D3D_OK; }
+ FIXME("iface %p, primitive_type %#x, fvf %#lx, vertices %p, vertex_count %lu, flags %#lx.\n", + iface, primitive_type, fvf, vertices, vertex_count, flags); + FIXME("cannot buffer, flushing first\n"); + // Cannot buffer, need to flush the rest, then process this one - ddraw_buffer_flush_d7(iface); + hr = ddraw_buffer_flush_d7(iface); + + hr = ddraw_buffer_add_d7(iface, primitive_type, fvf, vertices, vertex_count, flags, stride); + if (hr == D3D_OK) { + /* Buffered successfuly -> returning immediatly :-) */ + return D3D_OK; + } + + FIXME("REALLY cannot buffer skip\n"); + + // let's skip ;) + return hr; + +old:
wined3d_mutex_lock();
@@ -7011,7 +7030,6 @@ static HRESULT ddraw_buffer_add_d7(IDirect3DDevice7 *iface, D3DPRIMITIVETYPE pri struct d3d_device *device = impl_from_IDirect3DDevice7(iface); int buffer_indice_count_initial = device->ddraw_device_buffer.buffer_vertex_count;
- if (!TRACE_ON(ddraw_buffer)) return WINED3DERR_NOTAVAILABLE; if (primitive_type != D3DPT_TRIANGLEFAN) return WINED3DERR_NOTAVAILABLE; if (flags) return WINED3DERR_NOTAVAILABLE;
@@ -7020,17 +7038,28 @@ static HRESULT ddraw_buffer_add_d7(IDirect3DDevice7 *iface, D3DPRIMITIVETYPE pri return WINED3DERR_NOTAVAILABLE; }
- if (device->ddraw_device_buffer.buffer_vertex_count) { - if (fvf != device->ddraw_device_buffer.fvf) { - /* Not the same fvf as the buffered one. Cannot buffer more of those */ - TRACE_(ddraw_perf)("Buffering failed due to mismatched fvf %ld != buffer.fvf %ld \n", fvf, device->ddraw_device_buffer.fvf); - return WINED3DERR_NOTAVAILABLE; - } - } else { - /* New buffer, setting fvf */ + if (! device->ddraw_device_buffer.buffer_vertex_count) { + /* New buffer, setting everything up */ device->ddraw_device_buffer.fvf = fvf; - } + device->ddraw_device_buffer.stride = stride; + + /* We map & unmap directly. + * That way, we only reserve the space and other calls will have a new one. + * It should not happen, but let's be safe. + * + * We will fill it with following calls */ + wined3d_streaming_buffer_map(device->wined3d_device, &device->vertex_buffer, D3DMAXNUMVERTICES, stride, + &device->ddraw_device_buffer.vertex_buffer_pos, (void**) &device->ddraw_device_buffer.buffer_vertices); + wined3d_streaming_buffer_unmap(&device->vertex_buffer);
+ wined3d_streaming_buffer_map(device->wined3d_device, &device->index_buffer, D3DMAXNUMVERTICES, sizeof(*device->ddraw_device_buffer.buffer_indices), + &device->ddraw_device_buffer.idx_buffer_pos, (void**) &device->ddraw_device_buffer.buffer_indices); + wined3d_streaming_buffer_unmap(&device->index_buffer); + } else if (fvf != device->ddraw_device_buffer.fvf) { + /* Not the same fvf as the buffered one. Cannot buffer more of those */ + TRACE_(ddraw_perf)("Buffering failed due to mismatched fvf %ld != buffer.fvf %ld \n", fvf, device->ddraw_device_buffer.fvf); + return WINED3DERR_NOTAVAILABLE; + }
/* append all the vertices to the buffer */ memcpy(device->ddraw_device_buffer.buffer_vertices + device->ddraw_device_buffer.buffer_vertex_count * stride, vertices, vertex_count * stride); @@ -7050,7 +7079,7 @@ static HRESULT ddraw_buffer_add_d7(IDirect3DDevice7 *iface, D3DPRIMITIVETYPE pri
/* Next triangles are recreated with : 2 next vertices then the 1rst one. * So, it will *increase* the number of total vertices from 4 to 6, 5 to 9, 6 to 12, ... */ - for (int idx = 2; idx < vertex_count-1; idx ++) { + for (int idx = 3; idx < vertex_count; idx ++) { /* Copy the 2 last ones */ device->ddraw_device_buffer.buffer_indices[device->ddraw_device_buffer.buffer_indice_count++] = device->ddraw_device_buffer.buffer_vertex_count - 1; device->ddraw_device_buffer.buffer_indices[device->ddraw_device_buffer.buffer_indice_count++] = device->ddraw_device_buffer.buffer_vertex_count; @@ -7061,6 +7090,9 @@ static HRESULT ddraw_buffer_add_d7(IDirect3DDevice7 *iface, D3DPRIMITIVETYPE pri device->ddraw_device_buffer.buffer_vertex_count, device->ddraw_device_buffer.buffer_indice_count); }
+ TRACE_(ddraw_perf)("buffer_vertex_count %lu buffer_indice_count %lu max %d\n", + device->ddraw_device_buffer.buffer_vertex_count, device->ddraw_device_buffer.buffer_indice_count, D3DMAXNUMVERTICES); + /* Buffered ! */ return D3D_OK; } @@ -7070,31 +7102,45 @@ static HRESULT ddraw_buffer_add_d7(IDirect3DDevice7 *iface, D3DPRIMITIVETYPE pri * It will delegate to a single call to DrawPrimitive with the correct parameters, * and a (hopefully) huge list of triangles vertices. */
-static HRESULT ddraw_buffer_flush_d7(IDirect3DDevice7 *iface) { - struct d3d_device *device = impl_from_IDirect3DDevice7(iface); +static HRESULT ddraw_buffer_flush_d7_internal(struct d3d_device *device) { + HRESULT hr;
TRACE_(ddraw_perf)("buffer_vertex_count %lu buffer_indice_count %lu\n", device->ddraw_device_buffer.buffer_vertex_count, device->ddraw_device_buffer.buffer_indice_count);
- /* Nothing to do if it is empty */ - if (! device->ddraw_device_buffer.buffer_vertex_count) return D3D_OK; + /* Calling wined3d directly */ + wined3d_mutex_lock();
- /* Delegate the call */ - { - HRESULT ret; - const DWORD from_flush_flags = 0xF0F0F0F0; // Using a specific flag to recognise & avoid recursion loops + hr = wined3d_stateblock_set_stream_source(device->state, 0, device->vertex_buffer.buffer, 0, device->ddraw_device_buffer.stride);
- ret = d3d_device7_DrawIndexedPrimitive(iface, D3DPT_TRIANGLELIST, device->ddraw_device_buffer.fvf, - device->ddraw_device_buffer.buffer_vertices, device->ddraw_device_buffer.buffer_vertex_count, - device->ddraw_device_buffer.buffer_indices, device->ddraw_device_buffer.buffer_indice_count, - from_flush_flags); + if (FAILED(hr)) + goto done; + + wined3d_stateblock_set_index_buffer(device->state, device->index_buffer.buffer, WINED3DFMT_R16_UINT); + wined3d_stateblock_set_vertex_declaration(device->state, ddraw_find_decl(device->ddraw, device->ddraw_device_buffer.fvf)); + wined3d_device_context_set_primitive_type(device->immediate_context, wined3d_primitive_type_from_ddraw(D3DPT_TRIANGLELIST), 0); + wined3d_device_apply_stateblock(device->wined3d_device, device->state); + d3d_device_sync_surfaces(device); + + wined3d_device_context_draw_indexed(device->immediate_context, + device->ddraw_device_buffer.vertex_buffer_pos / device->ddraw_device_buffer.stride, + device->ddraw_device_buffer.idx_buffer_pos / sizeof(*device->ddraw_device_buffer.buffer_indices), + device->ddraw_device_buffer.buffer_indice_count, 0, 0);
- if (ret != D3D_OK) FIXME("error in call delegation %ld", ret); - }
+done: + wined3d_mutex_unlock(); /* Reset the buffer */ device->ddraw_device_buffer.buffer_vertex_count = 0; device->ddraw_device_buffer.buffer_indice_count = 0;
- /* The error isn't really useful as it is too late, so always returning */ - return D3D_OK; + return hr; +} + +static HRESULT ddraw_buffer_flush_d7(IDirect3DDevice7 *iface) { + struct d3d_device *device = impl_from_IDirect3DDevice7(iface); + + /* Nothing to do if it is empty */ + if (! device->ddraw_device_buffer.buffer_vertex_count) return D3D_OK; + + return ddraw_buffer_flush_d7_internal(device); }
From: Steve Schnepp steve.schnepp@pwkf.org
The D3DPT_POINTLIST are used to write blips on the minimap. *Each* point has its own DrawPrimitive(), which is very costly!
---
The implementation itself needs to be refactored with helper functions. Will amend the commit later, but i pushed early for feedback --- dlls/ddraw/ddraw_private.h | 1 + dlls/ddraw/device.c | 54 ++++++++++++++++++++++++-------------- 2 files changed, 36 insertions(+), 19 deletions(-)
diff --git a/dlls/ddraw/ddraw_private.h b/dlls/ddraw/ddraw_private.h index 2a322117202..020f1be1173 100644 --- a/dlls/ddraw/ddraw_private.h +++ b/dlls/ddraw/ddraw_private.h @@ -318,6 +318,7 @@ void *ddraw_free_handle(struct ddraw_handle_table *t, DWORD handle, enum ddraw_h void *ddraw_get_object(struct ddraw_handle_table *t, DWORD handle, enum ddraw_handle_type type) DECLSPEC_HIDDEN;
struct d3d_device_buffer { + D3DPRIMITIVETYPE primitive_type; DWORD fvf; UINT stride; DWORD buffer_indice_count; diff --git a/dlls/ddraw/device.c b/dlls/ddraw/device.c index 10ddeeded73..6e6cd895a95 100644 --- a/dlls/ddraw/device.c +++ b/dlls/ddraw/device.c @@ -7020,9 +7020,7 @@ HRESULT d3d_device_create(struct ddraw *ddraw, const GUID *guid, struct ddraw_su * * For now, it only supports : * d3d_device.version == 7 - * primitive_type == D3DPT_TRIANGLEFAN * flags == 0 - * vertex_count >= 3 * * Note : it does transform D3DPT_TRIANGLEFAN into D3DPT_TRIANGLELIST. */ @@ -7030,16 +7028,11 @@ static HRESULT ddraw_buffer_add_d7(IDirect3DDevice7 *iface, D3DPRIMITIVETYPE pri struct d3d_device *device = impl_from_IDirect3DDevice7(iface); int buffer_indice_count_initial = device->ddraw_device_buffer.buffer_vertex_count;
- if (primitive_type != D3DPT_TRIANGLEFAN) return WINED3DERR_NOTAVAILABLE; - if (flags) return WINED3DERR_NOTAVAILABLE; - - if (vertex_count < 3) { - WARN("vertex_count %lu lower than 3. not buffering", vertex_count); - return WINED3DERR_NOTAVAILABLE; - } + if (flags) return WINED3DERR_NOTAVAILABLE;
if (! device->ddraw_device_buffer.buffer_vertex_count) { /* New buffer, setting everything up */ + device->ddraw_device_buffer.primitive_type = primitive_type; device->ddraw_device_buffer.fvf = fvf; device->ddraw_device_buffer.stride = stride;
@@ -7059,24 +7052,41 @@ static HRESULT ddraw_buffer_add_d7(IDirect3DDevice7 *iface, D3DPRIMITIVETYPE pri /* Not the same fvf as the buffered one. Cannot buffer more of those */ TRACE_(ddraw_perf)("Buffering failed due to mismatched fvf %ld != buffer.fvf %ld \n", fvf, device->ddraw_device_buffer.fvf); return WINED3DERR_NOTAVAILABLE; + } else if (primitive_type != device->ddraw_device_buffer.primitive_type) { + TRACE_(ddraw_perf)("Buffering failed due to mismatched primitive_type %d != buffer.primitive_type %d \n", primitive_type, device->ddraw_device_buffer.primitive_type); + return WINED3DERR_NOTAVAILABLE; }
/* append all the vertices to the buffer */ memcpy(device->ddraw_device_buffer.buffer_vertices + device->ddraw_device_buffer.buffer_vertex_count * stride, vertices, vertex_count * stride); - - TRACE_(ddraw_perf)("vertex count %lu stride %d buffer_vertex_count %lu buffer_indice_count %lu\n", vertex_count, stride, + TRACE_(ddraw_perf)("vertex count %lu stride %d buffer_vertex_count %05lu buffer_indice_count %05lu\n", vertex_count, stride, device->ddraw_device_buffer.buffer_vertex_count, device->ddraw_device_buffer.buffer_indice_count);
/* Create the index */ + if (primitive_type == D3DPT_TRIANGLEFAN) goto fan; + if (primitive_type == D3DPT_POINTLIST) goto points; + + FIXME("primitive_type %#x not supported\n", primitive_type); + goto done; + +points: + for (int idx = 0; idx < vertex_count; idx ++) { + device->ddraw_device_buffer.buffer_indices[device->ddraw_device_buffer.buffer_indice_count++] = device->ddraw_device_buffer.buffer_vertex_count++; + } + + goto done; + +fan: + if (vertex_count < 3) { + WARN("vertex_count %lu lower than 3. not buffering", vertex_count); + return WINED3DERR_NOTAVAILABLE; + }
/* The first triangle is the same, therefore the indices are simply copied over */ device->ddraw_device_buffer.buffer_indices[device->ddraw_device_buffer.buffer_indice_count++] = device->ddraw_device_buffer.buffer_vertex_count++; device->ddraw_device_buffer.buffer_indices[device->ddraw_device_buffer.buffer_indice_count++] = device->ddraw_device_buffer.buffer_vertex_count++; device->ddraw_device_buffer.buffer_indices[device->ddraw_device_buffer.buffer_indice_count++] = device->ddraw_device_buffer.buffer_vertex_count++;
- TRACE_(ddraw_perf)("vertex count %lu stride %d buffer_vertex_count %lu buffer_indice_count %lu\n", vertex_count, stride, - device->ddraw_device_buffer.buffer_vertex_count, device->ddraw_device_buffer.buffer_indice_count); - /* Next triangles are recreated with : 2 next vertices then the 1rst one. * So, it will *increase* the number of total vertices from 4 to 6, 5 to 9, 6 to 12, ... */ for (int idx = 3; idx < vertex_count; idx ++) { @@ -7086,11 +7096,12 @@ static HRESULT ddraw_buffer_add_d7(IDirect3DDevice7 *iface, D3DPRIMITIVETYPE pri device->ddraw_device_buffer.buffer_indices[device->ddraw_device_buffer.buffer_indice_count++] = buffer_indice_count_initial; device->ddraw_device_buffer.buffer_vertex_count++;
- TRACE_(ddraw_perf)("idx %d vertex count %lu stride %d buffer_vertex_count %lu buffer_indice_count %lu\n", idx, vertex_count, stride, - device->ddraw_device_buffer.buffer_vertex_count, device->ddraw_device_buffer.buffer_indice_count); + TRACE_(ddraw_perf)("vertex count %lu stride %d buffer_vertex_count %05lu buffer_indice_count %05lu idx %d\n", vertex_count, stride, + device->ddraw_device_buffer.buffer_vertex_count, device->ddraw_device_buffer.buffer_indice_count, idx); }
- TRACE_(ddraw_perf)("buffer_vertex_count %lu buffer_indice_count %lu max %d\n", +done: + TRACE_(ddraw_perf)("buffer_vertex_count %05lu buffer_indice_count %05lu max %d\n", device->ddraw_device_buffer.buffer_vertex_count, device->ddraw_device_buffer.buffer_indice_count, D3DMAXNUMVERTICES);
/* Buffered ! */ @@ -7105,7 +7116,7 @@ static HRESULT ddraw_buffer_add_d7(IDirect3DDevice7 *iface, D3DPRIMITIVETYPE pri static HRESULT ddraw_buffer_flush_d7_internal(struct d3d_device *device) { HRESULT hr;
- TRACE_(ddraw_perf)("buffer_vertex_count %lu buffer_indice_count %lu\n", device->ddraw_device_buffer.buffer_vertex_count, device->ddraw_device_buffer.buffer_indice_count); + TRACE_(ddraw_perf)("primitive_type %#x buffer_vertex_count %05lu buffer_indice_count %05lu\n", device->ddraw_device_buffer.primitive_type, device->ddraw_device_buffer.buffer_vertex_count, device->ddraw_device_buffer.buffer_indice_count);
/* Calling wined3d directly */ wined3d_mutex_lock(); @@ -7117,7 +7128,13 @@ static HRESULT ddraw_buffer_flush_d7_internal(struct d3d_device *device) {
wined3d_stateblock_set_index_buffer(device->state, device->index_buffer.buffer, WINED3DFMT_R16_UINT); wined3d_stateblock_set_vertex_declaration(device->state, ddraw_find_decl(device->ddraw, device->ddraw_device_buffer.fvf)); + wined3d_device_context_set_primitive_type(device->immediate_context, wined3d_primitive_type_from_ddraw(D3DPT_TRIANGLELIST), 0); + + if (device->ddraw_device_buffer.primitive_type == D3DPT_POINTLIST) { + wined3d_device_context_set_primitive_type(device->immediate_context, wined3d_primitive_type_from_ddraw(D3DPT_POINTLIST), 0); + } + wined3d_device_apply_stateblock(device->wined3d_device, device->state); d3d_device_sync_surfaces(device);
@@ -7126,7 +7143,6 @@ static HRESULT ddraw_buffer_flush_d7_internal(struct d3d_device *device) { device->ddraw_device_buffer.idx_buffer_pos / sizeof(*device->ddraw_device_buffer.buffer_indices), device->ddraw_device_buffer.buffer_indice_count, 0, 0);
- done: wined3d_mutex_unlock(); /* Reset the buffer */
From: Steve Schnepp steve.schnepp@pwkf.org
We have no dx7 specifics, therefore using directly the d3d_device is simpler and more performant. --- dlls/ddraw/device.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-)
diff --git a/dlls/ddraw/device.c b/dlls/ddraw/device.c index 6e6cd895a95..9ce37d89b76 100644 --- a/dlls/ddraw/device.c +++ b/dlls/ddraw/device.c @@ -34,8 +34,8 @@ WINE_DECLARE_DEBUG_CHANNEL(winediag); WINE_DECLARE_DEBUG_CHANNEL(ddraw_perf); WINE_DECLARE_DEBUG_CHANNEL(ddraw_buffer);
-static HRESULT ddraw_buffer_flush_d7(IDirect3DDevice7 *iface); -static HRESULT ddraw_buffer_add_d7(IDirect3DDevice7 *iface, D3DPRIMITIVETYPE primitive_type, DWORD fvf, void *vertices, DWORD vertex_count, DWORD flags, UINT stride); +static HRESULT ddraw_buffer_flush_d7(struct d3d_device *device); +static HRESULT ddraw_buffer_add_d7(struct d3d_device *device, D3DPRIMITIVETYPE primitive_type, DWORD fvf, void *vertices, DWORD vertex_count, DWORD flags, UINT stride);
/* The device ID */ const GUID IID_D3DDEVICE_WineD3D = { @@ -1598,7 +1598,7 @@ static HRESULT d3d_device7_EndScene(IDirect3DDevice7 *iface) TRACE("iface %p.\n", iface);
// Flush the vertices buffer - ddraw_buffer_flush_d7(iface); + ddraw_buffer_flush_d7(device);
wined3d_mutex_lock(); hr = wined3d_device_end_scene(device->wined3d_device); @@ -2557,7 +2557,7 @@ static HRESULT d3d_device7_SetRenderState(IDirect3DDevice7 *iface, TRACE("iface %p, state %#x, value %#lx.\n", iface, state, value);
// Flush the vertices buffer - ddraw_buffer_flush_d7(iface); + ddraw_buffer_flush_d7(device);
wined3d_mutex_lock(); /* Some render states need special care */ @@ -3464,7 +3464,7 @@ static HRESULT d3d_device7_DrawPrimitive(IDirect3DDevice7 *iface,
if (!TRACE_ON(ddraw_buffer)) goto old;
- hr = ddraw_buffer_add_d7(iface, primitive_type, fvf, vertices, vertex_count, flags, stride); + hr = ddraw_buffer_add_d7(device, primitive_type, fvf, vertices, vertex_count, flags, stride); if (hr == D3D_OK) { /* Buffered successfuly -> returning immediatly :-) */ return D3D_OK; @@ -3475,9 +3475,9 @@ static HRESULT d3d_device7_DrawPrimitive(IDirect3DDevice7 *iface, FIXME("cannot buffer, flushing first\n");
// Cannot buffer, need to flush the rest, then process this one - hr = ddraw_buffer_flush_d7(iface); + hr = ddraw_buffer_flush_d7(device);
- hr = ddraw_buffer_add_d7(iface, primitive_type, fvf, vertices, vertex_count, flags, stride); + hr = ddraw_buffer_add_d7(device, primitive_type, fvf, vertices, vertex_count, flags, stride); if (hr == D3D_OK) { /* Buffered successfuly -> returning immediatly :-) */ return D3D_OK; @@ -4682,7 +4682,7 @@ static HRESULT d3d_device7_SetTexture(IDirect3DDevice7 *iface, TRACE("iface %p, stage %lu, texture %p.\n", iface, stage, texture);
// Flush the vertices buffer - ddraw_buffer_flush_d7(iface); + ddraw_buffer_flush_d7(device);
if (surf && (surf->surface_desc.ddsCaps.dwCaps & DDSCAPS_TEXTURE)) { @@ -4969,7 +4969,7 @@ static HRESULT d3d_device7_SetTextureStageState(IDirect3DDevice7 *iface, iface, stage, state, value);
// Flush the vertices buffer - ddraw_buffer_flush_d7(iface); + ddraw_buffer_flush_d7(device);
if (state > D3DTSS_TEXTURETRANSFORMFLAGS) { @@ -7024,8 +7024,7 @@ HRESULT d3d_device_create(struct ddraw *ddraw, const GUID *guid, struct ddraw_su * * Note : it does transform D3DPT_TRIANGLEFAN into D3DPT_TRIANGLELIST. */ -static HRESULT ddraw_buffer_add_d7(IDirect3DDevice7 *iface, D3DPRIMITIVETYPE primitive_type, DWORD fvf, void *vertices, DWORD vertex_count, DWORD flags, UINT stride) { - struct d3d_device *device = impl_from_IDirect3DDevice7(iface); +static HRESULT ddraw_buffer_add_d7(struct d3d_device *device, D3DPRIMITIVETYPE primitive_type, DWORD fvf, void *vertices, DWORD vertex_count, DWORD flags, UINT stride) { int buffer_indice_count_initial = device->ddraw_device_buffer.buffer_vertex_count;
if (flags) return WINED3DERR_NOTAVAILABLE; @@ -7152,9 +7151,7 @@ done: return hr; }
-static HRESULT ddraw_buffer_flush_d7(IDirect3DDevice7 *iface) { - struct d3d_device *device = impl_from_IDirect3DDevice7(iface); - +static HRESULT ddraw_buffer_flush_d7(struct d3d_device *device) { /* Nothing to do if it is empty */ if (! device->ddraw_device_buffer.buffer_vertex_count) return D3D_OK;
On Tue Feb 14 22:22:03 2023 +0000, Zebediah Figura wrote:
I took some time to look into this to see if there's extra overhead, and while I think there are some things we could do better in draw_primitive(), there's probably not very much. Depending on what's in frame, the CS spends the majority of its time in draw_primitive(). Probably about 20% of that is spent acquiring the GL context, 40% loading the RTVs, 20% in context_apply_draw_state(); the rest is difficult to measure. This is on a relatively powerful radeonsi machine, with the swap interval hacked to zero; the total frame time is probably about 9 ms in the scenes I'm testing. I think we can potentially cut draw_primitive() down to 10% of its current overhead if none of the state changes, but when we're doing 5000 draw calls per frame, even that may be too much. We could potentially buffer in wined3d, perhaps making use of EXT_multi_draw_arrays, but as Henri pointed out on IRC, we'd have to do a fair amount of work to invalidate (less than in ddraw itself), and this sort of thing probably doesn't perform well in newer d3d versions on Windows anyway. So buffering in ddraw is probably the right way to go. I'll look at the patch itself anon.
I finally managed to leverage the wined3d buffer.
The code is perfectible, but should give a good idea of the final result