From: Steve Schnepp steve.schnepp@pwkf.org
Convert all consecutive calls to d7_DrawPrimitive() into a single call to wined3d.
The buffer is flushed upon every state change. It is also flushed when incompatible arguments are passed to d7_DrawPrimitive(), such as a changing fvf or incompatible primitive_type.
Note, the call does *increase* the number of data to transmit, but in a limited fashion as it leverages indexed wined3d primitives. And bandwith overhead is much less costly than multiple calls overhead.
Finally, only a subset of the calls get buffered for now in order to ensure that the disruption is minimal.
It is opt-in and enabled via WINEDEBUG="+ddraw_buffer"
Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=33814 --- dlls/ddraw/ddraw_private.h | 15 ++++ dlls/ddraw/device.c | 178 ++++++++++++++++++++++++++++++++++++- 2 files changed, 191 insertions(+), 2 deletions(-)
diff --git a/dlls/ddraw/ddraw_private.h b/dlls/ddraw/ddraw_private.h index 09e8133350b..2a322117202 100644 --- a/dlls/ddraw/ddraw_private.h +++ b/dlls/ddraw/ddraw_private.h @@ -317,6 +317,18 @@ DWORD ddraw_allocate_handle(struct ddraw_handle_table *t, void *object, enum ddr void *ddraw_free_handle(struct ddraw_handle_table *t, DWORD handle, enum ddraw_handle_type type) DECLSPEC_HIDDEN; void *ddraw_get_object(struct ddraw_handle_table *t, DWORD handle, enum ddraw_handle_type type) DECLSPEC_HIDDEN;
+struct d3d_device_buffer { + DWORD fvf; + UINT stride; + DWORD buffer_indice_count; + DWORD buffer_vertex_count; + WORD *buffer_indices; + char *buffer_vertices; + + unsigned int idx_buffer_pos; + unsigned int vertex_buffer_pos; +}; + struct d3d_device { /* IUnknown */ @@ -369,6 +381,9 @@ struct d3d_device
struct wined3d_stateblock *recording, *state, *update_state; const struct wined3d_stateblock_state *stateblock_state; + + /* Vertices Buffer for squashing DrawPrimitive() calls before sending it to wined3d */ + struct d3d_device_buffer ddraw_device_buffer; };
HRESULT d3d_device_create(struct ddraw *ddraw, const GUID *guid, struct ddraw_surface *target, IUnknown *rt_iface, diff --git a/dlls/ddraw/device.c b/dlls/ddraw/device.c index 1cfef5007d5..cbdd83ce50f 100644 --- a/dlls/ddraw/device.c +++ b/dlls/ddraw/device.c @@ -31,6 +31,11 @@
WINE_DEFAULT_DEBUG_CHANNEL(ddraw); WINE_DECLARE_DEBUG_CHANNEL(winediag); +WINE_DECLARE_DEBUG_CHANNEL(ddraw_perf); +WINE_DECLARE_DEBUG_CHANNEL(ddraw_buffer); + +static HRESULT ddraw_buffer_flush(struct d3d_device *device); +static HRESULT ddraw_buffer_add(struct d3d_device *device, D3DPRIMITIVETYPE primitive_type, DWORD fvf, void *vertices, DWORD vertex_count, DWORD flags, UINT stride);
/* The device ID */ const GUID IID_D3DDEVICE_WineD3D = { @@ -1592,6 +1597,8 @@ static HRESULT d3d_device7_EndScene(IDirect3DDevice7 *iface)
TRACE("iface %p.\n", iface);
+ ddraw_buffer_flush(device); + wined3d_mutex_lock(); hr = wined3d_device_end_scene(device->wined3d_device); wined3d_mutex_unlock(); @@ -2548,6 +2555,8 @@ static HRESULT d3d_device7_SetRenderState(IDirect3DDevice7 *iface,
TRACE("iface %p, state %#x, value %#lx.\n", iface, state, value);
+ ddraw_buffer_flush(device); + wined3d_mutex_lock(); /* Some render states need special care */ switch (state) @@ -3451,10 +3460,39 @@ static HRESULT d3d_device7_DrawPrimitive(IDirect3DDevice7 *iface, stride = get_flexible_vertex_size(fvf); size = vertex_count * stride;
+ if (!TRACE_ON(ddraw_buffer)) goto old; + + hr = ddraw_buffer_add(device, primitive_type, fvf, vertices, vertex_count, flags, stride); + if (hr == D3D_OK) { + /* Buffered successfuly -> returning immediatly :-) */ + return D3D_OK; + } + + FIXME("iface %p, primitive_type %#x, fvf %#lx, vertices %p, vertex_count %lu, flags %#lx.\n", + iface, primitive_type, fvf, vertices, vertex_count, flags); + FIXME("cannot buffer, flushing first\n"); + + /* Cannot buffer, need to flush the rest, then process this one */ + hr = ddraw_buffer_flush(device); + + hr = ddraw_buffer_add(device, primitive_type, fvf, vertices, vertex_count, flags, stride); + if (hr == D3D_OK) { + /* Buffered successfuly -> returning immediatly :-) */ + return D3D_OK; + } + + FIXME("REALLY cannot buffer, skipping\n"); + + /* let's skip it */ + return hr; + +old: + wined3d_mutex_lock();
- if (FAILED(hr = wined3d_streaming_buffer_upload(device->wined3d_device, - &device->vertex_buffer, vertices, size, stride, &vb_pos))) + hr = wined3d_streaming_buffer_upload(device->wined3d_device, + &device->vertex_buffer, vertices, size, stride, &vb_pos); + if (FAILED(hr)) goto done;
hr = wined3d_stateblock_set_stream_source(device->state, 0, device->vertex_buffer.buffer, 0, stride); @@ -4641,6 +4679,8 @@ static HRESULT d3d_device7_SetTexture(IDirect3DDevice7 *iface,
TRACE("iface %p, stage %lu, texture %p.\n", iface, stage, texture);
+ ddraw_buffer_flush(device); + if (surf && (surf->surface_desc.ddsCaps.dwCaps & DDSCAPS_TEXTURE)) { if (surf->draw_texture) @@ -4925,6 +4965,8 @@ static HRESULT d3d_device7_SetTextureStageState(IDirect3DDevice7 *iface, TRACE("iface %p, stage %lu, state %#x, value %#lx.\n", iface, stage, state, value);
+ ddraw_buffer_flush(device); + if (state > D3DTSS_TEXTURETRANSFORMFLAGS) { WARN("Invalid state %#x passed.\n", state); @@ -6877,6 +6919,11 @@ static HRESULT d3d_device_init(struct d3d_device *device, struct ddraw *ddraw, c wined3d_streaming_buffer_init(&device->vertex_buffer, WINED3D_BIND_VERTEX_BUFFER); wined3d_streaming_buffer_init(&device->index_buffer, WINED3D_BIND_INDEX_BUFFER);
+ /* Initialize the ddraw triangle buffer. + * The 3 first vertices indexes will *always* be { 0, 1, 2 }. + */ + device->ddraw_device_buffer.buffer_vertex_count = 0; + /* Render to the back buffer */ rtv = ddraw_surface_get_rendertarget_view(target); if (FAILED(hr = wined3d_device_context_set_rendertarget_views(device->immediate_context, 0, 1, &rtv, TRUE))) @@ -6963,3 +7010,130 @@ HRESULT d3d_device_create(struct ddraw *ddraw, const GUID *guid, struct ddraw_su
return D3D_OK; } + +/* + * DirectD3D vertex buffer helpers + * + * For now, it only supports : + * d3d_device.version == 7 + * primitive_type == D3DPT_TRIANGLEFAN + * flags == 0 + * vertex_count >= 3 + * + * Note : it does transform D3DPT_TRIANGLEFAN into D3DPT_TRIANGLELIST. + */ +static HRESULT ddraw_buffer_add(struct d3d_device *device, D3DPRIMITIVETYPE primitive_type, DWORD fvf, void *vertices, DWORD vertex_count, DWORD flags, UINT stride) { + int buffer_indice_count_initial = device->ddraw_device_buffer.buffer_vertex_count; + + if (primitive_type != D3DPT_TRIANGLEFAN) return WINED3DERR_NOTAVAILABLE; + if (flags) return WINED3DERR_NOTAVAILABLE; + + if (vertex_count < 3) { + WARN("vertex_count %lu lower than 3. not buffering", vertex_count); + return WINED3DERR_NOTAVAILABLE; + } + + if (! device->ddraw_device_buffer.buffer_vertex_count) { + /* New buffer, setting everything up */ + device->ddraw_device_buffer.fvf = fvf; + device->ddraw_device_buffer.stride = stride; + + /* We map & unmap directly. + * That way, we only reserve the space and other calls will have a new one. + * It should not happen, but let's be safe. + * + * We will fill it with following calls */ + wined3d_streaming_buffer_map(device->wined3d_device, &device->vertex_buffer, D3DMAXNUMVERTICES, stride, + &device->ddraw_device_buffer.vertex_buffer_pos, (void**) &device->ddraw_device_buffer.buffer_vertices); + wined3d_streaming_buffer_unmap(&device->vertex_buffer); + + wined3d_streaming_buffer_map(device->wined3d_device, &device->index_buffer, D3DMAXNUMVERTICES, sizeof(*device->ddraw_device_buffer.buffer_indices), + &device->ddraw_device_buffer.idx_buffer_pos, (void**) &device->ddraw_device_buffer.buffer_indices); + wined3d_streaming_buffer_unmap(&device->index_buffer); + } else if (fvf != device->ddraw_device_buffer.fvf) { + /* Not the same fvf as the buffered one. Cannot buffer more of those */ + TRACE_(ddraw_perf)("Buffering failed due to mismatched fvf %ld != buffer.fvf %ld \n", fvf, device->ddraw_device_buffer.fvf); + return WINED3DERR_NOTAVAILABLE; + } + + /* append all the vertices to the buffer */ + memcpy(device->ddraw_device_buffer.buffer_vertices + device->ddraw_device_buffer.buffer_vertex_count * stride, vertices, vertex_count * stride); + + TRACE_(ddraw_perf)("vertex count %lu stride %d buffer_vertex_count %lu buffer_indice_count %lu\n", vertex_count, stride, + device->ddraw_device_buffer.buffer_vertex_count, device->ddraw_device_buffer.buffer_indice_count); + + /* Create the index */ + + /* The first triangle is the same, therefore the indices are simply copied over */ + device->ddraw_device_buffer.buffer_indices[device->ddraw_device_buffer.buffer_indice_count++] = device->ddraw_device_buffer.buffer_vertex_count++; + device->ddraw_device_buffer.buffer_indices[device->ddraw_device_buffer.buffer_indice_count++] = device->ddraw_device_buffer.buffer_vertex_count++; + device->ddraw_device_buffer.buffer_indices[device->ddraw_device_buffer.buffer_indice_count++] = device->ddraw_device_buffer.buffer_vertex_count++; + + TRACE_(ddraw_perf)("vertex count %lu stride %d buffer_vertex_count %lu buffer_indice_count %lu\n", vertex_count, stride, + device->ddraw_device_buffer.buffer_vertex_count, device->ddraw_device_buffer.buffer_indice_count); + + /* Next triangles are recreated with : 2 next vertices then the 1rst one. + * So, it will *increase* the number of total vertices from 4 to 6, 5 to 9, 6 to 12, ... */ + for (int idx = 3; idx < vertex_count; idx ++) { + /* Copy the 2 last ones */ + device->ddraw_device_buffer.buffer_indices[device->ddraw_device_buffer.buffer_indice_count++] = device->ddraw_device_buffer.buffer_vertex_count - 1; + device->ddraw_device_buffer.buffer_indices[device->ddraw_device_buffer.buffer_indice_count++] = device->ddraw_device_buffer.buffer_vertex_count; + device->ddraw_device_buffer.buffer_indices[device->ddraw_device_buffer.buffer_indice_count++] = buffer_indice_count_initial; + device->ddraw_device_buffer.buffer_vertex_count++; + + TRACE_(ddraw_perf)("idx %d vertex count %lu stride %d buffer_vertex_count %lu buffer_indice_count %lu\n", idx, vertex_count, stride, + device->ddraw_device_buffer.buffer_vertex_count, device->ddraw_device_buffer.buffer_indice_count); + } + + TRACE_(ddraw_perf)("buffer_vertex_count %lu buffer_indice_count %lu max %d\n", + device->ddraw_device_buffer.buffer_vertex_count, device->ddraw_device_buffer.buffer_indice_count, D3DMAXNUMVERTICES); + + /* Buffered ! */ + return D3D_OK; +} + +/* Flushing the buffer if it isn't empty. + * + * It will delegate to a single call to DrawPrimitive with the correct parameters, + * and a (hopefully) huge list of triangles vertices. */ + +static HRESULT ddraw_buffer_flush_internal(struct d3d_device *device) { + HRESULT hr; + + TRACE_(ddraw_perf)("buffer_vertex_count %lu buffer_indice_count %lu\n", device->ddraw_device_buffer.buffer_vertex_count, device->ddraw_device_buffer.buffer_indice_count); + + /* Calling wined3d directly */ + wined3d_mutex_lock(); + + hr = wined3d_stateblock_set_stream_source(device->state, 0, device->vertex_buffer.buffer, 0, device->ddraw_device_buffer.stride); + + if (FAILED(hr)) + goto done; + + wined3d_stateblock_set_index_buffer(device->state, device->index_buffer.buffer, WINED3DFMT_R16_UINT); + wined3d_stateblock_set_vertex_declaration(device->state, ddraw_find_decl(device->ddraw, device->ddraw_device_buffer.fvf)); + wined3d_device_context_set_primitive_type(device->immediate_context, wined3d_primitive_type_from_ddraw(D3DPT_TRIANGLELIST), 0); + wined3d_device_apply_stateblock(device->wined3d_device, device->state); + d3d_device_sync_surfaces(device); + + wined3d_device_context_draw_indexed(device->immediate_context, + device->ddraw_device_buffer.vertex_buffer_pos / device->ddraw_device_buffer.stride, + device->ddraw_device_buffer.idx_buffer_pos / sizeof(*device->ddraw_device_buffer.buffer_indices), + device->ddraw_device_buffer.buffer_indice_count, 0, 0); + + +done: + wined3d_mutex_unlock(); + /* Reset the buffer */ + device->ddraw_device_buffer.buffer_vertex_count = 0; + device->ddraw_device_buffer.buffer_indice_count = 0; + + return hr; +} + +static HRESULT ddraw_buffer_flush(struct d3d_device *device) { + /* Nothing to do if it is empty */ + if (! device->ddraw_device_buffer.buffer_vertex_count) return D3D_OK; + + return ddraw_buffer_flush_internal(device); +}