From: Steve Schnepp steve.schnepp@pwkf.org
Convert all consecutive calls to d7_DrawPrimitive() into a single call to wined3d.
The buffer is flushed upon every state change. It is also flushed when incompatible arguments are passed to d7_DrawPrimitive(), such as a changing fvf or incompatible primitive_type.
Note, the call does *increase* the number of data to transmit, but in a limited fashion as it leverages indexed wined3d primitives. And bandwith overhead is much less costly than multiple calls overhead.
Finally, only a subset of the calls get buffered for now in order to ensure that the disruption is minimal.
Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=33814
---
Several remarks :
1. I finally squashed everything in 1 commit as it was too cumbersome to maintain the separation. I can split it at the end once everything else is fine.
2. I didn't change (yet) the ddraw_perf channel to d3d_perf as d3d_perf isn't used at all in ddraw.dll.
3. I used HRESULT as a return value for buffer_add instead of bool. I do not like returning bool when it is not obvious what true/false mean. Yet, I don't mind changing it to an enum if one prefers. And of course, I can change it to bool if one insists ;-) --- dlls/ddraw/ddraw_private.h | 16 +++ dlls/ddraw/device.c | 193 ++++++++++++++++++++++++++++++++++++- 2 files changed, 206 insertions(+), 3 deletions(-)
diff --git a/dlls/ddraw/ddraw_private.h b/dlls/ddraw/ddraw_private.h index 09e8133350b..7a9e08505fa 100644 --- a/dlls/ddraw/ddraw_private.h +++ b/dlls/ddraw/ddraw_private.h @@ -317,6 +317,19 @@ DWORD ddraw_allocate_handle(struct ddraw_handle_table *t, void *object, enum ddr void *ddraw_free_handle(struct ddraw_handle_table *t, DWORD handle, enum ddraw_handle_type type) DECLSPEC_HIDDEN; void *ddraw_get_object(struct ddraw_handle_table *t, DWORD handle, enum ddraw_handle_type type) DECLSPEC_HIDDEN;
+struct vertex_batch { + D3DPRIMITIVETYPE primitive_type; + DWORD fvf; + UINT stride; + WORD *indices; + char *vertices; + + unsigned int index_count; + unsigned int vertex_count; + unsigned int index_pos; + unsigned int vertex_pos; +}; + struct d3d_device { /* IUnknown */ @@ -369,6 +382,9 @@ struct d3d_device
struct wined3d_stateblock *recording, *state, *update_state; const struct wined3d_stateblock_state *stateblock_state; + + /* Vertex buffer for squashing DrawPrimitive() calls before sending it to wined3d */ + struct vertex_batch vertex_batch; };
HRESULT d3d_device_create(struct ddraw *ddraw, const GUID *guid, struct ddraw_surface *target, IUnknown *rt_iface, diff --git a/dlls/ddraw/device.c b/dlls/ddraw/device.c index 1cfef5007d5..fd5d87e6d94 100644 --- a/dlls/ddraw/device.c +++ b/dlls/ddraw/device.c @@ -31,6 +31,7 @@
WINE_DEFAULT_DEBUG_CHANNEL(ddraw); WINE_DECLARE_DEBUG_CHANNEL(winediag); +WINE_DECLARE_DEBUG_CHANNEL(d3d_perf);
/* The device ID */ const GUID IID_D3DDEVICE_WineD3D = { @@ -1571,6 +1572,159 @@ static HRESULT WINAPI d3d_device1_BeginScene(IDirect3DDevice *iface) return IDirect3DDevice7_BeginScene(&device->IDirect3DDevice7_iface); }
+/* Flushing the buffer if it isn't empty. + * + * It will delegate to a single call to wined3d with the correct parameters, + * and a (hopefully) huge list of vertices/indices. */ +static HRESULT ddraw_buffer_flush(struct d3d_device *device) { + HRESULT hr; + + TRACE_(d3d_perf)("primitive_type %#x vertex_count %05d index_count %05d\n", device->vertex_batch.primitive_type, device->vertex_batch.vertex_count, device->vertex_batch.index_count); + + /* Calling wined3d directly */ + wined3d_mutex_lock(); + + wined3d_streaming_buffer_unmap(&device->vertex_buffer); + wined3d_streaming_buffer_unmap(&device->index_buffer); + + hr = wined3d_stateblock_set_stream_source(device->state, 0, device->vertex_buffer.buffer, 0, device->vertex_batch.stride); + + if (FAILED(hr)) + goto done; + + wined3d_stateblock_set_index_buffer(device->state, device->index_buffer.buffer, WINED3DFMT_R16_UINT); + wined3d_stateblock_set_vertex_declaration(device->state, ddraw_find_decl(device->ddraw, device->vertex_batch.fvf)); + + wined3d_device_context_set_primitive_type(device->immediate_context, wined3d_primitive_type_from_ddraw(D3DPT_TRIANGLELIST), 0); + + if (device->vertex_batch.primitive_type == D3DPT_POINTLIST) { + wined3d_device_context_set_primitive_type(device->immediate_context, wined3d_primitive_type_from_ddraw(D3DPT_POINTLIST), 0); + } + + wined3d_device_apply_stateblock(device->wined3d_device, device->state); + d3d_device_sync_surfaces(device); + + wined3d_device_context_draw_indexed(device->immediate_context, + device->vertex_batch.vertex_pos / device->vertex_batch.stride, + device->vertex_batch.index_pos / sizeof(*device->vertex_batch.indices), + device->vertex_batch.index_count, 0, 0); + +done: + wined3d_mutex_unlock(); + + /* Reset the buffer */ + device->vertex_batch.vertex_count = 0; + device->vertex_batch.index_count = 0; + + return hr; +} + +/* Helper to call ddraw_buffer_flush() only if needed. + * + * It enables the compiler to inline the test so it does not have a function + * call penalty. + * + * Otherwise it even shows itself in perf as it is called *quite* often. + * With this helper, even the test disappears from perf. */ +static HRESULT ddraw_buffer_flush_if_needed(struct d3d_device *device) { + /* Nothing to do if it is empty */ + if (! device->vertex_batch.vertex_count) return D3D_OK; + + return ddraw_buffer_flush(device); +} + +static void ddraw_buffer_add_indices_points(struct d3d_device *device, DWORD vertex_count) { + unsigned int idx; + + for (idx = 0; idx < vertex_count; idx ++) { + device->vertex_batch.indices[device->vertex_batch.index_count++] = device->vertex_batch.vertex_count++; + } +} + +static void ddraw_buffer_add_indices_fan(struct d3d_device *device, DWORD vertex_count) { + unsigned int index_count_initial = device->vertex_batch.vertex_count; + unsigned int idx; + + /* The first triangle is the same, therefore the indices are simply copied over */ + device->vertex_batch.indices[device->vertex_batch.index_count++] = device->vertex_batch.vertex_count++; + device->vertex_batch.indices[device->vertex_batch.index_count++] = device->vertex_batch.vertex_count++; + device->vertex_batch.indices[device->vertex_batch.index_count++] = device->vertex_batch.vertex_count++; + + /* Next triangles are recreated with : 2 next vertices then the 1rst one. + * So, it will *increase* the number of total vertices from 4 to 6, 5 to 9, 6 to 12, ... */ + for (idx = 3; idx < vertex_count; idx ++) { + /* Copy the 2 last ones */ + device->vertex_batch.indices[device->vertex_batch.index_count++] = device->vertex_batch.vertex_count - 1; + device->vertex_batch.indices[device->vertex_batch.index_count++] = device->vertex_batch.vertex_count; + device->vertex_batch.indices[device->vertex_batch.index_count++] = index_count_initial; + device->vertex_batch.vertex_count++; + } +} + +/* + * Note : Adding to the buffer transforms the D3DPT_TRIANGLEFAN primitive into a D3DPT_TRIANGLELIST. + * Otherwise, we cannot concatenate them as TRIANGLE FAN has the first vertex in common to the whole list. + */ +static HRESULT ddraw_buffer_add(struct d3d_device *device, D3DPRIMITIVETYPE primitive_type, DWORD fvf, void *vertices, DWORD vertex_count, DWORD flags, UINT stride) { + + if (device->vertex_batch.vertex_count) { + /* if already-buffered vertexes do not match the one that we want to add, flush. */ + if (fvf != device->vertex_batch.fvf) { + TRACE_(d3d_perf)("Buffering failed due to mismatched fvf %ld != buffer.fvf %ld \n", fvf, device->vertex_batch.fvf); + ddraw_buffer_flush(device); + } else if (primitive_type != device->vertex_batch.primitive_type) { + TRACE_(d3d_perf)("Buffering failed due to mismatched primitive_type %d != buffer.primitive_type %d \n", primitive_type, device->vertex_batch.primitive_type); + ddraw_buffer_flush(device); + } + } + + /* Need to test again vertex_count as a flush resets it to 0 */ + if (! device->vertex_batch.vertex_count) { + /* New buffer, setting everything up */ + device->vertex_batch.primitive_type = primitive_type; + device->vertex_batch.fvf = fvf; + device->vertex_batch.stride = stride; + + /* We map & unmap directly. + * That way, we only reserve the space and other calls will have a new one. + * It should not happen, but let's be safe. + * + * We will fill it with following calls */ + wined3d_streaming_buffer_map(device->wined3d_device, &device->vertex_buffer, D3DMAXNUMVERTICES, stride, + &device->vertex_batch.vertex_pos, (void**) &device->vertex_batch.vertices); + wined3d_streaming_buffer_map(device->wined3d_device, &device->index_buffer, D3DMAXNUMVERTICES, sizeof(*device->vertex_batch.indices), + &device->vertex_batch.index_pos, (void**) &device->vertex_batch.indices); + } + + /* append all the vertices to the buffer */ + memcpy(device->vertex_batch.vertices + device->vertex_batch.vertex_count * stride, vertices, vertex_count * stride); + TRACE_(d3d_perf)("vertex count %lu stride %d vertex_count %05d index_count %05d\n", vertex_count, stride, + device->vertex_batch.vertex_count, device->vertex_batch.index_count); + + /* Create the index */ + switch(primitive_type) { + case D3DPT_TRIANGLEFAN: + if (vertex_count < 3) { + WARN("vertex_count %lu lower than 3. not buffering.\n", vertex_count); + return WINED3DERR_NOTAVAILABLE; + } + ddraw_buffer_add_indices_fan(device, vertex_count); + break; + case D3DPT_POINTLIST: + ddraw_buffer_add_indices_points(device, vertex_count); + break; + default: + FIXME("primitive_type %#x not supported\n", primitive_type); + } + + TRACE_(d3d_perf)("vertex_count %05d index_count %05d max %d\n", + device->vertex_batch.vertex_count, device->vertex_batch.index_count, D3DMAXNUMVERTICES); + + /* Buffered ! */ + return D3D_OK; +} + + /***************************************************************************** * IDirect3DDevice7::EndScene * @@ -1592,6 +1746,8 @@ static HRESULT d3d_device7_EndScene(IDirect3DDevice7 *iface)
TRACE("iface %p.\n", iface);
+ ddraw_buffer_flush_if_needed(device); + wined3d_mutex_lock(); hr = wined3d_device_end_scene(device->wined3d_device); wined3d_mutex_unlock(); @@ -2548,6 +2704,8 @@ static HRESULT d3d_device7_SetRenderState(IDirect3DDevice7 *iface,
TRACE("iface %p, state %#x, value %#lx.\n", iface, state, value);
+ ddraw_buffer_flush_if_needed(device); + wined3d_mutex_lock(); /* Some render states need special care */ switch (state) @@ -3132,6 +3290,8 @@ static HRESULT d3d_device7_SetTransform(IDirect3DDevice7 *iface,
TRACE("iface %p, state %#x, matrix %p.\n", iface, state, matrix);
+ ddraw_buffer_flush_if_needed(device); + if (!matrix) return DDERR_INVALIDPARAMS;
@@ -3307,6 +3467,8 @@ static HRESULT d3d_device7_MultiplyTransform(IDirect3DDevice7 *iface,
TRACE("iface %p, state %#x, matrix %p.\n", iface, state, matrix);
+ ddraw_buffer_flush_if_needed(device); + /* Note: D3DMATRIX is compatible with struct wined3d_matrix. */ wined3d_mutex_lock(); wined3d_stateblock_multiply_transform(device->state, @@ -3451,10 +3613,19 @@ static HRESULT d3d_device7_DrawPrimitive(IDirect3DDevice7 *iface, stride = get_flexible_vertex_size(fvf); size = vertex_count * stride;
+ hr = ddraw_buffer_add(device, primitive_type, fvf, vertices, vertex_count, flags, stride); + if (hr == D3D_OK) { + /* Buffered successfuly -> returning immediatly :-) */ + return D3D_OK; + } + + FIXME("REALLY cannot buffer, skipping\n"); + wined3d_mutex_lock();
- if (FAILED(hr = wined3d_streaming_buffer_upload(device->wined3d_device, - &device->vertex_buffer, vertices, size, stride, &vb_pos))) + hr = wined3d_streaming_buffer_upload(device->wined3d_device, + &device->vertex_buffer, vertices, size, stride, &vb_pos); + if (FAILED(hr)) goto done;
hr = wined3d_stateblock_set_stream_source(device->state, 0, device->vertex_buffer.buffer, 0, stride); @@ -3494,10 +3665,12 @@ static HRESULT WINAPI d3d_device7_DrawPrimitive_FPUPreserve(IDirect3DDevice7 *if return hr; }
-static void setup_lighting(const struct d3d_device *device, DWORD fvf, DWORD flags) +static void setup_lighting(struct d3d_device *device, DWORD fvf, DWORD flags) { BOOL enable = TRUE;
+ ddraw_buffer_flush_if_needed(device); + /* Ignore the D3DFVF_XYZRHW case here, wined3d takes care of that */ if (!device->material || !(fvf & D3DFVF_NORMAL) || (flags & D3DDP_DONOTLIGHT)) enable = FALSE; @@ -4641,6 +4814,8 @@ static HRESULT d3d_device7_SetTexture(IDirect3DDevice7 *iface,
TRACE("iface %p, stage %lu, texture %p.\n", iface, stage, texture);
+ ddraw_buffer_flush_if_needed(device); + if (surf && (surf->surface_desc.ddsCaps.dwCaps & DDSCAPS_TEXTURE)) { if (surf->draw_texture) @@ -4925,6 +5100,8 @@ static HRESULT d3d_device7_SetTextureStageState(IDirect3DDevice7 *iface, TRACE("iface %p, stage %lu, state %#x, value %#lx.\n", iface, stage, state, value);
+ ddraw_buffer_flush_if_needed(device); + if (state > D3DTSS_TEXTURETRANSFORMFLAGS) { WARN("Invalid state %#x passed.\n", state); @@ -5195,6 +5372,8 @@ static HRESULT d3d_device7_SetViewport(IDirect3DDevice7 *iface, D3DVIEWPORT7 *vi if (!viewport) return DDERR_INVALIDPARAMS;
+ ddraw_buffer_flush_if_needed(device); + wined3d_mutex_lock(); if (!(rtv = wined3d_device_context_get_rendertarget_view(device->immediate_context, 0))) { @@ -5307,6 +5486,8 @@ static HRESULT d3d_device7_SetMaterial(IDirect3DDevice7 *iface, D3DMATERIAL7 *ma if (!material) return DDERR_INVALIDPARAMS;
+ ddraw_buffer_flush_if_needed(device); + wined3d_mutex_lock(); /* Note: D3DMATERIAL7 is compatible with struct wined3d_material. */ wined3d_stateblock_set_material(device->update_state, (const struct wined3d_material *)material); @@ -5400,6 +5581,8 @@ static HRESULT d3d_device7_SetLight(IDirect3DDevice7 *iface, DWORD light_idx, D3
TRACE("iface %p, light_idx %lu, light %p.\n", iface, light_idx, light);
+ ddraw_buffer_flush_if_needed(device); + wined3d_mutex_lock(); /* Note: D3DLIGHT7 is compatible with struct wined3d_light. */ hr = wined3d_stateblock_set_light(device->update_state, light_idx, (const struct wined3d_light *)light); @@ -6279,6 +6462,8 @@ static HRESULT d3d_device7_LightEnable(IDirect3DDevice7 *iface, DWORD light_idx,
TRACE("iface %p, light_idx %lu, enabled %#x.\n", iface, light_idx, enabled);
+ ddraw_buffer_flush_if_needed(device); + wined3d_mutex_lock(); hr = wined3d_stateblock_set_light_enable(device->update_state, light_idx, enabled); wined3d_mutex_unlock(); @@ -6381,6 +6566,8 @@ static HRESULT d3d_device7_SetClipPlane(IDirect3DDevice7 *iface, DWORD idx, D3DV if (!plane) return DDERR_INVALIDPARAMS;
+ ddraw_buffer_flush_if_needed(device); + wined3d_plane = (struct wined3d_vec4 *)plane;
wined3d_mutex_lock();