Hi,
Why not always allocate SYSTEMMEM buffers into GPU memory ? For example, GTT memory (without WC, thus fast cpu read), with persistent flags, which means you don't need to unlock. Then no upload is needed.
Yours,
Axel
On 12/12/2018 17:09, Matteo Bruni wrote:
From: Henri Verbeet hverbeet@codeweavers.com
Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=45486 Signed-off-by: Matteo Bruni mbruni@codeweavers.com
dlls/d3d9/buffer.c | 24 ++++++++++++++-- dlls/d3d9/d3d9_private.h | 4 +++ dlls/d3d9/device.c | 60 ++++++++++++++++++++++++++++++++++++---- dlls/d3d9/stateblock.c | 17 ++++++++++++ 4 files changed, 96 insertions(+), 9 deletions(-)
diff --git a/dlls/d3d9/buffer.c b/dlls/d3d9/buffer.c index 02b3570402f..15b7e2bdbce 100644 --- a/dlls/d3d9/buffer.c +++ b/dlls/d3d9/buffer.c @@ -59,6 +59,8 @@ static ULONG WINAPI d3d9_vertexbuffer_AddRef(IDirect3DVertexBuffer9 *iface) IDirect3DDevice9Ex_AddRef(buffer->parent_device); wined3d_mutex_lock(); wined3d_buffer_incref(buffer->wined3d_buffer);
if (buffer->draw_buffer)
wined3d_buffer_incref(buffer->draw_buffer); wined3d_mutex_unlock(); }
@@ -78,6 +80,8 @@ static ULONG WINAPI d3d9_vertexbuffer_Release(IDirect3DVertexBuffer9 *iface)
wined3d_mutex_lock(); wined3d_buffer_decref(buffer->wined3d_buffer);
if (buffer->draw_buffer)
wined3d_buffer_decref(buffer->draw_buffer); wined3d_mutex_unlock(); /* Release the device last, as it may cause the device to be destroyed. */
@@ -274,6 +278,7 @@ static const struct wined3d_parent_ops d3d9_vertexbuffer_wined3d_parent_ops = HRESULT vertexbuffer_init(struct d3d9_vertexbuffer *buffer, struct d3d9_device *device, UINT size, UINT usage, DWORD fvf, D3DPOOL pool) {
- const struct wined3d_parent_ops *parent_ops = &d3d9_null_wined3d_parent_ops; struct wined3d_buffer_desc desc; HRESULT hr;
@@ -299,15 +304,28 @@ HRESULT vertexbuffer_init(struct d3d9_vertexbuffer *buffer, struct d3d9_device *
desc.byte_width = size; desc.usage = usage & WINED3DUSAGE_MASK;
- desc.bind_flags = WINED3D_BIND_VERTEX_BUFFER;
desc.bind_flags = 0; desc.access = wined3daccess_from_d3dpool(pool, usage) | WINED3D_RESOURCE_ACCESS_MAP_R | WINED3D_RESOURCE_ACCESS_MAP_W; desc.misc_flags = 0; desc.structure_byte_stride = 0;
if (desc.access & WINED3D_RESOURCE_ACCESS_GPU)
{
desc.bind_flags = WINED3D_BIND_VERTEX_BUFFER;
parent_ops = &d3d9_vertexbuffer_wined3d_parent_ops;
}
wined3d_mutex_lock();
- hr = wined3d_buffer_create(device->wined3d_device, &desc, NULL, buffer,
&d3d9_vertexbuffer_wined3d_parent_ops, &buffer->wined3d_buffer);
- hr = wined3d_buffer_create(device->wined3d_device, &desc, NULL, buffer, parent_ops, &buffer->wined3d_buffer);
- if (SUCCEEDED(hr) && !(desc.access & WINED3D_RESOURCE_ACCESS_GPU))
- {
desc.bind_flags = WINED3D_BIND_VERTEX_BUFFER;
desc.access = WINED3D_RESOURCE_ACCESS_GPU;
if (FAILED(hr = wined3d_buffer_create(device->wined3d_device, &desc, NULL, buffer,
&d3d9_vertexbuffer_wined3d_parent_ops, &buffer->draw_buffer)))
wined3d_buffer_decref(buffer->wined3d_buffer);
- } wined3d_mutex_unlock(); if (FAILED(hr)) {
diff --git a/dlls/d3d9/d3d9_private.h b/dlls/d3d9/d3d9_private.h index d6155eba60b..8647aac4af1 100644 --- a/dlls/d3d9/d3d9_private.h +++ b/dlls/d3d9/d3d9_private.h @@ -42,6 +42,7 @@
#define D3D9_MAX_VERTEX_SHADER_CONSTANTF 256 #define D3D9_MAX_TEXTURE_UNITS 20 +#define D3D9_MAX_STREAMS 16
#define D3DPRESENTFLAGS_MASK 0x00000fffu
@@ -105,6 +106,8 @@ struct d3d9_device BOOL in_destruction; BOOL in_scene; BOOL has_vertex_declaration;
BOOL recording;
unsigned int upload_map;
unsigned int max_user_clip_planes;
@@ -181,6 +184,7 @@ struct d3d9_vertexbuffer struct d3d9_resource resource; struct wined3d_buffer *wined3d_buffer; IDirect3DDevice9Ex *parent_device;
- struct wined3d_buffer *draw_buffer; DWORD fvf; };
diff --git a/dlls/d3d9/device.c b/dlls/d3d9/device.c index f19e2ae6f65..7c488bfd601 100644 --- a/dlls/d3d9/device.c +++ b/dlls/d3d9/device.c @@ -974,6 +974,7 @@ static HRESULT d3d9_device_reset(struct d3d9_device *device,
if (!extended) {
device->recording = FALSE; wined3d_device_set_render_state(device->wined3d_device, WINED3D_RS_ZENABLE, !!swapchain_desc.enable_auto_depth_stencil); }
@@ -2317,7 +2318,8 @@ static HRESULT WINAPI d3d9_device_BeginStateBlock(IDirect3DDevice9Ex *iface) TRACE("iface %p.\n", iface);
wined3d_mutex_lock();
- hr = wined3d_device_begin_stateblock(device->wined3d_device);
if (SUCCEEDED(hr = wined3d_device_begin_stateblock(device->wined3d_device)))
device->recording = TRUE; wined3d_mutex_unlock(); return hr;
@@ -2340,6 +2342,7 @@ static HRESULT WINAPI d3d9_device_EndStateBlock(IDirect3DDevice9Ex *iface, IDire WARN("Failed to end the state block, hr %#x.\n", hr); return hr; }
device->recording = FALSE;
if (!(object = heap_alloc_zero(sizeof(*object)))) {
@@ -2688,10 +2691,38 @@ static void d3d9_generate_auto_mipmaps(struct d3d9_device *device) } }
+static void d3d9_device_upload_sysmem_buffers(struct d3d9_device *device,
unsigned int start_vertex, unsigned int vertex_count)
+{
- struct wined3d_box box = {0, 0, 0, 1, 0, 1};
- struct d3d9_vertexbuffer *d3d9_buffer;
- unsigned int i, offset, stride, map;
- struct wined3d_buffer *dst_buffer;
- HRESULT hr;
- map = device->upload_map;
- while (map)
- {
i = ffs(map) - 1;
map ^= 1u << i;
if (FAILED(hr = wined3d_device_get_stream_source(device->wined3d_device, i, &dst_buffer, &offset, &stride)))
ERR("Failed to get stream source.\n");
d3d9_buffer = wined3d_buffer_get_parent(dst_buffer);
box.left = offset + start_vertex * stride;
box.right = box.left + vertex_count * stride;
if (FAILED(hr = wined3d_device_copy_sub_resource_region(device->wined3d_device,
wined3d_buffer_get_resource(dst_buffer), 0, box.left, 0, 0,
wined3d_buffer_get_resource(d3d9_buffer->wined3d_buffer), 0, &box, 0)))
ERR("Failed to update buffer.\n");
- }
+}
static HRESULT WINAPI d3d9_device_DrawPrimitive(IDirect3DDevice9Ex *iface, D3DPRIMITIVETYPE primitive_type, UINT start_vertex, UINT primitive_count) { struct d3d9_device *device = impl_from_IDirect3DDevice9Ex(iface);
unsigned int vertex_count; HRESULT hr;
TRACE("iface %p, primitive_type %#x, start_vertex %u, primitive_count %u.\n",
@@ -2704,10 +2735,11 @@ static HRESULT WINAPI d3d9_device_DrawPrimitive(IDirect3DDevice9Ex *iface, WARN("Called without a valid vertex declaration set.\n"); return D3DERR_INVALIDCALL; }
- vertex_count = vertex_count_from_primitive_count(primitive_type, primitive_count);
- d3d9_device_upload_sysmem_buffers(device, start_vertex, vertex_count); d3d9_generate_auto_mipmaps(device); wined3d_device_set_primitive_type(device->wined3d_device, primitive_type, 0);
- hr = wined3d_device_draw_primitive(device->wined3d_device, start_vertex,
vertex_count_from_primitive_count(primitive_type, primitive_count));
- hr = wined3d_device_draw_primitive(device->wined3d_device, start_vertex, vertex_count); if (SUCCEEDED(hr)) d3d9_rts_flag_auto_gen_mipmap(device); wined3d_mutex_unlock();
@@ -2734,6 +2766,7 @@ static HRESULT WINAPI d3d9_device_DrawIndexedPrimitive(IDirect3DDevice9Ex *iface WARN("Called without a valid vertex declaration set.\n"); return D3DERR_INVALIDCALL; }
- d3d9_device_upload_sysmem_buffers(device, min_vertex_idx, vertex_count); d3d9_generate_auto_mipmaps(device); wined3d_device_set_base_vertex_index(device->wined3d_device, base_vertex_idx); wined3d_device_set_primitive_type(device->wined3d_device, primitive_type, 0);
@@ -3378,6 +3411,7 @@ static HRESULT WINAPI d3d9_device_SetStreamSource(IDirect3DDevice9Ex *iface, { struct d3d9_device *device = impl_from_IDirect3DDevice9Ex(iface); struct d3d9_vertexbuffer *buffer_impl = unsafe_impl_from_IDirect3DVertexBuffer9(buffer);
struct wined3d_buffer *wined3d_buffer; HRESULT hr;
TRACE("iface %p, stream_idx %u, buffer %p, offset %u, stride %u.\n",
@@ -3386,14 +3420,28 @@ static HRESULT WINAPI d3d9_device_SetStreamSource(IDirect3DDevice9Ex *iface, wined3d_mutex_lock(); if (!stride) {
struct wined3d_buffer *wined3d_buffer; unsigned int cur_offset; hr = wined3d_device_get_stream_source(device->wined3d_device, stream_idx, &wined3d_buffer, &cur_offset, &stride); }
hr = wined3d_device_set_stream_source(device->wined3d_device, stream_idx,
buffer_impl ? buffer_impl->wined3d_buffer : NULL, offset, stride);
if (!buffer_impl)
wined3d_buffer = NULL;
else if (buffer_impl->draw_buffer)
wined3d_buffer = buffer_impl->draw_buffer;
else
wined3d_buffer = buffer_impl->wined3d_buffer;
hr = wined3d_device_set_stream_source(device->wined3d_device, stream_idx, wined3d_buffer, offset, stride);
if (SUCCEEDED(hr) && !device->recording)
{
if (buffer_impl && buffer_impl->draw_buffer)
device->upload_map |= (1u << stream_idx);
else
device->upload_map &= ~(1u << stream_idx);
}
wined3d_mutex_unlock(); return hr;
diff --git a/dlls/d3d9/stateblock.c b/dlls/d3d9/stateblock.c index 62b3bacb28d..c5306fd80b3 100644 --- a/dlls/d3d9/stateblock.c +++ b/dlls/d3d9/stateblock.c @@ -108,11 +108,28 @@ static HRESULT WINAPI d3d9_stateblock_Capture(IDirect3DStateBlock9 *iface) static HRESULT WINAPI d3d9_stateblock_Apply(IDirect3DStateBlock9 *iface) { struct d3d9_stateblock *stateblock = impl_from_IDirect3DStateBlock9(iface);
struct wined3d_buffer *wined3d_buffer;
struct d3d9_vertexbuffer *buffer;
unsigned int i, offset, stride;
struct d3d9_device *device;
HRESULT hr;
TRACE("iface %p.\n", iface);
wined3d_mutex_lock(); wined3d_stateblock_apply(stateblock->wined3d_stateblock);
device = impl_from_IDirect3DDevice9Ex(stateblock->parent_device);
device->upload_map = 0;
for (i = 0; i < D3D9_MAX_STREAMS; ++i)
{
if (FAILED(hr = wined3d_device_get_stream_source(device->wined3d_device,
i, &wined3d_buffer, &offset, &stride)))
continue;
if (!wined3d_buffer || !(buffer = wined3d_buffer_get_parent(wined3d_buffer)))
continue;
if (buffer->draw_buffer)
device->upload_map |= 1u << i;
} wined3d_mutex_unlock();
return D3D_OK;