Shadow of the Tomb Raider overwrites descriptors while they are being copied in another thread. This patch makes reads and writes atomic for CBV, SRV, UAV, and sampler descriptors, but not RTV and DSV, for which copying is not implemented.
Benchmark total frames vs mutex count (the single mutex was locked only once for copying):
1 mutex: 6480 6489 6503 8 mutexes: 6691 6693 6661 16 mutexes: 6665 6682 6703
Signed-off-by: Conor McCarthy cmccarthy@codeweavers.com --- v2: Supersedes patch 170102, makes all writes atomic instead of only destruction. --- libs/vkd3d/device.c | 35 ++++++++++++++----- libs/vkd3d/resource.c | 70 ++++++++++++++++++++++++-------------- libs/vkd3d/vkd3d_private.h | 16 +++++++++ 3 files changed, 88 insertions(+), 33 deletions(-)
diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index dc0fa0a..3da4273 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -1990,6 +1990,7 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device *iface) { struct d3d12_device *device = impl_from_ID3D12Device(iface); ULONG refcount = InterlockedDecrement(&device->refcount); + size_t i;
TRACE("%p decreasing refcount to %u.\n", device, refcount);
@@ -2006,6 +2007,8 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device *iface) vkd3d_fence_worker_stop(&device->fence_worker, device); d3d12_device_destroy_pipeline_cache(device); d3d12_device_destroy_vkd3d_queues(device); + for (i = 0; i < ARRAY_SIZE(device->desc_mutex); ++i) + pthread_mutex_destroy(&device->desc_mutex[i]); VK_CALL(vkDestroyDevice(device->vk_device, NULL)); if (device->parent) IUnknown_Release(device->parent); @@ -2585,33 +2588,42 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateRootSignature(ID3D12Device * static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView(ID3D12Device *iface, const D3D12_CONSTANT_BUFFER_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) { + struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_desc tmp = {0}; + TRACE("iface %p, desc %p, descriptor %#lx.\n", iface, desc, descriptor.ptr);
- d3d12_desc_create_cbv(d3d12_desc_from_cpu_handle(descriptor), - impl_from_ID3D12Device(iface), desc); + d3d12_desc_create_cbv(&tmp, device, desc); + d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); }
static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(ID3D12Device *iface, ID3D12Resource *resource, const D3D12_SHADER_RESOURCE_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) { + struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_desc tmp = {0}; + TRACE("iface %p, resource %p, desc %p, descriptor %#lx.\n", iface, resource, desc, descriptor.ptr);
- d3d12_desc_create_srv(d3d12_desc_from_cpu_handle(descriptor), - impl_from_ID3D12Device(iface), unsafe_impl_from_ID3D12Resource(resource), desc); + d3d12_desc_create_srv(&tmp, device, unsafe_impl_from_ID3D12Resource(resource), desc); + d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); }
static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(ID3D12Device *iface, ID3D12Resource *resource, ID3D12Resource *counter_resource, const D3D12_UNORDERED_ACCESS_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) { + struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_desc tmp = {0}; + TRACE("iface %p, resource %p, counter_resource %p, desc %p, descriptor %#lx.\n", iface, resource, counter_resource, desc, descriptor.ptr);
- d3d12_desc_create_uav(d3d12_desc_from_cpu_handle(descriptor), - impl_from_ID3D12Device(iface), unsafe_impl_from_ID3D12Resource(resource), + d3d12_desc_create_uav(&tmp, device, unsafe_impl_from_ID3D12Resource(resource), unsafe_impl_from_ID3D12Resource(counter_resource), desc); + d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); }
static void STDMETHODCALLTYPE d3d12_device_CreateRenderTargetView(ID3D12Device *iface, @@ -2639,10 +2651,13 @@ static void STDMETHODCALLTYPE d3d12_device_CreateDepthStencilView(ID3D12Device * static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device *iface, const D3D12_SAMPLER_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) { + struct d3d12_device *device = impl_from_ID3D12Device(iface); + struct d3d12_desc tmp = {0}; + TRACE("iface %p, desc %p, descriptor %#lx.\n", iface, desc, descriptor.ptr);
- d3d12_desc_create_sampler(d3d12_desc_from_cpu_handle(descriptor), - impl_from_ID3D12Device(iface), desc); + d3d12_desc_create_sampler(&tmp, device, desc); + d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); }
static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface, @@ -3202,6 +3217,7 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, { const struct vkd3d_vk_device_procs *vk_procs; HRESULT hr; + size_t i;
device->ID3D12Device_iface.lpVtbl = &d3d12_device_vtbl; device->refcount = 1; @@ -3237,6 +3253,9 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, vkd3d_render_pass_cache_init(&device->render_pass_cache); vkd3d_gpu_va_allocator_init(&device->gpu_va_allocator);
+ for (i = 0; i < ARRAY_SIZE(device->desc_mutex); ++i) + pthread_mutex_init(&device->desc_mutex[i], NULL); + if ((device->parent = create_info->parent)) IUnknown_AddRef(device->parent);
diff --git a/libs/vkd3d/resource.c b/libs/vkd3d/resource.c index 463f373..aebe1fa 100644 --- a/libs/vkd3d/resource.c +++ b/libs/vkd3d/resource.c @@ -1,5 +1,6 @@ /* * Copyright 2016 Józef Kucia for CodeWeavers + * Copyright 2019 Conor McCarthy for CodeWeavers * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -1891,14 +1892,10 @@ void vkd3d_view_incref(struct vkd3d_view *view) InterlockedIncrement(&view->refcount); }
-static void vkd3d_view_decref_descriptor(struct vkd3d_view *view, +static void vkd3d_view_destroy_descriptor(struct vkd3d_view *view, const struct d3d12_desc *descriptor, struct d3d12_device *device) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - ULONG refcount = InterlockedDecrement(&view->refcount); - - if (refcount) - return;
TRACE("Destroying view %p.\n", view);
@@ -1927,31 +1924,56 @@ static void vkd3d_view_decref_descriptor(struct vkd3d_view *view,
void vkd3d_view_decref(struct vkd3d_view *view, struct d3d12_device *device) { - vkd3d_view_decref_descriptor(view, NULL, device); + if (!InterlockedDecrement(&view->refcount)) + vkd3d_view_destroy_descriptor(view, NULL, device); }
-static void d3d12_desc_destroy(struct d3d12_desc *descriptor, +void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device) { + struct d3d12_desc destroy_desc; + pthread_mutex_t *mutex; + + destroy_desc.u.view = NULL; + + mutex = d3d12_device_get_descriptor_mutex(device, dst); + pthread_mutex_lock(mutex); + /* Nothing to do for VKD3D_DESCRIPTOR_MAGIC_CBV. */ - if (descriptor->magic == VKD3D_DESCRIPTOR_MAGIC_SRV - || descriptor->magic == VKD3D_DESCRIPTOR_MAGIC_UAV - || descriptor->magic == VKD3D_DESCRIPTOR_MAGIC_SAMPLER) - { - vkd3d_view_decref_descriptor(descriptor->u.view, descriptor, device); - } + if ((dst->magic == VKD3D_DESCRIPTOR_MAGIC_SRV + || dst->magic == VKD3D_DESCRIPTOR_MAGIC_UAV + || dst->magic == VKD3D_DESCRIPTOR_MAGIC_SAMPLER) + && !InterlockedDecrement(&dst->u.view->refcount)) + destroy_desc = *dst; + + *dst = *src; + + pthread_mutex_unlock(mutex); + + /* Destroy the view after unlocking to reduce wait time. */ + if (destroy_desc.u.view) + vkd3d_view_destroy_descriptor(destroy_desc.u.view, &destroy_desc, device); +}
- memset(descriptor, 0, sizeof(*descriptor)); +static void d3d12_desc_destroy(struct d3d12_desc *descriptor, struct d3d12_device *device) +{ + static const struct d3d12_desc null_desc = {0}; + + d3d12_desc_write_atomic(descriptor, &null_desc, device); }
void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device) { - assert(dst != src); + struct d3d12_desc tmp; + pthread_mutex_t *mutex;
- d3d12_desc_destroy(dst, device); + assert(dst != src);
- *dst = *src; + /* Shadow of the Tomb Raider and possibly other titles sometimes destroy + * and rewrite a descriptor in another thread while it is being copied. */ + mutex = d3d12_device_get_descriptor_mutex(device, src); + pthread_mutex_lock(mutex);
if (src->magic == VKD3D_DESCRIPTOR_MAGIC_SRV || src->magic == VKD3D_DESCRIPTOR_MAGIC_UAV @@ -1959,6 +1981,12 @@ void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, { vkd3d_view_incref(src->u.view); } + + tmp = *src; + + pthread_mutex_unlock(mutex); + + d3d12_desc_write_atomic(dst, &tmp, device); }
static VkDeviceSize vkd3d_get_required_texel_buffer_alignment(const struct d3d12_device *device, @@ -2323,8 +2351,6 @@ void d3d12_desc_create_cbv(struct d3d12_desc *descriptor, struct VkDescriptorBufferInfo *buffer_info; struct d3d12_resource *resource;
- d3d12_desc_destroy(descriptor, device); - if (!desc) { WARN("Constant buffer desc is NULL.\n"); @@ -2467,8 +2493,6 @@ void d3d12_desc_create_srv(struct d3d12_desc *descriptor, struct vkd3d_texture_view_desc vkd3d_desc; struct vkd3d_view *view;
- d3d12_desc_destroy(descriptor, device); - if (!resource) { vkd3d_create_null_srv(descriptor, device, desc); @@ -2761,8 +2785,6 @@ void d3d12_desc_create_uav(struct d3d12_desc *descriptor, struct d3d12_device *d struct d3d12_resource *resource, struct d3d12_resource *counter_resource, const D3D12_UNORDERED_ACCESS_VIEW_DESC *desc) { - d3d12_desc_destroy(descriptor, device); - if (!resource) { if (counter_resource) @@ -2889,8 +2911,6 @@ void d3d12_desc_create_sampler(struct d3d12_desc *sampler, { struct vkd3d_view *view;
- d3d12_desc_destroy(sampler, device); - if (!desc) { WARN("NULL sampler desc.\n"); diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 294e677..59f0eac 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -512,6 +512,8 @@ void d3d12_desc_create_uav(struct d3d12_desc *descriptor, struct d3d12_device *d const D3D12_UNORDERED_ACCESS_VIEW_DESC *desc) DECLSPEC_HIDDEN; void d3d12_desc_create_sampler(struct d3d12_desc *sampler, struct d3d12_device *device, const D3D12_SAMPLER_DESC *desc) DECLSPEC_HIDDEN; +void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, + struct d3d12_device *device) DECLSPEC_HIDDEN;
bool vkd3d_create_raw_buffer_view(struct d3d12_device *device, D3D12_GPU_VIRTUAL_ADDRESS gpu_address, VkBufferView *vk_buffer_view) DECLSPEC_HIDDEN; @@ -1052,6 +1054,7 @@ struct d3d12_device struct vkd3d_fence_worker fence_worker;
pthread_mutex_t mutex; + pthread_mutex_t desc_mutex[8]; struct vkd3d_render_pass_cache render_pass_cache; VkPipelineCache vk_pipeline_cache;
@@ -1111,6 +1114,19 @@ static inline unsigned int d3d12_device_get_descriptor_handle_increment_size(str return ID3D12Device_GetDescriptorHandleIncrementSize(&device->ID3D12Device_iface, descriptor_type); }
+static inline pthread_mutex_t *d3d12_device_get_descriptor_mutex(struct d3d12_device *device, + const struct d3d12_desc *descriptor) +{ + STATIC_ASSERT(!(ARRAY_SIZE(device->desc_mutex) & (ARRAY_SIZE(device->desc_mutex) - 1))); + uintptr_t idx = (uintptr_t)descriptor; + + idx ^= idx >> 12; + idx ^= idx >> 6; + idx ^= idx >> 3; + + return &device->desc_mutex[idx & (ARRAY_SIZE(device->desc_mutex) - 1)]; +} + /* utils */ enum vkd3d_format_type {