Saves a few CPU cycles on a potentially very hot code path. Based on a vkd3d-proton patch by Philip Rebohle.
Signed-off-by: Conor McCarthy cmccarthy@codeweavers.com --- libs/vkd3d/resource.c | 10 ++-------- libs/vkd3d/vkd3d_private.h | 16 ++++++++++------ 2 files changed, 12 insertions(+), 14 deletions(-)
diff --git a/libs/vkd3d/resource.c b/libs/vkd3d/resource.c index 28877793..19912231 100644 --- a/libs/vkd3d/resource.c +++ b/libs/vkd3d/resource.c @@ -2130,9 +2130,7 @@ void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *sr pthread_mutex_lock(mutex);
/* Nothing to do for VKD3D_DESCRIPTOR_MAGIC_CBV. */ - if ((dst->magic == VKD3D_DESCRIPTOR_MAGIC_SRV - || dst->magic == VKD3D_DESCRIPTOR_MAGIC_UAV - || dst->magic == VKD3D_DESCRIPTOR_MAGIC_SAMPLER) + if ((dst->magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) && !InterlockedDecrement(&dst->u.view->refcount)) destroy_desc = *dst;
@@ -2165,12 +2163,8 @@ void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, mutex = d3d12_device_get_descriptor_mutex(device, src); pthread_mutex_lock(mutex);
- if (src->magic == VKD3D_DESCRIPTOR_MAGIC_SRV - || src->magic == VKD3D_DESCRIPTOR_MAGIC_UAV - || src->magic == VKD3D_DESCRIPTOR_MAGIC_SAMPLER) - { + if (src->magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) vkd3d_view_incref(src->u.view); - }
tmp = *src;
diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index e9631313..f83fd1aa 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -41,13 +41,17 @@
#define VK_CALL(f) (vk_procs->f)
+#define MAKE_MAGIC(a,b,c,f) (((uint32_t)a) | (((uint32_t)b) << 8) | (((uint32_t)c) << 16) | f) + +#define VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW 0x01000000u + #define VKD3D_DESCRIPTOR_MAGIC_FREE 0x00000000u -#define VKD3D_DESCRIPTOR_MAGIC_CBV 0x00564243u -#define VKD3D_DESCRIPTOR_MAGIC_SRV 0x00565253u -#define VKD3D_DESCRIPTOR_MAGIC_UAV 0x00564155u -#define VKD3D_DESCRIPTOR_MAGIC_SAMPLER 0x504d4153u -#define VKD3D_DESCRIPTOR_MAGIC_DSV 0x00565344u -#define VKD3D_DESCRIPTOR_MAGIC_RTV 0x00565452u +#define VKD3D_DESCRIPTOR_MAGIC_CBV MAKE_MAGIC('C', 'B', 'V', 0) +#define VKD3D_DESCRIPTOR_MAGIC_SRV MAKE_MAGIC('S', 'R', 'V', VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) +#define VKD3D_DESCRIPTOR_MAGIC_UAV MAKE_MAGIC('U', 'A', 'V', VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) +#define VKD3D_DESCRIPTOR_MAGIC_SAMPLER MAKE_MAGIC('S', 'M', 'P', VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) +#define VKD3D_DESCRIPTOR_MAGIC_DSV MAKE_MAGIC('D', 'S', 'V', 0) +#define VKD3D_DESCRIPTOR_MAGIC_RTV MAKE_MAGIC('R', 'T', 'V', 0)
#define VKD3D_MAX_COMPATIBLE_FORMAT_COUNT 6u #define VKD3D_MAX_QUEUE_FAMILY_COUNT 3u
Greatly improves performance in Control.
Based on a vkd3d-proton patch by Philip Rebohle.
Signed-off-by: Conor McCarthy cmccarthy@codeweavers.com --- libs/vkd3d/resource.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-)
diff --git a/libs/vkd3d/resource.c b/libs/vkd3d/resource.c index 19912231..d7888f2f 100644 --- a/libs/vkd3d/resource.c +++ b/libs/vkd3d/resource.c @@ -2159,7 +2159,27 @@ void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, assert(dst != src);
/* Shadow of the Tomb Raider and possibly other titles sometimes destroy - * and rewrite a descriptor in another thread while it is being copied. */ + * and rewrite a descriptor in another thread while it is being copied. + * We don't need to lock either descriptor for the update check. The purpose + * of locking descriptors is not to make descriptor writes thread safe, + * but only to prevent use-after-free of the vkd3d_view caused by a race + * condition in the calling app. It's not important if the update check + * falls out of date as it's their race condition, not ours. */ + if (dst->magic == src->magic) + { + if (dst->magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) + { + if (dst->u.view == src->u.view) + return; + } + else if (dst->u.vk_cbv_info.buffer == src->u.vk_cbv_info.buffer + && dst->u.vk_cbv_info.offset == src->u.vk_cbv_info.offset + && dst->u.vk_cbv_info.range == src->u.vk_cbv_info.range) + { + return; + } + } + mutex = d3d12_device_get_descriptor_mutex(device, src); pthread_mutex_lock(mutex);
Based on a vkd3d-proton patch by Philip Rebohle.
Signed-off-by: Conor McCarthy cmccarthy@codeweavers.com --- libs/vkd3d/resource.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/libs/vkd3d/resource.c b/libs/vkd3d/resource.c index d7888f2f..2a7c7331 100644 --- a/libs/vkd3d/resource.c +++ b/libs/vkd3d/resource.c @@ -2121,26 +2121,24 @@ void vkd3d_view_decref(struct vkd3d_view *view, struct d3d12_device *device) void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device) { - struct d3d12_desc destroy_desc; + struct vkd3d_view *defunct_view = NULL; pthread_mutex_t *mutex;
- destroy_desc.u.view = NULL; - mutex = d3d12_device_get_descriptor_mutex(device, dst); pthread_mutex_lock(mutex);
/* Nothing to do for VKD3D_DESCRIPTOR_MAGIC_CBV. */ if ((dst->magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) && !InterlockedDecrement(&dst->u.view->refcount)) - destroy_desc = *dst; + defunct_view = dst->u.view;
*dst = *src;
pthread_mutex_unlock(mutex);
/* Destroy the view after unlocking to reduce wait time. */ - if (destroy_desc.u.view) - vkd3d_view_destroy(destroy_desc.u.view, device); + if (defunct_view) + vkd3d_view_destroy(defunct_view, device); }
static void d3d12_desc_destroy(struct d3d12_desc *descriptor, struct d3d12_device *device)
Atomic descriptor writes and copies were implemented for Shadow of the Tomb Raider, which now has a Linux version. Since the bug resulted from a race condition in the game, it's probably not common and atomic ops can be made optional. Atomic ops are a significant drag on performance of games which copy large numbers of descriptors per frame, e.g. Control.
Signed-off-by: Conor McCarthy cmccarthy@codeweavers.com --- README | 3 +++ libs/vkd3d/device.c | 30 ++++++++++++++++++++++-------- libs/vkd3d/resource.c | 38 +++++++++++++++++++++++++++++++++++++- libs/vkd3d/vkd3d_private.h | 10 ++++++++++ 4 files changed, 72 insertions(+), 9 deletions(-)
diff --git a/README b/README index ed53f202..fff5712f 100644 --- a/README +++ b/README @@ -51,6 +51,9 @@ commas or semicolons. even when the output supports colour.
* VKD3D_CONFIG - a list of options that change the behavior of libvkd3d. + * atomic_descriptor - Make all descriptor copy and write operations thread- + safe. This is a workaround for race condition bugs which Windows + tolerates but which will crash vkd3d. * vk_debug - enables Vulkan debug extensions.
* VKD3D_DEBUG - controls the debug level for log messages produced by diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index 96ff7ae7..e4e1fc0a 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -429,6 +429,7 @@ static void vkd3d_init_debug_report(struct vkd3d_instance *instance)
static const struct vkd3d_debug_option vkd3d_config_options[] = { + {"atomic_descriptor", VKD3D_CONFIG_FLAG_ATOMIC_DESC_OPS}, /* atomic descriptor read/write */ {"vk_debug", VKD3D_CONFIG_FLAG_VULKAN_DEBUG}, /* enable Vulkan debug extensions */ };
@@ -2350,8 +2351,11 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device *iface) vkd3d_fence_worker_stop(&device->fence_worker, device); d3d12_device_destroy_pipeline_cache(device); d3d12_device_destroy_vkd3d_queues(device); - for (i = 0; i < ARRAY_SIZE(device->desc_mutex); ++i) - pthread_mutex_destroy(&device->desc_mutex[i]); + if (device->atomic_desc_ops) + { + for (i = 0; i < ARRAY_SIZE(device->desc_mutex); ++i) + pthread_mutex_destroy(&device->desc_mutex[i]); + } VK_CALL(vkDestroyDevice(device->vk_device, NULL)); if (device->parent) IUnknown_Release(device->parent); @@ -3178,7 +3182,7 @@ static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView(ID3D12Device TRACE("iface %p, desc %p, descriptor %#lx.\n", iface, desc, descriptor.ptr);
d3d12_desc_create_cbv(&tmp, device, desc); - d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); + device->descriptor_write(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); }
static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(ID3D12Device *iface, @@ -3192,7 +3196,7 @@ static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(ID3D12Device iface, resource, desc, descriptor.ptr);
d3d12_desc_create_srv(&tmp, device, unsafe_impl_from_ID3D12Resource(resource), desc); - d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); + device->descriptor_write(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); }
static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(ID3D12Device *iface, @@ -3207,7 +3211,7 @@ static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(ID3D12Devic
d3d12_desc_create_uav(&tmp, device, unsafe_impl_from_ID3D12Resource(resource), unsafe_impl_from_ID3D12Resource(counter_resource), desc); - d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); + device->descriptor_write(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); }
static void STDMETHODCALLTYPE d3d12_device_CreateRenderTargetView(ID3D12Device *iface, @@ -3241,7 +3245,7 @@ static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device *iface, TRACE("iface %p, desc %p, descriptor %#lx.\n", iface, desc, descriptor.ptr);
d3d12_desc_create_sampler(&tmp, device, desc); - d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); + device->descriptor_write(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); }
static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface, @@ -3851,8 +3855,18 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, vkd3d_gpu_descriptor_allocator_init(&device->gpu_descriptor_allocator); vkd3d_gpu_va_allocator_init(&device->gpu_va_allocator);
- for (i = 0; i < ARRAY_SIZE(device->desc_mutex); ++i) - pthread_mutex_init(&device->desc_mutex[i], NULL); + if ((device->atomic_desc_ops = !!(device->vkd3d_instance->config_flags & VKD3D_CONFIG_FLAG_ATOMIC_DESC_OPS))) + { + for (i = 0; i < ARRAY_SIZE(device->desc_mutex); ++i) + pthread_mutex_init(&device->desc_mutex[i], NULL); + device->descriptor_copy = d3d12_desc_copy_atomic; + device->descriptor_write = d3d12_desc_write_atomic; + } + else + { + device->descriptor_copy = d3d12_desc_copy; + device->descriptor_write = d3d12_desc_write; + }
if ((device->parent = create_info->parent)) IUnknown_AddRef(device->parent); diff --git a/libs/vkd3d/resource.c b/libs/vkd3d/resource.c index 2a7c7331..80f3977e 100644 --- a/libs/vkd3d/resource.c +++ b/libs/vkd3d/resource.c @@ -2118,6 +2118,17 @@ void vkd3d_view_decref(struct vkd3d_view *view, struct d3d12_device *device) vkd3d_view_destroy(view, device); }
+void d3d12_desc_write(struct d3d12_desc *dst, const struct d3d12_desc *src, + struct d3d12_device *device) +{ + /* Nothing to do for VKD3D_DESCRIPTOR_MAGIC_CBV. */ + if ((dst->magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) + && !InterlockedDecrement(&dst->u.view->refcount)) + vkd3d_view_destroy(dst->u.view, device); + + *dst = *src; +} + void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device) { @@ -2145,11 +2156,36 @@ static void d3d12_desc_destroy(struct d3d12_desc *descriptor, struct d3d12_devic { static const struct d3d12_desc null_desc = {0};
- d3d12_desc_write_atomic(descriptor, &null_desc, device); + device->descriptor_write(descriptor, &null_desc, device); }
void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device) +{ + assert(dst != src); + + if (dst->magic == src->magic) + { + if (dst->magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) + { + if (dst->u.view == src->u.view) + return; + } + else if (dst->u.vk_cbv_info.buffer == src->u.vk_cbv_info.buffer + && dst->u.vk_cbv_info.offset == src->u.vk_cbv_info.offset + && dst->u.vk_cbv_info.range == src->u.vk_cbv_info.range) + { + return; + } + } + + if (src->magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) + vkd3d_view_incref(src->u.view); + + d3d12_desc_write(dst, src, device); +} + +void d3d12_desc_copy_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device) { struct d3d12_desc tmp; pthread_mutex_t *mutex; diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index f83fd1aa..8af374a1 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -137,6 +137,7 @@ struct vkd3d_vulkan_info enum vkd3d_config_flags { VKD3D_CONFIG_FLAG_VULKAN_DEBUG = 0x00000001, + VKD3D_CONFIG_FLAG_ATOMIC_DESC_OPS = 0x00000002, };
struct vkd3d_instance @@ -557,7 +558,10 @@ static inline struct d3d12_desc *d3d12_desc_from_gpu_handle(D3D12_GPU_DESCRIPTOR return (struct d3d12_desc *)(intptr_t)gpu_handle.ptr; }
+void d3d12_desc_write(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device); +void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device); void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device); +void d3d12_desc_copy_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device); void d3d12_desc_create_cbv(struct d3d12_desc *descriptor, struct d3d12_device *device, const D3D12_CONSTANT_BUFFER_VIEW_DESC *desc); void d3d12_desc_create_srv(struct d3d12_desc *descriptor, @@ -1207,6 +1211,12 @@ struct d3d12_device const struct vkd3d_format_compatibility_list *format_compatibility_lists; struct vkd3d_null_resources null_resources; struct vkd3d_uav_clear_state uav_clear_state; + + void (*descriptor_copy)(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device); + void (*descriptor_write)(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device); + void (*descriptor_vk_heap_write)(struct d3d12_desc *dst, const struct d3d12_desc *src, + struct d3d12_device *device); + bool atomic_desc_ops; };
HRESULT d3d12_device_create(struct vkd3d_instance *instance,