Atomic descriptor writes and copies were implemented for Shadow of the Tomb Raider, which now has a Linux version. Since the bug resulted from a race condition in the game, it's probably not common and atomic ops can be made optional. Atomic ops are a significant drag on performance of games which copy large numbers of descriptors per frame, e.g. Control.
Signed-off-by: Conor McCarthy cmccarthy@codeweavers.com --- v2: Replace a missed instance of d3d12_desc_copy(). --- README | 3 +++ libs/vkd3d/device.c | 32 +++++++++++++++++++++++--------- libs/vkd3d/resource.c | 38 +++++++++++++++++++++++++++++++++++++- libs/vkd3d/vkd3d_private.h | 10 ++++++++++ 4 files changed, 73 insertions(+), 10 deletions(-)
diff --git a/README b/README index ed53f202..fff5712f 100644 --- a/README +++ b/README @@ -51,6 +51,9 @@ commas or semicolons. even when the output supports colour.
* VKD3D_CONFIG - a list of options that change the behavior of libvkd3d. + * atomic_descriptor - Make all descriptor copy and write operations thread- + safe. This is a workaround for race condition bugs which Windows + tolerates but which will crash vkd3d. * vk_debug - enables Vulkan debug extensions.
* VKD3D_DEBUG - controls the debug level for log messages produced by diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index 96ff7ae7..09bdb7a5 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -429,6 +429,7 @@ static void vkd3d_init_debug_report(struct vkd3d_instance *instance)
static const struct vkd3d_debug_option vkd3d_config_options[] = { + {"atomic_descriptor", VKD3D_CONFIG_FLAG_ATOMIC_DESC_OPS}, /* atomic descriptor read/write */ {"vk_debug", VKD3D_CONFIG_FLAG_VULKAN_DEBUG}, /* enable Vulkan debug extensions */ };
@@ -2350,8 +2351,11 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device *iface) vkd3d_fence_worker_stop(&device->fence_worker, device); d3d12_device_destroy_pipeline_cache(device); d3d12_device_destroy_vkd3d_queues(device); - for (i = 0; i < ARRAY_SIZE(device->desc_mutex); ++i) - pthread_mutex_destroy(&device->desc_mutex[i]); + if (device->atomic_desc_ops) + { + for (i = 0; i < ARRAY_SIZE(device->desc_mutex); ++i) + pthread_mutex_destroy(&device->desc_mutex[i]); + } VK_CALL(vkDestroyDevice(device->vk_device, NULL)); if (device->parent) IUnknown_Release(device->parent); @@ -3178,7 +3182,7 @@ static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView(ID3D12Device TRACE("iface %p, desc %p, descriptor %#lx.\n", iface, desc, descriptor.ptr);
d3d12_desc_create_cbv(&tmp, device, desc); - d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); + device->descriptor_write(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); }
static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(ID3D12Device *iface, @@ -3192,7 +3196,7 @@ static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(ID3D12Device iface, resource, desc, descriptor.ptr);
d3d12_desc_create_srv(&tmp, device, unsafe_impl_from_ID3D12Resource(resource), desc); - d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); + device->descriptor_write(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); }
static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(ID3D12Device *iface, @@ -3207,7 +3211,7 @@ static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(ID3D12Devic
d3d12_desc_create_uav(&tmp, device, unsafe_impl_from_ID3D12Resource(resource), unsafe_impl_from_ID3D12Resource(counter_resource), desc); - d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); + device->descriptor_write(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); }
static void STDMETHODCALLTYPE d3d12_device_CreateRenderTargetView(ID3D12Device *iface, @@ -3241,7 +3245,7 @@ static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device *iface, TRACE("iface %p, desc %p, descriptor %#lx.\n", iface, desc, descriptor.ptr);
d3d12_desc_create_sampler(&tmp, device, desc); - d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); + device->descriptor_write(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); }
static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface, @@ -3283,7 +3287,7 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface, src = d3d12_desc_from_cpu_handle(src_descriptor_range_offsets[src_range_idx]);
while (dst_idx < dst_range_size && src_idx < src_range_size) - d3d12_desc_copy(&dst[dst_idx++], &src[src_idx++], device); + device->descriptor_copy(&dst[dst_idx++], &src[src_idx++], device);
if (dst_idx >= dst_range_size) { @@ -3851,8 +3855,18 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, vkd3d_gpu_descriptor_allocator_init(&device->gpu_descriptor_allocator); vkd3d_gpu_va_allocator_init(&device->gpu_va_allocator);
- for (i = 0; i < ARRAY_SIZE(device->desc_mutex); ++i) - pthread_mutex_init(&device->desc_mutex[i], NULL); + if ((device->atomic_desc_ops = !!(device->vkd3d_instance->config_flags & VKD3D_CONFIG_FLAG_ATOMIC_DESC_OPS))) + { + for (i = 0; i < ARRAY_SIZE(device->desc_mutex); ++i) + pthread_mutex_init(&device->desc_mutex[i], NULL); + device->descriptor_copy = d3d12_desc_copy_atomic; + device->descriptor_write = d3d12_desc_write_atomic; + } + else + { + device->descriptor_copy = d3d12_desc_copy; + device->descriptor_write = d3d12_desc_write; + }
if ((device->parent = create_info->parent)) IUnknown_AddRef(device->parent); diff --git a/libs/vkd3d/resource.c b/libs/vkd3d/resource.c index 2a7c7331..80f3977e 100644 --- a/libs/vkd3d/resource.c +++ b/libs/vkd3d/resource.c @@ -2118,6 +2118,17 @@ void vkd3d_view_decref(struct vkd3d_view *view, struct d3d12_device *device) vkd3d_view_destroy(view, device); }
+void d3d12_desc_write(struct d3d12_desc *dst, const struct d3d12_desc *src, + struct d3d12_device *device) +{ + /* Nothing to do for VKD3D_DESCRIPTOR_MAGIC_CBV. */ + if ((dst->magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) + && !InterlockedDecrement(&dst->u.view->refcount)) + vkd3d_view_destroy(dst->u.view, device); + + *dst = *src; +} + void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device) { @@ -2145,11 +2156,36 @@ static void d3d12_desc_destroy(struct d3d12_desc *descriptor, struct d3d12_devic { static const struct d3d12_desc null_desc = {0};
- d3d12_desc_write_atomic(descriptor, &null_desc, device); + device->descriptor_write(descriptor, &null_desc, device); }
void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device) +{ + assert(dst != src); + + if (dst->magic == src->magic) + { + if (dst->magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) + { + if (dst->u.view == src->u.view) + return; + } + else if (dst->u.vk_cbv_info.buffer == src->u.vk_cbv_info.buffer + && dst->u.vk_cbv_info.offset == src->u.vk_cbv_info.offset + && dst->u.vk_cbv_info.range == src->u.vk_cbv_info.range) + { + return; + } + } + + if (src->magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) + vkd3d_view_incref(src->u.view); + + d3d12_desc_write(dst, src, device); +} + +void d3d12_desc_copy_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device) { struct d3d12_desc tmp; pthread_mutex_t *mutex; diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index f83fd1aa..8af374a1 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -137,6 +137,7 @@ struct vkd3d_vulkan_info enum vkd3d_config_flags { VKD3D_CONFIG_FLAG_VULKAN_DEBUG = 0x00000001, + VKD3D_CONFIG_FLAG_ATOMIC_DESC_OPS = 0x00000002, };
struct vkd3d_instance @@ -557,7 +558,10 @@ static inline struct d3d12_desc *d3d12_desc_from_gpu_handle(D3D12_GPU_DESCRIPTOR return (struct d3d12_desc *)(intptr_t)gpu_handle.ptr; }
+void d3d12_desc_write(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device); +void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device); void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device); +void d3d12_desc_copy_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device); void d3d12_desc_create_cbv(struct d3d12_desc *descriptor, struct d3d12_device *device, const D3D12_CONSTANT_BUFFER_VIEW_DESC *desc); void d3d12_desc_create_srv(struct d3d12_desc *descriptor, @@ -1207,6 +1211,12 @@ struct d3d12_device const struct vkd3d_format_compatibility_list *format_compatibility_lists; struct vkd3d_null_resources null_resources; struct vkd3d_uav_clear_state uav_clear_state; + + void (*descriptor_copy)(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device); + void (*descriptor_write)(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device); + void (*descriptor_vk_heap_write)(struct d3d12_desc *dst, const struct d3d12_desc *src, + struct d3d12_device *device); + bool atomic_desc_ops; };
HRESULT d3d12_device_create(struct vkd3d_instance *instance,