Saves a few CPU cycles on a potentially very hot code path. Based on a vkd3d-proton patch by Philip Rebohle.
Signed-off-by: Conor McCarthy cmccarthy@codeweavers.com --- libs/vkd3d/resource.c | 10 ++-------- libs/vkd3d/vkd3d_private.h | 16 ++++++++++------ 2 files changed, 12 insertions(+), 14 deletions(-)
diff --git a/libs/vkd3d/resource.c b/libs/vkd3d/resource.c index e6b326e6..cae5b580 100644 --- a/libs/vkd3d/resource.c +++ b/libs/vkd3d/resource.c @@ -2130,9 +2130,7 @@ void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *sr pthread_mutex_lock(mutex);
/* Nothing to do for VKD3D_DESCRIPTOR_MAGIC_CBV. */ - if ((dst->magic == VKD3D_DESCRIPTOR_MAGIC_SRV - || dst->magic == VKD3D_DESCRIPTOR_MAGIC_UAV - || dst->magic == VKD3D_DESCRIPTOR_MAGIC_SAMPLER) + if ((dst->magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) && !InterlockedDecrement(&dst->u.view->refcount)) destroy_desc = *dst;
@@ -2165,12 +2163,8 @@ void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, mutex = d3d12_device_get_descriptor_mutex(device, src); pthread_mutex_lock(mutex);
- if (src->magic == VKD3D_DESCRIPTOR_MAGIC_SRV - || src->magic == VKD3D_DESCRIPTOR_MAGIC_UAV - || src->magic == VKD3D_DESCRIPTOR_MAGIC_SAMPLER) - { + if (src->magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) vkd3d_view_incref(src->u.view); - }
tmp = *src;
diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 0b326b11..9829e0aa 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -41,13 +41,17 @@
#define VK_CALL(f) (vk_procs->f)
+#define MAKE_MAGIC(a,b,c,f) (((uint32_t)a) | (((uint32_t)b) << 8) | (((uint32_t)c) << 16) | f) + +#define VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW 0x01000000u + #define VKD3D_DESCRIPTOR_MAGIC_FREE 0x00000000u -#define VKD3D_DESCRIPTOR_MAGIC_CBV 0x00564243u -#define VKD3D_DESCRIPTOR_MAGIC_SRV 0x00565253u -#define VKD3D_DESCRIPTOR_MAGIC_UAV 0x00564155u -#define VKD3D_DESCRIPTOR_MAGIC_SAMPLER 0x504d4153u -#define VKD3D_DESCRIPTOR_MAGIC_DSV 0x00565344u -#define VKD3D_DESCRIPTOR_MAGIC_RTV 0x00565452u +#define VKD3D_DESCRIPTOR_MAGIC_CBV MAKE_MAGIC('C', 'B', 'V', 0) +#define VKD3D_DESCRIPTOR_MAGIC_SRV MAKE_MAGIC('S', 'R', 'V', VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) +#define VKD3D_DESCRIPTOR_MAGIC_UAV MAKE_MAGIC('U', 'A', 'V', VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) +#define VKD3D_DESCRIPTOR_MAGIC_SAMPLER MAKE_MAGIC('S', 'M', 'P', VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) +#define VKD3D_DESCRIPTOR_MAGIC_DSV MAKE_MAGIC('D', 'S', 'V', 0) +#define VKD3D_DESCRIPTOR_MAGIC_RTV MAKE_MAGIC('R', 'T', 'V', 0)
#define VKD3D_MAX_COMPATIBLE_FORMAT_COUNT 6u #define VKD3D_MAX_QUEUE_FAMILY_COUNT 3u
Greatly improves performance in Control in Proton (but upstream vkd3d/Wine currently only renders the intro screens in DX12 mode).
Based on a vkd3d-proton patch by Philip Rebohle.
Signed-off-by: Conor McCarthy cmccarthy@codeweavers.com --- libs/vkd3d/resource.c | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-)
diff --git a/libs/vkd3d/resource.c b/libs/vkd3d/resource.c index cae5b580..2c6c07c7 100644 --- a/libs/vkd3d/resource.c +++ b/libs/vkd3d/resource.c @@ -2153,24 +2153,45 @@ static void d3d12_desc_destroy(struct d3d12_desc *descriptor, struct d3d12_devic void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device) { + bool needs_update = true; struct d3d12_desc tmp; pthread_mutex_t *mutex;
assert(dst != src);
/* Shadow of the Tomb Raider and possibly other titles sometimes destroy - * and rewrite a descriptor in another thread while it is being copied. */ - mutex = d3d12_device_get_descriptor_mutex(device, src); - pthread_mutex_lock(mutex); + * and rewrite a descriptor in another thread while it is being copied. + * We don't need to lock either descriptor for the update check. We only care + * about preventing use-after-free of the vkd3d_view caused by a race condiion + * in the calling app. It's not important if needs_update falls out of date as + * it's their race condition, not ours. */ + if (dst->magic == src->magic) + { + if (dst->magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) + { + needs_update = dst->u.view != src->u.view; + } + else if (dst->magic != VKD3D_DESCRIPTOR_MAGIC_FREE) + { + needs_update = dst->u.vk_cbv_info.buffer != src->u.vk_cbv_info.buffer || + dst->u.vk_cbv_info.offset != src->u.vk_cbv_info.offset || + dst->u.vk_cbv_info.range != src->u.vk_cbv_info.range; + } + }
- if (src->magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) - vkd3d_view_incref(src->u.view); + if (needs_update) + { + mutex = d3d12_device_get_descriptor_mutex(device, src); + pthread_mutex_lock(mutex);
- tmp = *src; + if (src->magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) + vkd3d_view_incref(src->u.view); + tmp = *src;
- pthread_mutex_unlock(mutex); + pthread_mutex_unlock(mutex);
- d3d12_desc_write_atomic(dst, &tmp, device); + d3d12_desc_write_atomic(dst, &tmp, device); + } }
static VkDeviceSize vkd3d_get_required_texel_buffer_alignment(const struct d3d12_device *device,
Greatly improves performance in various games that update or copy a large number of descriptors per frame due to the high overhead of pthread_mutex_{un}lock.
Based on vkd3d-proton patches by Hans-Kristian Arntzen, Philip Rebohle and Georg Lehmann.
Signed-off-by: Conor McCarthy cmccarthy@codeweavers.com --- configure.ac | 1 + include/private/vkd3d_common.h | 33 +++++++++++++++++++++++++++++++++ libs/vkd3d/device.c | 10 +--------- libs/vkd3d/resource.c | 18 ++++++++---------- libs/vkd3d/vkd3d_private.h | 17 ++--------------- 5 files changed, 45 insertions(+), 34 deletions(-)
diff --git a/configure.ac b/configure.ac index d296dfd4..546b6c57 100644 --- a/configure.ac +++ b/configure.ac @@ -131,6 +131,7 @@ VKD3D_CHECK_FUNC([HAVE_BUILTIN_POPCOUNT], [__builtin_popcount], [__builtin_popco VKD3D_CHECK_FUNC([HAVE_BUILTIN_ADD_OVERFLOW], [__builtin_add_overflow], [__builtin_add_overflow(0, 0, (int *)0)]) VKD3D_CHECK_FUNC([HAVE_SYNC_ADD_AND_FETCH], [__sync_add_and_fetch], [__sync_add_and_fetch((int *)0, 0)]) VKD3D_CHECK_FUNC([HAVE_SYNC_SUB_AND_FETCH], [__sync_sub_and_fetch], [__sync_sub_and_fetch((int *)0, 0)]) +VKD3D_CHECK_FUNC([HAVE_SYNC_LOCK_TEST_AND_SET], [__sync_lock_test_and_set], [__sync_lock_test_and_set((int *)0, 0)])
VKD3D_CHECK_PTHREAD_SETNAME_NP
diff --git a/include/private/vkd3d_common.h b/include/private/vkd3d_common.h index 8d1ca397..1ae43d8f 100644 --- a/include/private/vkd3d_common.h +++ b/include/private/vkd3d_common.h @@ -28,6 +28,10 @@ #include <stdbool.h> #include <stdint.h>
+#ifdef __SSE2__ +#include <emmintrin.h> +#endif + #ifdef _MSC_VER #include <intrin.h> #endif @@ -211,6 +215,18 @@ static inline LONG InterlockedDecrement(LONG volatile *x) # else # error "InterlockedDecrement() not implemented for this platform" # endif + +# if HAVE_SYNC_LOCK_TEST_AND_SET +static inline LONG InterlockedExchange(LONG volatile *ptr, LONG val) +{ + return __sync_lock_test_and_set(ptr, val); +} +# define vkd3d_spinlock_unlock(lock) __sync_lock_release(lock) +# else +# error "spinlocks not implemented for this platform" +# endif /* HAVE_SYNC_LOCK_TEST_AND_SET */ +#else +# define vkd3d_spinlock_unlock(lock) InterlockedExchange(lock, 0) #endif /* _WIN32 */
#if HAVE_SYNC_ADD_AND_FETCH @@ -222,6 +238,23 @@ static inline LONG InterlockedDecrement(LONG volatile *x) # error "atomic_add_fetch() not implemented for this platform" #endif /* HAVE_SYNC_ADD_AND_FETCH */
+typedef LONG vkd3d_spinlock_t; + +static inline void vkd3d_spinlock_acquire(vkd3d_spinlock_t *lock) +{ + while (InterlockedExchange(lock, 1)) + { +#ifdef __SSE2__ + _mm_pause(); +#endif + } +} + +static inline void vkd3d_spinlock_release(vkd3d_spinlock_t *lock) +{ + vkd3d_spinlock_unlock(lock); +} + static inline void vkd3d_parse_version(const char *version, int *major, int *minor) { *major = atoi(version); diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index 0fadb521..28a01331 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -2213,7 +2213,6 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device *iface) { struct d3d12_device *device = impl_from_ID3D12Device(iface); ULONG refcount = InterlockedDecrement(&device->refcount); - size_t i;
TRACE("%p decreasing refcount to %u.\n", device, refcount);
@@ -2231,8 +2230,6 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device *iface) vkd3d_fence_worker_stop(&device->fence_worker, device); d3d12_device_destroy_pipeline_cache(device); d3d12_device_destroy_vkd3d_queues(device); - for (i = 0; i < ARRAY_SIZE(device->desc_mutex); ++i) - pthread_mutex_destroy(&device->desc_mutex[i]); VK_CALL(vkDestroyDevice(device->vk_device, NULL)); if (device->parent) IUnknown_Release(device->parent); @@ -3135,8 +3132,7 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device *iface, struct d3d12_device *device = impl_from_ID3D12Device(iface); unsigned int dst_range_idx, dst_idx, src_range_idx, src_idx; unsigned int dst_range_size, src_range_size; - const struct d3d12_desc *src; - struct d3d12_desc *dst; + struct d3d12_desc *src, *dst;
TRACE("iface %p, dst_descriptor_range_count %u, dst_descriptor_range_offsets %p, " "dst_descriptor_range_sizes %p, src_descriptor_range_count %u, " @@ -3692,7 +3688,6 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, { const struct vkd3d_vk_device_procs *vk_procs; HRESULT hr; - size_t i;
device->ID3D12Device_iface.lpVtbl = &d3d12_device_vtbl; device->refcount = 1; @@ -3731,9 +3726,6 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, vkd3d_render_pass_cache_init(&device->render_pass_cache); vkd3d_gpu_va_allocator_init(&device->gpu_va_allocator);
- for (i = 0; i < ARRAY_SIZE(device->desc_mutex); ++i) - pthread_mutex_init(&device->desc_mutex[i], NULL); - if ((device->parent = create_info->parent)) IUnknown_AddRef(device->parent);
diff --git a/libs/vkd3d/resource.c b/libs/vkd3d/resource.c index 2c6c07c7..f3cbb684 100644 --- a/libs/vkd3d/resource.c +++ b/libs/vkd3d/resource.c @@ -2122,21 +2122,21 @@ void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *sr struct d3d12_device *device) { struct d3d12_desc destroy_desc; - pthread_mutex_t *mutex;
destroy_desc.u.view = NULL;
- mutex = d3d12_device_get_descriptor_mutex(device, dst); - pthread_mutex_lock(mutex); + vkd3d_spinlock_acquire(&dst->spinlock);
/* Nothing to do for VKD3D_DESCRIPTOR_MAGIC_CBV. */ if ((dst->magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) && !InterlockedDecrement(&dst->u.view->refcount)) destroy_desc = *dst;
- *dst = *src; + dst->magic = src->magic; + dst->vk_descriptor_type = src->vk_descriptor_type; + dst->u = src->u;
- pthread_mutex_unlock(mutex); + vkd3d_spinlock_release(&dst->spinlock);
/* Destroy the view after unlocking to reduce wait time. */ if (destroy_desc.u.view) @@ -2150,12 +2150,11 @@ static void d3d12_desc_destroy(struct d3d12_desc *descriptor, struct d3d12_devic d3d12_desc_write_atomic(descriptor, &null_desc, device); }
-void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, +void d3d12_desc_copy(struct d3d12_desc *dst, struct d3d12_desc *src, struct d3d12_device *device) { bool needs_update = true; struct d3d12_desc tmp; - pthread_mutex_t *mutex;
assert(dst != src);
@@ -2181,14 +2180,13 @@ void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src,
if (needs_update) { - mutex = d3d12_device_get_descriptor_mutex(device, src); - pthread_mutex_lock(mutex); + vkd3d_spinlock_acquire(&src->spinlock);
if (src->magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) vkd3d_view_incref(src->u.view); tmp = *src;
- pthread_mutex_unlock(mutex); + vkd3d_spinlock_release(&src->spinlock);
d3d12_desc_write_atomic(dst, &tmp, device); } diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 9829e0aa..78e503ae 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -517,6 +517,7 @@ struct d3d12_desc { uint32_t magic; VkDescriptorType vk_descriptor_type; + vkd3d_spinlock_t spinlock; union { VkDescriptorBufferInfo vk_cbv_info; @@ -534,7 +535,7 @@ static inline struct d3d12_desc *d3d12_desc_from_gpu_handle(D3D12_GPU_DESCRIPTOR return (struct d3d12_desc *)(intptr_t)gpu_handle.ptr; }
-void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device); +void d3d12_desc_copy(struct d3d12_desc *dst, struct d3d12_desc *src, struct d3d12_device *device); void d3d12_desc_create_cbv(struct d3d12_desc *descriptor, struct d3d12_device *device, const D3D12_CONSTANT_BUFFER_VIEW_DESC *desc); void d3d12_desc_create_srv(struct d3d12_desc *descriptor, @@ -1134,7 +1135,6 @@ struct d3d12_device struct vkd3d_fence_worker fence_worker;
pthread_mutex_t mutex; - pthread_mutex_t desc_mutex[8]; struct vkd3d_render_pass_cache render_pass_cache; VkPipelineCache vk_pipeline_cache;
@@ -1200,19 +1200,6 @@ static inline unsigned int d3d12_device_get_descriptor_handle_increment_size(str return ID3D12Device_GetDescriptorHandleIncrementSize(&device->ID3D12Device_iface, descriptor_type); }
-static inline pthread_mutex_t *d3d12_device_get_descriptor_mutex(struct d3d12_device *device, - const struct d3d12_desc *descriptor) -{ - STATIC_ASSERT(!(ARRAY_SIZE(device->desc_mutex) & (ARRAY_SIZE(device->desc_mutex) - 1))); - uintptr_t idx = (uintptr_t)descriptor; - - idx ^= idx >> 12; - idx ^= idx >> 6; - idx ^= idx >> 3; - - return &device->desc_mutex[idx & (ARRAY_SIZE(device->desc_mutex) - 1)]; -} - /* utils */ enum vkd3d_format_type {
Hi,
While running your changed tests, I think I found new failures. Being a bot and all I'm not very good at pattern recognition, so I might be wrong, but could you please double-check?
Full results can be found at: https://testbot.winehq.org/JobDetails.pl?Key=98400
Your paranoid android.
=== debiant2 (build log) ===
error: patch failed: configure.ac:131 Task: Patch failed to apply
=== debiant2 (build log) ===
error: patch failed: configure.ac:131 Task: Patch failed to apply
From: Philip Rebohle philip.rebohle@tu-dortmund.de Signed-off-by: Conor McCarthy cmccarthy@codeweavers.com --- libs/vkd3d/resource.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/libs/vkd3d/resource.c b/libs/vkd3d/resource.c index f3cbb684..8316138f 100644 --- a/libs/vkd3d/resource.c +++ b/libs/vkd3d/resource.c @@ -2121,16 +2121,14 @@ void vkd3d_view_decref(struct vkd3d_view *view, struct d3d12_device *device) void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device) { - struct d3d12_desc destroy_desc; - - destroy_desc.u.view = NULL; + struct vkd3d_view *destroy_view = NULL;
vkd3d_spinlock_acquire(&dst->spinlock);
/* Nothing to do for VKD3D_DESCRIPTOR_MAGIC_CBV. */ if ((dst->magic & VKD3D_DESCRIPTOR_MAGIC_HAS_VIEW) && !InterlockedDecrement(&dst->u.view->refcount)) - destroy_desc = *dst; + destroy_view = dst->u.view;
dst->magic = src->magic; dst->vk_descriptor_type = src->vk_descriptor_type; @@ -2139,8 +2137,8 @@ void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *sr vkd3d_spinlock_release(&dst->spinlock);
/* Destroy the view after unlocking to reduce wait time. */ - if (destroy_desc.u.view) - vkd3d_view_destroy(destroy_desc.u.view, device); + if (destroy_view) + vkd3d_view_destroy(destroy_view, device); }
static void d3d12_desc_destroy(struct d3d12_desc *descriptor, struct d3d12_device *device)