From: Conor McCarthy cmccarthy@codeweavers.com
--- libs/vkd3d/device.c | 87 +++++++++++++++++++++++++------- libs/vkd3d/resource.c | 100 +++++++++++++++++++------------------ libs/vkd3d/vkd3d_private.h | 75 +++++++++++++++++++++++++--- tests/d3d12.c | 2 + 4 files changed, 191 insertions(+), 73 deletions(-)
diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index d8c94fbf..c49847f0 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -2434,26 +2434,65 @@ static void device_init_descriptor_pool_sizes(struct d3d12_device *device) device->vk_pool_count = 6; };
-static void vkd3d_desc_object_cache_init(struct vkd3d_desc_object_cache *cache, size_t size) +struct global_tls_key_cache { - memset(cache, 0, sizeof(*cache)); - cache->size = size; + struct vkd3d_mutex mutex; + struct vkd3d_tls_key *keys; + size_t key_capacity; + size_t key_count; +}; + +static struct global_tls_key_cache key_cache = {{VKD3D_MUTEX_INITIALIZER}, NULL, 0, 0}; + +static int global_tls_key_cache_get(struct vkd3d_tls_key *key) +{ + int rc = 0; + + vkd3d_mutex_lock(&key_cache.mutex); + + if (key_cache.key_count) + *key = key_cache.keys[--key_cache.key_count]; + else + rc = vkd3d_tls_key_create(key); + + vkd3d_mutex_unlock(&key_cache.mutex); + + return rc; }
-static void vkd3d_desc_object_cache_cleanup(struct vkd3d_desc_object_cache *cache) +static void global_tls_key_cache_put(struct vkd3d_tls_key *key) { - union d3d12_desc_object u; - unsigned int i; - void *next; + vkd3d_mutex_lock(&key_cache.mutex);
- for (i = 0; i < ARRAY_SIZE(cache->heads); ++i) + if (!vkd3d_array_reserve((void **)&key_cache.keys, &key_cache.key_capacity, key_cache.key_count + 1, + sizeof(*key_cache.keys))) { - for (u.object = cache->heads[i].head; u.object; u.object = next) - { - next = u.header->next; - vkd3d_free(u.object); - } + ERR("Failed to cache TLS key for reuse.\n"); + } + else + { + vkd3d_tls_key_set_value(key, NULL); + key_cache.keys[key_cache.key_count++] = *key; } + + vkd3d_mutex_unlock(&key_cache.mutex); +} + +struct desc_object_caches *device_get_desc_object_caches(struct d3d12_device *device) +{ + struct desc_object_caches *caches = vkd3d_tls_key_get_value(&device->tls_key); + + if (caches) + return caches; + + caches = vkd3d_calloc(1, sizeof(*caches)); + vkd3d_tls_key_set_value(&device->tls_key, caches); + caches->view_desc_cache.size = sizeof(struct vkd3d_view); + caches->view_desc_cache.rebalance = &device->view_desc_rebalance; + caches->cbuffer_desc_cache.size = sizeof(struct vkd3d_cbuffer_desc); + caches->cbuffer_desc_cache.rebalance = &device->cbuffer_desc_rebalance; + + return caches; }
/* ID3D12Device */ @@ -2520,12 +2559,11 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device5 *iface) vkd3d_render_pass_cache_cleanup(&device->render_pass_cache, device); d3d12_device_destroy_pipeline_cache(device); d3d12_device_destroy_vkd3d_queues(device); - vkd3d_desc_object_cache_cleanup(&device->view_desc_cache); - vkd3d_desc_object_cache_cleanup(&device->cbuffer_desc_cache); VK_CALL(vkDestroyDevice(device->vk_device, NULL)); if (device->parent) IUnknown_Release(device->parent); vkd3d_instance_decref(device->vkd3d_instance); + global_tls_key_cache_put(&device->tls_key);
vkd3d_free(device); } @@ -4208,11 +4246,18 @@ struct d3d12_device *unsafe_impl_from_ID3D12Device5(ID3D12Device5 *iface) return impl_from_ID3D12Device5(iface); }
+static void desc_rebalance_init(struct desc_rebalance *rebalance) +{ + memset(rebalance, 0, sizeof(*rebalance)); + vkd3d_mutex_init(&rebalance->mutex); +} + static HRESULT d3d12_device_init(struct d3d12_device *device, struct vkd3d_instance *instance, const struct vkd3d_device_create_info *create_info) { const struct vkd3d_vk_device_procs *vk_procs; HRESULT hr; + int rc;
device->ID3D12Device5_iface.lpVtbl = &d3d12_device_vtbl; device->refcount = 1; @@ -4255,8 +4300,14 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, device->blocked_queue_count = 0; vkd3d_mutex_init(&device->blocked_queues_mutex);
- vkd3d_desc_object_cache_init(&device->view_desc_cache, sizeof(struct vkd3d_view)); - vkd3d_desc_object_cache_init(&device->cbuffer_desc_cache, sizeof(struct vkd3d_cbuffer_desc)); + if ((rc = global_tls_key_cache_get(&device->tls_key)) < 0) + { + ERR("Failed to allocate TLS key, rc %d\n.", rc); + hr = E_FAIL; + goto out_cleanup_descriptor_heap_layouts; + } + desc_rebalance_init(&device->view_desc_rebalance); + desc_rebalance_init(&device->cbuffer_desc_rebalance);
device_init_descriptor_pool_sizes(device);
@@ -4265,6 +4316,8 @@ static HRESULT d3d12_device_init(struct d3d12_device *device,
return S_OK;
+out_cleanup_descriptor_heap_layouts: + vkd3d_vk_descriptor_heap_layouts_cleanup(device); out_cleanup_uav_clear_state: vkd3d_uav_clear_state_cleanup(&device->uav_clear_state, device); out_destroy_null_resources: diff --git a/libs/vkd3d/resource.c b/libs/vkd3d/resource.c index 14fb24a9..a1430dc0 100644 --- a/libs/vkd3d/resource.c +++ b/libs/vkd3d/resource.c @@ -2282,72 +2282,71 @@ ULONG vkd3d_resource_decref(ID3D12Resource *resource) return d3d12_resource_decref(impl_from_ID3D12Resource(resource)); }
-#define HEAD_INDEX_MASK (ARRAY_SIZE(cache->heads) - 1) +static const unsigned int REBALANCE_SIZE = 64;
-/* Objects are cached so that vkd3d_view_incref() can safely check the refcount of an - * object freed by another thread. This could be implemented as a single atomic linked - * list, but it requires handling the ABA problem, which brings issues with cross-platform - * support, compiler support, and non-universal x86-64 support for 128-bit CAS. */ static void *vkd3d_desc_object_cache_get(struct vkd3d_desc_object_cache *cache) { - union d3d12_desc_object u; - unsigned int i; + struct desc_rebalance *rebalance; + void *object;
- STATIC_ASSERT(!(ARRAY_SIZE(cache->heads) & HEAD_INDEX_MASK)); + if (cache->count) + return cache->data[--cache->count];
- i = (vkd3d_atomic_increment(&cache->next_index)) & HEAD_INDEX_MASK; - for (;;) - { - if (vkd3d_atomic_compare_exchange(&cache->heads[i].spinlock, 0, 1)) - { - if ((u.object = cache->heads[i].head)) - { - vkd3d_atomic_decrement(&cache->free_count); - cache->heads[i].head = u.header->next; - vkd3d_atomic_exchange(&cache->heads[i].spinlock, 0); - return u.object; - } - vkd3d_atomic_exchange(&cache->heads[i].spinlock, 0); - } - /* Keeping a free count avoids uncertainty over when this loop should terminate, - * which could result in excess allocations gradually increasing without limit. */ - if (cache->free_count < ARRAY_SIZE(cache->heads)) - return vkd3d_malloc(cache->size); + cache->max_count += cache->prev_rebalance_count; + cache->prev_rebalance_count = 0;
- i = (i + 1) & HEAD_INDEX_MASK; + rebalance = cache->rebalance; + vkd3d_mutex_lock(&rebalance->mutex); + + if (rebalance->count) + { + rebalance->count -= REBALANCE_SIZE; + vkd3d_array_reserve((void **)&cache->data, &cache->capacity, REBALANCE_SIZE, sizeof(*cache->data)); + memcpy(cache->data, &rebalance->data[rebalance->count], REBALANCE_SIZE * sizeof(*cache->data)); + cache->count = REBALANCE_SIZE; + object = cache->data[--cache->count]; + } + else + { + object = vkd3d_malloc(cache->size); } + + vkd3d_mutex_unlock(&rebalance->mutex); + return object; }
static void vkd3d_desc_object_cache_push(struct vkd3d_desc_object_cache *cache, void *object) { - union d3d12_desc_object u = {object}; - unsigned int i; - void *head; + struct desc_rebalance *rebalance;
- /* Using the same index as above may result in a somewhat uneven distribution, - * but the main objective is to avoid costly spinlock contention. */ - i = (vkd3d_atomic_increment(&cache->next_index)) & HEAD_INDEX_MASK; - for (;;) - { - if (vkd3d_atomic_compare_exchange(&cache->heads[i].spinlock, 0, 1)) - break; - i = (i + 1) & HEAD_INDEX_MASK; - } + vkd3d_array_reserve((void **)&cache->data, &cache->capacity, max(cache->count + 1, 64u), sizeof(*cache->data)); + cache->data[cache->count++] = object; + + if (cache->count < cache->max_count + REBALANCE_SIZE) + return; + + cache->count -= REBALANCE_SIZE; + cache->prev_rebalance_count = REBALANCE_SIZE; + + rebalance = cache->rebalance; + vkd3d_mutex_lock(&rebalance->mutex);
- head = cache->heads[i].head; - u.header->next = head; - cache->heads[i].head = u.object; - vkd3d_atomic_exchange(&cache->heads[i].spinlock, 0); - vkd3d_atomic_increment(&cache->free_count); + vkd3d_array_reserve((void **)&rebalance->data, &rebalance->capacity, max(rebalance->count + REBALANCE_SIZE, 1024u), + sizeof(*rebalance->data)); + memcpy(&rebalance->data[rebalance->count], &cache->data[cache->count], REBALANCE_SIZE * sizeof(*rebalance->data)); + rebalance->count += REBALANCE_SIZE; + + vkd3d_mutex_unlock(&rebalance->mutex); }
#undef HEAD_INDEX_MASK
static struct vkd3d_cbuffer_desc *vkd3d_cbuffer_desc_create(struct d3d12_device *device) { + struct desc_object_caches *caches = device_get_desc_object_caches(device); struct vkd3d_cbuffer_desc *desc;
- if (!(desc = vkd3d_desc_object_cache_get(&device->cbuffer_desc_cache))) + if (!(desc = vkd3d_desc_object_cache_get(&caches->cbuffer_desc_cache))) return NULL;
desc->h.magic = VKD3D_DESCRIPTOR_MAGIC_CBV; @@ -2360,11 +2359,12 @@ static struct vkd3d_cbuffer_desc *vkd3d_cbuffer_desc_create(struct d3d12_device static struct vkd3d_view *vkd3d_view_create(uint32_t magic, VkDescriptorType vk_descriptor_type, enum vkd3d_view_type type, struct d3d12_device *device) { + struct desc_object_caches *caches = device_get_desc_object_caches(device); struct vkd3d_view *view;
assert(magic);
- if (!(view = vkd3d_desc_object_cache_get(&device->view_desc_cache))) + if (!(view = vkd3d_desc_object_cache_get(&caches->view_desc_cache))) { ERR("Failed to allocate descriptor object.\n"); return NULL; @@ -2382,6 +2382,7 @@ static struct vkd3d_view *vkd3d_view_create(uint32_t magic, VkDescriptorType vk_ static void vkd3d_view_destroy(struct vkd3d_view *view, struct d3d12_device *device) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + struct desc_object_caches *caches = device_get_desc_object_caches(device);
TRACE("Destroying view %p.\n", view);
@@ -2403,7 +2404,7 @@ static void vkd3d_view_destroy(struct vkd3d_view *view, struct d3d12_device *dev if (view->v.vk_counter_view) VK_CALL(vkDestroyBufferView(device->vk_device, view->v.vk_counter_view, NULL));
- vkd3d_desc_object_cache_push(&device->view_desc_cache, view); + vkd3d_desc_object_cache_push(&caches->view_desc_cache, view); }
void vkd3d_view_decref(void *view, struct d3d12_device *device) @@ -2416,7 +2417,10 @@ void vkd3d_view_decref(void *view, struct d3d12_device *device) if (u.header->magic != VKD3D_DESCRIPTOR_MAGIC_CBV) vkd3d_view_destroy(u.view, device); else - vkd3d_desc_object_cache_push(&device->cbuffer_desc_cache, u.object); + { + struct desc_object_caches *caches = device_get_desc_object_caches(device); + vkd3d_desc_object_cache_push(&caches->cbuffer_desc_cache, u.object); + } }
static inline void d3d12_desc_replace(struct d3d12_desc *dst, void *view, struct d3d12_device *device) diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 89f8b15e..f261a8af 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -204,11 +204,18 @@ struct vkd3d_mutex CRITICAL_SECTION lock; };
+#define VKD3D_MUTEX_INITIALIZER {NULL, -1, 0, 0, 0, 0} + struct vkd3d_cond { CONDITION_VARIABLE cond; };
+struct vkd3d_tls_key +{ + unsigned int key; +}; + static inline void vkd3d_mutex_init(struct vkd3d_mutex *lock) { InitializeCriticalSection(&lock->lock); @@ -254,6 +261,24 @@ static inline void vkd3d_cond_destroy(struct vkd3d_cond *cond) { }
+static inline int vkd3d_tls_key_create(struct vkd3d_tls_key *key) +{ + if ((key->key = TlsAlloc()) == TLS_OUT_OF_INDEXES) + return -1; + return 0; +} + +static inline int vkd3d_tls_key_set_value(const struct vkd3d_tls_key *key, void *value) +{ + TlsSetValue(key->key, value); + return 0; +} + +static inline void *vkd3d_tls_key_get_value(const struct vkd3d_tls_key *key) +{ + return TlsGetValue(key->key); +} + static inline unsigned int vkd3d_atomic_increment(unsigned int volatile *x) { return InterlockedIncrement((LONG volatile *)x); @@ -299,11 +324,17 @@ struct vkd3d_mutex pthread_mutex_t lock; };
+#define VKD3D_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER + struct vkd3d_cond { pthread_cond_t cond; };
+struct vkd3d_tls_key +{ + pthread_key_t key; +};
static inline void vkd3d_mutex_init(struct vkd3d_mutex *lock) { @@ -386,6 +417,21 @@ static inline void vkd3d_cond_destroy(struct vkd3d_cond *cond) ERR("Could not destroy the condition variable, error %d.\n", ret); }
+static inline int vkd3d_tls_key_create(struct vkd3d_tls_key *key) +{ + return pthread_key_create(&key->key, NULL); +} + +static inline int vkd3d_tls_key_set_value(const struct vkd3d_tls_key *key, void *value) +{ + return pthread_setspecific(key->key, value); +} + +static inline void *vkd3d_tls_key_get_value(const struct vkd3d_tls_key *key) +{ + return pthread_getspecific(key->key); +} + # if HAVE_SYNC_SUB_AND_FETCH static inline unsigned int vkd3d_atomic_decrement(unsigned int volatile *x) { @@ -1692,20 +1738,31 @@ struct vkd3d_uav_clear_state HRESULT vkd3d_uav_clear_state_init(struct vkd3d_uav_clear_state *state, struct d3d12_device *device); void vkd3d_uav_clear_state_cleanup(struct vkd3d_uav_clear_state *state, struct d3d12_device *device);
-struct desc_object_cache_head +struct desc_rebalance { - void *head; - unsigned int spinlock; + void **data; + size_t capacity; + size_t count; + struct vkd3d_mutex mutex; };
struct vkd3d_desc_object_cache { - struct desc_object_cache_head heads[16]; - unsigned int next_index; - unsigned int free_count; + void **data; + size_t capacity; + size_t count; + size_t max_count; + size_t prev_rebalance_count; + struct desc_rebalance *rebalance; size_t size; };
+struct desc_object_caches +{ + struct vkd3d_desc_object_cache view_desc_cache; + struct vkd3d_desc_object_cache cbuffer_desc_cache; +}; + #define VKD3D_DESCRIPTOR_POOL_COUNT 6
/* ID3D12Device */ @@ -1723,8 +1780,9 @@ struct d3d12_device struct vkd3d_gpu_va_allocator gpu_va_allocator;
struct vkd3d_mutex mutex; - struct vkd3d_desc_object_cache view_desc_cache; - struct vkd3d_desc_object_cache cbuffer_desc_cache; + struct vkd3d_tls_key tls_key; + struct desc_rebalance view_desc_rebalance; + struct desc_rebalance cbuffer_desc_rebalance; struct vkd3d_render_pass_cache render_pass_cache; VkPipelineCache vk_pipeline_cache;
@@ -1777,6 +1835,7 @@ struct vkd3d_queue *d3d12_device_get_vkd3d_queue(struct d3d12_device *device, D3 bool d3d12_device_is_uma(struct d3d12_device *device, bool *coherent); void d3d12_device_mark_as_removed(struct d3d12_device *device, HRESULT reason, const char *message, ...) VKD3D_PRINTF_FUNC(3, 4); +struct desc_object_caches *device_get_desc_object_caches(struct d3d12_device *device); struct d3d12_device *unsafe_impl_from_ID3D12Device5(ID3D12Device5 *iface);
static inline HRESULT d3d12_device_query_interface(struct d3d12_device *device, REFIID iid, void **object) diff --git a/tests/d3d12.c b/tests/d3d12.c index 86e8a8b1..d61309e9 100644 --- a/tests/d3d12.c +++ b/tests/d3d12.c @@ -36343,6 +36343,8 @@ static void test_readback_map_stability(void) ID3D12Resource_Unmap(buffer, 0, NULL);
ID3D12Resource_Release(buffer); + + destroy_test_context(&context); }
static void test_vs_ps_relative_addressing(void)