Tested on MSVC 2017. This patches makes it viable to build standalone and debuggable .dlls of vkd3d.
This commit fixes various compatibility issues with MSVC 2017.
The only non-trivial change is the addition of a trivial pthread wrapper. This requires Windows Vista and up due to condition variable support being non-existent in XP.
Signed-off-by: Hans-Kristian Arntzen post@arntzen-software.no --- include/private/vkd3d_common.h | 18 ++- include/private/vkd3d_debug.h | 4 +- include/private/vkd3d_threads.h | 166 +++++++++++++++++++++++ libs/vkd3d-shader/spirv.c | 2 +- libs/vkd3d-shader/vkd3d_shader_private.h | 1 - libs/vkd3d/device.c | 2 +- libs/vkd3d/resource.c | 2 +- libs/vkd3d/state.c | 2 +- libs/vkd3d/utils.c | 4 +- libs/vkd3d/vkd3d_private.h | 17 +-- libs/vkd3d/vulkan_procs.h | 4 +- 11 files changed, 197 insertions(+), 25 deletions(-) create mode 100644 include/private/vkd3d_threads.h
diff --git a/include/private/vkd3d_common.h b/include/private/vkd3d_common.h index 1ac8a63..fab0cd4 100644 --- a/include/private/vkd3d_common.h +++ b/include/private/vkd3d_common.h @@ -26,6 +26,10 @@ #include <limits.h> #include <stdbool.h>
+#ifdef _MSC_VER +#include <intrin.h> +#endif + #ifndef ARRAY_SIZE # define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) #endif @@ -51,7 +55,9 @@ static inline size_t align(size_t addr, size_t alignment)
static inline unsigned int vkd3d_popcount(unsigned int v) { -#ifdef HAVE_BUILTIN_POPCOUNT +#ifdef _MSC_VER + return __popcnt(v); +#elif defined(HAVE_BUILTIN_POPCOUNT) return __builtin_popcount(v); #else v -= (v >> 1) & 0x55555555; @@ -78,7 +84,11 @@ static inline bool vkd3d_bitmask_is_contiguous(unsigned int mask) /* Undefined for x == 0. */ static inline unsigned int vkd3d_log2i(unsigned int x) { -#ifdef HAVE_BUILTIN_CLZ +#ifdef _MSC_VER + unsigned long result; + _BitScanReverse(&result, x); + return (unsigned int)x; +#elif defined(HAVE_BUILTIN_CLZ) return __builtin_clz(x) ^ 0x1f; #else static const unsigned int l[] = @@ -152,8 +162,8 @@ static inline LONG InterlockedDecrement(LONG volatile *x)
#if HAVE_SYNC_ADD_AND_FETCH # define atomic_add_fetch(ptr, val) __sync_add_and_fetch(ptr, val) -#else -# error "atomic_add_fetch() not implemented for this platform" +#elif defined(_MSC_VER) +# define atomic_add_fetch(ptr, val) InterlockedAdd(ptr, val) #endif /* HAVE_SYNC_ADD_AND_FETCH */
static inline void vkd3d_parse_version(const char *version, int *major, int *minor) diff --git a/include/private/vkd3d_debug.h b/include/private/vkd3d_debug.h index 1a44cb9..c37c841 100644 --- a/include/private/vkd3d_debug.h +++ b/include/private/vkd3d_debug.h @@ -67,8 +67,8 @@ const char *debugstr_w(const WCHAR *wstr, size_t wchar_size) DECLSPEC_HIDDEN; vkd3d_dbg_next_time = true; \ VKD3D_DBG_PRINTF
-#define VKD3D_DBG_PRINTF(args...) \ - vkd3d_dbg_printf(vkd3d_dbg_level, __FUNCTION__, args); } while (0) +#define VKD3D_DBG_PRINTF(...) \ + vkd3d_dbg_printf(vkd3d_dbg_level, __FUNCTION__, __VA_ARGS__); } while (0)
#ifndef TRACE #define TRACE VKD3D_DBG_LOG(TRACE) diff --git a/include/private/vkd3d_threads.h b/include/private/vkd3d_threads.h new file mode 100644 index 0000000..b613b50 --- /dev/null +++ b/include/private/vkd3d_threads.h @@ -0,0 +1,166 @@ +/* + * Copyright 2019 Hans-Kristian Arntzen for Valve + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifndef __VKD3D_THREADS_H +#define __VKD3D_THREADS_H + +#include "config.h" + +#if defined(HAVE_PTHREAD_H) +#include <pthread.h> + +#elif defined(_WIN32) /* HAVE_PTHREAD_H */ + +#define WIN32_LEAN_AND_MEAN +#include <windows.h> + +typedef struct pthread +{ + HANDLE thread; + DWORD id; +} pthread_t; + +typedef struct pthread_mutex +{ + CRITICAL_SECTION *lock; +} pthread_mutex_t; + +typedef struct pthread_cond +{ + CONDITION_VARIABLE *cond; +} pthread_cond_t; + +struct vkd3d_pthread_wrapper_struct +{ + void * (*routine)(void *); + void *arg; +}; + +static DWORD WINAPI win32_thread_wrapper_routine(struct vkd3d_pthread_wrapper_struct *wrapper) +{ + struct vkd3d_pthread_wrapper_struct tmp = *wrapper; + vkd3d_free(wrapper); + tmp.routine(tmp.arg); + return 0; +} + +static inline int pthread_create(pthread_t *thread, void *attr, void * (*thread_fun)(void *), void *arg) +{ + (void)attr; + struct vkd3d_pthread_wrapper_struct *wrapper = vkd3d_malloc(sizeof(*wrapper)); + if (!wrapper) + return -1; + wrapper->routine = thread_fun; + wrapper->arg = arg; + thread->thread = CreateThread(NULL, 0, win32_thread_wrapper_routine, wrapper, 0, &thread->id); + if (!thread->thread) + { + vkd3d_free(wrapper); + return -1; + } + return 0; +} + +static inline int pthread_join(pthread_t thread, void **ret) +{ + (void)ret; + int success = WaitForSingleObject(thread.thread, INFINITE) == WAIT_OBJECT_0; + CloseHandle(thread.thread); + return success ? 0 : -1; +} + +static inline int pthread_mutex_init(pthread_mutex_t *lock, void *attr) +{ + (void)attr; + lock->lock = vkd3d_malloc(sizeof(CRITICAL_SECTION)); + if (!lock->lock) + return -1; + InitializeCriticalSection(lock->lock); + return 0; +} + +static inline int pthread_mutex_lock(pthread_mutex_t *lock) +{ + EnterCriticalSection(lock->lock); + return 0; +} + +static inline int pthread_mutex_unlock(pthread_mutex_t *lock) +{ + LeaveCriticalSection(lock->lock); + return 0; +} + +static inline int pthread_mutex_destroy(pthread_mutex_t *lock) +{ + DeleteCriticalSection(lock->lock); + vkd3d_free(lock->lock); + return 0; +} + +static inline int pthread_cond_init(pthread_cond_t *cond, void *attr) +{ + (void)attr; + cond->cond = vkd3d_malloc(sizeof(CONDITION_VARIABLE)); + if (!cond->cond) + return -1; + InitializeConditionVariable(cond->cond); + return 0; +} + +static inline void pthread_cond_destroy(pthread_cond_t *cond) +{ + vkd3d_free(cond->cond); +} + +static inline int pthread_cond_signal(pthread_cond_t *cond) +{ + WakeConditionVariable(cond->cond); + return 0; +} + +static inline int pthread_cond_broadcast(pthread_cond_t *cond) +{ + WakeAllConditionVariable(cond->cond); + return 0; +} + +static inline int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *lock) +{ + bool ret = SleepConditionVariableCS(cond->cond, lock->lock, INFINITE); + return ret ? 0 : -1; +} + +#else /* HAVE_PTHREAD_H */ +#error "Threads are not supported. Cannot build." +#endif /* HAVE_PTHREAD_H */ + +static inline void vkd3d_set_thread_name(const char *name) +{ +#if defined(_MSC_VER) + (void)name; +#elif defined(HAVE_PTHREAD_SETNAME_NP_2) + pthread_setname_np(pthread_self(), name); +#elif defined(HAVE_PTHREAD_SETNAME_NP_1) + pthread_setname_np(name); +#else + (void)name; +#endif +} + +#endif /* __VKD3D_THREADS_H */ diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index fe8935c..580230f 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -4540,7 +4540,7 @@ static void vkd3d_dxbc_compiler_emit_store_shader_output(struct vkd3d_dxbc_compi
static void vkd3d_dxbc_compiler_emit_shader_epilogue_function(struct vkd3d_dxbc_compiler *compiler) { - uint32_t param_type_id[MAX_REG_OUTPUT + 1], param_id[MAX_REG_OUTPUT + 1] = {}; + uint32_t param_type_id[MAX_REG_OUTPUT + 1], param_id[MAX_REG_OUTPUT + 1] = {0}; uint32_t void_id, type_id, ptr_type_id, function_type_id, function_id; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; const struct vkd3d_shader_signature *signature; diff --git a/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d-shader/vkd3d_shader_private.h index 47764e5..940cb76 100644 --- a/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d-shader/vkd3d_shader_private.h @@ -56,7 +56,6 @@ #include <limits.h> #include <stdbool.h> #include <string.h> -#include <strings.h>
#define VKD3D_VEC4_SIZE 4
diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index 3da4273..460bdf9 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -453,7 +453,7 @@ static HRESULT vkd3d_instance_init(struct vkd3d_instance *instance, bool *user_extension_supported = NULL; VkApplicationInfo application_info; VkInstanceCreateInfo instance_info; - char application_name[PATH_MAX]; + char application_name[VKD3D_PATH_MAX]; uint32_t extension_count; const char **extensions; VkInstance vk_instance; diff --git a/libs/vkd3d/resource.c b/libs/vkd3d/resource.c index aebe1fa..88f184d 100644 --- a/libs/vkd3d/resource.c +++ b/libs/vkd3d/resource.c @@ -1388,7 +1388,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_ReadFromSubresource(ID3D12Resour size = (box.right - box.left) / format->block_width * format->byte_count * format->block_byte_count; for (z = box.front; z < box.back; ++z) { - dst = dst_data + (z - box.front) * dst_slice_pitch; + dst = (uint8_t *)dst_data + (z - box.front) * dst_slice_pitch; src = src_data + z * vk_layout.depthPitch + box.top / format->block_height * vk_layout.rowPitch; for (y = box.top; y < box.bottom; y += format->block_height) { diff --git a/libs/vkd3d/state.c b/libs/vkd3d/state.c index 12a711b..9dc5cff 100644 --- a/libs/vkd3d/state.c +++ b/libs/vkd3d/state.c @@ -1342,7 +1342,7 @@ static HRESULT create_shader_stage(struct d3d12_device *device, struct vkd3d_shader_code dxbc = {code->pShaderBytecode, code->BytecodeLength}; const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; struct VkShaderModuleCreateInfo shader_desc; - struct vkd3d_shader_code spirv = {}; + struct vkd3d_shader_code spirv = {0}; VkResult vr; int ret;
diff --git a/libs/vkd3d/utils.c b/libs/vkd3d/utils.c index 6a910a7..624b14f 100644 --- a/libs/vkd3d/utils.c +++ b/libs/vkd3d/utils.c @@ -804,7 +804,7 @@ HRESULT vkd3d_load_vk_device_procs(struct vkd3d_vk_device_procs *procs, return S_OK; }
-#ifdef _GNU_SOURCE +#if defined(_GNU_SOURCE) && !defined(_WIN32)
bool vkd3d_get_program_name(char program_name[PATH_MAX]) { @@ -840,7 +840,7 @@ bool vkd3d_get_program_name(char program_name[PATH_MAX])
#else
-bool vkd3d_get_program_name(char program_name[PATH_MAX]) +bool vkd3d_get_program_name(char program_name[VKD3D_PATH_MAX]) { *program_name = '\0'; return false; diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 59f0eac..a51ca4d 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -31,11 +31,11 @@
#include "vkd3d.h" #include "vkd3d_shader.h" +#include "vkd3d_threads.h"
#include <assert.h> #include <inttypes.h> #include <limits.h> -#include <pthread.h> #include <stdbool.h>
#define VK_CALL(f) (vk_procs->f) @@ -1231,16 +1231,13 @@ HRESULT vkd3d_load_vk_device_procs(struct vkd3d_vk_device_procs *procs,
extern const char vkd3d_build[];
-bool vkd3d_get_program_name(char program_name[PATH_MAX]) DECLSPEC_HIDDEN; - -static inline void vkd3d_set_thread_name(const char *name) -{ -#if defined(HAVE_PTHREAD_SETNAME_NP_2) - pthread_setname_np(pthread_self(), name); -#elif defined(HAVE_PTHREAD_SETNAME_NP_1) - pthread_setname_np(name); +#ifdef PATH_MAX +#define VKD3D_PATH_MAX PATH_MAX +#else +#define VKD3D_PATH_MAX 256 #endif -} + +bool vkd3d_get_program_name(char program_name[VKD3D_PATH_MAX]) DECLSPEC_HIDDEN;
VkResult vkd3d_set_vk_object_name_utf8(struct d3d12_device *device, uint64_t vk_object, VkDebugReportObjectTypeEXT vk_object_type, const char *name) DECLSPEC_HIDDEN; diff --git a/libs/vkd3d/vulkan_procs.h b/libs/vkd3d/vulkan_procs.h index 702cfd2..ec29eb4 100644 --- a/libs/vkd3d/vulkan_procs.h +++ b/libs/vkd3d/vulkan_procs.h @@ -178,8 +178,8 @@ VK_DEVICE_PFN(vkUpdateDescriptorSets) VK_DEVICE_PFN(vkWaitForFences)
/* VK_KHR_draw_indirect_count */ -VK_DEVICE_EXT_PFN(vkCmdDrawIndirectCountKHR); -VK_DEVICE_EXT_PFN(vkCmdDrawIndexedIndirectCountKHR); +VK_DEVICE_EXT_PFN(vkCmdDrawIndirectCountKHR) +VK_DEVICE_EXT_PFN(vkCmdDrawIndexedIndirectCountKHR)
/* VK_KHR_get_memory_requirements2 */ VK_DEVICE_EXT_PFN(vkGetBufferMemoryRequirements2KHR)
Greatly reduces number of maps for applications which do not do this themselves. Also add a config option to use older map-on-demand behavior, which might be nice for debugging tools.
Signed-off-by: Hans-Kristian Arntzen post@arntzen-software.no --- libs/vkd3d/device.c | 1 + libs/vkd3d/resource.c | 33 +++++++++++++++++++++++++++++++++ libs/vkd3d/vkd3d_private.h | 2 ++ 3 files changed, 36 insertions(+)
diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index 460bdf9..d81d53c 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -428,6 +428,7 @@ static void vkd3d_init_debug_report(struct vkd3d_instance *instance) static const struct vkd3d_debug_option vkd3d_config_options[] = { {"vk_debug", VKD3D_CONFIG_FLAG_VULKAN_DEBUG}, /* enable Vulkan debug extensions */ + {"nopersistent", VKD3D_CONFIG_FLAG_NO_PERSISTENT_MAPPING}, /* No persistent host pointer mapping. */ };
static uint64_t vkd3d_init_config_flags(void) diff --git a/libs/vkd3d/resource.c b/libs/vkd3d/resource.c index 88f184d..87277f7 100644 --- a/libs/vkd3d/resource.c +++ b/libs/vkd3d/resource.c @@ -299,6 +299,9 @@ static void d3d12_heap_destroy(struct d3d12_heap *heap)
TRACE("Destroying heap %p.\n", heap);
+ if (heap->is_persistent && heap->map_ptr) + VK_CALL(vkUnmapMemory(device->vk_device, heap->vk_memory)); + vkd3d_private_store_destroy(&heap->private_store);
VK_CALL(vkFreeMemory(device->vk_device, heap->vk_memory, NULL)); @@ -420,6 +423,18 @@ static HRESULT d3d12_heap_map(struct d3d12_heap *heap, uint64_t offset, VkResult vr; int rc;
+ /* If we have a persistent heap, there is no need to lock and map/unmap. + * Just hand a pointer to caller. There is technically a need to do cache maintenance here + * but we always use COHERENT memory types for host memory, so no need to deal with incoherent + * cached host memory. */ + if (heap->is_persistent) + { + TRACE("Mapping persistently mapped heap %p.\n", heap); + assert(heap->map_ptr); + *data = (BYTE *)heap->map_ptr + offset; + return S_OK; + } + if ((rc = pthread_mutex_lock(&heap->mutex))) { ERR("Failed to lock mutex, error %d.\n", rc); @@ -475,6 +490,10 @@ static void d3d12_heap_unmap(struct d3d12_heap *heap, struct d3d12_resource *res struct d3d12_device *device = heap->device; int rc;
+ /* If the heap is persistent, mapping happens when heap is destroyed. */ + if (heap->is_persistent) + return; + if ((rc = pthread_mutex_lock(&heap->mutex))) { ERR("Failed to lock mutex, error %d.\n", rc); @@ -543,11 +562,13 @@ static HRESULT d3d12_heap_init(struct d3d12_heap *heap, VkDeviceSize vk_memory_size; HRESULT hr; int rc; + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
heap->ID3D12Heap_iface.lpVtbl = &d3d12_heap_vtbl; heap->refcount = 1;
heap->is_private = !!resource; + heap->is_persistent = false;
heap->desc = *desc;
@@ -614,6 +635,18 @@ static HRESULT d3d12_heap_init(struct d3d12_heap *heap, return hr; }
+ /* If the heap is in HOST_VISIBLE space, just persistently map it. + * This way we avoid mapping and unmapping the whole memory block and taking locks every time + * a small sub-region is mapped. */ + if ((device->vkd3d_instance->config_flags & VKD3D_CONFIG_FLAG_NO_PERSISTENT_MAPPING) == 0) + { + if (device->memory_properties.memoryTypes[heap->vk_memory_type].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + { + if (VK_CALL(vkMapMemory(device->vk_device, heap->vk_memory, 0, VK_WHOLE_SIZE, 0, &heap->map_ptr)) == VK_SUCCESS) + heap->is_persistent = true; + } + } + heap->device = device; if (!heap->is_private) d3d12_device_add_ref(heap->device); diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index a51ca4d..d842f58 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -130,6 +130,7 @@ struct vkd3d_vulkan_info enum vkd3d_config_flags { VKD3D_CONFIG_FLAG_VULKAN_DEBUG = 0x00000001, + VKD3D_CONFIG_FLAG_NO_PERSISTENT_MAPPING = 0x00000002 };
struct vkd3d_instance @@ -360,6 +361,7 @@ struct d3d12_heap LONG refcount;
bool is_private; + bool is_persistent; D3D12_HEAP_DESC desc;
pthread_mutex_t mutex;
On Mon, 30 Sep 2019 at 17:48, Hans-Kristian Arntzen post@arntzen-software.no wrote:
Greatly reduces number of maps for applications which do not do this themselves. Also add a config option to use older map-on-demand behavior, which might be nice for debugging tools.
I don't want to just say no, but this does seem to go against the spirit of an "explicit" API like d3d12 somewhat. My other concern with this is that while on 64-bit we typically have plenty of address space, things can be a bit more tight on 32-bit. I assume the main justification for this patch is performance; do you have some numbers?
On 9/30/19 9:40 PM, Henri Verbeet wrote:
On Mon, 30 Sep 2019 at 17:48, Hans-Kristian Arntzen post@arntzen-software.no wrote:
Greatly reduces number of maps for applications which do not do this themselves. Also add a config option to use older map-on-demand behavior, which might be nice for debugging tools.
I don't want to just say no, but this does seem to go against the spirit of an "explicit" API like d3d12 somewhat. My other concern with this is that while on 64-bit we typically have plenty of address space, things can be a bit more tight on 32-bit. I assume the main justification for this patch is performance; do you have some numbers?
FWIW, I tried this on a native D3D12 and Map/Unmap of single resources seems to behave in a very similar way to the existing implementation. vkMapMemory is actually faster than ID3D12Resource::Map, so probably fine to just drop this patch.
Cheers, Hans-Kristian
It is possible to map a resource, but not disclose the VA to caller. This is used for WriteToSubresource.
Signed-off-by: Hans-Kristian Arntzen post@arntzen-software.no --- libs/vkd3d/resource.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-)
diff --git a/libs/vkd3d/resource.c b/libs/vkd3d/resource.c index 87277f7..4ba7586 100644 --- a/libs/vkd3d/resource.c +++ b/libs/vkd3d/resource.c @@ -431,7 +431,9 @@ static HRESULT d3d12_heap_map(struct d3d12_heap *heap, uint64_t offset, { TRACE("Mapping persistently mapped heap %p.\n", heap); assert(heap->map_ptr); - *data = (BYTE *)heap->map_ptr + offset; + /* Data may be null, in which case we still need to map, but don't have to disclose CPU VA. */ + if (data) + *data = (BYTE *)heap->map_ptr + offset; return S_OK; }
@@ -471,13 +473,15 @@ static HRESULT d3d12_heap_map(struct d3d12_heap *heap, uint64_t offset, if (hr == S_OK) { assert(heap->map_ptr); - *data = (BYTE *)heap->map_ptr + offset; + if (data) + *data = (BYTE *)heap->map_ptr + offset; ++resource->map_count; } else { assert(!heap->map_ptr); - *data = NULL; + if (data) + *data = NULL; }
pthread_mutex_unlock(&heap->mutex); @@ -1268,7 +1272,8 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_Map(ID3D12Resource *iface, UINT if (FAILED(hr = d3d12_heap_map(resource->heap, resource->heap_offset, resource, data))) WARN("Failed to map resource %p, hr %#x.\n", resource, hr);
- TRACE("Returning pointer %p.\n", *data); + if (data) + TRACE("Returning pointer %p.\n", *data);
return hr; }
On Mon, 30 Sep 2019 at 17:48, Hans-Kristian Arntzen post@arntzen-software.no wrote:
It is possible to map a resource, but not disclose the VA to caller. This is used for WriteToSubresource.
Signed-off-by: Hans-Kristian Arntzen post@arntzen-software.no
libs/vkd3d/resource.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-)
This seems fine. Do you have a regression test to go along with this as well?
It is possible to map a resource, but not disclose the VA to caller. This is used for WriteToSubresource.
Signed-off-by: Hans-Kristian Arntzen post@arntzen-software.no --- libs/vkd3d/resource.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/libs/vkd3d/resource.c b/libs/vkd3d/resource.c index aebe1fa..4186155 100644 --- a/libs/vkd3d/resource.c +++ b/libs/vkd3d/resource.c @@ -423,7 +423,8 @@ static HRESULT d3d12_heap_map(struct d3d12_heap *heap, uint64_t offset, if ((rc = pthread_mutex_lock(&heap->mutex))) { ERR("Failed to lock mutex, error %d.\n", rc); - *data = NULL; + if (data) + *data = NULL; return hresult_from_errno(rc); }
@@ -456,13 +457,15 @@ static HRESULT d3d12_heap_map(struct d3d12_heap *heap, uint64_t offset, if (hr == S_OK) { assert(heap->map_ptr); - *data = (BYTE *)heap->map_ptr + offset; + if (data) + *data = (BYTE *)heap->map_ptr + offset; ++resource->map_count; } else { assert(!heap->map_ptr); - *data = NULL; + if (data) + *data = NULL; }
pthread_mutex_unlock(&heap->mutex); @@ -1235,7 +1238,8 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_Map(ID3D12Resource *iface, UINT if (FAILED(hr = d3d12_heap_map(resource->heap, resource->heap_offset, resource, data))) WARN("Failed to map resource %p, hr %#x.\n", resource, hr);
- TRACE("Returning pointer %p.\n", *data); + if (data) + TRACE("Returning pointer %p.\n", *data);
return hr; }
Signed-off-by: Hans-Kristian Arntzen post@arntzen-software.no --- tests/d3d12.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/tests/d3d12.c b/tests/d3d12.c index 9c608c1..12165e0 100644 --- a/tests/d3d12.c +++ b/tests/d3d12.c @@ -6708,6 +6708,11 @@ static void test_map_resource(void) ID3D12Resource_Unmap(resource, 1, NULL); ID3D12Resource_Unmap(resource, 0, NULL);
+ /* Passing NULL to Map should map, but not disclose the CPU VA to caller. */ + hr = ID3D12Resource_Map(resource, 0, NULL, NULL); + ok(hr == S_OK, "Got unexpected hr %#x.\n", hr); + ID3D12Resource_Unmap(resource, 0, NULL); + ID3D12Resource_Release(resource);
refcount = ID3D12Device_Release(device);
Signed-off-by: Henri Verbeet hverbeet@codeweavers.com
Signed-off-by: Henri Verbeet hverbeet@codeweavers.com
Allow the Vulkan command pools to behave like D3D12 command pools, where memory is owned by the pool rather than individual command buffers.
Also, do not release resources on vkResetCommandPool, as the intention from D3D12 docs is to reuse memory rather than freeing it.
Signed-off-by: Hans-Kristian Arntzen post@arntzen-software.no --- libs/vkd3d/command.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index ae88910..d420863 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -1648,8 +1648,8 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_allocator_Reset(ID3D12CommandAllo allocator->command_buffer_count = 0; }
- if ((vr = VK_CALL(vkResetCommandPool(device->vk_device, allocator->vk_command_pool, - VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT)))) + /* The intent here is to recycle memory, so do not use RELEASE_RESOURCES_BIT here. */ + if ((vr = VK_CALL(vkResetCommandPool(device->vk_device, allocator->vk_command_pool, 0)))) { WARN("Resetting command pool failed, vr %d.\n", vr); return hresult_from_vk_result(vr); @@ -1723,7 +1723,10 @@ static HRESULT d3d12_command_allocator_init(struct d3d12_command_allocator *allo
command_pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; command_pool_info.pNext = NULL; - command_pool_info.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; + /* Do not use RESET_COMMAND_BUFFER_BIT. This allows the CommandPool to be a D3D12-style command pool. + * Memory is owned by the pool and CommandBuffers become lightweight handles, + * assuming a half-decent driver implementation. */ + command_pool_info.flags = 0; command_pool_info.queueFamilyIndex = queue->vk_family_index;
if ((vr = VK_CALL(vkCreateCommandPool(device->vk_device, &command_pool_info, NULL,
On Mon, 30 Sep 2019 at 17:48, Hans-Kristian Arntzen post@arntzen-software.no wrote:
Allow the Vulkan command pools to behave like D3D12 command pools, where memory is owned by the pool rather than individual command buffers.
Also, do not release resources on vkResetCommandPool, as the intention from D3D12 docs is to reuse memory rather than freeing it.
Signed-off-by: Hans-Kristian Arntzen post@arntzen-software.no
libs/vkd3d/command.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-)
This one seems fine as well, but it also sounds like this could be split in two individual commits.
D3D12 command allocators are intended to recycle memory across resets, so we should do the same thing in vkd3d.
Signed-off-by: Hans-Kristian Arntzen post@arntzen-software.no --- libs/vkd3d/command.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index ae88910..2d6d02d 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -1648,8 +1648,8 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_allocator_Reset(ID3D12CommandAllo allocator->command_buffer_count = 0; }
- if ((vr = VK_CALL(vkResetCommandPool(device->vk_device, allocator->vk_command_pool, - VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT)))) + /* The intent here is to recycle memory, so do not use RELEASE_RESOURCES_BIT here. */ + if ((vr = VK_CALL(vkResetCommandPool(device->vk_device, allocator->vk_command_pool, 0)))) { WARN("Resetting command pool failed, vr %d.\n", vr); return hresult_from_vk_result(vr);
By setting this flag, command pools cannot efficiently pool allocations. This flag should be set to 0 so only the VkCommandPool may be reset. This matches D3D12 API.
Signed-off-by: Hans-Kristian Arntzen post@arntzen-software.no --- libs/vkd3d/command.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index 2d6d02d..d420863 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -1723,7 +1723,10 @@ static HRESULT d3d12_command_allocator_init(struct d3d12_command_allocator *allo
command_pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; command_pool_info.pNext = NULL; - command_pool_info.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; + /* Do not use RESET_COMMAND_BUFFER_BIT. This allows the CommandPool to be a D3D12-style command pool. + * Memory is owned by the pool and CommandBuffers become lightweight handles, + * assuming a half-decent driver implementation. */ + command_pool_info.flags = 0; command_pool_info.queueFamilyIndex = queue->vk_family_index;
if ((vr = VK_CALL(vkCreateCommandPool(device->vk_device, &command_pool_info, NULL,
Signed-off-by: Henri Verbeet hverbeet@codeweavers.com
Signed-off-by: Henri Verbeet hverbeet@codeweavers.com
The GPU VA allocator was allocating memory in a way where dereferencing GPU VA required a lock + bsearch to find the right VA range.
Rather than going this route, we turn the common case into O(1) and lockless by creating a slab allocator which allows us to lookup a ptr directly from GPU VA with (VA - Base) / PageSize.
The number of allocations in the fast path must be limited since we cannot trivially grow the allocator while remaining lock-free for dereferences.
Signed-off-by: Hans-Kristian Arntzen post@arntzen-software.no --- libs/vkd3d/device.c | 240 +++++++++++++++++++++++++++++++------ libs/vkd3d/resource.c | 2 +- libs/vkd3d/vkd3d_private.h | 31 +++-- 3 files changed, 227 insertions(+), 46 deletions(-)
diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index d81d53c..beac7f2 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -1823,42 +1823,106 @@ static void d3d12_device_destroy_pipeline_cache(struct d3d12_device *device) pthread_mutex_destroy(&device->mutex); }
-D3D12_GPU_VIRTUAL_ADDRESS vkd3d_gpu_va_allocator_allocate(struct vkd3d_gpu_va_allocator *allocator, - size_t size, void *ptr) +#define VKD3D_MAX_VA_SLAB_ALLOCATIONS (64 * 1024) +#define VKD3D_BASE_VA_SLAB (0x1000000000ull) +#define VKD3D_BASE_VA_FALLBACK (0x8000000000000000ull) +#define VKD3D_SLAB_ALLOCATION_SIZE (0x100000000ull) +#define VKD3D_SLAB_ALLOCATION_SIZE_LOG2 32 + +static D3D12_GPU_VIRTUAL_ADDRESS vkd3d_gpu_va_allocator_allocate_fallback(struct vkd3d_gpu_va_allocator *allocator, + size_t size, size_t alignment, void *ptr) { D3D12_GPU_VIRTUAL_ADDRESS ceiling = ~(D3D12_GPU_VIRTUAL_ADDRESS)0; struct vkd3d_gpu_va_allocation *allocation; - int rc;
- if ((rc = pthread_mutex_lock(&allocator->mutex))) + if (!vkd3d_array_reserve((void **)&allocator->fallback_mem_allocations, &allocator->fallback_mem_allocations_size, + allocator->fallback_mem_allocation_count + 1, sizeof(*allocator->fallback_mem_allocations))) { - ERR("Failed to lock mutex, error %d.\n", rc); return 0; }
- if (!vkd3d_array_reserve((void **)&allocator->allocations, &allocator->allocations_size, - allocator->allocation_count + 1, sizeof(*allocator->allocations))) + allocator->fallback_mem_floor = (allocator->fallback_mem_floor + alignment - 1) & ~((D3D12_GPU_VIRTUAL_ADDRESS)alignment - 1); + + if (size > ceiling || ceiling - size < allocator->fallback_mem_floor) { - pthread_mutex_unlock(&allocator->mutex); return 0; }
- if (size > ceiling || ceiling - size < allocator->floor) + allocation = &allocator->fallback_mem_allocations[allocator->fallback_mem_allocation_count++]; + allocation->base = allocator->fallback_mem_floor; + allocation->size = size; + allocation->ptr = ptr; + + /* This pointer is bumped and never lowered on a free. + * However, this will only fail once we have exhausted 63 bits of address space. */ + allocator->fallback_mem_floor += size; + + return allocation->base; +} + +static D3D12_GPU_VIRTUAL_ADDRESS vkd3d_gpu_va_allocator_allocate_slab(struct vkd3d_gpu_va_allocator *allocator, + size_t size, size_t alignment, void *ptr) +{ + int rc; + unsigned vacant_index; + D3D12_GPU_VIRTUAL_ADDRESS virtual_address = 0; + + if ((rc = pthread_mutex_lock(&allocator->mutex))) { - pthread_mutex_unlock(&allocator->mutex); + ERR("Failed to lock mutex, error %d.\n", rc); return 0; }
- allocation = &allocator->allocations[allocator->allocation_count++]; - allocation->base = allocator->floor; - allocation->size = size; - allocation->ptr = ptr; + TRACE("Allocating %zu bytes (%zu align) of VA from slab allocator.\n", size, alignment); + if (allocator->mem_vacant_count > 0) + { + vacant_index = allocator->mem_vacant[--allocator->mem_vacant_count]; + + /* It is critical that the multiplication happens in 64-bit to not overflow. */ + virtual_address = VKD3D_BASE_VA_SLAB + vacant_index * VKD3D_SLAB_ALLOCATION_SIZE; + TRACE("Allocating VA: 0x%llx: vacant index %u from slab.\n", + (unsigned long long)virtual_address, vacant_index); + assert(!allocator->slab_mem_allocations[vacant_index].ptr); + allocator->slab_mem_allocations[vacant_index].ptr = ptr; + allocator->slab_mem_allocations[vacant_index].size = size; + }
- allocator->floor += size; + if (virtual_address == 0) + { + TRACE("Slab allocator is empty, allocating %zu bytes (%zu align) of VA from fallback allocator.\n", + size, alignment); + /* Fall back to slow allocator. */ + virtual_address = vkd3d_gpu_va_allocator_allocate_fallback(allocator, size, alignment, ptr); + }
pthread_mutex_unlock(&allocator->mutex); + return virtual_address; +}
- return allocation->base; +D3D12_GPU_VIRTUAL_ADDRESS vkd3d_gpu_va_allocator_allocate(struct vkd3d_gpu_va_allocator *allocator, + size_t size, size_t alignment, void *ptr) +{ + D3D12_GPU_VIRTUAL_ADDRESS virtual_address; + int rc; + size_t aligned_size; + + aligned_size = size > alignment ? size : alignment; + + if (aligned_size > VKD3D_SLAB_ALLOCATION_SIZE) + { + /* For massive VA allocations, go straight to high-mem with a slower allocator. */ + if ((rc = pthread_mutex_lock(&allocator->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return 0; + } + virtual_address = vkd3d_gpu_va_allocator_allocate_fallback(allocator, size, alignment, ptr); + pthread_mutex_unlock(&allocator->mutex); + } + else + virtual_address = vkd3d_gpu_va_allocator_allocate_slab(allocator, size, alignment, ptr); + + return virtual_address; }
static int vkd3d_gpu_va_allocation_compare(const void *k, const void *e) @@ -1873,24 +1937,93 @@ static int vkd3d_gpu_va_allocation_compare(const void *k, const void *e) return 0; }
+static void *vkd3d_gpu_va_allocator_dereference_slab(struct vkd3d_gpu_va_allocator *allocator, D3D12_GPU_VIRTUAL_ADDRESS address) +{ + D3D12_GPU_VIRTUAL_ADDRESS base_offset; + uint64_t base_index; + const struct vkd3d_gpu_va_slab_entry *slab; + + base_offset = address - VKD3D_BASE_VA_SLAB; + base_index = base_offset >> VKD3D_SLAB_ALLOCATION_SIZE_LOG2; + if (base_index >= VKD3D_MAX_VA_SLAB_ALLOCATIONS) + { + ERR("Accessed slab size class out of range.\n"); + return NULL; + } + + slab = &allocator->slab_mem_allocations[base_index]; + base_offset -= base_index * VKD3D_SLAB_ALLOCATION_SIZE; + if (base_offset >= slab->size) + { + ERR("Accessed slab out of range.\n"); + return NULL; + } + return slab->ptr; +} + +static void vkd3d_gpu_va_allocator_free_slab(struct vkd3d_gpu_va_allocator *allocator, D3D12_GPU_VIRTUAL_ADDRESS address) +{ + D3D12_GPU_VIRTUAL_ADDRESS base_offset; + unsigned base_index; + struct vkd3d_gpu_va_slab_entry *slab; + + base_offset = address - VKD3D_BASE_VA_SLAB; + base_index = base_offset >> VKD3D_SLAB_ALLOCATION_SIZE_LOG2; + + if (base_index >= VKD3D_MAX_VA_SLAB_ALLOCATIONS) + { + ERR("Accessed slab size class out of range.\n"); + return; + } + + slab = &allocator->slab_mem_allocations[base_index]; + if (slab->ptr == NULL) + { + ERR("Attempting to free NULL VA.\n"); + return; + } + + if (allocator->mem_vacant_count >= VKD3D_MAX_VA_SLAB_ALLOCATIONS) + { + ERR("Invalid free, slab size class is fully freed.\n"); + return; + } + + TRACE("Freeing VA: 0x%llx: index %u from slab.\n", + (unsigned long long)address, base_index); + + slab->ptr = NULL; + allocator->mem_vacant[allocator->mem_vacant_count++] = base_index; +} + void *vkd3d_gpu_va_allocator_dereference(struct vkd3d_gpu_va_allocator *allocator, D3D12_GPU_VIRTUAL_ADDRESS address) { struct vkd3d_gpu_va_allocation *allocation; int rc;
- if ((rc = pthread_mutex_lock(&allocator->mutex))) + /* If we land in the non-fallback region, dereferencing VA is lockless. The base pointer is immutable, + * and only way we can have a data race is if some other thread is poking into the slab_mem_allocation[class][base_index] block. + * This can only happen if someone is trying to free the entry while we're dereferencing, which would be a serious app bug. */ + if (address < VKD3D_BASE_VA_FALLBACK) { - ERR("Failed to lock mutex, error %d.\n", rc); - return NULL; + return vkd3d_gpu_va_allocator_dereference_slab(allocator, address); } + else + { + /* Slow fallback. */ + if ((rc = pthread_mutex_lock(&allocator->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return NULL; + }
- allocation = bsearch(&address, allocator->allocations, allocator->allocation_count, - sizeof(*allocation), vkd3d_gpu_va_allocation_compare); - - pthread_mutex_unlock(&allocator->mutex); + allocation = bsearch(&address, allocator->fallback_mem_allocations, allocator->fallback_mem_allocation_count, + sizeof(*allocation), vkd3d_gpu_va_allocation_compare);
- return allocation ? allocation->ptr : NULL; + pthread_mutex_unlock(&allocator->mutex); + return allocation ? allocation->ptr : NULL; + } }
void vkd3d_gpu_va_allocator_free(struct vkd3d_gpu_va_allocator *allocator, D3D12_GPU_VIRTUAL_ADDRESS address) @@ -1905,16 +2038,23 @@ void vkd3d_gpu_va_allocator_free(struct vkd3d_gpu_va_allocator *allocator, D3D12 return; }
- allocation = bsearch(&address, allocator->allocations, allocator->allocation_count, - sizeof(*allocation), vkd3d_gpu_va_allocation_compare); - if (allocation && allocation->base == address) + if (address < VKD3D_BASE_VA_FALLBACK) { - index = allocation - allocator->allocations; - --allocator->allocation_count; - if (index != allocator->allocation_count) + vkd3d_gpu_va_allocator_free_slab(allocator, address); + } + else + { + allocation = bsearch(&address, allocator->fallback_mem_allocations, allocator->fallback_mem_allocation_count, + sizeof(*allocation), vkd3d_gpu_va_allocation_compare); + if (allocation && allocation->base == address) { - memmove(&allocator->allocations[index], &allocator->allocations[index + 1], - (allocator->allocation_count - index) * sizeof(*allocation)); + index = allocation - allocator->fallback_mem_allocations; + --allocator->fallback_mem_allocation_count; + if (index != allocator->fallback_mem_allocation_count) + { + memmove(&allocator->fallback_mem_allocations[index], &allocator->fallback_mem_allocations[index + 1], + (allocator->fallback_mem_allocation_count - index) * sizeof(*allocation)); + } } }
@@ -1924,29 +2064,59 @@ void vkd3d_gpu_va_allocator_free(struct vkd3d_gpu_va_allocator *allocator, D3D12 static bool vkd3d_gpu_va_allocator_init(struct vkd3d_gpu_va_allocator *allocator) { int rc; + int i;
memset(allocator, 0, sizeof(*allocator)); - allocator->floor = 0x1000; + allocator->fallback_mem_floor = VKD3D_BASE_VA_FALLBACK; + + /* To remain lock-less, we cannot grow these lists after the fact. If we commit to a maximum number of allocations + * here, we can dereference without taking a lock as the base pointer never changes. + * We would be able to grow more seamlessly using an array of pointers, + * but would make dereferencing slightly less efficient. */ + allocator->slab_mem_allocations = vkd3d_calloc(VKD3D_MAX_VA_SLAB_ALLOCATIONS, sizeof(*allocator->slab_mem_allocations)); + if (!allocator->slab_mem_allocations) + goto error; + + /* Otherwise we need 32-bit indices. */ + assert(VKD3D_MAX_VA_SLAB_ALLOCATIONS <= 64 * 1024); + + allocator->mem_vacant = vkd3d_malloc(VKD3D_MAX_VA_SLAB_ALLOCATIONS * sizeof(uint16_t)); + if (!allocator->mem_vacant) + goto error; + + /* Build a stack of which slab indices are available for allocation. + * Place lowest indices last (first to be popped off stack). */ + for (i = 0; i < VKD3D_MAX_VA_SLAB_ALLOCATIONS; i++) + allocator->mem_vacant[i] = (VKD3D_MAX_VA_SLAB_ALLOCATIONS - 1) - i; + allocator->mem_vacant_count = VKD3D_MAX_VA_SLAB_ALLOCATIONS;
if ((rc = pthread_mutex_init(&allocator->mutex, NULL))) { ERR("Failed to initialize mutex, error %d.\n", rc); - return false; + goto error; }
return true; + +error: + vkd3d_free(allocator->slab_mem_allocations); + vkd3d_free(allocator->mem_vacant); + return false; }
static void vkd3d_gpu_va_allocator_cleanup(struct vkd3d_gpu_va_allocator *allocator) { int rc;
+ vkd3d_free(allocator->slab_mem_allocations); + vkd3d_free(allocator->mem_vacant); + if ((rc = pthread_mutex_lock(&allocator->mutex))) { ERR("Failed to lock mutex, error %d.\n", rc); return; } - vkd3d_free(allocator->allocations); + vkd3d_free(allocator->fallback_mem_allocations); pthread_mutex_unlock(&allocator->mutex); pthread_mutex_destroy(&allocator->mutex); } diff --git a/libs/vkd3d/resource.c b/libs/vkd3d/resource.c index 4ba7586..31813a1 100644 --- a/libs/vkd3d/resource.c +++ b/libs/vkd3d/resource.c @@ -1645,7 +1645,7 @@ static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12 &resource->desc, &resource->u.vk_buffer))) return hr; if (!(resource->gpu_address = vkd3d_gpu_va_allocator_allocate(&device->gpu_va_allocator, - desc->Width, resource))) + desc->Width, D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT, resource))) { ERR("Failed to allocate GPU VA.\n"); d3d12_resource_destroy(resource, device); diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index d842f58..320d604 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -203,24 +203,35 @@ HRESULT vkd3d_fence_worker_start(struct vkd3d_fence_worker *worker, HRESULT vkd3d_fence_worker_stop(struct vkd3d_fence_worker *worker, struct d3d12_device *device) DECLSPEC_HIDDEN;
+struct vkd3d_gpu_va_allocation +{ + D3D12_GPU_VIRTUAL_ADDRESS base; + SIZE_T size; + void *ptr; +}; + +struct vkd3d_gpu_va_slab_entry +{ + void *ptr; + SIZE_T size; +}; + struct vkd3d_gpu_va_allocator { pthread_mutex_t mutex;
- D3D12_GPU_VIRTUAL_ADDRESS floor; + struct vkd3d_gpu_va_slab_entry *slab_mem_allocations; + uint16_t *mem_vacant; + size_t mem_vacant_count;
- struct vkd3d_gpu_va_allocation - { - D3D12_GPU_VIRTUAL_ADDRESS base; - SIZE_T size; - void *ptr; - } *allocations; - size_t allocations_size; - size_t allocation_count; + struct vkd3d_gpu_va_allocation *fallback_mem_allocations; + size_t fallback_mem_allocations_size; + size_t fallback_mem_allocation_count; + D3D12_GPU_VIRTUAL_ADDRESS fallback_mem_floor; };
D3D12_GPU_VIRTUAL_ADDRESS vkd3d_gpu_va_allocator_allocate(struct vkd3d_gpu_va_allocator *allocator, - size_t size, void *ptr) DECLSPEC_HIDDEN; + size_t size, size_t alignment, void *ptr) DECLSPEC_HIDDEN; void *vkd3d_gpu_va_allocator_dereference(struct vkd3d_gpu_va_allocator *allocator, D3D12_GPU_VIRTUAL_ADDRESS address) DECLSPEC_HIDDEN; void vkd3d_gpu_va_allocator_free(struct vkd3d_gpu_va_allocator *allocator,
On Mon, 30 Sep 2019 at 17:48, Hans-Kristian Arntzen post@arntzen-software.no wrote:
The GPU VA allocator was allocating memory in a way where dereferencing GPU VA required a lock + bsearch to find the right VA range.
Rather than going this route, we turn the common case into O(1) and lockless by creating a slab allocator which allows us to lookup a ptr directly from GPU VA with (VA - Base) / PageSize.
The number of allocations in the fast path must be limited since we cannot trivially grow the allocator while remaining lock-free for dereferences.
I need to review this in more detail, but on first sight it makes sense.
On 9/30/19 9:40 PM, Henri Verbeet wrote:
On Mon, 30 Sep 2019 at 17:48, Hans-Kristian Arntzen post@arntzen-software.no wrote:
The GPU VA allocator was allocating memory in a way where dereferencing GPU VA required a lock + bsearch to find the right VA range.
Rather than going this route, we turn the common case into O(1) and lockless by creating a slab allocator which allows us to lookup a ptr directly from GPU VA with (VA - Base) / PageSize.
The number of allocations in the fast path must be limited since we cannot trivially grow the allocator while remaining lock-free for dereferences.
I need to review this in more detail, but on first sight it makes sense.
Any updates on this and the follow-up 6/6 patch? They were marked as superseded on the list for some reason, but that's wrong.
Cheers, Hans-Kristian
Greatly reduce VA allocations we have to make and makes returned VA more sensible, and better matches returned VAs we see on native drivers.
D3D12 usage flags for buffers seem generic enough that there is no obvious benefit to place smaller VkBuffers on top of VkDeviceMemory.
Ideally, physical_buffer_address is used here, but this works as a good fallback if that path is added later.
With this patch and previous VA optimization, I'm observing a 2.0-2.5% FPS uplift on SOTTR when CPU bound.
Signed-off-by: Hans-Kristian Arntzen post@arntzen-software.no --- libs/vkd3d/command.c | 9 +-- libs/vkd3d/device.c | 2 + libs/vkd3d/resource.c | 134 ++++++++++++++++++++++++++++++++----- libs/vkd3d/vkd3d_private.h | 2 + 4 files changed, 127 insertions(+), 20 deletions(-)
diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index d420863..dabdbb5 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -3031,8 +3031,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyBufferRegion(ID3D12Graphics
d3d12_command_list_end_current_render_pass(list);
- buffer_copy.srcOffset = src_offset; - buffer_copy.dstOffset = dst_offset; + buffer_copy.srcOffset = src_offset + src_resource->heap_offset; + buffer_copy.dstOffset = dst_offset + dst_resource->heap_offset; buffer_copy.size = byte_count;
VK_CALL(vkCmdCopyBuffer(list->vk_command_buffer, @@ -3450,8 +3450,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsComm assert(d3d12_resource_is_buffer(src_resource)); assert(src_resource->desc.Width == dst_resource->desc.Width);
- vk_buffer_copy.srcOffset = 0; - vk_buffer_copy.dstOffset = 0; + vk_buffer_copy.srcOffset = src_resource->heap_offset; + vk_buffer_copy.dstOffset = dst_resource->heap_offset; vk_buffer_copy.size = dst_resource->desc.Width; VK_CALL(vkCmdCopyBuffer(list->vk_command_buffer, src_resource->u.vk_buffer, dst_resource->u.vk_buffer, 1, &vk_buffer_copy)); @@ -4076,6 +4076,7 @@ static void d3d12_command_list_set_root_cbv(struct d3d12_command_list *list,
resource = vkd3d_gpu_va_allocator_dereference(&list->device->gpu_va_allocator, gpu_address); buffer_info.buffer = resource->u.vk_buffer; + buffer_info.offset = gpu_address - resource->gpu_address; buffer_info.range = resource->desc.Width - buffer_info.offset; buffer_info.range = min(buffer_info.range, vk_info->device_limits.maxUniformBufferRange); diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index beac7f2..8d65bf3 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -1335,6 +1335,8 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, device->feature_options.CrossAdapterRowMajorTextureSupported = FALSE; /* SPV_EXT_shader_viewport_index_layer */ device->feature_options.VPAndRTArrayIndexFromAnyShaderFeedingRasterizerSupportedWithoutGSEmulation = FALSE; + + /* FIXME: Does this actually work on NV which has 64k bufferImage alignment quirks with VkDeviceMemory? */ device->feature_options.ResourceHeapTier = D3D12_RESOURCE_HEAP_TIER_2;
if ((vr = VK_CALL(vkEnumerateDeviceExtensionProperties(physical_device, NULL, &count, NULL))) < 0) diff --git a/libs/vkd3d/resource.c b/libs/vkd3d/resource.c index 31813a1..8b1c511 100644 --- a/libs/vkd3d/resource.c +++ b/libs/vkd3d/resource.c @@ -292,6 +292,8 @@ static ULONG STDMETHODCALLTYPE d3d12_heap_AddRef(ID3D12Heap *iface) return refcount; }
+static ULONG d3d12_resource_decref(struct d3d12_resource *resource); + static void d3d12_heap_destroy(struct d3d12_heap *heap) { struct d3d12_device *device = heap->device; @@ -299,6 +301,9 @@ static void d3d12_heap_destroy(struct d3d12_heap *heap)
TRACE("Destroying heap %p.\n", heap);
+ if (heap->buffer_resource) + d3d12_resource_decref(heap->buffer_resource); + if (heap->is_persistent && heap->map_ptr) VK_CALL(vkUnmapMemory(device->vk_device, heap->vk_memory));
@@ -559,6 +564,12 @@ static HRESULT validate_heap_desc(const D3D12_HEAP_DESC *desc, const struct d3d1 return S_OK; }
+static HRESULT d3d12_resource_create(struct d3d12_device *device, + const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, + const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, + const D3D12_CLEAR_VALUE *optimized_clear_value, bool placed, + struct d3d12_resource **resource); + static HRESULT d3d12_heap_init(struct d3d12_heap *heap, struct d3d12_device *device, const D3D12_HEAP_DESC *desc, const struct d3d12_resource *resource) { @@ -566,6 +577,9 @@ static HRESULT d3d12_heap_init(struct d3d12_heap *heap, VkDeviceSize vk_memory_size; HRESULT hr; int rc; + bool buffers_allowed; + D3D12_RESOURCE_DESC resource_desc; + D3D12_RESOURCE_STATES initial_resource_state; const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
heap->ID3D12Heap_iface.lpVtbl = &d3d12_heap_vtbl; @@ -578,6 +592,7 @@ static HRESULT d3d12_heap_init(struct d3d12_heap *heap,
heap->map_ptr = NULL; heap->map_count = 0; + heap->buffer_resource = NULL;
if (!heap->desc.Properties.CreationNodeMask) heap->desc.Properties.CreationNodeMask = 1; @@ -605,6 +620,53 @@ static HRESULT d3d12_heap_init(struct d3d12_heap *heap, return hr; }
+ buffers_allowed = !(heap->desc.Flags & D3D12_HEAP_FLAG_DENY_BUFFERS); + if (buffers_allowed && !resource) + { + /* Create a single omnipotent buffer which fills the entire heap. + * Whenever we place buffer resources on this heap, we'll just offset this VkBuffer. + * This allows us to keep VA space somewhat sane, and keeps number of (limited) VA allocations down. + * One possible downside is that the buffer might be slightly slower to access, + * but D3D12 has very lenient usage flags for buffers. */ + + memset(&resource_desc, 0, sizeof(resource_desc)); + resource_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + resource_desc.Width = desc->SizeInBytes; + resource_desc.Height = 1; + resource_desc.DepthOrArraySize = 1; + resource_desc.MipLevels = 1; + resource_desc.SampleDesc.Count = 1; + resource_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + + switch (desc->Properties.Type) + { + case D3D12_HEAP_TYPE_UPLOAD: + initial_resource_state = D3D12_RESOURCE_STATE_GENERIC_READ; + break; + + case D3D12_HEAP_TYPE_READBACK: + initial_resource_state = D3D12_RESOURCE_STATE_COPY_DEST; + break; + + default: + /* Upload and readback heaps do not allow UAV access, only enable this flag for other heaps. */ + resource_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + initial_resource_state = D3D12_RESOURCE_STATE_COMMON; + break; + } + + if (FAILED(hr = d3d12_resource_create(device, &desc->Properties, desc->Flags, + &resource_desc, initial_resource_state, + NULL, false, &heap->buffer_resource))) + { + heap->buffer_resource = NULL; + return hr; + } + /* This internal resource should not own a reference on the device. + * d3d12_resource_create takes a reference on the device. */ + d3d12_device_release(device); + } + if (resource) { if (d3d12_resource_is_buffer(resource)) @@ -622,12 +684,19 @@ static HRESULT d3d12_heap_init(struct d3d12_heap *heap,
heap->desc.SizeInBytes = vk_memory_size; } + else if (heap->buffer_resource) + { + hr = vkd3d_allocate_buffer_memory(device, heap->buffer_resource->u.vk_buffer, + &heap->desc.Properties, heap->desc.Flags, + &heap->vk_memory, &heap->vk_memory_type, &vk_memory_size); + } else { + /* Allocate generic memory which should hopefully match up with whatever resources + * we want to place here. */ memory_requirements.size = heap->desc.SizeInBytes; memory_requirements.alignment = heap->desc.Alignment; memory_requirements.memoryTypeBits = ~(uint32_t)0; - hr = vkd3d_allocate_device_memory(device, &heap->desc.Properties, heap->desc.Flags, &memory_requirements, NULL, &heap->vk_memory, &heap->vk_memory_type); @@ -636,6 +705,11 @@ static HRESULT d3d12_heap_init(struct d3d12_heap *heap, { vkd3d_private_store_destroy(&heap->private_store); pthread_mutex_destroy(&heap->mutex); + if (heap->buffer_resource) + { + d3d12_resource_decref(heap->buffer_resource); + heap->buffer_resource = NULL; + } return hr; }
@@ -1037,13 +1111,16 @@ static void d3d12_resource_destroy(struct d3d12_resource *resource, struct d3d12 if (resource->flags & VKD3D_RESOURCE_EXTERNAL) return;
- if (resource->gpu_address) - vkd3d_gpu_va_allocator_free(&device->gpu_va_allocator, resource->gpu_address); + if (!(resource->flags & VKD3D_RESOURCE_PLACED_BUFFER)) + { + if (resource->gpu_address) + vkd3d_gpu_va_allocator_free(&device->gpu_va_allocator, resource->gpu_address);
- if (d3d12_resource_is_buffer(resource)) - VK_CALL(vkDestroyBuffer(device->vk_device, resource->u.vk_buffer, NULL)); - else - VK_CALL(vkDestroyImage(device->vk_device, resource->u.vk_image, NULL)); + if (d3d12_resource_is_buffer(resource)) + VK_CALL(vkDestroyBuffer(device->vk_device, resource->u.vk_buffer, NULL)); + else + VK_CALL(vkDestroyImage(device->vk_device, resource->u.vk_image, NULL)); + }
if (resource->flags & VKD3D_RESOURCE_DEDICATED_HEAP) d3d12_heap_destroy(resource->heap); @@ -1604,7 +1681,7 @@ static bool d3d12_resource_validate_heap_properties(const struct d3d12_resource static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12_device *device, const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, - const D3D12_CLEAR_VALUE *optimized_clear_value) + const D3D12_CLEAR_VALUE *optimized_clear_value, bool placed) { HRESULT hr;
@@ -1634,6 +1711,8 @@ static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12
resource->gpu_address = 0; resource->flags = 0; + if (placed && desc->Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) + resource->flags |= VKD3D_RESOURCE_PLACED_BUFFER;
if (FAILED(hr = d3d12_resource_validate_desc(&resource->desc))) return hr; @@ -1641,6 +1720,13 @@ static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12 switch (desc->Dimension) { case D3D12_RESOURCE_DIMENSION_BUFFER: + /* We'll inherit a VkBuffer reference from the heap with an implied offset. */ + if (placed) + { + resource->u.vk_buffer = VK_NULL_HANDLE; + break; + } + if (FAILED(hr = vkd3d_create_buffer(device, heap_properties, heap_flags, &resource->desc, &resource->u.vk_buffer))) return hr; @@ -1690,7 +1776,7 @@ static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12 static HRESULT d3d12_resource_create(struct d3d12_device *device, const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, - const D3D12_CLEAR_VALUE *optimized_clear_value, struct d3d12_resource **resource) + const D3D12_CLEAR_VALUE *optimized_clear_value, bool placed, struct d3d12_resource **resource) { struct d3d12_resource *object; HRESULT hr; @@ -1699,7 +1785,7 @@ static HRESULT d3d12_resource_create(struct d3d12_device *device, return E_OUTOFMEMORY;
if (FAILED(hr = d3d12_resource_init(object, device, heap_properties, heap_flags, - desc, initial_state, optimized_clear_value))) + desc, initial_state, optimized_clear_value, placed))) { vkd3d_free(object); return hr; @@ -1741,7 +1827,7 @@ HRESULT d3d12_committed_resource_create(struct d3d12_device *device, }
if (FAILED(hr = d3d12_resource_create(device, heap_properties, heap_flags, - desc, initial_state, optimized_clear_value, &object))) + desc, initial_state, optimized_clear_value, false, &object))) return hr;
if (FAILED(hr = vkd3d_allocate_resource_memory(device, object, heap_properties, heap_flags))) @@ -1765,6 +1851,16 @@ static HRESULT vkd3d_bind_heap_memory(struct d3d12_device *device, VkMemoryRequirements requirements; VkResult vr;
+ if (resource->flags & VKD3D_RESOURCE_PLACED_BUFFER) + { + /* Just inherit the buffer from the heap. */ + resource->u.vk_buffer = heap->buffer_resource->u.vk_buffer; + resource->heap = heap; + resource->heap_offset = heap_offset; + resource->gpu_address = heap->buffer_resource->gpu_address + heap_offset; + return S_OK; + } + if (d3d12_resource_is_buffer(resource)) VK_CALL(vkGetBufferMemoryRequirements(vk_device, resource->u.vk_buffer, &requirements)); else @@ -1814,7 +1910,7 @@ HRESULT d3d12_placed_resource_create(struct d3d12_device *device, struct d3d12_h HRESULT hr;
if (FAILED(hr = d3d12_resource_create(device, &heap->desc.Properties, heap->desc.Flags, - desc, initial_state, optimized_clear_value, &object))) + desc, initial_state, optimized_clear_value, true, &object))) return hr;
if (FAILED(hr = vkd3d_bind_heap_memory(device, object, heap, heap_offset))) @@ -1838,7 +1934,7 @@ HRESULT d3d12_reserved_resource_create(struct d3d12_device *device, HRESULT hr;
if (FAILED(hr = d3d12_resource_create(device, NULL, 0, - desc, initial_state, optimized_clear_value, &object))) + desc, initial_state, optimized_clear_value, false, &object))) return hr;
TRACE("Created reserved resource %p.\n", object); @@ -2140,7 +2236,7 @@ static bool vkd3d_create_buffer_view_for_resource(struct d3d12_device *device, assert(d3d12_resource_is_buffer(resource));
return vkd3d_create_buffer_view(device, resource->u.vk_buffer, - format, offset * element_size, size * element_size, view); + format, resource->heap_offset + offset * element_size, size * element_size, view); }
static void vkd3d_set_view_swizzle_for_format(VkComponentMapping *components, @@ -2742,7 +2838,7 @@ static void vkd3d_create_buffer_uav(struct d3d12_desc *descriptor, struct d3d12_
format = vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false); if (!vkd3d_create_vk_buffer_view(device, counter_resource->u.vk_buffer, format, - desc->u.Buffer.CounterOffsetInBytes, sizeof(uint32_t), &view->vk_counter_view)) + desc->u.Buffer.CounterOffsetInBytes + resource->heap_offset, sizeof(uint32_t), &view->vk_counter_view)) { WARN("Failed to create counter buffer view.\n"); view->vk_counter_view = VK_NULL_HANDLE; @@ -2848,12 +2944,18 @@ bool vkd3d_create_raw_buffer_view(struct d3d12_device *device, { const struct vkd3d_format *format; struct d3d12_resource *resource; + uint64_t range; + uint64_t offset;
format = vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false); resource = vkd3d_gpu_va_allocator_dereference(&device->gpu_va_allocator, gpu_address); assert(d3d12_resource_is_buffer(resource)); + + offset = gpu_address - resource->gpu_address; + range = min(resource->desc.Width - offset, device->vk_info.device_limits.maxStorageBufferRange); + return vkd3d_create_vk_buffer_view(device, resource->u.vk_buffer, format, - gpu_address - resource->gpu_address, VK_WHOLE_SIZE, vk_buffer_view); + offset, range, vk_buffer_view); }
/* samplers */ diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 320d604..d2fc599 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -382,6 +382,7 @@ struct d3d12_heap unsigned int map_count; uint32_t vk_memory_type;
+ struct d3d12_resource *buffer_resource; struct d3d12_device *device;
struct vkd3d_private_store private_store; @@ -396,6 +397,7 @@ struct d3d12_heap *unsafe_impl_from_ID3D12Heap(ID3D12Heap *iface) DECLSPEC_HIDDE #define VKD3D_RESOURCE_EXTERNAL 0x00000004 #define VKD3D_RESOURCE_DEDICATED_HEAP 0x00000008 #define VKD3D_RESOURCE_LINEAR_TILING 0x00000010 +#define VKD3D_RESOURCE_PLACED_BUFFER 0x00000020
/* ID3D12Resource */ struct d3d12_resource
September 30, 2019 9:17 AM, "Hans-Kristian Arntzen" post@arntzen-software.no wrote:
diff --git a/include/private/vkd3d_common.h b/include/private/vkd3d_common.h index 1ac8a63..fab0cd4 100644 --- a/include/private/vkd3d_common.h +++ b/include/private/vkd3d_common.h @@ -78,7 +84,11 @@ static inline bool vkd3d_bitmask_is_contiguous(unsigned int mask) /* Undefined for x == 0. */ static inline unsigned int vkd3d_log2i(unsigned int x) { -#ifdef HAVE_BUILTIN_CLZ +#ifdef _MSC_VER
- unsigned long result;
- _BitScanReverse(&result, x);
- return (unsigned int)x;
Are you sure this is correct?
diff --git a/include/private/vkd3d_threads.h b/include/private/vkd3d_threads.h new file mode 100644 index 0000000..b613b50 --- /dev/null +++ b/include/private/vkd3d_threads.h @@ -0,0 +1,166 @@
[...]
+typedef struct pthread_mutex +{
- CRITICAL_SECTION *lock;
+} pthread_mutex_t;
+typedef struct pthread_cond +{
- CONDITION_VARIABLE *cond;
+} pthread_cond_t;
Why not simply store the objects directly in the struct, instead of allocating them on the heap?
[...]
+static DWORD WINAPI win32_thread_wrapper_routine(struct vkd3d_pthread_wrapper_struct *wrapper) +{
- struct vkd3d_pthread_wrapper_struct tmp = *wrapper;
- vkd3d_free(wrapper);
I get why this is on the heap--if it were on the stack, then if pthread_create() returns before the new thread can access the struct, the new thread may end up using garbage. But I can't help thinking there has to be another way.
Chip
Hi Hans-Kristian,
From a cursory look, most of this seems fine, but see what Chip said
about e.g. vkd3d_log2i(). Could you please split this into individual patches though? Both in order to make it more reviewable and to get more useful bisects. Wrt. the pthread wrappers, it seems tempting to introduce internal functions along the lines of vkd3d_create_thread(), vkd3d_join_thread(), etc.
Henri
On Tue, Oct 1, 2019 at 12:17 AM Hans-Kristian Arntzen < post@arntzen-software.no> wrote:
+typedef struct pthread +{
- HANDLE thread;
- DWORD id;
+} pthread_t;
+typedef struct pthread_mutex +{
- CRITICAL_SECTION *lock;
+} pthread_mutex_t;
+typedef struct pthread_cond +{
- CONDITION_VARIABLE *cond;
+} pthread_cond_t;
+struct vkd3d_pthread_wrapper_struct +{
- void * (*routine)(void *);
- void *arg;
+};
+static DWORD WINAPI win32_thread_wrapper_routine(struct vkd3d_pthread_wrapper_struct *wrapper) +{
- struct vkd3d_pthread_wrapper_struct tmp = *wrapper;
- vkd3d_free(wrapper);
- tmp.routine(tmp.arg);
- return 0;
+}
FWIW, ZStandard avoids complications with thread wrapping by declaring the function pointer and arg within its pthread_t struct: https://github.com/facebook/zstd/blob/dev/lib/common/threading.h
Signed-off-by: Hans-Kristian Arntzen post@arntzen-software.no --- include/private/vkd3d_common.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/include/private/vkd3d_common.h b/include/private/vkd3d_common.h index 1ac8a63..ba4b68a 100644 --- a/include/private/vkd3d_common.h +++ b/include/private/vkd3d_common.h @@ -26,6 +26,10 @@ #include <limits.h> #include <stdbool.h>
+#ifdef _MSC_VER +#include <intrin.h> +#endif + #ifndef ARRAY_SIZE # define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) #endif @@ -51,7 +55,9 @@ static inline size_t align(size_t addr, size_t alignment)
static inline unsigned int vkd3d_popcount(unsigned int v) { -#ifdef HAVE_BUILTIN_POPCOUNT +#ifdef _MSC_VER + return __popcnt(v); +#elif defined(HAVE_BUILTIN_POPCOUNT) return __builtin_popcount(v); #else v -= (v >> 1) & 0x55555555;
Signed-off-by: Hans-Kristian Arntzen post@arntzen-software.no --- include/private/vkd3d_common.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/include/private/vkd3d_common.h b/include/private/vkd3d_common.h index ba4b68a..d8fb361 100644 --- a/include/private/vkd3d_common.h +++ b/include/private/vkd3d_common.h @@ -84,7 +84,13 @@ static inline bool vkd3d_bitmask_is_contiguous(unsigned int mask) /* Undefined for x == 0. */ static inline unsigned int vkd3d_log2i(unsigned int x) { -#ifdef HAVE_BUILTIN_CLZ +#ifdef _MSC_VER + /* _BitScanReverse returns the index of the highest set bit, + * unlike clz which is 31 - index. */ + unsigned long result; + _BitScanReverse(&result, x); + return (unsigned int)result; +#elif defined(HAVE_BUILTIN_CLZ) return __builtin_clz(x) ^ 0x1f; #else static const unsigned int l[] =
Signed-off-by: Henri Verbeet hverbeet@codeweavers.com
Signed-off-by: Hans-Kristian Arntzen post@arntzen-software.no --- include/private/vkd3d_common.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/include/private/vkd3d_common.h b/include/private/vkd3d_common.h index d8fb361..c8dd047 100644 --- a/include/private/vkd3d_common.h +++ b/include/private/vkd3d_common.h @@ -164,8 +164,9 @@ static inline LONG InterlockedDecrement(LONG volatile *x)
#if HAVE_SYNC_ADD_AND_FETCH # define atomic_add_fetch(ptr, val) __sync_add_and_fetch(ptr, val) -#else -# error "atomic_add_fetch() not implemented for this platform" +#elif defined(_MSC_VER) +/* InterlockedAdd returns value after increment, like add_and_fetch. */ +# define atomic_add_fetch(ptr, val) InterlockedAdd(ptr, val) #endif /* HAVE_SYNC_ADD_AND_FETCH */
static inline void vkd3d_parse_version(const char *version, int *major, int *minor)
Hans-Kristian Arntzen post@arntzen-software.no wrote:
#if HAVE_SYNC_ADD_AND_FETCH # define atomic_add_fetch(ptr, val) __sync_add_and_fetch(ptr, val) -#else -# error "atomic_add_fetch() not implemented for this platform" +#elif defined(_MSC_VER) +/* InterlockedAdd returns value after increment, like add_and_fetch. */ +# define atomic_add_fetch(ptr, val) InterlockedAdd(ptr, val) #endif /* HAVE_SYNC_ADD_AND_FETCH */
Probably it would be better to still have the #error case.
Signed-off-by: Hans-Kristian Arntzen post@arntzen-software.no --- include/private/vkd3d_common.h | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/include/private/vkd3d_common.h b/include/private/vkd3d_common.h index 1ac8a63..6134dbd 100644 --- a/include/private/vkd3d_common.h +++ b/include/private/vkd3d_common.h @@ -152,6 +152,9 @@ static inline LONG InterlockedDecrement(LONG volatile *x)
#if HAVE_SYNC_ADD_AND_FETCH # define atomic_add_fetch(ptr, val) __sync_add_and_fetch(ptr, val) +#elif defined(_MSC_VER) +/* InterlockedAdd returns value after increment, like add_and_fetch. */ +# define atomic_add_fetch(ptr, val) InterlockedAdd(ptr, val) #else # error "atomic_add_fetch() not implemented for this platform" #endif /* HAVE_SYNC_ADD_AND_FETCH */
Signed-off-by: Henri Verbeet hverbeet@codeweavers.com
Signed-off-by: Hans-Kristian Arntzen post@arntzen-software.no --- libs/vkd3d-shader/spirv.c | 2 +- libs/vkd3d-shader/vkd3d_shader_private.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-)
diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index fe8935c..580230f 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -4540,7 +4540,7 @@ static void vkd3d_dxbc_compiler_emit_store_shader_output(struct vkd3d_dxbc_compi
static void vkd3d_dxbc_compiler_emit_shader_epilogue_function(struct vkd3d_dxbc_compiler *compiler) { - uint32_t param_type_id[MAX_REG_OUTPUT + 1], param_id[MAX_REG_OUTPUT + 1] = {}; + uint32_t param_type_id[MAX_REG_OUTPUT + 1], param_id[MAX_REG_OUTPUT + 1] = {0}; uint32_t void_id, type_id, ptr_type_id, function_type_id, function_id; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; const struct vkd3d_shader_signature *signature; diff --git a/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d-shader/vkd3d_shader_private.h index 47764e5..940cb76 100644 --- a/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d-shader/vkd3d_shader_private.h @@ -56,7 +56,6 @@ #include <limits.h> #include <stdbool.h> #include <string.h> -#include <strings.h>
#define VKD3D_VEC4_SIZE 4
Signed-off-by: Henri Verbeet hverbeet@codeweavers.com
Signed-off-by: Hans-Kristian Arntzen post@arntzen-software.no --- include/private/vkd3d_threads.h | 165 ++++++++++++++++++++++++++++++++ 1 file changed, 165 insertions(+) create mode 100644 include/private/vkd3d_threads.h
diff --git a/include/private/vkd3d_threads.h b/include/private/vkd3d_threads.h new file mode 100644 index 0000000..94d4307 --- /dev/null +++ b/include/private/vkd3d_threads.h @@ -0,0 +1,165 @@ +/* + * Copyright 2019 Hans-Kristian Arntzen for Valve + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifndef __VKD3D_THREADS_H +#define __VKD3D_THREADS_H + +#include "config.h" + +#if defined(HAVE_PTHREAD_H) +#include <pthread.h> + +#elif defined(_WIN32) /* HAVE_PTHREAD_H */ + +#define WIN32_LEAN_AND_MEAN +#include <windows.h> + +/* pthread_t is passed by value in some functions, + * which implies we need pthread_t to be a pointer type here. */ +struct pthread +{ + HANDLE thread; + DWORD id; + void * (*routine)(void *); + void *arg; +}; +typedef struct pthread *pthread_t; + +/* pthread_mutex_t is not copyable, so embed CS inline. */ +typedef struct pthread_mutex +{ + CRITICAL_SECTION lock; +} pthread_mutex_t; + +/* pthread_cond_t is not copyable, so embed CV inline. */ +typedef struct pthread_cond +{ + CONDITION_VARIABLE cond; +} pthread_cond_t; + +static DWORD WINAPI win32_thread_wrapper_routine(void *arg) +{ + pthread_t thread = arg; + thread->routine(thread->arg); + return 0; +} + +static inline int pthread_create(pthread_t *out_thread, void *attr, void * (*thread_fun)(void *), void *arg) +{ + pthread_t thread = vkd3d_calloc(1, sizeof(*thread)); + if (!thread) + return -1; + + (void)attr; + thread->routine = thread_fun; + thread->arg = arg; + thread->thread = CreateThread(NULL, 0, win32_thread_wrapper_routine, thread, 0, &thread->id); + if (!thread->thread) + { + vkd3d_free(thread); + return -1; + } + *out_thread = thread; + return 0; +} + +static inline int pthread_join(pthread_t thread, void **ret) +{ + (void)ret; + int success = WaitForSingleObject(thread->thread, INFINITE) == WAIT_OBJECT_0; + if (success) + { + CloseHandle(thread->thread); + vkd3d_free(thread); + } + return success ? 0 : -1; +} + +static inline int pthread_mutex_init(pthread_mutex_t *lock, void *attr) +{ + (void)attr; + InitializeCriticalSection(&lock->lock); + return 0; +} + +static inline int pthread_mutex_lock(pthread_mutex_t *lock) +{ + EnterCriticalSection(&lock->lock); + return 0; +} + +static inline int pthread_mutex_unlock(pthread_mutex_t *lock) +{ + LeaveCriticalSection(&lock->lock); + return 0; +} + +static inline int pthread_mutex_destroy(pthread_mutex_t *lock) +{ + DeleteCriticalSection(&lock->lock); + return 0; +} + +static inline int pthread_cond_init(pthread_cond_t *cond, void *attr) +{ + (void)attr; + InitializeConditionVariable(&cond->cond); + return 0; +} + +static inline void pthread_cond_destroy(pthread_cond_t *cond) +{ + (void)cond; +} + +static inline int pthread_cond_signal(pthread_cond_t *cond) +{ + WakeConditionVariable(&cond->cond); + return 0; +} + +static inline int pthread_cond_broadcast(pthread_cond_t *cond) +{ + WakeAllConditionVariable(&cond->cond); + return 0; +} + +static inline int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *lock) +{ + bool ret = SleepConditionVariableCS(&cond->cond, &lock->lock, INFINITE); + return ret ? 0 : -1; +} + +#else /* HAVE_PTHREAD_H */ +#error "Threads are not supported. Cannot build." +#endif /* HAVE_PTHREAD_H */ + +static inline void vkd3d_set_thread_name(const char *name) +{ +#if defined(_MSC_VER) + (void)name; +#elif defined(HAVE_PTHREAD_SETNAME_NP_2) + pthread_setname_np(pthread_self(), name); +#elif defined(HAVE_PTHREAD_SETNAME_NP_1) + pthread_setname_np(name); +#else + (void)name; +#endif +} + +#endif /* __VKD3D_THREADS_H */
Signed-off-by: Hans-Kristian Arntzen post@arntzen-software.no --- libs/vkd3d/vulkan_procs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/libs/vkd3d/vulkan_procs.h b/libs/vkd3d/vulkan_procs.h index 702cfd2..ec29eb4 100644 --- a/libs/vkd3d/vulkan_procs.h +++ b/libs/vkd3d/vulkan_procs.h @@ -178,8 +178,8 @@ VK_DEVICE_PFN(vkUpdateDescriptorSets) VK_DEVICE_PFN(vkWaitForFences)
/* VK_KHR_draw_indirect_count */ -VK_DEVICE_EXT_PFN(vkCmdDrawIndirectCountKHR); -VK_DEVICE_EXT_PFN(vkCmdDrawIndexedIndirectCountKHR); +VK_DEVICE_EXT_PFN(vkCmdDrawIndirectCountKHR) +VK_DEVICE_EXT_PFN(vkCmdDrawIndexedIndirectCountKHR)
/* VK_KHR_get_memory_requirements2 */ VK_DEVICE_EXT_PFN(vkGetBufferMemoryRequirements2KHR)
Signed-off-by: Henri Verbeet hverbeet@codeweavers.com
Signed-off-by: Hans-Kristian Arntzen post@arntzen-software.no --- include/private/vkd3d_debug.h | 4 ++-- include/private/vkd3d_test.h | 20 ++++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/include/private/vkd3d_debug.h b/include/private/vkd3d_debug.h index 1a44cb9..c37c841 100644 --- a/include/private/vkd3d_debug.h +++ b/include/private/vkd3d_debug.h @@ -67,8 +67,8 @@ const char *debugstr_w(const WCHAR *wstr, size_t wchar_size) DECLSPEC_HIDDEN; vkd3d_dbg_next_time = true; \ VKD3D_DBG_PRINTF
-#define VKD3D_DBG_PRINTF(args...) \ - vkd3d_dbg_printf(vkd3d_dbg_level, __FUNCTION__, args); } while (0) +#define VKD3D_DBG_PRINTF(...) \ + vkd3d_dbg_printf(vkd3d_dbg_level, __FUNCTION__, __VA_ARGS__); } while (0)
#ifndef TRACE #define TRACE VKD3D_DBG_LOG(TRACE) diff --git a/include/private/vkd3d_test.h b/include/private/vkd3d_test.h index 319bf5d..6b8763d 100644 --- a/include/private/vkd3d_test.h +++ b/include/private/vkd3d_test.h @@ -57,40 +57,40 @@ static void vkd3d_test_end_todo(void); unsigned int vkd3d_line = line; \ VKD3D_TEST_ASSERT_THAT
-#define VKD3D_TEST_ASSERT_THAT(args...) \ - vkd3d_test_assert_that(vkd3d_line, args); } while (0) +#define VKD3D_TEST_ASSERT_THAT(...) \ + vkd3d_test_assert_that(vkd3d_line, __VA_ARGS__); } while (0)
#define ok_(line) \ do { \ unsigned int vkd3d_line = line; \ VKD3D_TEST_OK
-#define VKD3D_TEST_OK(args...) \ - vkd3d_test_ok(vkd3d_line, args); } while (0) +#define VKD3D_TEST_OK(...) \ + vkd3d_test_ok(vkd3d_line, __VA_ARGS__); } while (0)
#define todo_(line) \ do { \ unsigned int vkd3d_line = line; \ VKD3D_TEST_TODO
-#define VKD3D_TEST_TODO(args...) \ - vkd3d_test_todo(vkd3d_line, args); } while (0) +#define VKD3D_TEST_TODO(...) \ + vkd3d_test_todo(vkd3d_line, __VA_ARGS__); } while (0)
#define skip_(line) \ do { \ unsigned int vkd3d_line = line; \ VKD3D_TEST_SKIP
-#define VKD3D_TEST_SKIP(args...) \ - vkd3d_test_skip(vkd3d_line, args); } while (0) +#define VKD3D_TEST_SKIP(...) \ + vkd3d_test_skip(vkd3d_line, __VA_ARGS__); } while (0)
#define trace_(line) \ do { \ unsigned int vkd3d_line = line; \ VKD3D_TEST_TRACE
-#define VKD3D_TEST_TRACE(args...) \ - vkd3d_test_trace(vkd3d_line, args); } while (0) +#define VKD3D_TEST_TRACE(...) \ + vkd3d_test_trace(vkd3d_line, __VA_ARGS__); } while (0)
#define todo_if(is_todo) \ for (vkd3d_test_start_todo(is_todo); vkd3d_test_loop_todo(); vkd3d_test_end_todo())
Signed-off-by: Henri Verbeet hverbeet@codeweavers.com
Signed-off-by: Hans-Kristian Arntzen post@arntzen-software.no --- tests/d3d12.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-)
diff --git a/tests/d3d12.c b/tests/d3d12.c index 9c608c1..186cc07 100644 --- a/tests/d3d12.c +++ b/tests/d3d12.c @@ -18,6 +18,10 @@
#include "d3d12_crosstest.h"
+#ifndef M_PI +#define M_PI 3.14159265358979323846264338327950288 +#endif + static PFN_D3D12_CREATE_VERSIONED_ROOT_SIGNATURE_DESERIALIZER pfn_D3D12CreateVersionedRootSignatureDeserializer; static PFN_D3D12_SERIALIZE_VERSIONED_ROOT_SIGNATURE pfn_D3D12SerializeVersionedRootSignature;
@@ -585,7 +589,7 @@ static void check_sub_resource_vec4_(unsigned int line, ID3D12Resource *texture, struct resource_readback rb; unsigned int x = 0, y; bool all_match = true; - struct vec4 got = {}; + struct vec4 got = {0};
get_texture_readback_with_command_list(texture, sub_resource_idx, &rb, queue, command_list); for (y = 0; y < rb.height; ++y) @@ -614,7 +618,7 @@ static void check_sub_resource_uvec4_(unsigned int line, ID3D12Resource *texture const struct uvec4 *expected_value) { struct resource_readback rb; - struct uvec4 value = {}; + struct uvec4 value = {0}; unsigned int x = 0, y; bool all_match = true;
@@ -6365,7 +6369,7 @@ static void test_draw_uav_only(void) 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00004001, 0x00000001, 0x0100003e, }; static const D3D12_SHADER_BYTECODE ps = {ps_code, sizeof(ps_code)}; - static const float zero[4] = {}; + static const float zero[4] = {0};
memset(&desc, 0, sizeof(desc)); desc.no_render_target = true; @@ -8723,11 +8727,11 @@ static void test_shader_instructions(void) {&ps_loop_ret, {{2.0f, 1.0f}}, {{1.0f, 1.0f, 1.0f, 1.0f}}}, {&ps_loop_ret, {{8.0f, 7.0f}}, {{1.0f, 1.0f, 1.0f, 1.0f}}},
- {&ps_breakc_nz, {}, {{0.0f, 1.0f, 0.0f, 1.0f}}}, - {&ps_breakc_z, {}, {{0.0f, 1.0f, 0.0f, 1.0f}}}, + {&ps_breakc_nz, {{0}}, {{0.0f, 1.0f, 0.0f, 1.0f}}}, + {&ps_breakc_z, {{0}}, {{0.0f, 1.0f, 0.0f, 1.0f}}},
- {&ps_continue, {}, {{254.0f}}, true}, - {&ps_continuec_nz, {}, {{509.0f}}}, + {&ps_continue, {{0}}, {{254.0f}}, true}, + {&ps_continuec_nz, {{0}}, {{509.0f}}},
{&ps_retc_nz, {{ 0.0f}}, {{1.0f}}}, {&ps_retc_nz, {{ 10.0f}}, {{1.0f}}}, @@ -9603,9 +9607,9 @@ static void test_compute_shader_instructions(void) {&cs_atomic_iadd_tgsm_raw, {0xffffffff}, {-1}, {1, 1}, {0, 0}}, {&cs_atomic_iadd_tgsm_raw, {0xffffffff}, {-1}, {4, 4}, {3, 3}},
- {&cs_atomic_iadd_const, {}, {}, {0x00000000, 0x00000000}, {0xffffffff, 0xffffffff}}, - {&cs_atomic_iadd_const, {}, {}, {0x00000001, 0x00000001}, {0x00000000, 0x00000000}}, - {&cs_atomic_iadd_const, {}, {}, {0xffffffff, 0xffffffff}, {0xfffffffe, 0xfffffffe}}, + {&cs_atomic_iadd_const, {0}, {0}, {0x00000000, 0x00000000}, {0xffffffff, 0xffffffff}}, + {&cs_atomic_iadd_const, {0}, {0}, {0x00000001, 0x00000001}, {0x00000000, 0x00000000}}, + {&cs_atomic_iadd_const, {0}, {0}, {0xffffffff, 0xffffffff}, {0xfffffffe, 0xfffffffe}}, };
if (!init_compute_test_context(&context)) @@ -11687,7 +11691,7 @@ static void test_immediate_constant_buffer(void) ID3D12GraphicsCommandList *command_list; struct test_context_desc desc; struct test_context context; - unsigned int index[4] = {}; + unsigned int index[4] = {0}; ID3D12CommandQueue *queue; ID3D12Resource *cb; unsigned int i; @@ -20841,7 +20845,7 @@ static void test_cs_uav_store(void) 0x00000001, 0x00000001, 0x00000000, 0x00000000, 0x01000016, 0x0100003e, }; static const D3D12_SHADER_BYTECODE cs_group_index = {cs_group_index_code, sizeof(cs_group_index_code)}; - static const float zero[4] = {}; + static const float zero[4] = {0}; static const struct { const D3D12_SHADER_BYTECODE *shader;
On 10/1/19 12:33 PM, Hans-Kristian Arntzen wrote:
+#ifndef M_PI +#define M_PI 3.14159265358979323846264338327950288 +#endif
MSVC will provide this if you define _USE_MATH_DEFINES.
Thanks,
Jacek
Signed-off-by: Hans-Kristian Arntzen post@arntzen-software.no --- tests/d3d12.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-)
diff --git a/tests/d3d12.c b/tests/d3d12.c index 9c608c1..42b9a84 100644 --- a/tests/d3d12.c +++ b/tests/d3d12.c @@ -16,6 +16,11 @@ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA */
+#ifdef _MSC_VER +/* Used for M_PI */ +#define _USE_MATH_DEFINES +#endif + #include "d3d12_crosstest.h"
static PFN_D3D12_CREATE_VERSIONED_ROOT_SIGNATURE_DESERIALIZER pfn_D3D12CreateVersionedRootSignatureDeserializer; @@ -585,7 +590,7 @@ static void check_sub_resource_vec4_(unsigned int line, ID3D12Resource *texture, struct resource_readback rb; unsigned int x = 0, y; bool all_match = true; - struct vec4 got = {}; + struct vec4 got = {0};
get_texture_readback_with_command_list(texture, sub_resource_idx, &rb, queue, command_list); for (y = 0; y < rb.height; ++y) @@ -614,7 +619,7 @@ static void check_sub_resource_uvec4_(unsigned int line, ID3D12Resource *texture const struct uvec4 *expected_value) { struct resource_readback rb; - struct uvec4 value = {}; + struct uvec4 value = {0}; unsigned int x = 0, y; bool all_match = true;
@@ -6365,7 +6370,7 @@ static void test_draw_uav_only(void) 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00004001, 0x00000001, 0x0100003e, }; static const D3D12_SHADER_BYTECODE ps = {ps_code, sizeof(ps_code)}; - static const float zero[4] = {}; + static const float zero[4] = {0};
memset(&desc, 0, sizeof(desc)); desc.no_render_target = true; @@ -8723,11 +8728,11 @@ static void test_shader_instructions(void) {&ps_loop_ret, {{2.0f, 1.0f}}, {{1.0f, 1.0f, 1.0f, 1.0f}}}, {&ps_loop_ret, {{8.0f, 7.0f}}, {{1.0f, 1.0f, 1.0f, 1.0f}}},
- {&ps_breakc_nz, {}, {{0.0f, 1.0f, 0.0f, 1.0f}}}, - {&ps_breakc_z, {}, {{0.0f, 1.0f, 0.0f, 1.0f}}}, + {&ps_breakc_nz, {{0}}, {{0.0f, 1.0f, 0.0f, 1.0f}}}, + {&ps_breakc_z, {{0}}, {{0.0f, 1.0f, 0.0f, 1.0f}}},
- {&ps_continue, {}, {{254.0f}}, true}, - {&ps_continuec_nz, {}, {{509.0f}}}, + {&ps_continue, {{0}}, {{254.0f}}, true}, + {&ps_continuec_nz, {{0}}, {{509.0f}}},
{&ps_retc_nz, {{ 0.0f}}, {{1.0f}}}, {&ps_retc_nz, {{ 10.0f}}, {{1.0f}}}, @@ -9603,9 +9608,9 @@ static void test_compute_shader_instructions(void) {&cs_atomic_iadd_tgsm_raw, {0xffffffff}, {-1}, {1, 1}, {0, 0}}, {&cs_atomic_iadd_tgsm_raw, {0xffffffff}, {-1}, {4, 4}, {3, 3}},
- {&cs_atomic_iadd_const, {}, {}, {0x00000000, 0x00000000}, {0xffffffff, 0xffffffff}}, - {&cs_atomic_iadd_const, {}, {}, {0x00000001, 0x00000001}, {0x00000000, 0x00000000}}, - {&cs_atomic_iadd_const, {}, {}, {0xffffffff, 0xffffffff}, {0xfffffffe, 0xfffffffe}}, + {&cs_atomic_iadd_const, {0}, {0}, {0x00000000, 0x00000000}, {0xffffffff, 0xffffffff}}, + {&cs_atomic_iadd_const, {0}, {0}, {0x00000001, 0x00000001}, {0x00000000, 0x00000000}}, + {&cs_atomic_iadd_const, {0}, {0}, {0xffffffff, 0xffffffff}, {0xfffffffe, 0xfffffffe}}, };
if (!init_compute_test_context(&context)) @@ -11687,7 +11692,7 @@ static void test_immediate_constant_buffer(void) ID3D12GraphicsCommandList *command_list; struct test_context_desc desc; struct test_context context; - unsigned int index[4] = {}; + unsigned int index[4] = {0}; ID3D12CommandQueue *queue; ID3D12Resource *cb; unsigned int i; @@ -20841,7 +20846,7 @@ static void test_cs_uav_store(void) 0x00000001, 0x00000001, 0x00000000, 0x00000000, 0x01000016, 0x0100003e, }; static const D3D12_SHADER_BYTECODE cs_group_index = {cs_group_index_code, sizeof(cs_group_index_code)}; - static const float zero[4] = {}; + static const float zero[4] = {0}; static const struct { const D3D12_SHADER_BYTECODE *shader;
Signed-off-by: Henri Verbeet hverbeet@codeweavers.com
Signed-off-by: Hans-Kristian Arntzen post@arntzen-software.no --- libs/vkd3d/resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libs/vkd3d/resource.c b/libs/vkd3d/resource.c index aebe1fa..88f184d 100644 --- a/libs/vkd3d/resource.c +++ b/libs/vkd3d/resource.c @@ -1388,7 +1388,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_ReadFromSubresource(ID3D12Resour size = (box.right - box.left) / format->block_width * format->byte_count * format->block_byte_count; for (z = box.front; z < box.back; ++z) { - dst = dst_data + (z - box.front) * dst_slice_pitch; + dst = (uint8_t *)dst_data + (z - box.front) * dst_slice_pitch; src = src_data + z * vk_layout.depthPitch + box.top / format->block_height * vk_layout.rowPitch; for (y = box.top; y < box.bottom; y += format->block_height) {
Signed-off-by: Henri Verbeet hverbeet@codeweavers.com
Signed-off-by: Hans-Kristian Arntzen post@arntzen-software.no --- libs/vkd3d/state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libs/vkd3d/state.c b/libs/vkd3d/state.c index 12a711b..9dc5cff 100644 --- a/libs/vkd3d/state.c +++ b/libs/vkd3d/state.c @@ -1342,7 +1342,7 @@ static HRESULT create_shader_stage(struct d3d12_device *device, struct vkd3d_shader_code dxbc = {code->pShaderBytecode, code->BytecodeLength}; const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; struct VkShaderModuleCreateInfo shader_desc; - struct vkd3d_shader_code spirv = {}; + struct vkd3d_shader_code spirv = {0}; VkResult vr; int ret;
Signed-off-by: Henri Verbeet hverbeet@codeweavers.com
Signed-off-by: Hans-Kristian Arntzen post@arntzen-software.no --- libs/vkd3d/vkd3d_private.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 59f0eac..d063492 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -31,11 +31,11 @@
#include "vkd3d.h" #include "vkd3d_shader.h" +#include "vkd3d_threads.h"
#include <assert.h> #include <inttypes.h> #include <limits.h> -#include <pthread.h> #include <stdbool.h>
#define VK_CALL(f) (vk_procs->f)
Signed-off-by: Hans-Kristian Arntzen post@arntzen-software.no --- libs/vkd3d/device.c | 2 +- libs/vkd3d/utils.c | 4 ++-- libs/vkd3d/vkd3d_private.h | 15 ++++++--------- 3 files changed, 9 insertions(+), 12 deletions(-)
diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index 3da4273..460bdf9 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -453,7 +453,7 @@ static HRESULT vkd3d_instance_init(struct vkd3d_instance *instance, bool *user_extension_supported = NULL; VkApplicationInfo application_info; VkInstanceCreateInfo instance_info; - char application_name[PATH_MAX]; + char application_name[VKD3D_PATH_MAX]; uint32_t extension_count; const char **extensions; VkInstance vk_instance; diff --git a/libs/vkd3d/utils.c b/libs/vkd3d/utils.c index 6a910a7..624b14f 100644 --- a/libs/vkd3d/utils.c +++ b/libs/vkd3d/utils.c @@ -804,7 +804,7 @@ HRESULT vkd3d_load_vk_device_procs(struct vkd3d_vk_device_procs *procs, return S_OK; }
-#ifdef _GNU_SOURCE +#if defined(_GNU_SOURCE) && !defined(_WIN32)
bool vkd3d_get_program_name(char program_name[PATH_MAX]) { @@ -840,7 +840,7 @@ bool vkd3d_get_program_name(char program_name[PATH_MAX])
#else
-bool vkd3d_get_program_name(char program_name[PATH_MAX]) +bool vkd3d_get_program_name(char program_name[VKD3D_PATH_MAX]) { *program_name = '\0'; return false; diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index d063492..a51ca4d 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -1231,16 +1231,13 @@ HRESULT vkd3d_load_vk_device_procs(struct vkd3d_vk_device_procs *procs,
extern const char vkd3d_build[];
-bool vkd3d_get_program_name(char program_name[PATH_MAX]) DECLSPEC_HIDDEN; - -static inline void vkd3d_set_thread_name(const char *name) -{ -#if defined(HAVE_PTHREAD_SETNAME_NP_2) - pthread_setname_np(pthread_self(), name); -#elif defined(HAVE_PTHREAD_SETNAME_NP_1) - pthread_setname_np(name); +#ifdef PATH_MAX +#define VKD3D_PATH_MAX PATH_MAX +#else +#define VKD3D_PATH_MAX 256 #endif -} + +bool vkd3d_get_program_name(char program_name[VKD3D_PATH_MAX]) DECLSPEC_HIDDEN;
VkResult vkd3d_set_vk_object_name_utf8(struct d3d12_device *device, uint64_t vk_object, VkDebugReportObjectTypeEXT vk_object_type, const char *name) DECLSPEC_HIDDEN;
Signed-off-by: Henri Verbeet hverbeet@codeweavers.com
Signed-off-by: Hans-Kristian Arntzen post@arntzen-software.no --- libs/vkd3d/vkd3d_private.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 59f0eac..d063492 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -31,11 +31,11 @@
#include "vkd3d.h" #include "vkd3d_shader.h" +#include "vkd3d_threads.h"
#include <assert.h> #include <inttypes.h> #include <limits.h> -#include <pthread.h> #include <stdbool.h>
#define VK_CALL(f) (vk_procs->f)
Signed-off-by: Hans-Kristian Arntzen post@arntzen-software.no --- libs/vkd3d/device.c | 2 +- libs/vkd3d/utils.c | 4 ++-- libs/vkd3d/vkd3d_private.h | 15 ++++++--------- 3 files changed, 9 insertions(+), 12 deletions(-)
diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index 3da4273..460bdf9 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -453,7 +453,7 @@ static HRESULT vkd3d_instance_init(struct vkd3d_instance *instance, bool *user_extension_supported = NULL; VkApplicationInfo application_info; VkInstanceCreateInfo instance_info; - char application_name[PATH_MAX]; + char application_name[VKD3D_PATH_MAX]; uint32_t extension_count; const char **extensions; VkInstance vk_instance; diff --git a/libs/vkd3d/utils.c b/libs/vkd3d/utils.c index 6a910a7..624b14f 100644 --- a/libs/vkd3d/utils.c +++ b/libs/vkd3d/utils.c @@ -804,7 +804,7 @@ HRESULT vkd3d_load_vk_device_procs(struct vkd3d_vk_device_procs *procs, return S_OK; }
-#ifdef _GNU_SOURCE +#if defined(_GNU_SOURCE) && !defined(_WIN32)
bool vkd3d_get_program_name(char program_name[PATH_MAX]) { @@ -840,7 +840,7 @@ bool vkd3d_get_program_name(char program_name[PATH_MAX])
#else
-bool vkd3d_get_program_name(char program_name[PATH_MAX]) +bool vkd3d_get_program_name(char program_name[VKD3D_PATH_MAX]) { *program_name = '\0'; return false; diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index d063492..a51ca4d 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -1231,16 +1231,13 @@ HRESULT vkd3d_load_vk_device_procs(struct vkd3d_vk_device_procs *procs,
extern const char vkd3d_build[];
-bool vkd3d_get_program_name(char program_name[PATH_MAX]) DECLSPEC_HIDDEN; - -static inline void vkd3d_set_thread_name(const char *name) -{ -#if defined(HAVE_PTHREAD_SETNAME_NP_2) - pthread_setname_np(pthread_self(), name); -#elif defined(HAVE_PTHREAD_SETNAME_NP_1) - pthread_setname_np(name); +#ifdef PATH_MAX +#define VKD3D_PATH_MAX PATH_MAX +#else +#define VKD3D_PATH_MAX 256 #endif -} + +bool vkd3d_get_program_name(char program_name[VKD3D_PATH_MAX]) DECLSPEC_HIDDEN;
VkResult vkd3d_set_vk_object_name_utf8(struct d3d12_device *device, uint64_t vk_object, VkDebugReportObjectTypeEXT vk_object_type, const char *name) DECLSPEC_HIDDEN;