UE4, at least in Psychonauts 2, catches failure of this function. It works if zeros are returned.
Signed-off-by: Conor McCarthy cmccarthy@codeweavers.com --- libs/vkd3d/command.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index 9fbde800..6ad4fa18 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -6225,10 +6225,14 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_GetTimestampFrequency(ID3D1 static HRESULT STDMETHODCALLTYPE d3d12_command_queue_GetClockCalibration(ID3D12CommandQueue *iface, UINT64 *gpu_timestamp, UINT64 *cpu_timestamp) { - FIXME("iface %p, gpu_timestamp %p, cpu_timestamp %p stub!\n", + TRACE("iface %p, gpu_timestamp %p, cpu_timestamp %p.\n", iface, gpu_timestamp, cpu_timestamp);
- return E_NOTIMPL; + WARN("Setting timestamps to zero.\n"); + *gpu_timestamp = 0; + *cpu_timestamp = 0; + + return S_OK; }
static D3D12_COMMAND_QUEUE_DESC * STDMETHODCALLTYPE d3d12_command_queue_GetDesc(ID3D12CommandQueue *iface,
Signed-off-by: Conor McCarthy cmccarthy@codeweavers.com --- tests/d3d12.c | 19 +++++++++++++++++++ tests/d3d12_test_utils.h | 1 + 2 files changed, 20 insertions(+)
diff --git a/tests/d3d12.c b/tests/d3d12.c index 91e31578..ee000b0b 100644 --- a/tests/d3d12.c +++ b/tests/d3d12.c @@ -19454,6 +19454,8 @@ static void test_get_copyable_footprints(void) {DXGI_FORMAT_BC6H_UF16, true}, {DXGI_FORMAT_BC6H_SF16, true}, {DXGI_FORMAT_BC7_UNORM, true}, + {DXGI_FORMAT_D32_FLOAT, false}, + {DXGI_FORMAT_D24_UNORM_S8_UINT, false}, }; static const uint64_t base_offsets[] = { @@ -19499,6 +19501,14 @@ static void test_get_copyable_footprints(void) {D3D12_RESOURCE_DIMENSION_TEXTURE3D, 3, 2, 2, 2, 2, DXGI_FORMAT_BC1_UNORM, {1, 0}, D3D12_TEXTURE_LAYOUT_UNKNOWN, D3D12_RESOURCE_FLAG_NONE}, 0, 1, }, + { + {D3D12_RESOURCE_DIMENSION_TEXTURE2D, 0, 4, 4, 1, 1, DXGI_FORMAT_D32_FLOAT, + {1, 0}, D3D12_TEXTURE_LAYOUT_UNKNOWN, D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL}, 0, 2, + }, + { + {D3D12_RESOURCE_DIMENSION_TEXTURE2D, 0, 4, 4, 1, 1, DXGI_FORMAT_D24_UNORM_S8_UINT, + {1, 0}, D3D12_TEXTURE_LAYOUT_UNKNOWN, D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL}, 0, 3, + }, };
if (!(device = create_device())) @@ -19538,6 +19548,15 @@ static void test_get_copyable_footprints(void) sub_resource_count = resource_desc.MipLevels; if (resources[i].dimension != D3D12_RESOURCE_DIMENSION_TEXTURE3D) sub_resource_count *= resource_desc.DepthOrArraySize; + if (resource_desc.Format == DXGI_FORMAT_D24_UNORM_S8_UINT) + { + if (!vkd3d_test_platform_is_windows()) + { + skip("Depth/stencil planes are not supported.\n"); + continue; + } + sub_resource_count *= 2; + } assert(sub_resource_count <= ARRAY_SIZE(layouts));
for (k = 0; k < ARRAY_SIZE(base_offsets); ++k) diff --git a/tests/d3d12_test_utils.h b/tests/d3d12_test_utils.h index 5d900f89..c8287cf3 100644 --- a/tests/d3d12_test_utils.h +++ b/tests/d3d12_test_utils.h @@ -345,6 +345,7 @@ static unsigned int format_size(DXGI_FORMAT format) case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB: case DXGI_FORMAT_R8G8B8A8_UINT: case DXGI_FORMAT_B8G8R8A8_UNORM: + case DXGI_FORMAT_D24_UNORM_S8_UINT: return 4; case DXGI_FORMAT_R16_FLOAT: case DXGI_FORMAT_R16_UNORM:
Signed-off-by: Henri Verbeet hverbeet@codeweavers.com
Signed-off-by: Conor McCarthy cmccarthy@codeweavers.com --- libs/vkd3d/device.c | 10 ++++++---- tests/d3d12.c | 6 ++---- 2 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index 96ff7ae7..10b2da1c 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -3602,7 +3602,7 @@ static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints(ID3D12Device *i = {DXGI_FORMAT_UNKNOWN, VK_FORMAT_UNDEFINED, 1, 1, 1, 1, 0};
unsigned int i, sub_resource_idx, miplevel_idx, row_count, row_size, row_pitch; - unsigned int width, height, depth, array_size; + unsigned int width, height, depth, plane_count, sub_resources_per_plane; const struct vkd3d_format *format; uint64_t offset, size, total;
@@ -3636,9 +3636,11 @@ static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints(ID3D12Device *i return; }
- array_size = d3d12_resource_desc_get_layer_count(desc); + plane_count = ((format->vk_aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT) + && (format->vk_aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT)) ? 2 : 1; + sub_resources_per_plane = d3d12_resource_desc_get_sub_resource_count(desc);
- if (!vkd3d_bound_range(first_sub_resource, sub_resource_count, desc->MipLevels * array_size)) + if (!vkd3d_bound_range(first_sub_resource, sub_resource_count, sub_resources_per_plane * plane_count)) { WARN("Invalid sub-resource range %u-%u for resource.\n", first_sub_resource, sub_resource_count); return; @@ -3648,7 +3650,7 @@ static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints(ID3D12Device *i total = 0; for (i = 0; i < sub_resource_count; ++i) { - sub_resource_idx = first_sub_resource + i; + sub_resource_idx = (first_sub_resource + i) % sub_resources_per_plane; miplevel_idx = sub_resource_idx % desc->MipLevels; width = align(d3d12_resource_desc_get_width(desc, miplevel_idx), format->block_width); height = align(d3d12_resource_desc_get_height(desc, miplevel_idx), format->block_height); diff --git a/tests/d3d12.c b/tests/d3d12.c index ee000b0b..b973ad9c 100644 --- a/tests/d3d12.c +++ b/tests/d3d12.c @@ -19550,11 +19550,9 @@ static void test_get_copyable_footprints(void) sub_resource_count *= resource_desc.DepthOrArraySize; if (resource_desc.Format == DXGI_FORMAT_D24_UNORM_S8_UINT) { + /* FIXME: we require D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL here for DS formats but windows doesn't. */ if (!vkd3d_test_platform_is_windows()) - { - skip("Depth/stencil planes are not supported.\n"); - continue; - } + resource_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; sub_resource_count *= 2; } assert(sub_resource_count <= ARRAY_SIZE(layouts));
Signed-off-by: Henri Verbeet hverbeet@codeweavers.com
On Mon, 10 Jan 2022 at 15:02, Conor McCarthy cmccarthy@codeweavers.com wrote:
diff --git a/tests/d3d12.c b/tests/d3d12.c index ee000b0b..b973ad9c 100644 --- a/tests/d3d12.c +++ b/tests/d3d12.c @@ -19550,11 +19550,9 @@ static void test_get_copyable_footprints(void) sub_resource_count *= resource_desc.DepthOrArraySize; if (resource_desc.Format == DXGI_FORMAT_D24_UNORM_S8_UINT) {
/* FIXME: we require D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL here for DS formats but windows doesn't. */ if (!vkd3d_test_platform_is_windows())
{
skip("Depth/stencil planes are not supported.\n");
continue;
}
resource_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; sub_resource_count *= 2;
Arguably, the vkd3d_test_platform_is_windows() test here just makes the test behaviour diverge between Windows and vkd3d; adding D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL should work on Windows as well, right?
The D3D12 documentation states: "If hEvent is a null handle, then this API will not return until the specified fence value(s) have been reached."
Based on a vkd3d-proton patch by Hans-Kristian Arntzen.
Signed-off-by: Conor McCarthy cmccarthy@codeweavers.com --- libs/vkd3d/command.c | 39 ++++++++++++++++++++++++++++++++++++-- libs/vkd3d/vkd3d_private.h | 2 ++ 2 files changed, 39 insertions(+), 2 deletions(-)
diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index 6ad4fa18..a800e8ec 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -806,6 +806,7 @@ static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t value, VkF { struct d3d12_device *device = fence->device; struct vkd3d_signaled_semaphore *current; + bool signal_null_event_cond = false; unsigned int i, j; int rc;
@@ -823,7 +824,15 @@ static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t value, VkF
if (current->value <= value) { - fence->device->signal_event(current->event); + if (current->event) + { + fence->device->signal_event(current->event); + } + else + { + current->latch = true; + signal_null_event_cond = true; + } } else { @@ -834,6 +843,9 @@ static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t value, VkF } fence->event_count = j;
+ if (signal_null_event_cond) + pthread_cond_broadcast(&fence->null_event_cond); + if (vk_fence) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; @@ -916,6 +928,7 @@ static ULONG STDMETHODCALLTYPE d3d12_fence_Release(ID3D12Fence *iface) vkd3d_free(fence->events); if ((rc = pthread_mutex_destroy(&fence->mutex))) ERR("Failed to destroy mutex, error %d.\n", rc); + pthread_cond_destroy(&fence->null_event_cond); vkd3d_free(fence);
d3d12_device_release(device); @@ -997,6 +1010,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_fence_SetEventOnCompletion(ID3D12Fence *i { struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); unsigned int i; + bool volatile *latch; int rc;
TRACE("iface %p, value %#"PRIx64", event %p.\n", iface, value, event); @@ -1009,7 +1023,8 @@ static HRESULT STDMETHODCALLTYPE d3d12_fence_SetEventOnCompletion(ID3D12Fence *i
if (value <= fence->value) { - fence->device->signal_event(event); + if (event) + fence->device->signal_event(event); pthread_mutex_unlock(&fence->mutex); return S_OK; } @@ -1036,8 +1051,20 @@ static HRESULT STDMETHODCALLTYPE d3d12_fence_SetEventOnCompletion(ID3D12Fence *i
fence->events[fence->event_count].value = value; fence->events[fence->event_count].event = event; + fence->events[fence->event_count].latch = false; + latch = &fence->events[fence->event_count].latch; ++fence->event_count;
+ /* If event is NULL, we need to block until the fence value completes. + * Implement this in a uniform way where we pretend we have a dummy event. + * A NULL fence->events[].event means that we should set latch to true + * and signal a condition variable instead of calling external signal_event callback. */ + if (!event) + { + while (!*latch) + pthread_cond_wait(&fence->null_event_cond, &fence->mutex); + } + pthread_mutex_unlock(&fence->mutex); return S_OK; } @@ -1095,6 +1122,13 @@ static HRESULT d3d12_fence_init(struct d3d12_fence *fence, struct d3d12_device * return hresult_from_errno(rc); }
+ if ((rc = pthread_cond_init(&fence->null_event_cond, NULL))) + { + ERR("Failed to initialize cond variable, error %d.\n", rc); + pthread_mutex_destroy(&fence->mutex); + return hresult_from_errno(rc); + } + if (flags) FIXME("Ignoring flags %#x.\n", flags);
@@ -1112,6 +1146,7 @@ static HRESULT d3d12_fence_init(struct d3d12_fence *fence, struct d3d12_device * if (FAILED(hr = vkd3d_private_store_init(&fence->private_store))) { pthread_mutex_destroy(&fence->mutex); + pthread_cond_destroy(&fence->null_event_cond); return hr; }
diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 1bccb35d..e129681b 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -361,11 +361,13 @@ struct d3d12_fence
uint64_t value; pthread_mutex_t mutex; + pthread_cond_t null_event_cond;
struct vkd3d_waiting_event { uint64_t value; HANDLE event; + bool volatile latch; } *events; size_t events_size; size_t event_count;
On Mon, 10 Jan 2022 at 15:02, Conor McCarthy cmccarthy@codeweavers.com wrote:
diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 1bccb35d..e129681b 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -361,11 +361,13 @@ struct d3d12_fence
uint64_t value; pthread_mutex_t mutex;
pthread_cond_t null_event_cond;
struct vkd3d_waiting_event { uint64_t value; HANDLE event;
bool volatile latch;
} *events; size_t events_size; size_t event_count;
--
Do we need the volatile above?
Testing this before NULL event handling is patched results in a crash. Based on a vkd3d-proton patch by Hans-Kristian Arntzen.
Signed-off-by: Conor McCarthy cmccarthy@codeweavers.com --- tests/d3d12.c | 12 ++++++++++-- tests/d3d12_crosstest.h | 26 ++++++++++++++++++++++++++ tests/d3d12_test_utils.h | 2 ++ 3 files changed, 38 insertions(+), 2 deletions(-)
diff --git a/tests/d3d12.c b/tests/d3d12.c index b973ad9c..9c5f84ef 100644 --- a/tests/d3d12.c +++ b/tests/d3d12.c @@ -4412,11 +4412,14 @@ static void test_fence_values(void) value = ID3D12Fence_GetCompletedValue(fence); ok(value == next_value, "Got value %#"PRIx64", expected %#"PRIx64".\n", value, next_value);
- for (i = 0; i < 100; ++i) + for (i = 0; i < 200; ++i) { ++next_value; queue_signal(queue, fence, next_value); - wait_queue_idle(device, queue); + if ((i * 11) & 8) + wait_queue_idle_no_event(device, queue); + else + wait_queue_idle(device, queue); value = ID3D12Fence_GetCompletedValue(fence); ok(value == next_value, "Got value %#"PRIx64", expected %#"PRIx64".\n", value, next_value); } @@ -4453,6 +4456,11 @@ static void test_fence_values(void) wait_queue_idle(device, queue); value = ID3D12Fence_GetCompletedValue(fence); ok(value == next_value, "Got value %#"PRIx64", expected %#"PRIx64".\n", value, next_value); + next_value <<= 1; + queue_signal(queue, fence, next_value); + wait_queue_idle_no_event(device, queue); + value = ID3D12Fence_GetCompletedValue(fence); + ok(value == next_value, "Got value %#"PRIx64", expected %#"PRIx64".\n", value, next_value); next_value = 0; queue_signal(queue, fence, next_value); wait_queue_idle(device, queue); diff --git a/tests/d3d12_crosstest.h b/tests/d3d12_crosstest.h index 0b77bdee..557d0430 100644 --- a/tests/d3d12_crosstest.h +++ b/tests/d3d12_crosstest.h @@ -226,6 +226,15 @@ static HRESULT wait_for_fence(ID3D12Fence *fence, uint64_t value) return ret == WAIT_OBJECT_0 ? S_OK : E_FAIL; }
+static HRESULT wait_for_fence_no_event(ID3D12Fence *fence, uint64_t value) +{ + if (ID3D12Fence_GetCompletedValue(fence) >= value) + return S_OK; + + /* This is defined to block on the value with infinite timeout. */ + return ID3D12Fence_SetEventOnCompletion(fence, value, NULL); +} + static void wait_queue_idle_(unsigned int line, ID3D12Device *device, ID3D12CommandQueue *queue) { ID3D12Fence *fence; @@ -243,6 +252,23 @@ static void wait_queue_idle_(unsigned int line, ID3D12Device *device, ID3D12Comm ID3D12Fence_Release(fence); }
+static void wait_queue_idle_no_event_(unsigned int line, ID3D12Device *device, ID3D12CommandQueue *queue) +{ + ID3D12Fence *fence; + HRESULT hr; + + hr = ID3D12Device_CreateFence(device, 0, D3D12_FENCE_FLAG_NONE, + &IID_ID3D12Fence, (void **)&fence); + assert_that_(line)(hr == S_OK, "Failed to create fence, hr %#x.\n", hr); + + hr = ID3D12CommandQueue_Signal(queue, fence, 1); + assert_that_(line)(hr == S_OK, "Failed to signal fence, hr %#x.\n", hr); + hr = wait_for_fence_no_event(fence, 1); + assert_that_(line)(hr == S_OK, "Failed to wait for fence, hr %#x.\n", hr); + + ID3D12Fence_Release(fence); +} + static bool use_warp_device; static unsigned int use_adapter_idx;
diff --git a/tests/d3d12_test_utils.h b/tests/d3d12_test_utils.h index c8287cf3..48156b81 100644 --- a/tests/d3d12_test_utils.h +++ b/tests/d3d12_test_utils.h @@ -26,6 +26,8 @@ struct vec4
#define wait_queue_idle(a, b) wait_queue_idle_(__LINE__, a, b) static void wait_queue_idle_(unsigned int line, ID3D12Device *device, ID3D12CommandQueue *queue); +#define wait_queue_idle_no_event(a, b) wait_queue_idle_no_event_(__LINE__, a, b) +static void wait_queue_idle_no_event_(unsigned int line, ID3D12Device *device, ID3D12CommandQueue *queue); static ID3D12Device *create_device(void);
static void set_rect(RECT *rect, int left, int top, int right, int bottom)
On Mon, 10 Jan 2022 at 15:02, Conor McCarthy cmccarthy@codeweavers.com wrote:
diff --git a/tests/d3d12_crosstest.h b/tests/d3d12_crosstest.h index 0b77bdee..557d0430 100644 --- a/tests/d3d12_crosstest.h +++ b/tests/d3d12_crosstest.h @@ -226,6 +226,15 @@ static HRESULT wait_for_fence(ID3D12Fence *fence, uint64_t value) return ret == WAIT_OBJECT_0 ? S_OK : E_FAIL; }
+static HRESULT wait_for_fence_no_event(ID3D12Fence *fence, uint64_t value) +{
- if (ID3D12Fence_GetCompletedValue(fence) >= value)
return S_OK;
- /* This is defined to block on the value with infinite timeout. */
- return ID3D12Fence_SetEventOnCompletion(fence, value, NULL);
+}
static void wait_queue_idle_(unsigned int line, ID3D12Device *device, ID3D12CommandQueue *queue) { ID3D12Fence *fence; @@ -243,6 +252,23 @@ static void wait_queue_idle_(unsigned int line, ID3D12Device *device, ID3D12Comm ID3D12Fence_Release(fence); }
+static void wait_queue_idle_no_event_(unsigned int line, ID3D12Device *device, ID3D12CommandQueue *queue) +{
- ID3D12Fence *fence;
- HRESULT hr;
- hr = ID3D12Device_CreateFence(device, 0, D3D12_FENCE_FLAG_NONE,
&IID_ID3D12Fence, (void **)&fence);
- assert_that_(line)(hr == S_OK, "Failed to create fence, hr %#x.\n", hr);
- hr = ID3D12CommandQueue_Signal(queue, fence, 1);
- assert_that_(line)(hr == S_OK, "Failed to signal fence, hr %#x.\n", hr);
- hr = wait_for_fence_no_event(fence, 1);
- assert_that_(line)(hr == S_OK, "Failed to wait for fence, hr %#x.\n", hr);
- ID3D12Fence_Release(fence);
+}
static bool use_warp_device; static unsigned int use_adapter_idx;
This introduces build warnings. E.g.:
In file included from <vkd3d>/tests/d3d12_invalid_usage.c:19: <vkd3d>/tests/d3d12_crosstest.h:255:13: warning: ‘wait_queue_idle_no_event_’ defined but not used [-Wunused-function]
On Mon, 10 Jan 2022 at 15:02, Conor McCarthy cmccarthy@codeweavers.com wrote:
@@ -6225,10 +6225,14 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_GetTimestampFrequency(ID3D1 static HRESULT STDMETHODCALLTYPE d3d12_command_queue_GetClockCalibration(ID3D12CommandQueue *iface, UINT64 *gpu_timestamp, UINT64 *cpu_timestamp) {
- FIXME("iface %p, gpu_timestamp %p, cpu_timestamp %p stub!\n",
- TRACE("iface %p, gpu_timestamp %p, cpu_timestamp %p.\n", iface, gpu_timestamp, cpu_timestamp);
- return E_NOTIMPL;
- WARN("Setting timestamps to zero.\n");
- *gpu_timestamp = 0;
- *cpu_timestamp = 0;
- return S_OK;
}
Always returning 0 here (without FIXME) seems like something that may end up being problematic in hard to diagnose ways at some point. Couldn't we use VK_EXT_calibrated_timestamps here, or failing that, build something on top of vkCmdWriteTimestamp()?