wine-devel April 2022

wine-devel@winehq.org

87 participants
938 discussions

[PATCH 1/3] winegstreamer: Support dynamic wg_transform output format change.
by Rémi Bernon 06 Apr '22

06 Apr '22

Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=45988 Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=47084 Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=49715 Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=52183 Signed-off-by: Rémi Bernon <rbernon(a)codeweavers.com> --- (This need to come first, before 231660-231665 as the spurious todo_wine is caused by the H264 format being synchronously rejected when libav plugins are used) In this patch I assumed that sink callbacks are always called from the same thread, and then do not require any locking around sink_caps accesses. The output samples will each hold their own ref on the caps, eventually different ones as the stream format changes, and wg_transform_read_data will only read the caps from the sample it unqueued. dlls/winegstreamer/unixlib.h | 1 + dlls/winegstreamer/wg_transform.c | 89 ++++++++++++++++++++++++------- 2 files changed, 71 insertions(+), 19 deletions(-) diff --git a/dlls/winegstreamer/unixlib.h b/dlls/winegstreamer/unixlib.h index f4e2ea4966b..7a31020fb87 100644 --- a/dlls/winegstreamer/unixlib.h +++ b/dlls/winegstreamer/unixlib.h @@ -122,6 +122,7 @@ struct wg_sample UINT32 max_size; UINT32 size; BYTE *data; + struct wg_format *format; }; struct wg_parser_buffer diff --git a/dlls/winegstreamer/wg_transform.c b/dlls/winegstreamer/wg_transform.c index 0327b92ce8e..ddf86de13ae 100644 --- a/dlls/winegstreamer/wg_transform.c +++ b/dlls/winegstreamer/wg_transform.c @@ -51,40 +51,73 @@ struct wg_transform GstBufferList *input; guint input_max_length; GstAtomicQueue *output_queue; - GstBuffer *output_buffer; + GstSample *output_sample; + GstCaps *sink_caps; }; static GstFlowReturn transform_sink_chain_cb(GstPad *pad, GstObject *parent, GstBuffer *buffer) { struct wg_transform *transform = gst_pad_get_element_private(pad); + GstSample *sample; GST_LOG("transform %p, buffer %p.", transform, buffer); - gst_atomic_queue_push(transform->output_queue, buffer); + if ((sample = gst_sample_new(buffer, transform->sink_caps, NULL, NULL))) + gst_atomic_queue_push(transform->output_queue, sample); - return GST_FLOW_OK; + gst_buffer_unref(buffer); + + return sample ? GST_FLOW_OK : GST_FLOW_ERROR; +} + +static gboolean transform_sink_event_cb(GstPad *pad, GstObject *parent, GstEvent *event) +{ + struct wg_transform *transform = gst_pad_get_element_private(pad); + + GST_LOG("transform %p, type \"%s\".", transform, GST_EVENT_TYPE_NAME(event)); + + switch (event->type) + { + case GST_EVENT_CAPS: + { + GstCaps *caps; + + gst_event_parse_caps(event, &caps); + + gst_caps_unref(transform->sink_caps); + transform->sink_caps = gst_caps_ref(caps); + break; + } + default: + GST_WARNING("Ignoring \"%s\" event.", GST_EVENT_TYPE_NAME(event)); + break; + } + + gst_event_unref(event); + return TRUE; } NTSTATUS wg_transform_destroy(void *args) { struct wg_transform *transform = args; - GstBuffer *buffer; + GstSample *sample; if (transform->input) gst_buffer_list_unref(transform->input); gst_element_set_state(transform->container, GST_STATE_NULL); - if (transform->output_buffer) - gst_buffer_unref(transform->output_buffer); - while ((buffer = gst_atomic_queue_pop(transform->output_queue))) - gst_buffer_unref(buffer); + if (transform->output_sample) + gst_sample_unref(transform->output_sample); + while ((sample = gst_atomic_queue_pop(transform->output_queue))) + gst_sample_unref(sample); g_object_unref(transform->their_sink); g_object_unref(transform->their_src); g_object_unref(transform->container); g_object_unref(transform->my_sink); g_object_unref(transform->my_src); + gst_caps_unref(transform->sink_caps); gst_atomic_queue_unref(transform->output_queue); free(transform); @@ -162,10 +195,10 @@ static bool transform_append_element(struct wg_transform *transform, GstElement NTSTATUS wg_transform_create(void *args) { struct wg_transform_create_params *params = args; - GstCaps *raw_caps = NULL, *src_caps = NULL, *sink_caps = NULL; struct wg_format output_format = *params->output_format; struct wg_format input_format = *params->input_format; GstElement *first = NULL, *last = NULL, *element; + GstCaps *raw_caps = NULL, *src_caps = NULL; NTSTATUS status = STATUS_UNSUCCESSFUL; GstPadTemplate *template = NULL; struct wg_transform *transform; @@ -194,9 +227,9 @@ NTSTATUS wg_transform_create(void *args) if (!transform->my_src) goto out; - if (!(sink_caps = wg_format_to_caps(&output_format))) + if (!(transform->sink_caps = wg_format_to_caps(&output_format))) goto out; - if (!(template = gst_pad_template_new("sink", GST_PAD_SINK, GST_PAD_ALWAYS, sink_caps))) + if (!(template = gst_pad_template_new("sink", GST_PAD_SINK, GST_PAD_ALWAYS, transform->sink_caps))) goto out; transform->my_sink = gst_pad_new_from_template(template, "sink"); g_object_unref(template); @@ -204,13 +237,14 @@ NTSTATUS wg_transform_create(void *args) goto out; gst_pad_set_element_private(transform->my_sink, transform); + gst_pad_set_event_function(transform->my_sink, transform_sink_event_cb); gst_pad_set_chain_function(transform->my_sink, transform_sink_chain_cb); /* Since we append conversion elements, we don't want to filter decoders * based on the actual output caps now. Matching decoders with the * raw output media type should be enough. */ - media_type = gst_structure_get_name(gst_caps_get_structure(sink_caps, 0)); + media_type = gst_structure_get_name(gst_caps_get_structure(transform->sink_caps, 0)); if (!(raw_caps = gst_caps_new_empty_simple(media_type))) goto out; @@ -264,6 +298,11 @@ NTSTATUS wg_transform_create(void *args) break; case WG_MAJOR_TYPE_VIDEO: + if (!(element = create_element("videoconvert", "base")) + || !transform_append_element(transform, element, &first, &last)) + goto out; + /* Let GStreamer choose a default number of threads. */ + gst_util_set_object_arg(G_OBJECT(element), "n-threads", "0"); break; case WG_MAJOR_TYPE_H264: @@ -306,7 +345,6 @@ NTSTATUS wg_transform_create(void *args) || !gst_pad_push_event(transform->my_src, event)) goto out; - gst_caps_unref(sink_caps); gst_caps_unref(src_caps); GST_INFO("Created winegstreamer transform %p.", transform); @@ -320,8 +358,8 @@ out: gst_object_unref(transform->their_src); if (transform->my_sink) gst_object_unref(transform->my_sink); - if (sink_caps) - gst_caps_unref(sink_caps); + if (transform->sink_caps) + gst_caps_unref(transform->sink_caps); if (transform->my_src) gst_object_unref(transform->my_src); if (src_caps) @@ -401,8 +439,10 @@ NTSTATUS wg_transform_read_data(void *args) struct wg_transform *transform = params->transform; struct wg_sample *sample = params->sample; GstBufferList *input = transform->input; + struct wg_format format; GstFlowReturn ret; NTSTATUS status; + GstCaps *caps; if (!gst_buffer_list_length(transform->input)) GST_DEBUG("Not input buffer queued"); @@ -417,7 +457,7 @@ NTSTATUS wg_transform_read_data(void *args) return STATUS_UNSUCCESSFUL; } - if (!transform->output_buffer && !(transform->output_buffer = gst_atomic_queue_pop(transform->output_queue))) + if (!transform->output_sample && !(transform->output_sample = gst_atomic_queue_pop(transform->output_queue))) { sample->size = 0; params->result = MF_E_TRANSFORM_NEED_MORE_INPUT; @@ -425,7 +465,18 @@ NTSTATUS wg_transform_read_data(void *args) return STATUS_SUCCESS; } - if ((status = read_transform_output_data(transform->output_buffer, sample))) + if (sample->format && (caps = gst_sample_get_caps(transform->output_sample))) + { + wg_format_from_caps(&format, caps); + if (!wg_format_compare(&format, sample->format)) + { + *sample->format = format; + params->result = MF_E_TRANSFORM_STREAM_CHANGE; + return STATUS_SUCCESS; + } + } + + if ((status = read_transform_output_data(gst_sample_get_buffer(transform->output_sample), sample))) { sample->size = 0; return status; @@ -433,8 +484,8 @@ NTSTATUS wg_transform_read_data(void *args) if (!(sample->flags & WG_SAMPLE_FLAG_INCOMPLETE)) { - gst_buffer_unref(transform->output_buffer); - transform->output_buffer = NULL; + gst_sample_unref(transform->output_sample); + transform->output_sample = NULL; } params->result = S_OK; -- 2.35.1

3 12

[PATCH vkd3d 1/2] vkd3d: Use Vulkan timeline semaphores for D3D12 fences.
by Henri Verbeet 06 Apr '22

06 Apr '22

From: Conor McCarthy <cmccarthy(a)codeweavers.com> D3D12 supports signalling a fence to a lower value, while Vulkan timeline semaphores do not. On the GPU side this is handled by simply submitting the signal anyway, if a test for this passes on device creation, because working around this is impractical. For CPU signals the Vulkan semaphore is replaced with a new one at the lower value only if no waits and/or signals are pending on the GPU. Otherwise, a fixme is emitted. Partly based on a vkd3d-proton patch by Hans-Kristian Arntzen (not including the handling of lower fence values). The old implementation is used if KHR_timeline_semaphore is not available or GPU signals do not work for a lower value. Signed-off-by: Conor McCarthy <cmccarthy(a)codeweavers.com> Signed-off-by: Henri Verbeet <hverbeet(a)codeweavers.com> --- This supersedes patch 230633. v4: - Fix the (trivial) merge conflict with patch 230641. - Simplify setting "device->use_timeline_semaphores". libs/vkd3d/command.c | 571 ++++++++++++++++++++++++++++++++++--- libs/vkd3d/device.c | 87 ++++++ libs/vkd3d/vkd3d_private.h | 30 ++ libs/vkd3d/vulkan_procs.h | 5 + tests/d3d12.c | 11 +- 5 files changed, 663 insertions(+), 41 deletions(-) diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index 952675cf..09171fe4 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -46,6 +46,9 @@ HRESULT vkd3d_queue_create(struct d3d12_device *device, object->vk_queue_flags = properties->queueFlags; object->timestamp_bits = properties->timestampValidBits; + object->wait_completion_semaphore = VK_NULL_HANDLE; + object->pending_wait_completion_value = 0; + object->semaphores = NULL; object->semaphores_size = 0; object->semaphore_count = 0; @@ -61,6 +64,20 @@ HRESULT vkd3d_queue_create(struct d3d12_device *device, return S_OK; } +bool vkd3d_queue_init_timeline_semaphore(struct vkd3d_queue *queue, struct d3d12_device *device) +{ + VkResult vr; + + if (!queue->wait_completion_semaphore + && (vr = vkd3d_create_timeline_semaphore(device, 0, &queue->wait_completion_semaphore)) < 0) + { + WARN("Failed to create timeline semaphore, vr %d.\n", vr); + return false; + } + + return true; +} + void vkd3d_queue_destroy(struct vkd3d_queue *queue, struct d3d12_device *device) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; @@ -75,6 +92,8 @@ void vkd3d_queue_destroy(struct vkd3d_queue *queue, struct d3d12_device *device) vkd3d_free(queue->semaphores); + VK_CALL(vkDestroySemaphore(device->vk_device, queue->wait_completion_semaphore, NULL)); + for (i = 0; i < ARRAY_SIZE(queue->old_vk_semaphores); ++i) { if (queue->old_vk_semaphores[i]) @@ -268,6 +287,7 @@ static HRESULT vkd3d_enqueue_gpu_fence(struct vkd3d_fence_worker *worker, } worker->enqueued_fences[worker->enqueued_fence_count].vk_fence = vk_fence; + worker->enqueued_fences[worker->enqueued_fence_count].vk_semaphore = VK_NULL_HANDLE; waiting_fence = &worker->enqueued_fences[worker->enqueued_fence_count].waiting_fence; waiting_fence->fence = fence; waiting_fence->value = value; @@ -317,6 +337,7 @@ static void vkd3d_fence_worker_remove_fence(struct vkd3d_fence_worker *worker, s static void vkd3d_fence_worker_move_enqueued_fences_locked(struct vkd3d_fence_worker *worker) { unsigned int i; + bool timeline; size_t count; bool ret; @@ -325,8 +346,18 @@ static void vkd3d_fence_worker_move_enqueued_fences_locked(struct vkd3d_fence_wo count = worker->fence_count + worker->enqueued_fence_count; - ret = vkd3d_array_reserve((void **)&worker->vk_fences, &worker->vk_fences_size, - count, sizeof(*worker->vk_fences)); + if ((timeline = worker->device->use_timeline_semaphores)) + { + ret = vkd3d_array_reserve((void **) &worker->vk_semaphores, &worker->vk_semaphores_size, + count, sizeof(*worker->vk_semaphores)); + ret &= vkd3d_array_reserve((void **) &worker->semaphore_wait_values, &worker->semaphore_wait_values_size, + count, sizeof(*worker->semaphore_wait_values)); + } + else + { + ret = vkd3d_array_reserve((void **)&worker->vk_fences, &worker->vk_fences_size, + count, sizeof(*worker->vk_fences)); + } ret &= vkd3d_array_reserve((void **)&worker->fences, &worker->fences_size, count, sizeof(*worker->fences)); if (!ret) @@ -339,7 +370,16 @@ static void vkd3d_fence_worker_move_enqueued_fences_locked(struct vkd3d_fence_wo { struct vkd3d_enqueued_fence *current = &worker->enqueued_fences[i]; - worker->vk_fences[worker->fence_count] = current->vk_fence; + if (timeline) + { + worker->vk_semaphores[worker->fence_count] = current->vk_semaphore; + worker->semaphore_wait_values[worker->fence_count] = current->waiting_fence.value; + } + else + { + worker->vk_fences[worker->fence_count] = current->vk_fence; + } + worker->fences[worker->fence_count] = current->waiting_fence; ++worker->fence_count; } @@ -347,6 +387,66 @@ static void vkd3d_fence_worker_move_enqueued_fences_locked(struct vkd3d_fence_wo worker->enqueued_fence_count = 0; } +static void vkd3d_wait_for_gpu_timeline_semaphores(struct vkd3d_fence_worker *worker) +{ + const struct d3d12_device *device = worker->device; + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkSemaphoreWaitInfoKHR wait_info; + VkSemaphore vk_semaphore; + uint64_t counter_value; + unsigned int i, j; + HRESULT hr; + int vr; + + if (!worker->fence_count) + return; + + wait_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO_KHR; + wait_info.pNext = NULL; + wait_info.flags = VK_SEMAPHORE_WAIT_ANY_BIT_KHR; + wait_info.pSemaphores = worker->vk_semaphores; + wait_info.semaphoreCount = worker->fence_count; + wait_info.pValues = worker->semaphore_wait_values; + + vr = VK_CALL(vkWaitSemaphoresKHR(device->vk_device, &wait_info, ~(uint64_t)0)); + if (vr == VK_TIMEOUT) + return; + if (vr != VK_SUCCESS) + { + ERR("Failed to wait for Vulkan timeline semaphores, vr %d.\n", vr); + return; + } + + for (i = 0, j = 0; i < worker->fence_count; ++i) + { + struct vkd3d_waiting_fence *current = &worker->fences[i]; + + vk_semaphore = worker->vk_semaphores[i]; + if ((vr = VK_CALL(vkGetSemaphoreCounterValueKHR(device->vk_device, vk_semaphore, &counter_value))) < 0) + { + ERR("Failed to get Vulkan semaphore value, vr %d.\n", vr); + } + else if (counter_value >= current->value) + { + TRACE("Signaling fence %p value %#"PRIx64".\n", current->fence, current->value); + if (FAILED(hr = d3d12_fence_signal(current->fence, counter_value, VK_NULL_HANDLE))) + ERR("Failed to signal D3D12 fence, hr %#x.\n", hr); + + InterlockedDecrement(&current->fence->pending_worker_operation_count); + continue; + } + + if (i != j) + { + worker->vk_semaphores[j] = worker->vk_semaphores[i]; + worker->semaphore_wait_values[j] = worker->semaphore_wait_values[i]; + worker->fences[j] = worker->fences[i]; + } + ++j; + } + worker->fence_count = j; +} + static void vkd3d_wait_for_gpu_fences(struct vkd3d_fence_worker *worker) { struct d3d12_device *device = worker->device; @@ -408,7 +508,7 @@ static void *vkd3d_fence_worker_main(void *arg) for (;;) { - vkd3d_wait_for_gpu_fences(worker); + worker->wait_for_gpu_fences(worker); if (!worker->fence_count || InterlockedAdd(&worker->enqueued_fence_count, 0)) { @@ -473,6 +573,13 @@ HRESULT vkd3d_fence_worker_start(struct vkd3d_fence_worker *worker, worker->vk_fences_size = 0; worker->fences = NULL; worker->fences_size = 0; + worker->vk_semaphores = NULL; + worker->vk_semaphores_size = 0; + worker->semaphore_wait_values = NULL; + worker->semaphore_wait_values_size = 0; + + worker->wait_for_gpu_fences = device->use_timeline_semaphores + ? vkd3d_wait_for_gpu_timeline_semaphores : vkd3d_wait_for_gpu_fences; if ((rc = vkd3d_mutex_init(&worker->mutex))) { @@ -535,6 +642,8 @@ HRESULT vkd3d_fence_worker_stop(struct vkd3d_fence_worker *worker, vkd3d_free(worker->enqueued_fences); vkd3d_free(worker->vk_fences); vkd3d_free(worker->fences); + vkd3d_free(worker->vk_semaphores); + vkd3d_free(worker->semaphore_wait_values); return S_OK; } @@ -684,6 +793,7 @@ static void d3d12_fence_destroy_vk_objects(struct d3d12_fence *fence) } d3d12_fence_garbage_collect_vk_semaphores_locked(fence, true); + VK_CALL(vkDestroySemaphore(device->vk_device, fence->timeline_semaphore, NULL)); vkd3d_mutex_unlock(&fence->mutex); } @@ -802,31 +912,21 @@ static HRESULT d3d12_fence_add_vk_semaphore(struct d3d12_fence *fence, return hr; } -static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t value, VkFence vk_fence) +static bool d3d12_fence_signal_external_events_locked(struct d3d12_fence *fence) { struct d3d12_device *device = fence->device; - struct vkd3d_signaled_semaphore *current; bool signal_null_event_cond = false; unsigned int i, j; - int rc; - - if ((rc = vkd3d_mutex_lock(&fence->mutex))) - { - ERR("Failed to lock mutex, error %d.\n", rc); - return hresult_from_errno(rc); - } - - fence->value = value; for (i = 0, j = 0; i < fence->event_count; ++i) { struct vkd3d_waiting_event *current = &fence->events[i]; - if (current->value <= value) + if (current->value <= fence->value) { if (current->event) { - fence->device->signal_event(current->event); + device->signal_event(current->event); } else { @@ -841,9 +941,28 @@ static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t value, VkF ++j; } } + fence->event_count = j; - if (signal_null_event_cond) + return signal_null_event_cond; +} + +static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t value, VkFence vk_fence) +{ + struct d3d12_device *device = fence->device; + struct vkd3d_signaled_semaphore *current; + unsigned int i; + int rc; + + if ((rc = vkd3d_mutex_lock(&fence->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return hresult_from_errno(rc); + } + + fence->value = value; + + if (d3d12_fence_signal_external_events_locked(fence)) vkd3d_cond_broadcast(&fence->null_event_cond); if (vk_fence) @@ -1069,12 +1188,160 @@ static HRESULT STDMETHODCALLTYPE d3d12_fence_SetEventOnCompletion(ID3D12Fence *i return S_OK; } +static inline bool d3d12_fence_gpu_wait_is_completed(const struct d3d12_fence *fence, unsigned int i) +{ + const struct d3d12_device *device = fence->device; + const struct vkd3d_vk_device_procs *vk_procs; + uint64_t value; + VkResult vr; + + vk_procs = &device->vk_procs; + + if ((vr = VK_CALL(vkGetSemaphoreCounterValueKHR(device->vk_device, + fence->gpu_waits[i].queue->wait_completion_semaphore, &value))) >= 0) + { + return value >= fence->gpu_waits[i].pending_value; + } + + ERR("Failed to get Vulkan semaphore status, vr %d.\n", vr); + return true; +} + +static inline bool d3d12_fence_has_pending_gpu_ops_locked(struct d3d12_fence *fence) +{ + const struct d3d12_device *device = fence->device; + const struct vkd3d_vk_device_procs *vk_procs; + uint64_t value; + unsigned int i; + VkResult vr; + + for (i = 0; i < fence->gpu_wait_count; ++i) + { + if (d3d12_fence_gpu_wait_is_completed(fence, i) && i < --fence->gpu_wait_count) + fence->gpu_waits[i] = fence->gpu_waits[fence->gpu_wait_count]; + } + if (fence->gpu_wait_count) + return true; + + /* Check for pending signals too. */ + if (fence->value >= fence->pending_timeline_value) + return false; + + vk_procs = &device->vk_procs; + + /* Check the actual semaphore value in case fence->value update is lagging. */ + if ((vr = VK_CALL(vkGetSemaphoreCounterValueKHR(device->vk_device, fence->timeline_semaphore, &value))) < 0) + { + ERR("Failed to get Vulkan semaphore status, vr %d.\n", vr); + return false; + } + + return value < fence->pending_timeline_value; +} + +/* Replace the VkSemaphore with a new one to allow a lower value to be set. Ideally apps will + * only use this to reset the fence when no operations are pending on the queue. */ +static HRESULT d3d12_fence_reinit_timeline_semaphore_locked(struct d3d12_fence *fence, uint64_t value) +{ + const struct d3d12_device *device = fence->device; + const struct vkd3d_vk_device_procs *vk_procs; + VkSemaphore timeline_semaphore; + VkResult vr; + + if (d3d12_fence_has_pending_gpu_ops_locked(fence)) + { + /* This situation is not very likely because it means a fence with pending waits and/or signals was + * signalled on the CPU to a lower value. For now, emit a fixme so it can be patched if necessary. + * A patch already exists for this but it's not pretty. */ + FIXME("Unable to re-initialise timeline semaphore to a lower value due to pending GPU ops.\n"); + return E_FAIL; + } + + if ((vr = vkd3d_create_timeline_semaphore(device, value, &timeline_semaphore)) < 0) + { + WARN("Failed to create timeline semaphore, vr %d.\n", vr); + return hresult_from_vk_result(vr); + } + + fence->value = value; + fence->pending_timeline_value = value; + + WARN("Replacing timeline semaphore with a new object.\n"); + + vk_procs = &device->vk_procs; + + VK_CALL(vkDestroySemaphore(device->vk_device, fence->timeline_semaphore, NULL)); + fence->timeline_semaphore = timeline_semaphore; + + return S_OK; +} + +static HRESULT d3d12_fence_signal_cpu_timeline_semaphore(struct d3d12_fence *fence, uint64_t value) +{ + const struct d3d12_device *device = fence->device; + VkSemaphoreSignalInfoKHR info; + HRESULT hr = S_OK; + VkResult vr; + int rc; + + if ((rc = vkd3d_mutex_lock(&fence->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return hresult_from_errno(rc); + } + + /* We must only signal a value which is greater than the current value. + * That value can be in the range of current known value (fence->value), or as large as pending_timeline_value. + * Pending timeline value signal might be blocked by another synchronization primitive, and thus statically + * cannot be that value, so the safest thing to do is to check the current value which is updated by the fence + * wait thread continuously. This check is technically racy since the value might be immediately out of date, + * but there is no way to avoid this. */ + if (value > fence->value) + { + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + + /* Sanity check against the delta limit. */ + if (value - fence->value > device->vk_info.timeline_semaphore_properties.maxTimelineSemaphoreValueDifference) + { + FIXME("Timeline semaphore delta is %"PRIu64", but implementation only supports a delta of %"PRIu64".\n", + value - fence->value, device->vk_info.timeline_semaphore_properties.maxTimelineSemaphoreValueDifference); + } + + info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SIGNAL_INFO_KHR; + info.pNext = NULL; + info.semaphore = fence->timeline_semaphore; + info.value = value; + if ((vr = VK_CALL(vkSignalSemaphoreKHR(device->vk_device, &info))) >= 0) + { + fence->value = value; + if (value > fence->pending_timeline_value) + fence->pending_timeline_value = value; + } + else + { + ERR("Failed to signal timeline semaphore, vr %d.\n", vr); + hr = hresult_from_vk_result(vr); + } + } + else if (value < fence->value) + { + hr = d3d12_fence_reinit_timeline_semaphore_locked(fence, value); + } + + d3d12_fence_signal_external_events_locked(fence); + + vkd3d_mutex_unlock(&fence->mutex); + return hr; +} + static HRESULT STDMETHODCALLTYPE d3d12_fence_Signal(ID3D12Fence *iface, UINT64 value) { struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); TRACE("iface %p, value %#"PRIx64".\n", iface, value); + if (fence->timeline_semaphore) + return d3d12_fence_signal_cpu_timeline_semaphore(fence, value); return d3d12_fence_signal(fence, value, VK_NULL_HANDLE); } @@ -1108,6 +1375,7 @@ static struct d3d12_fence *unsafe_impl_from_ID3D12Fence(ID3D12Fence *iface) static HRESULT d3d12_fence_init(struct d3d12_fence *fence, struct d3d12_device *device, UINT64 initial_value, D3D12_FENCE_FLAGS flags) { + VkResult vr; HRESULT hr; int rc; @@ -1136,6 +1404,16 @@ static HRESULT d3d12_fence_init(struct d3d12_fence *fence, struct d3d12_device * fence->events_size = 0; fence->event_count = 0; + fence->timeline_semaphore = VK_NULL_HANDLE; + if (device->use_timeline_semaphores && (vr = vkd3d_create_timeline_semaphore(device, initial_value, + &fence->timeline_semaphore)) < 0) + { + WARN("Failed to create timeline semaphore, vr %d.\n", vr); + return hresult_from_vk_result(vr); + } + fence->pending_timeline_value = initial_value; + fence->gpu_wait_count = 0; + list_init(&fence->semaphores); fence->semaphore_count = 0; @@ -1172,6 +1450,25 @@ HRESULT d3d12_fence_create(struct d3d12_device *device, return S_OK; } +VkResult vkd3d_create_timeline_semaphore(const struct d3d12_device *device, uint64_t initial_value, + VkSemaphore *timeline_semaphore) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkSemaphoreTypeCreateInfoKHR type_info; + VkSemaphoreCreateInfo info; + + info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; + info.pNext = &type_info; + info.flags = 0; + + type_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO_KHR; + type_info.pNext = NULL; + type_info.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE_KHR; + type_info.initialValue = initial_value; + + return VK_CALL(vkCreateSemaphore(device->vk_device, &info, NULL, timeline_semaphore)); +} + /* Command buffers */ static void d3d12_command_list_mark_as_invalid(struct d3d12_command_list *list, const char *message, ...) @@ -6138,18 +6435,88 @@ static void STDMETHODCALLTYPE d3d12_command_queue_EndEvent(ID3D12CommandQueue *i FIXME("iface %p stub!\n", iface); } +static HRESULT d3d12_fence_update_gpu_signal_timeline_semaphore(struct d3d12_fence *fence, uint64_t value) +{ + const struct d3d12_device *device = fence->device; + int rc; + + if ((rc = vkd3d_mutex_lock(&fence->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return hresult_from_errno(rc); + } + + /* If we're attempting to async signal a fence with a value which is not strictly increasing the payload value, + * warn about this case. Do not treat this as an error since it works at least with RADV and Nvidia drivers and + * there's no workaround on the GPU side. */ + if (value <= fence->pending_timeline_value) + { + WARN("Fence %p values are not strictly increasing. Pending values: old %"PRIu64", new %"PRIu64".\n", + fence, fence->pending_timeline_value, value); + } + /* Sanity check against the delta limit. Use the current fence value. */ + else if (value - fence->value > device->vk_info.timeline_semaphore_properties.maxTimelineSemaphoreValueDifference) + { + FIXME("Timeline semaphore delta is %"PRIu64", but implementation only supports a delta of %"PRIu64".\n", + value - fence->value, device->vk_info.timeline_semaphore_properties.maxTimelineSemaphoreValueDifference); + } + fence->pending_timeline_value = value; + + vkd3d_mutex_unlock(&fence->mutex); + + return S_OK; +} + +static HRESULT vkd3d_enqueue_timeline_semaphore(struct vkd3d_fence_worker *worker, VkSemaphore vk_semaphore, + struct d3d12_fence *fence, uint64_t value, struct vkd3d_queue *queue) +{ + struct vkd3d_waiting_fence *waiting_fence; + int rc; + + TRACE("worker %p, fence %p, value %#"PRIx64".\n", worker, fence, value); + + if ((rc = vkd3d_mutex_lock(&worker->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return hresult_from_errno(rc); + } + + if (!vkd3d_array_reserve((void **)&worker->enqueued_fences, &worker->enqueued_fences_size, + worker->enqueued_fence_count + 1, sizeof(*worker->enqueued_fences))) + { + ERR("Failed to add GPU timeline semaphore.\n"); + vkd3d_mutex_unlock(&worker->mutex); + return E_OUTOFMEMORY; + } + + worker->enqueued_fences[worker->enqueued_fence_count].vk_semaphore = vk_semaphore; + waiting_fence = &worker->enqueued_fences[worker->enqueued_fence_count].waiting_fence; + waiting_fence->fence = fence; + waiting_fence->value = value; + waiting_fence->queue = queue; + ++worker->enqueued_fence_count; + + InterlockedIncrement(&fence->pending_worker_operation_count); + + vkd3d_cond_signal(&worker->cond); + vkd3d_mutex_unlock(&worker->mutex); + + return S_OK; +} + static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue *iface, ID3D12Fence *fence_iface, UINT64 value) { struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); + VkTimelineSemaphoreSubmitInfoKHR timeline_submit_info; const struct vkd3d_vk_device_procs *vk_procs; VkSemaphore vk_semaphore = VK_NULL_HANDLE; VkFence vk_fence = VK_NULL_HANDLE; struct vkd3d_queue *vkd3d_queue; + uint64_t sequence_number = 0; struct d3d12_device *device; struct d3d12_fence *fence; VkSubmitInfo submit_info; - uint64_t sequence_number; VkQueue vk_queue; VkResult vr; HRESULT hr; @@ -6162,10 +6529,21 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue * fence = unsafe_impl_from_ID3D12Fence(fence_iface); - if ((vr = d3d12_fence_create_vk_fence(fence, &vk_fence)) < 0) + if (device->use_timeline_semaphores) { - WARN("Failed to create Vulkan fence, vr %d.\n", vr); - goto fail_vkresult; + if (FAILED(hr = d3d12_fence_update_gpu_signal_timeline_semaphore(fence, value))) + return hr; + + vk_semaphore = fence->timeline_semaphore; + assert(vk_semaphore); + } + else + { + if ((vr = d3d12_fence_create_vk_fence(fence, &vk_fence)) < 0) + { + WARN("Failed to create Vulkan fence, vr %d.\n", vr); + goto fail_vkresult; + } } if (!(vk_queue = vkd3d_queue_acquire(vkd3d_queue))) @@ -6175,7 +6553,8 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue * goto fail; } - if ((vr = vkd3d_queue_create_vk_semaphore_locked(vkd3d_queue, device, &vk_semaphore)) < 0) + if (!device->use_timeline_semaphores && (vr = vkd3d_queue_create_vk_semaphore_locked(vkd3d_queue, + device, &vk_semaphore)) < 0) { ERR("Failed to create Vulkan semaphore, vr %d.\n", vr); vk_semaphore = VK_NULL_HANDLE; @@ -6191,7 +6570,19 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue * submit_info.signalSemaphoreCount = vk_semaphore ? 1 : 0; submit_info.pSignalSemaphores = &vk_semaphore; - if ((vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, vk_fence))) >= 0) + if (device->use_timeline_semaphores) + { + timeline_submit_info.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR; + timeline_submit_info.pNext = NULL; + timeline_submit_info.pSignalSemaphoreValues = &value; + timeline_submit_info.signalSemaphoreValueCount = submit_info.signalSemaphoreCount; + timeline_submit_info.waitSemaphoreValueCount = 0; + timeline_submit_info.pWaitSemaphoreValues = NULL; + submit_info.pNext = &timeline_submit_info; + } + + vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, vk_fence)); + if (!device->use_timeline_semaphores && vr >= 0) { sequence_number = ++vkd3d_queue->submitted_sequence_number; @@ -6208,6 +6599,9 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue * goto fail_vkresult; } + if (device->use_timeline_semaphores) + return vkd3d_enqueue_timeline_semaphore(&device->fence_worker, vk_semaphore, fence, value, vkd3d_queue); + if (vk_semaphore && SUCCEEDED(hr = d3d12_fence_add_vk_semaphore(fence, vk_semaphore, vk_fence, value))) vk_semaphore = VK_NULL_HANDLE; @@ -6243,32 +6637,27 @@ fail_vkresult: hr = hresult_from_vk_result(vr); fail: VK_CALL(vkDestroyFence(device->vk_device, vk_fence, NULL)); - VK_CALL(vkDestroySemaphore(device->vk_device, vk_semaphore, NULL)); + if (!device->use_timeline_semaphores) + VK_CALL(vkDestroySemaphore(device->vk_device, vk_semaphore, NULL)); return hr; } -static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Wait(ID3D12CommandQueue *iface, - ID3D12Fence *fence_iface, UINT64 value) +static HRESULT d3d12_command_queue_wait_binary_semaphore(struct d3d12_command_queue *command_queue, + struct d3d12_fence *fence, uint64_t value) { static const VkPipelineStageFlagBits wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; - struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); const struct vkd3d_vk_device_procs *vk_procs; struct vkd3d_signaled_semaphore *semaphore; uint64_t completed_value = 0; struct vkd3d_queue *queue; - struct d3d12_fence *fence; VkSubmitInfo submit_info; VkQueue vk_queue; VkResult vr; HRESULT hr; - TRACE("iface %p, fence %p, value %#"PRIx64".\n", iface, fence_iface, value); - vk_procs = &command_queue->device->vk_procs; queue = command_queue->vkd3d_queue; - fence = unsafe_impl_from_ID3D12Fence(fence_iface); - semaphore = d3d12_fence_acquire_vk_semaphore(fence, value, &completed_value); if (!semaphore && completed_value >= value) { @@ -6346,6 +6735,122 @@ fail: return hr; } +static inline void d3d12_fence_update_gpu_wait(struct d3d12_fence *fence, const struct vkd3d_queue *queue) +{ + unsigned int i; + bool found; + int rc; + + if ((rc = vkd3d_mutex_lock(&fence->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return; + } + + for (i = 0, found = false; i < fence->gpu_wait_count; ++i) + { + if (fence->gpu_waits[i].queue == queue) + { + fence->gpu_waits[i].pending_value = queue->pending_wait_completion_value; + found = true; + } + else if (d3d12_fence_gpu_wait_is_completed(fence, i) && i < --fence->gpu_wait_count) + { + fence->gpu_waits[i] = fence->gpu_waits[fence->gpu_wait_count]; + } + } + + if (!found) + { + if (fence->gpu_wait_count < ARRAY_SIZE(fence->gpu_waits)) + { + fence->gpu_waits[fence->gpu_wait_count].queue = queue; + fence->gpu_waits[fence->gpu_wait_count++].pending_value = queue->pending_wait_completion_value; + } + else + { + FIXME("Unable to track GPU fence wait.\n"); + } + } + + vkd3d_mutex_unlock(&fence->mutex); +} + +static HRESULT d3d12_command_queue_wait_timeline_semaphore(struct d3d12_command_queue *command_queue, + struct d3d12_fence *fence, uint64_t value) +{ + static const VkPipelineStageFlagBits wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + VkTimelineSemaphoreSubmitInfoKHR timeline_submit_info; + const struct vkd3d_vk_device_procs *vk_procs; + struct vkd3d_queue *queue; + VkSubmitInfo submit_info; + VkQueue vk_queue; + VkResult vr; + + vk_procs = &command_queue->device->vk_procs; + queue = command_queue->vkd3d_queue; + + assert(fence->timeline_semaphore); + timeline_submit_info.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR; + timeline_submit_info.pNext = NULL; + timeline_submit_info.signalSemaphoreValueCount = 0; + timeline_submit_info.pSignalSemaphoreValues = NULL; + timeline_submit_info.waitSemaphoreValueCount = 1; + timeline_submit_info.pWaitSemaphoreValues = &value; + + if (!(vk_queue = vkd3d_queue_acquire(queue))) + { + ERR("Failed to acquire queue %p.\n", queue); + return E_FAIL; + } + + submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submit_info.pNext = &timeline_submit_info; + submit_info.waitSemaphoreCount = 1; + submit_info.pWaitSemaphores = &fence->timeline_semaphore; + submit_info.pWaitDstStageMask = &wait_stage_mask; + submit_info.commandBufferCount = 0; + submit_info.pCommandBuffers = NULL; + submit_info.signalSemaphoreCount = 0; + submit_info.pSignalSemaphores = NULL; + + ++queue->pending_wait_completion_value; + + submit_info.signalSemaphoreCount = 1; + submit_info.pSignalSemaphores = &queue->wait_completion_semaphore; + timeline_submit_info.signalSemaphoreValueCount = 1; + timeline_submit_info.pSignalSemaphoreValues = &queue->pending_wait_completion_value; + + d3d12_fence_update_gpu_wait(fence, queue); + + vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, VK_NULL_HANDLE)); + + vkd3d_queue_release(queue); + + if (vr < 0) + { + WARN("Failed to submit wait operation, vr %d.\n", vr); + return hresult_from_vk_result(vr); + } + + return S_OK; +} + +static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Wait(ID3D12CommandQueue *iface, + ID3D12Fence *fence_iface, UINT64 value) +{ + struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); + struct d3d12_fence *fence = unsafe_impl_from_ID3D12Fence(fence_iface); + + TRACE("iface %p, fence %p, value %#"PRIx64".\n", iface, fence_iface, value); + + if (command_queue->device->use_timeline_semaphores) + return d3d12_command_queue_wait_timeline_semaphore(command_queue, fence, value); + + FIXME_ONCE("KHR_timeline_semaphore is not available or incompatible. Some wait commands may be unsupported.\n"); + return d3d12_command_queue_wait_binary_semaphore(command_queue, fence, value); +} + static HRESULT STDMETHODCALLTYPE d3d12_command_queue_GetTimestampFrequency(ID3D12CommandQueue *iface, UINT64 *frequency) { diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index eb470c12..ab087d60 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -129,6 +129,7 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] = VK_EXTENSION(KHR_MAINTENANCE3, KHR_maintenance3), VK_EXTENSION(KHR_PUSH_DESCRIPTOR, KHR_push_descriptor), VK_EXTENSION(KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE, KHR_sampler_mirror_clamp_to_edge), + VK_EXTENSION(KHR_TIMELINE_SEMAPHORE, KHR_timeline_semaphore), /* EXT extensions */ VK_EXTENSION(EXT_CALIBRATED_TIMESTAMPS, EXT_calibrated_timestamps), VK_EXTENSION(EXT_CONDITIONAL_RENDERING, EXT_conditional_rendering), @@ -791,6 +792,7 @@ struct vkd3d_physical_device_info VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT texel_buffer_alignment_properties; VkPhysicalDeviceTransformFeedbackPropertiesEXT xfb_properties; VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT vertex_divisor_properties; + VkPhysicalDeviceTimelineSemaphorePropertiesKHR timeline_semaphore_properties; VkPhysicalDeviceProperties2KHR properties2; @@ -803,6 +805,7 @@ struct vkd3d_physical_device_info VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT texel_buffer_alignment_features; VkPhysicalDeviceTransformFeedbackFeaturesEXT xfb_features; VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT vertex_divisor_features; + VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore_features; VkPhysicalDeviceFeatures2 features2; }; @@ -814,11 +817,13 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i VkPhysicalDeviceDescriptorIndexingPropertiesEXT *descriptor_indexing_properties; VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *vertex_divisor_properties; VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *buffer_alignment_properties; + VkPhysicalDeviceTimelineSemaphorePropertiesKHR *timeline_semaphore_properties; VkPhysicalDeviceDescriptorIndexingFeaturesEXT *descriptor_indexing_features; VkPhysicalDeviceRobustness2FeaturesEXT *robustness2_features; VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *vertex_divisor_features; VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *buffer_alignment_features; VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *demote_features; + VkPhysicalDeviceTimelineSemaphoreFeaturesKHR *timeline_semaphore_features; VkPhysicalDeviceDepthClipEnableFeaturesEXT *depth_clip_features; VkPhysicalDeviceMaintenance3Properties *maintenance3_properties; VkPhysicalDeviceTransformFeedbackPropertiesEXT *xfb_properties; @@ -838,6 +843,8 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i buffer_alignment_properties = &info->texel_buffer_alignment_properties; vertex_divisor_features = &info->vertex_divisor_features; vertex_divisor_properties = &info->vertex_divisor_properties; + timeline_semaphore_features = &info->timeline_semaphore_features; + timeline_semaphore_properties = &info->timeline_semaphore_properties; xfb_features = &info->xfb_features; xfb_properties = &info->xfb_properties; @@ -859,6 +866,8 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i vk_prepend_struct(&info->features2, xfb_features); vertex_divisor_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT; vk_prepend_struct(&info->features2, vertex_divisor_features); + timeline_semaphore_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR; + vk_prepend_struct(&info->features2, timeline_semaphore_features); if (vulkan_info->KHR_get_physical_device_properties2) VK_CALL(vkGetPhysicalDeviceFeatures2KHR(physical_device, &info->features2)); @@ -877,6 +886,8 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i vk_prepend_struct(&info->properties2, xfb_properties); vertex_divisor_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT; vk_prepend_struct(&info->properties2, vertex_divisor_properties); + timeline_semaphore_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_PROPERTIES_KHR; + vk_prepend_struct(&info->properties2, timeline_semaphore_properties); if (vulkan_info->KHR_get_physical_device_properties2) VK_CALL(vkGetPhysicalDeviceProperties2KHR(physical_device, &info->properties2)); @@ -1465,6 +1476,7 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, vulkan_info->rasterization_stream = physical_device_info->xfb_properties.transformFeedbackRasterizationStreamSelect; vulkan_info->transform_feedback_queries = physical_device_info->xfb_properties.transformFeedbackQueries; vulkan_info->max_vertex_attrib_divisor = max(physical_device_info->vertex_divisor_properties.maxVertexAttribDivisor, 1); + vulkan_info->timeline_semaphore_properties = physical_device_info->timeline_semaphore_properties; device->feature_options.DoublePrecisionFloatShaderOps = features->shaderFloat64; device->feature_options.OutputMergerLogicOp = features->logicOp; @@ -1589,6 +1601,8 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, vulkan_info->EXT_shader_demote_to_helper_invocation = false; if (!physical_device_info->texel_buffer_alignment_features.texelBufferAlignment) vulkan_info->EXT_texel_buffer_alignment = false; + if (!physical_device_info->timeline_semaphore_features.timelineSemaphore) + vulkan_info->KHR_timeline_semaphore = false; vulkan_info->texel_buffer_alignment_properties = physical_device_info->texel_buffer_alignment_properties; @@ -1939,6 +1953,75 @@ static bool d3d12_is_64k_msaa_supported(struct d3d12_device *device) && info.Alignment <= 0x10000; } +/* A lower value can be signalled on a D3D12 fence. Vulkan timeline semaphores + * do not support this, but test if it works anyway. */ +static bool d3d12_is_timeline_semaphore_supported(const struct d3d12_device *device) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkTimelineSemaphoreSubmitInfoKHR timeline_submit_info; + VkSemaphore timeline_semaphore; + VkSubmitInfo submit_info; + bool result = false; + uint64_t value = 0; + VkQueue vk_queue; + VkResult vr; + + if (!device->vk_info.KHR_timeline_semaphore) + return false; + + if ((vr = vkd3d_create_timeline_semaphore(device, 1, &timeline_semaphore)) < 0) + { + WARN("Failed to create timeline semaphore, vr %d.\n", vr); + return false; + } + + if (!(vk_queue = vkd3d_queue_acquire(device->direct_queue))) + { + ERR("Failed to acquire queue %p.\n", device->direct_queue); + VK_CALL(vkDestroySemaphore(device->vk_device, timeline_semaphore, NULL)); + return false; + } + + submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submit_info.pNext = &timeline_submit_info; + submit_info.waitSemaphoreCount = 0; + submit_info.pWaitSemaphores = NULL; + submit_info.pWaitDstStageMask = NULL; + submit_info.commandBufferCount = 0; + submit_info.pCommandBuffers = NULL; + submit_info.signalSemaphoreCount = 1; + submit_info.pSignalSemaphores = &timeline_semaphore; + + timeline_submit_info.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR; + timeline_submit_info.pNext = NULL; + timeline_submit_info.pSignalSemaphoreValues = &value; + timeline_submit_info.signalSemaphoreValueCount = 1; + timeline_submit_info.waitSemaphoreValueCount = 0; + timeline_submit_info.pWaitSemaphoreValues = NULL; + + vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, VK_NULL_HANDLE)); + + if (vr >= 0) + { + if ((vr = VK_CALL(vkQueueWaitIdle(vk_queue))) < 0) + WARN("Failed to wait for queue, vr %d.\n", vr); + + if ((vr = VK_CALL(vkGetSemaphoreCounterValueKHR(device->vk_device, timeline_semaphore, &value))) < 0) + ERR("Failed to get Vulkan semaphore status, vr %d.\n", vr); + else if (!(result = !value)) + WARN("Disabling timeline semaphore use due to incompatible behaviour.\n"); + } + else + { + WARN("Failed to submit signal operation, vr %d.\n", vr); + } + + vkd3d_queue_release(device->direct_queue); + VK_CALL(vkDestroySemaphore(device->vk_device, timeline_semaphore, NULL)); + + return result; +} + static HRESULT vkd3d_create_vk_device(struct d3d12_device *device, const struct vkd3d_device_create_info *create_info) { @@ -2037,6 +2120,10 @@ static HRESULT vkd3d_create_vk_device(struct d3d12_device *device, } device->feature_options4.MSAA64KBAlignedTextureSupported = d3d12_is_64k_msaa_supported(device); + device->use_timeline_semaphores = d3d12_is_timeline_semaphore_supported(device) + && vkd3d_queue_init_timeline_semaphore(device->direct_queue, device) + && vkd3d_queue_init_timeline_semaphore(device->compute_queue, device) + && vkd3d_queue_init_timeline_semaphore(device->copy_queue, device); TRACE("Created Vulkan device %p.\n", vk_device); diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 796dfefd..56060b6d 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -59,6 +59,7 @@ #define VKD3D_MAX_SHADER_EXTENSIONS 3u #define VKD3D_MAX_SHADER_STAGES 5u #define VKD3D_MAX_VK_SYNC_OBJECTS 4u +#define VKD3D_MAX_FENCE_WAITING_QUEUES 4u #define VKD3D_MAX_DESCRIPTOR_SETS 64u /* D3D12 binding tier 3 has a limit of 2048 samplers. */ #define VKD3D_MAX_DESCRIPTOR_SET_SAMPLERS 2048u @@ -125,6 +126,7 @@ struct vkd3d_vulkan_info bool KHR_maintenance3; bool KHR_push_descriptor; bool KHR_sampler_mirror_clamp_to_edge; + bool KHR_timeline_semaphore; /* EXT device extensions */ bool EXT_calibrated_timestamps; bool EXT_conditional_rendering; @@ -150,6 +152,8 @@ struct vkd3d_vulkan_info VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT texel_buffer_alignment_properties; + VkPhysicalDeviceTimelineSemaphorePropertiesKHR timeline_semaphore_properties; + unsigned int shader_extension_count; enum vkd3d_shader_spirv_extension shader_extensions[VKD3D_MAX_SHADER_EXTENSIONS]; @@ -348,6 +352,7 @@ struct vkd3d_fence_worker struct vkd3d_enqueued_fence { VkFence vk_fence; + VkSemaphore vk_semaphore; struct vkd3d_waiting_fence waiting_fence; } *enqueued_fences; size_t enqueued_fences_size; @@ -357,6 +362,12 @@ struct vkd3d_fence_worker size_t vk_fences_size; struct vkd3d_waiting_fence *fences; size_t fences_size; + VkSemaphore *vk_semaphores; + size_t vk_semaphores_size; + uint64_t *semaphore_wait_values; + size_t semaphore_wait_values_size; + + void (*wait_for_gpu_fences)(struct vkd3d_fence_worker *worker); struct d3d12_device *device; }; @@ -511,6 +522,12 @@ struct vkd3d_signaled_semaphore bool is_acquired; }; +struct vkd3d_pending_fence_wait +{ + const struct vkd3d_queue *queue; + uint64_t pending_value; +}; + /* ID3D12Fence */ struct d3d12_fence { @@ -530,6 +547,11 @@ struct d3d12_fence size_t events_size; size_t event_count; + VkSemaphore timeline_semaphore; + uint64_t pending_timeline_value; + struct vkd3d_pending_fence_wait gpu_waits[VKD3D_MAX_FENCE_WAITING_QUEUES]; + unsigned int gpu_wait_count; + struct list semaphores; unsigned int semaphore_count; @@ -545,6 +567,9 @@ struct d3d12_fence HRESULT d3d12_fence_create(struct d3d12_device *device, uint64_t initial_value, D3D12_FENCE_FLAGS flags, struct d3d12_fence **fence); +VkResult vkd3d_create_timeline_semaphore(const struct d3d12_device *device, uint64_t initial_value, + VkSemaphore *timeline_semaphore); + /* ID3D12Heap */ struct d3d12_heap { @@ -1284,6 +1309,9 @@ struct vkd3d_queue VkQueueFlags vk_queue_flags; uint32_t timestamp_bits; + VkSemaphore wait_completion_semaphore; + uint64_t pending_wait_completion_value; + struct { VkSemaphore vk_semaphore; @@ -1298,6 +1326,7 @@ struct vkd3d_queue VkQueue vkd3d_queue_acquire(struct vkd3d_queue *queue); HRESULT vkd3d_queue_create(struct d3d12_device *device, uint32_t family_index, const VkQueueFamilyProperties *properties, struct vkd3d_queue **queue); +bool vkd3d_queue_init_timeline_semaphore(struct vkd3d_queue *queue, struct d3d12_device *device); void vkd3d_queue_destroy(struct vkd3d_queue *queue, struct d3d12_device *device); void vkd3d_queue_release(struct vkd3d_queue *queue); @@ -1456,6 +1485,7 @@ struct d3d12_device VkDescriptorPoolSize vk_pool_sizes[VKD3D_DESCRIPTOR_POOL_COUNT]; struct vkd3d_vk_descriptor_heap_layout vk_descriptor_heap_layouts[VKD3D_SET_INDEX_COUNT]; bool use_vk_heaps; + bool use_timeline_semaphores; }; HRESULT d3d12_device_create(struct vkd3d_instance *instance, diff --git a/libs/vkd3d/vulkan_procs.h b/libs/vkd3d/vulkan_procs.h index 60556735..34e0ab4b 100644 --- a/libs/vkd3d/vulkan_procs.h +++ b/libs/vkd3d/vulkan_procs.h @@ -195,6 +195,11 @@ VK_DEVICE_EXT_PFN(vkGetDescriptorSetLayoutSupportKHR) /* VK_KHR_push_descriptor */ VK_DEVICE_EXT_PFN(vkCmdPushDescriptorSetKHR) +/* VK_KHR_timeline_semaphore */ +VK_DEVICE_EXT_PFN(vkGetSemaphoreCounterValueKHR) +VK_DEVICE_EXT_PFN(vkWaitSemaphoresKHR) +VK_DEVICE_EXT_PFN(vkSignalSemaphoreKHR) + /* VK_EXT_calibrated_timestamps */ VK_DEVICE_EXT_PFN(vkGetCalibratedTimestampsEXT) diff --git a/tests/d3d12.c b/tests/d3d12.c index 29bd40c8..2059b182 100644 --- a/tests/d3d12.c +++ b/tests/d3d12.c @@ -33240,7 +33240,9 @@ static void test_queue_wait(void) command_list = context.list; queue = context.queue; - queue2 = create_command_queue(device, D3D12_COMMAND_LIST_TYPE_DIRECT, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL); + /* 'queue2' must not map to the same command queue as 'queue', or Wait() before GPU signal will fail. + * Using a compute queue fixes this on most hardware, but it may still fail on low spec hardware. */ + queue2 = create_command_queue(device, D3D12_COMMAND_LIST_TYPE_COMPUTE, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL); event = create_event(); ok(event, "Failed to create event.\n"); @@ -33305,12 +33307,6 @@ static void test_queue_wait(void) check_readback_data_uint(&rb, NULL, 0xff00ff00, 0); release_resource_readback(&rb); - if (!vkd3d_test_platform_is_windows()) - { - skip("Wait() is not implemented yet.\n"); /* FIXME */ - goto skip_tests; - } - /* Wait() before CPU signal */ update_buffer_data(cb, 0, sizeof(blue), &blue); queue_wait(queue, fence, 2); @@ -33386,7 +33382,6 @@ static void test_queue_wait(void) check_readback_data_uint(&rb, NULL, 0xff00ff00, 0); release_resource_readback(&rb); -skip_tests: /* Signal() and Wait() in the same command queue */ update_buffer_data(cb, 0, sizeof(blue), &blue); queue_signal(queue, fence, 7); -- 2.20.1

2 1

[PATCH v2] include: Add some math declarations.
by Daniel Lehman 06 Apr '22

06 Apr '22

Signed-off-by: Daniel Lehman <dlehman25(a)gmail.com> --- v2: correct some declarations --- include/msvcrt/math.h | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/include/msvcrt/math.h b/include/msvcrt/math.h index da07303bcd9..82f58839cd8 100644 --- a/include/msvcrt/math.h +++ b/include/msvcrt/math.h @@ -69,12 +69,16 @@ _ACRTIMP double __cdecl fabs(double); _ACRTIMP double __cdecl ldexp(double, int); _ACRTIMP double __cdecl frexp(double, int*); _ACRTIMP double __cdecl modf(double, double*); +_ACRTIMP double __cdecl fdim(double, double); _ACRTIMP double __cdecl fmod(double, double); _ACRTIMP double __cdecl fmin(double, double); _ACRTIMP double __cdecl fmax(double, double); _ACRTIMP double __cdecl erf(double); +_ACRTIMP double __cdecl remainder(double, double); _ACRTIMP double __cdecl remquo(double, double, int*); _ACRTIMP float __cdecl remquof(float, float, int*); +_ACRTIMP double __cdecl lgamma(double); +_ACRTIMP double __cdecl tgamma(double); _ACRTIMP double __cdecl _hypot(double, double); _ACRTIMP double __cdecl _j0(double); @@ -86,18 +90,31 @@ _ACRTIMP double __cdecl _yn(int, double); _ACRTIMP double __cdecl cbrt(double); _ACRTIMP double __cdecl exp2(double); +_ACRTIMP double __cdecl expm1(double); +_ACRTIMP double __cdecl log1p(double); _ACRTIMP double __cdecl log2(double); +_ACRTIMP double __cdecl logb(double); _ACRTIMP double __cdecl rint(double); _ACRTIMP double __cdecl round(double); _ACRTIMP double __cdecl trunc(double); _ACRTIMP float __cdecl cbrtf(float); _ACRTIMP float __cdecl exp2f(float); +_ACRTIMP float __cdecl expm1f(float); +_ACRTIMP float __cdecl log1pf(float); _ACRTIMP float __cdecl log2f(float); +_ACRTIMP float __cdecl logbf(float); _ACRTIMP float __cdecl rintf(float); _ACRTIMP float __cdecl roundf(float); _ACRTIMP float __cdecl truncf(float); +_ACRTIMP int __cdecl ilogb(double); +_ACRTIMP int __cdecl ilogbf(float); + +_ACRTIMP long long __cdecl llrint(double); +_ACRTIMP long long __cdecl llrintf(float); +_ACRTIMP long long __cdecl llround(double); +_ACRTIMP long long __cdecl llroundf(float); _ACRTIMP __msvcrt_long __cdecl lrint(double); _ACRTIMP __msvcrt_long __cdecl lrintf(float); _ACRTIMP __msvcrt_long __cdecl lround(double); @@ -105,6 +122,8 @@ _ACRTIMP __msvcrt_long __cdecl lroundf(float); _ACRTIMP double __cdecl scalbn(double,int); _ACRTIMP float __cdecl scalbnf(float,int); +_ACRTIMP double __cdecl scalbln(double,__msvcrt_long); +_ACRTIMP float __cdecl scalblnf(float,__msvcrt_long); _ACRTIMP double __cdecl _copysign (double, double); _ACRTIMP double __cdecl _chgsign (double); @@ -115,6 +134,8 @@ _ACRTIMP int __cdecl _finite(double); _ACRTIMP int __cdecl _isnan(double); _ACRTIMP int __cdecl _fpclass(double); +_ACRTIMP double __cdecl nextafter(double, double); + #ifndef __i386__ _ACRTIMP float __cdecl sinf(float); @@ -199,6 +220,14 @@ _ACRTIMP float __cdecl _logbf(float); _ACRTIMP float __cdecl acoshf(float); _ACRTIMP float __cdecl asinhf(float); _ACRTIMP float __cdecl atanhf(float); +_ACRTIMP float __cdecl erff(float); +_ACRTIMP float __cdecl fdimf(float, float); +_ACRTIMP float __cdecl fmaxf(float, float); +_ACRTIMP float __cdecl fminf(float, float); +_ACRTIMP float __cdecl lgammaf(float); +_ACRTIMP float __cdecl nextafterf(float, float); +_ACRTIMP float __cdecl remainderf(float, float); +_ACRTIMP float __cdecl tgammaf(float); #else @@ -208,6 +237,14 @@ static inline float _logbf(float x) { return _logb(x); } static inline float acoshf(float x) { return acosh(x); } static inline float asinhf(float x) { return asinh(x); } static inline float atanhf(float x) { return atanh(x); } +static inline float erff(float x) { return erf(x); } +static inline float fdimf(float x, float y) { return fdim(x, y); } +static inline float fmaxf(float x, float y) { return fmax(x, y); } +static inline float fminf(float x, float y) { return fmin(x, y); } +static inline float lgammaf(float x) { return lgamma(x); } +static inline float nextafterf(float x, float y) { return nextafter(x, y); } +static inline float remainderf(float x, float y) { return remainder(x, y); } +static inline float tgammaf(float x) { return tgamma(x); } #endif -- 2.25.1

3 2

[PATCH v3 1/7] include/winerror.h: Add missing WinRT HRESULT codes.
by Bernhard Kölbl 06 Apr '22

06 Apr '22

Taken from MSDN and original winerror.h. Signed-off-by: Bernhard Kölbl <besentv(a)gmail.com> --- include/winerror.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/include/winerror.h b/include/winerror.h index b63b74a5b18..d51d5648cdb 100644 --- a/include/winerror.h +++ b/include/winerror.h @@ -2578,6 +2578,25 @@ static inline HRESULT HRESULT_FROM_WIN32(unsigned int x) #define E_CHANGED_STATE _HRESULT_TYPEDEF_(0x8000000C) #define E_ILLEGAL_STATE_CHANGE _HRESULT_TYPEDEF_(0x8000000D) #define E_ILLEGAL_METHOD_CALL _HRESULT_TYPEDEF_(0x8000000E) +#define RO_E_METADATA_NAME_NOT_FOUND _HRESULT_TYPEDEF_(0x8000000F) +#define RO_E_METADATA_NAME_IS_NAMESPACE _HRESULT_TYPEDEF_(0x80000010) +#define RO_E_METADATA_INVALID_TYPE_FORMAT _HRESULT_TYPEDEF_(0x80000011) +#define RO_E_INVALID_METADATA_FILE _HRESULT_TYPEDEF_(0x80000012) +#define RO_E_CLOSED _HRESULT_TYPEDEF_(0x80000013) +#define RO_E_EXCLUSIVE_WRITE _HRESULT_TYPEDEF_(0x80000014) +#define RO_E_CHANGE_NOTIFICATION_IN_PROGRESS _HRESULT_TYPEDEF_(0x80000015) +#define RO_E_ERROR_STRING_NOT_FOUND _HRESULT_TYPEDEF_(0x80000016) +#define E_STRING_NOT_NULL_TERMINATED _HRESULT_TYPEDEF_(0x80000017) +#define E_ILLEGAL_DELEGATE_ASSIGNMENT _HRESULT_TYPEDEF_(0x80000018) +#define E_ASYNC_OPERATION_NOT_STARTED _HRESULT_TYPEDEF_(0x80000019) +#define E_APPLICATION_EXITING _HRESULT_TYPEDEF_(0x8000001A) +#define E_APPLICATION_VIEW_EXITING _HRESULT_TYPEDEF_(0x8000001B) +#define RO_E_MUST_BE_AGILE _HRESULT_TYPEDEF_(0x8000001C) +#define RO_E_UNSUPPORTED_FROM_MTA _HRESULT_TYPEDEF_(0x8000001D) +#define RO_E_COMMITTED _HRESULT_TYPEDEF_(0x8000001E) +#define RO_E_BLOCKED_CROSS_ASTA_CALL _HRESULT_TYPEDEF_(0x8000001F) +#define RO_E_CANNOT_ACTIVATE_FULL_TRUST_SERVER _HRESULT_TYPEDEF_(0x80000020) +#define RO_E_CANNOT_ACTIVATE_UNIVERSAL_APPLICATION_SERVER _HRESULT_TYPEDEF_(0x80000021) #define E_NOTIMPL _HRESULT_TYPEDEF_(0x80004001) #define E_NOINTERFACE _HRESULT_TYPEDEF_(0x80004002) -- 2.35.1

2 12

[PATCH vkd3d v3 1/7] vkd3d-shader/hlsl: Lower casts to bool to comparisons.
by Giovanni Mascellani 06 Apr '22

06 Apr '22

Signed-off-by: Giovanni Mascellani <gmascellani(a)codeweavers.com> --- v3: * Fix optimization passes order. * Rename lower_cast_to_bool() to lower_casts_to_bool(). --- libs/vkd3d-shader/hlsl_codegen.c | 32 +++++++++++++++++++++++++++ libs/vkd3d-shader/hlsl_constant_ops.c | 9 ++------ libs/vkd3d-shader/hlsl_sm4.c | 3 ++- 3 files changed, 36 insertions(+), 8 deletions(-) diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 2b05264c..72c00430 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -750,6 +750,37 @@ static bool lower_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, voi return true; } +static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_type *type = instr->data_type, *arg_type; + struct hlsl_ir_constant *zero; + struct hlsl_ir_expr *expr; + + if (instr->type != HLSL_IR_EXPR) + return false; + expr = hlsl_ir_expr(instr); + if (expr->op != HLSL_OP1_CAST) + return false; + arg_type = expr->operands[0].node->data_type; + if (type->type > HLSL_CLASS_VECTOR || arg_type->type > HLSL_CLASS_VECTOR) + return false; + if (type->base_type != HLSL_TYPE_BOOL) + return false; + + /* Narrowing casts should have already been lowered. */ + assert(type->dimx == arg_type->dimx); + + zero = hlsl_new_constant(ctx, arg_type, &instr->loc); + if (!zero) + return false; + list_add_before(&instr->entry, &zero->node.entry); + + expr->op = HLSL_OP2_NEQUAL; + hlsl_src_from_node(&expr->operands[1], &zero->node); + + return true; +} + static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { switch (instr->type) @@ -1655,6 +1686,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry } while (progress); transform_ir(ctx, lower_narrowing_casts, body, NULL); + transform_ir(ctx, lower_casts_to_bool, body, NULL); do { progress = transform_ir(ctx, hlsl_fold_constants, body, NULL); diff --git a/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d-shader/hlsl_constant_ops.c index e282dbd4..5cac4bde 100644 --- a/libs/vkd3d-shader/hlsl_constant_ops.c +++ b/libs/vkd3d-shader/hlsl_constant_ops.c @@ -27,7 +27,6 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct int32_t i; double d; float f; - bool b; if (dst->node.data_type->dimx != src->node.data_type->dimx || dst->node.data_type->dimy != src->node.data_type->dimy) @@ -47,7 +46,6 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct i = src->value[k].f; f = src->value[k].f; d = src->value[k].f; - b = !!src->value[k].f; break; case HLSL_TYPE_DOUBLE: @@ -55,7 +53,6 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct i = src->value[k].d; f = src->value[k].d; d = src->value[k].d; - b = !!src->value[k].d; break; case HLSL_TYPE_INT: @@ -63,7 +60,6 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct i = src->value[k].i; f = src->value[k].i; d = src->value[k].i; - b = !!src->value[k].i; break; case HLSL_TYPE_UINT: @@ -71,7 +67,6 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct i = src->value[k].u; f = src->value[k].u; d = src->value[k].u; - b = !!src->value[k].u; break; case HLSL_TYPE_BOOL: @@ -79,7 +74,6 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct i = !!src->value[k].u; f = !!src->value[k].u; d = !!src->value[k].u; - b = !!src->value[k].u; break; default: @@ -107,7 +101,8 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct break; case HLSL_TYPE_BOOL: - dst->value[k].u = b ? ~0u : 0; + /* Casts to bool should have already been lowered. */ + assert(0); break; default: diff --git a/libs/vkd3d-shader/hlsl_sm4.c b/libs/vkd3d-shader/hlsl_sm4.c index 872118c1..f3530084 100644 --- a/libs/vkd3d-shader/hlsl_sm4.c +++ b/libs/vkd3d-shader/hlsl_sm4.c @@ -1540,7 +1540,8 @@ static void write_sm4_cast(struct hlsl_ctx *ctx, break; case HLSL_TYPE_BOOL: - hlsl_fixme(ctx, &expr->node.loc, "SM4 cast to bool.\n"); + /* Casts to bool should have already been lowered. */ + assert(0); break; default: -- 2.35.1

2 7

[PATCH vkd3d v2 1/9] vkd3d-shader/hlsl: Use an unsigned variable for iteration.
by Giovanni Mascellani 06 Apr '22

06 Apr '22

Signed-off-by: Giovanni Mascellani <gmascellani(a)codeweavers.com> --- v2: * New --- libs/vkd3d-shader/hlsl_constant_ops.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d-shader/hlsl_constant_ops.c index 51cee179..109fc2ee 100644 --- a/libs/vkd3d-shader/hlsl_constant_ops.c +++ b/libs/vkd3d-shader/hlsl_constant_ops.c @@ -194,11 +194,12 @@ static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst, struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2) { enum hlsl_base_type type = dst->node.data_type->base_type; + unsigned int k; assert(type == src1->node.data_type->base_type); assert(type == src2->node.data_type->base_type); - for (int k = 0; k < 4; ++k) + for (k = 0; k < 4; ++k) { switch (type) { -- 2.35.1

4 35

[PATCH] mf/session: Use correct format specifier for start position.
by Nikolay Sivov 06 Apr '22

06 Apr '22

Fixup for 3f84ee1eb6637602e3e95fee20985444639b4602. Signed-off-by: Nikolay Sivov <nsivov(a)codeweavers.com> --- dlls/mf/session.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dlls/mf/session.c b/dlls/mf/session.c index 85cbcc29630..85b63f767fe 100644 --- a/dlls/mf/session.c +++ b/dlls/mf/session.c @@ -2009,7 +2009,7 @@ static HRESULT WINAPI mfsession_Start(IMFMediaSession *iface, const GUID *format struct session_op *op; HRESULT hr; - TRACE("%p, %s, %p.\n", iface, debugstr_guid(format), debugstr_propvar(start_position)); + TRACE("%p, %s, %s.\n", iface, debugstr_guid(format), debugstr_propvar(start_position)); if (!start_position) return E_POINTER; -- 2.35.1

1 0

[PATCH 1/3] evr/presenter: Fix hns-to-msec conversion multiplier.
by Nikolay Sivov 06 Apr '22

06 Apr '22

From: Andrew Eikum <aeikum(a)codeweavers.com> Signed-off-by: Nikolay Sivov <nsivov(a)codeweavers.com> --- dlls/evr/presenter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dlls/evr/presenter.c b/dlls/evr/presenter.c index 7e14cb2f720..1b119ce9cab 100644 --- a/dlls/evr/presenter.c +++ b/dlls/evr/presenter.c @@ -548,7 +548,7 @@ static void video_presenter_check_queue(struct video_presenter *presenter, if (delta > 3 * presenter->frame_time_threshold) { /* Convert 100ns -> msec */ - wait = (delta - 3 * presenter->frame_time_threshold) / 100000; + wait = (delta - 3 * presenter->frame_time_threshold) / 10000; present = FALSE; } } -- 2.35.1

1 2

[PATCH] combase: Don't start dllhost if the CLSID key doesn't exist.
by Huw Davies 06 Apr '22

06 Apr '22

Signed-off-by: Huw Davies <huw(a)codeweavers.com> --- dlls/combase/rpc.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/dlls/combase/rpc.c b/dlls/combase/rpc.c index f620339f19e..c8401fe0cbd 100644 --- a/dlls/combase/rpc.c +++ b/dlls/combase/rpc.c @@ -538,6 +538,12 @@ static HRESULT create_surrogate_server(REFCLSID rclsid, HANDLE *process) TRACE("Attempting to start surrogate server for %s\n", debugstr_guid(rclsid)); + hr = open_key_for_clsid(rclsid, NULL, KEY_READ, &key); + if (FAILED(hr) && (arch == 64 || (IsWow64Process(GetCurrentProcess(), &is_wow64) && is_wow64))) + hr = open_key_for_clsid(rclsid, NULL, opposite | KEY_READ, &key); + if (FAILED(hr)) return hr; + RegCloseKey(key); + hr = open_appidkey_from_clsid(rclsid, KEY_READ, &key); if (FAILED(hr) && (arch == 64 || (IsWow64Process(GetCurrentProcess(), &is_wow64) && is_wow64))) { -- 2.23.0

1 0

[PATCH 1/3] include: Add ACTIVATION_CONTEXT_SECTION_WINRT_ACTIVATABLE_CLASSES declaration.
by Rémi Bernon 06 Apr '22

06 Apr '22

Signed-off-by: Rémi Bernon <rbernon(a)codeweavers.com> --- include/winnt.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/winnt.h b/include/winnt.h index 079858d2f7d..31c6b7d69e9 100644 --- a/include/winnt.h +++ b/include/winnt.h @@ -5861,6 +5861,7 @@ typedef enum _ACTIVATION_CONTEXT_INFO_CLASS { #define ACTIVATION_CONTEXT_SECTION_CLR_SURROGATES 9 #define ACTIVATION_CONTEXT_SECTION_APPLICATION_SETTINGS 10 #define ACTIVATION_CONTEXT_SECTION_COMPATIBILITY_INFO 11 +#define ACTIVATION_CONTEXT_SECTION_WINRT_ACTIVATABLE_CLASSES 12 typedef enum _JOBOBJECTINFOCLASS { -- 2.35.1

1 2

← Newer
1
...
74
75
76
77
78
79
80
...
94
Older →

Jump to page:

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

2008

2007

2006

2005

2004

2003

2002

2001

wine-devel April 2022