-- v2: winegstreamer: Support zero-copy in wg_transform_push_data. winegstreamer: Support zero-copy output using the allocator. winegstreamer: Introduce a new custom memory allocator. winegstreamer: Rename mf_(create|destroy)_wg_sample helpers. winegstreamer: Introduce new wg_transform_(push|read)_mf helpers.
From: Rémi Bernon rbernon@codeweavers.com
To read MF sample properties before pushing, and update them after sucessfully reading a sample.
Signed-off-by: Rémi Bernon rbernon@codeweavers.com --- dlls/winegstreamer/gst_private.h | 4 +++ dlls/winegstreamer/h264_decoder.c | 6 ++-- dlls/winegstreamer/mfplat.c | 55 +++++++++++++++++++++---------- dlls/winegstreamer/wma_decoder.c | 4 +-- 4 files changed, 45 insertions(+), 24 deletions(-)
diff --git a/dlls/winegstreamer/gst_private.h b/dlls/winegstreamer/gst_private.h index 159143d7e54..ca7396ad5cd 100644 --- a/dlls/winegstreamer/gst_private.h +++ b/dlls/winegstreamer/gst_private.h @@ -123,6 +123,10 @@ void mf_media_type_to_wg_format(IMFMediaType *type, struct wg_format *format); HRESULT mf_create_wg_sample(IMFSample *sample, struct wg_sample **out); void mf_destroy_wg_sample(struct wg_sample *wg_sample);
+HRESULT wg_transform_push_mf(struct wg_transform *transform, struct wg_sample *sample); +HRESULT wg_transform_read_mf(struct wg_transform *transform, struct wg_sample *sample, + struct wg_format *format); + HRESULT winegstreamer_stream_handler_create(REFIID riid, void **obj);
HRESULT h264_decoder_create(REFIID riid, void **ret); diff --git a/dlls/winegstreamer/h264_decoder.c b/dlls/winegstreamer/h264_decoder.c index 19a36a9a77a..0ee9084e12e 100644 --- a/dlls/winegstreamer/h264_decoder.c +++ b/dlls/winegstreamer/h264_decoder.c @@ -543,8 +543,7 @@ static HRESULT WINAPI transform_ProcessInput(IMFTransform *iface, DWORD id, IMFS if (FAILED(hr = mf_create_wg_sample(sample, &wg_sample))) return hr;
- hr = wg_transform_push_data(decoder->wg_transform, wg_sample); - + hr = wg_transform_push_mf(decoder->wg_transform, wg_sample); mf_destroy_wg_sample(wg_sample); return hr; } @@ -583,8 +582,7 @@ static HRESULT WINAPI transform_ProcessOutput(IMFTransform *iface, DWORD flags, return MF_E_BUFFERTOOSMALL; }
- hr = wg_transform_read_data(decoder->wg_transform, wg_sample, - &wg_format); + hr = wg_transform_read_mf(decoder->wg_transform, wg_sample, &wg_format); mf_destroy_wg_sample(wg_sample);
if (hr == MF_E_TRANSFORM_STREAM_CHANGE) diff --git a/dlls/winegstreamer/mfplat.c b/dlls/winegstreamer/mfplat.c index 0226e7a2e45..026237bdf5d 100644 --- a/dlls/winegstreamer/mfplat.c +++ b/dlls/winegstreamer/mfplat.c @@ -969,8 +969,6 @@ HRESULT mf_create_wg_sample(IMFSample *sample, struct wg_sample **out) { DWORD current_length, max_length; struct mf_sample *mf_sample; - LONGLONG time, duration; - UINT32 value; BYTE *buffer; HRESULT hr;
@@ -981,19 +979,6 @@ HRESULT mf_create_wg_sample(IMFSample *sample, struct wg_sample **out) if (FAILED(hr = IMFMediaBuffer_Lock(mf_sample->media_buffer, &buffer, &max_length, ¤t_length))) goto out;
- if (SUCCEEDED(IMFSample_GetSampleTime(sample, &time))) - { - mf_sample->wg_sample.flags |= WG_SAMPLE_FLAG_HAS_PTS; - mf_sample->wg_sample.pts = time; - } - if (SUCCEEDED(IMFSample_GetSampleDuration(sample, &duration))) - { - mf_sample->wg_sample.flags |= WG_SAMPLE_FLAG_HAS_DURATION; - mf_sample->wg_sample.duration = duration; - } - if (SUCCEEDED(IMFSample_GetUINT32(sample, &MFSampleExtension_CleanPoint, &value)) && value) - mf_sample->wg_sample.flags |= WG_SAMPLE_FLAG_SYNC_POINT; - IMFSample_AddRef((mf_sample->sample = sample)); mf_sample->wg_sample.data = buffer; mf_sample->wg_sample.size = current_length; @@ -1015,8 +1000,43 @@ void mf_destroy_wg_sample(struct wg_sample *wg_sample) struct mf_sample *mf_sample = CONTAINING_RECORD(wg_sample, struct mf_sample, wg_sample);
IMFMediaBuffer_Unlock(mf_sample->media_buffer); - IMFMediaBuffer_SetCurrentLength(mf_sample->media_buffer, wg_sample->size); IMFMediaBuffer_Release(mf_sample->media_buffer); + IMFSample_Release(mf_sample->sample); + free(mf_sample); +} + +HRESULT wg_transform_push_mf(struct wg_transform *transform, struct wg_sample *sample) +{ + struct mf_sample *mf_sample = CONTAINING_RECORD(sample, struct mf_sample, wg_sample); + LONGLONG time, duration; + UINT32 value; + + if (SUCCEEDED(IMFSample_GetSampleTime(mf_sample->sample, &time))) + { + mf_sample->wg_sample.flags |= WG_SAMPLE_FLAG_HAS_PTS; + mf_sample->wg_sample.pts = time; + } + if (SUCCEEDED(IMFSample_GetSampleDuration(mf_sample->sample, &duration))) + { + mf_sample->wg_sample.flags |= WG_SAMPLE_FLAG_HAS_DURATION; + mf_sample->wg_sample.duration = duration; + } + if (SUCCEEDED(IMFSample_GetUINT32(mf_sample->sample, &MFSampleExtension_CleanPoint, &value)) && value) + mf_sample->wg_sample.flags |= WG_SAMPLE_FLAG_SYNC_POINT; + + return wg_transform_push_data(transform, sample); +} + +HRESULT wg_transform_read_mf(struct wg_transform *transform, struct wg_sample *wg_sample, + struct wg_format *format) +{ + struct mf_sample *mf_sample = CONTAINING_RECORD(wg_sample, struct mf_sample, wg_sample); + HRESULT hr; + + if (FAILED(hr = wg_transform_read_data(transform, wg_sample, format))) + return hr; + + IMFMediaBuffer_SetCurrentLength(mf_sample->media_buffer, wg_sample->size);
if (wg_sample->flags & WG_SAMPLE_FLAG_HAS_PTS) IMFSample_SetSampleTime(mf_sample->sample, wg_sample->pts); @@ -1025,6 +1045,5 @@ void mf_destroy_wg_sample(struct wg_sample *wg_sample) if (wg_sample->flags & WG_SAMPLE_FLAG_SYNC_POINT) IMFSample_SetUINT32(mf_sample->sample, &MFSampleExtension_CleanPoint, 1);
- IMFSample_Release(mf_sample->sample); - free(mf_sample); + return S_OK; } diff --git a/dlls/winegstreamer/wma_decoder.c b/dlls/winegstreamer/wma_decoder.c index 106d32adce9..fdb68328c3a 100644 --- a/dlls/winegstreamer/wma_decoder.c +++ b/dlls/winegstreamer/wma_decoder.c @@ -544,7 +544,7 @@ static HRESULT WINAPI transform_ProcessInput(IMFTransform *iface, DWORD id, IMFS return S_OK; }
- hr = wg_transform_push_data(decoder->wg_transform, wg_sample); + hr = wg_transform_push_mf(decoder->wg_transform, wg_sample); mf_destroy_wg_sample(wg_sample); return hr; } @@ -586,7 +586,7 @@ static HRESULT WINAPI transform_ProcessOutput(IMFTransform *iface, DWORD flags, return MF_E_BUFFERTOOSMALL; }
- if (SUCCEEDED(hr = wg_transform_read_data(decoder->wg_transform, wg_sample, NULL))) + if (SUCCEEDED(hr = wg_transform_read_mf(decoder->wg_transform, wg_sample, NULL))) { if (wg_sample->flags & WG_SAMPLE_FLAG_INCOMPLETE) samples[0].dwStatus |= MFT_OUTPUT_DATA_BUFFER_INCOMPLETE;
From: Rémi Bernon rbernon@codeweavers.com
Signed-off-by: Rémi Bernon rbernon@codeweavers.com --- dlls/winegstreamer/gst_private.h | 4 ++-- dlls/winegstreamer/h264_decoder.c | 10 +++++----- dlls/winegstreamer/mfplat.c | 4 ++-- dlls/winegstreamer/wma_decoder.c | 12 ++++++------ 4 files changed, 15 insertions(+), 15 deletions(-)
diff --git a/dlls/winegstreamer/gst_private.h b/dlls/winegstreamer/gst_private.h index ca7396ad5cd..ab89942847a 100644 --- a/dlls/winegstreamer/gst_private.h +++ b/dlls/winegstreamer/gst_private.h @@ -120,8 +120,8 @@ extern HRESULT mfplat_DllRegisterServer(void); IMFMediaType *mf_media_type_from_wg_format(const struct wg_format *format); void mf_media_type_to_wg_format(IMFMediaType *type, struct wg_format *format);
-HRESULT mf_create_wg_sample(IMFSample *sample, struct wg_sample **out); -void mf_destroy_wg_sample(struct wg_sample *wg_sample); +HRESULT wg_sample_create_mf(IMFSample *sample, struct wg_sample **out); +void wg_sample_release(struct wg_sample *wg_sample);
HRESULT wg_transform_push_mf(struct wg_transform *transform, struct wg_sample *sample); HRESULT wg_transform_read_mf(struct wg_transform *transform, struct wg_sample *sample, diff --git a/dlls/winegstreamer/h264_decoder.c b/dlls/winegstreamer/h264_decoder.c index 0ee9084e12e..8d3de4355bb 100644 --- a/dlls/winegstreamer/h264_decoder.c +++ b/dlls/winegstreamer/h264_decoder.c @@ -540,11 +540,11 @@ static HRESULT WINAPI transform_ProcessInput(IMFTransform *iface, DWORD id, IMFS if (!decoder->wg_transform) return MF_E_TRANSFORM_TYPE_NOT_SET;
- if (FAILED(hr = mf_create_wg_sample(sample, &wg_sample))) + if (FAILED(hr = wg_sample_create_mf(sample, &wg_sample))) return hr;
hr = wg_transform_push_mf(decoder->wg_transform, wg_sample); - mf_destroy_wg_sample(wg_sample); + wg_sample_release(wg_sample); return hr; }
@@ -573,17 +573,17 @@ static HRESULT WINAPI transform_ProcessOutput(IMFTransform *iface, DWORD flags, samples[0].dwStatus = 0; if (!samples[0].pSample) return E_INVALIDARG;
- if (FAILED(hr = mf_create_wg_sample(samples[0].pSample, &wg_sample))) + if (FAILED(hr = wg_sample_create_mf(samples[0].pSample, &wg_sample))) return hr;
if (wg_sample->max_size < info.cbSize) { - mf_destroy_wg_sample(wg_sample); + wg_sample_release(wg_sample); return MF_E_BUFFERTOOSMALL; }
hr = wg_transform_read_mf(decoder->wg_transform, wg_sample, &wg_format); - mf_destroy_wg_sample(wg_sample); + wg_sample_release(wg_sample);
if (hr == MF_E_TRANSFORM_STREAM_CHANGE) { diff --git a/dlls/winegstreamer/mfplat.c b/dlls/winegstreamer/mfplat.c index 026237bdf5d..28a3fc20ead 100644 --- a/dlls/winegstreamer/mfplat.c +++ b/dlls/winegstreamer/mfplat.c @@ -965,7 +965,7 @@ struct mf_sample struct wg_sample wg_sample; };
-HRESULT mf_create_wg_sample(IMFSample *sample, struct wg_sample **out) +HRESULT wg_sample_create_mf(IMFSample *sample, struct wg_sample **out) { DWORD current_length, max_length; struct mf_sample *mf_sample; @@ -995,7 +995,7 @@ out: return hr; }
-void mf_destroy_wg_sample(struct wg_sample *wg_sample) +void wg_sample_release(struct wg_sample *wg_sample) { struct mf_sample *mf_sample = CONTAINING_RECORD(wg_sample, struct mf_sample, wg_sample);
diff --git a/dlls/winegstreamer/wma_decoder.c b/dlls/winegstreamer/wma_decoder.c index fdb68328c3a..81285f5ad44 100644 --- a/dlls/winegstreamer/wma_decoder.c +++ b/dlls/winegstreamer/wma_decoder.c @@ -534,18 +534,18 @@ static HRESULT WINAPI transform_ProcessInput(IMFTransform *iface, DWORD id, IMFS if (FAILED(hr = IMFTransform_GetInputStreamInfo(iface, 0, &info))) return hr;
- if (FAILED(hr = mf_create_wg_sample(sample, &wg_sample))) + if (FAILED(hr = wg_sample_create_mf(sample, &wg_sample))) return hr;
/* WMA transform uses fixed size input samples and ignores samples with invalid sizes */ if (wg_sample->size % info.cbSize) { - mf_destroy_wg_sample(wg_sample); + wg_sample_release(wg_sample); return S_OK; }
hr = wg_transform_push_mf(decoder->wg_transform, wg_sample); - mf_destroy_wg_sample(wg_sample); + wg_sample_release(wg_sample); return hr; }
@@ -576,13 +576,13 @@ static HRESULT WINAPI transform_ProcessOutput(IMFTransform *iface, DWORD flags, return MF_E_TRANSFORM_NEED_MORE_INPUT; }
- if (FAILED(hr = mf_create_wg_sample(samples[0].pSample, &wg_sample))) + if (FAILED(hr = wg_sample_create_mf(samples[0].pSample, &wg_sample))) return hr;
wg_sample->size = 0; if (wg_sample->max_size < info.cbSize) { - mf_destroy_wg_sample(wg_sample); + wg_sample_release(wg_sample); return MF_E_BUFFERTOOSMALL; }
@@ -592,7 +592,7 @@ static HRESULT WINAPI transform_ProcessOutput(IMFTransform *iface, DWORD flags, samples[0].dwStatus |= MFT_OUTPUT_DATA_BUFFER_INCOMPLETE; }
- mf_destroy_wg_sample(wg_sample); + wg_sample_release(wg_sample);
if (hr == MF_E_TRANSFORM_STREAM_CHANGE) {
From: Rémi Bernon rbernon@codeweavers.com
Signed-off-by: Rémi Bernon rbernon@codeweavers.com --- dlls/winegstreamer/Makefile.in | 1 + dlls/winegstreamer/unix_private.h | 3 + dlls/winegstreamer/wg_allocator.c | 162 ++++++++++++++++++++++++++++++ dlls/winegstreamer/wg_transform.c | 12 ++- 4 files changed, 176 insertions(+), 2 deletions(-) create mode 100644 dlls/winegstreamer/wg_allocator.c
diff --git a/dlls/winegstreamer/Makefile.in b/dlls/winegstreamer/Makefile.in index e4c2636d02d..50f4dc861d4 100644 --- a/dlls/winegstreamer/Makefile.in +++ b/dlls/winegstreamer/Makefile.in @@ -14,6 +14,7 @@ C_SRCS = \ mfplat.c \ quartz_parser.c \ quartz_transform.c \ + wg_allocator.c \ wg_format.c \ wg_parser.c \ wg_transform.c \ diff --git a/dlls/winegstreamer/unix_private.h b/dlls/winegstreamer/unix_private.h index 7bce8263aaf..16615ef0833 100644 --- a/dlls/winegstreamer/unix_private.h +++ b/dlls/winegstreamer/unix_private.h @@ -37,4 +37,7 @@ extern NTSTATUS wg_transform_destroy(void *args) DECLSPEC_HIDDEN; extern NTSTATUS wg_transform_push_data(void *args) DECLSPEC_HIDDEN; extern NTSTATUS wg_transform_read_data(void *args) DECLSPEC_HIDDEN;
+extern GstAllocator *wg_allocator_create(void) DECLSPEC_HIDDEN; +extern void wg_allocator_destroy(GstAllocator *allocator) DECLSPEC_HIDDEN; + #endif /* __WINE_WINEGSTREAMER_UNIX_PRIVATE_H */ diff --git a/dlls/winegstreamer/wg_allocator.c b/dlls/winegstreamer/wg_allocator.c new file mode 100644 index 00000000000..90dada288ae --- /dev/null +++ b/dlls/winegstreamer/wg_allocator.c @@ -0,0 +1,162 @@ +/* + * GStreamer memory allocator + * + * Copyright 2022 Rémi Bernon for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#if 0 +#pragma makedep unix +#endif + +#include "config.h" + +#include <assert.h> +#include <stdarg.h> + +#include <gst/gst.h> +#include <gst/video/video.h> +#include <gst/audio/audio.h> + +#include "unix_private.h" + +GST_DEBUG_CATEGORY_EXTERN(wine); +#define GST_CAT_DEFAULT wine + +typedef struct +{ + GstMemory parent; + + GstMemory *unix_memory; + GstMapInfo unix_map_info; +} WgMemory; + +typedef struct +{ + GstAllocator parent; +} WgAllocator; + +typedef struct +{ + GstAllocatorClass parent_class; +} WgAllocatorClass; + +G_DEFINE_TYPE(WgAllocator, wg_allocator, GST_TYPE_ALLOCATOR); + +static gpointer wg_allocator_map(GstMemory *gst_memory, GstMapInfo *info, gsize maxsize) +{ + WgMemory *memory = (WgMemory *)gst_memory; + + if (gst_memory->parent) + return wg_allocator_map(gst_memory->parent, info, maxsize); + + GST_LOG("memory %p, info %p, maxsize %#zx", memory, info, maxsize); + + info->data = memory->unix_map_info.data; + + GST_INFO("Mapped memory %p to %p", memory, info->data); + return info->data; +} + +static void wg_allocator_unmap(GstMemory *gst_memory, GstMapInfo *info) +{ + WgMemory *memory = (WgMemory *)gst_memory; + + if (gst_memory->parent) + return wg_allocator_unmap(gst_memory->parent, info); + + GST_LOG("memory %p, info %p", memory, info); +} + +static void wg_allocator_init(WgAllocator *allocator) +{ + GST_LOG("allocator %p", allocator); + + allocator->parent.mem_type = "Wine"; + + allocator->parent.mem_map_full = wg_allocator_map; + allocator->parent.mem_unmap_full = wg_allocator_unmap; + + GST_OBJECT_FLAG_SET(allocator, GST_ALLOCATOR_FLAG_CUSTOM_ALLOC); +} + +static void wg_allocator_finalize(GObject *object) +{ + WgAllocator *allocator = (WgAllocator *)object; + + GST_LOG("allocator %p", allocator); + + G_OBJECT_CLASS(wg_allocator_parent_class)->finalize(object); +} + +static GstMemory *wg_allocator_alloc(GstAllocator *gst_allocator, gsize size, + GstAllocationParams *params) +{ + WgAllocator *allocator = (WgAllocator *)gst_allocator; + WgMemory *memory; + + GST_LOG("allocator %p, size %#zx, params %p", allocator, size, params); + + memory = g_slice_new0(WgMemory); + gst_memory_init(GST_MEMORY_CAST(memory), 0, GST_ALLOCATOR_CAST(allocator), + NULL, size, 0, 0, size); + memory->unix_memory = gst_allocator_alloc(NULL, size, params); + gst_memory_map(memory->unix_memory, &memory->unix_map_info, GST_MAP_WRITE); + + GST_INFO("Allocated memory %p, unix_memory %p, data %p", memory, memory->unix_memory, + memory->unix_map_info.data); + return (GstMemory *)memory; +} + +static void wg_allocator_free(GstAllocator *gst_allocator, GstMemory *gst_memory) +{ + WgAllocator *allocator = (WgAllocator *)gst_allocator; + WgMemory *memory = (WgMemory *)gst_memory; + + GST_LOG("allocator %p, memory %p", allocator, memory); + + gst_memory_unmap(memory->unix_memory, &memory->unix_map_info); + gst_memory_unref(memory->unix_memory); + g_slice_free(WgMemory, memory); +} + +static void wg_allocator_class_init(WgAllocatorClass *klass) +{ + GstAllocatorClass *parent_class = (GstAllocatorClass *)klass; + GObjectClass *root_class = (GObjectClass *)klass; + + GST_LOG("klass %p", klass); + + parent_class->alloc = wg_allocator_alloc; + parent_class->free = wg_allocator_free; + root_class->finalize = wg_allocator_finalize; +} + +GstAllocator *wg_allocator_create(void) +{ + return g_object_new(wg_allocator_get_type(), NULL); +} + +void wg_allocator_destroy(GstAllocator *gst_allocator) +{ + WgAllocator *allocator = (WgAllocator *)gst_allocator; + + GST_LOG("allocator %p", allocator); + + g_object_unref(allocator); + + GST_INFO("Destroyed buffer allocator %p", allocator); +} diff --git a/dlls/winegstreamer/wg_transform.c b/dlls/winegstreamer/wg_transform.c index fb852b4cf3d..c87536e5fbb 100644 --- a/dlls/winegstreamer/wg_transform.c +++ b/dlls/winegstreamer/wg_transform.c @@ -47,6 +47,7 @@ GST_DEBUG_CATEGORY_EXTERN(wine); struct wg_transform { GstElement *container; + GstAllocator *allocator; GstPad *my_src, *my_sink; GstPad *their_sink, *their_src; GstSegment segment; @@ -152,6 +153,7 @@ static gboolean transform_sink_query_cb(GstPad *pad, GstObject *parent, GstQuery
gst_buffer_pool_config_set_params(config, caps, info.size, 0, 0); + gst_buffer_pool_config_set_allocator(config, transform->allocator, NULL); if (!gst_buffer_pool_set_config(pool, config)) GST_ERROR("Failed to set pool %p config.", pool); } @@ -161,9 +163,10 @@ static gboolean transform_sink_query_cb(GstPad *pad, GstObject *parent, GstQuery GST_ERROR("Pool %p failed to activate.", pool);
gst_query_add_allocation_pool(query, pool, info.size, 0, 0); + gst_query_add_allocation_param(query, transform->allocator, NULL);
- GST_INFO("Proposing pool %p, buffer size %#zx, for query %p.", - pool, info.size, query); + GST_INFO("Proposing pool %p, buffer size %#zx, allocator %p, for query %p.", + pool, info.size, transform->allocator, query);
g_object_unref(pool); return true; @@ -221,6 +224,7 @@ NTSTATUS wg_transform_destroy(void *args) while ((sample = gst_atomic_queue_pop(transform->output_queue))) gst_sample_unref(sample);
+ wg_allocator_destroy(transform->allocator); g_object_unref(transform->their_sink); g_object_unref(transform->their_src); g_object_unref(transform->container); @@ -325,6 +329,8 @@ NTSTATUS wg_transform_create(void *args) goto out; if (!(transform->output_queue = gst_atomic_queue_new(8))) goto out; + if (!(transform->allocator = wg_allocator_create())) + goto out; transform->input_max_length = 1; transform->output_plane_align = 0;
@@ -481,6 +487,8 @@ out: gst_object_unref(transform->my_src); if (src_caps) gst_caps_unref(src_caps); + if (transform->allocator) + wg_allocator_destroy(transform->allocator); if (transform->output_queue) gst_atomic_queue_unref(transform->output_queue); if (transform->input)
From: Rémi Bernon rbernon@codeweavers.com
Through a custom allocator, by borrowing memory from the reading thread and mapping it instead of the allocated memory.
We cannot use the buffer pool to share wrapped buffers, because some decoder will hold on the acquired buffers longer than they should and we cannot remove our memory from them as long as they keep a reference.
Swapping the memory on map should be safe.
Signed-off-by: Rémi Bernon rbernon@codeweavers.com --- dlls/winegstreamer/mfplat.c | 6 ++ dlls/winegstreamer/unix_private.h | 7 +- dlls/winegstreamer/unixlib.h | 1 + dlls/winegstreamer/wg_allocator.c | 135 ++++++++++++++++++++++++++++-- dlls/winegstreamer/wg_transform.c | 98 +++++++++++++++++++--- 5 files changed, 230 insertions(+), 17 deletions(-)
diff --git a/dlls/winegstreamer/mfplat.c b/dlls/winegstreamer/mfplat.c index 28a3fc20ead..40199706759 100644 --- a/dlls/winegstreamer/mfplat.c +++ b/dlls/winegstreamer/mfplat.c @@ -999,6 +999,12 @@ void wg_sample_release(struct wg_sample *wg_sample) { struct mf_sample *mf_sample = CONTAINING_RECORD(wg_sample, struct mf_sample, wg_sample);
+ if (InterlockedOr(&wg_sample->refcount, 0)) + { + ERR("Sample %p is still in use, trouble ahead!\n", wg_sample); + return; + } + IMFMediaBuffer_Unlock(mf_sample->media_buffer); IMFMediaBuffer_Release(mf_sample->media_buffer); IMFSample_Release(mf_sample->sample); diff --git a/dlls/winegstreamer/unix_private.h b/dlls/winegstreamer/unix_private.h index 16615ef0833..e9f472986ae 100644 --- a/dlls/winegstreamer/unix_private.h +++ b/dlls/winegstreamer/unix_private.h @@ -37,7 +37,12 @@ extern NTSTATUS wg_transform_destroy(void *args) DECLSPEC_HIDDEN; extern NTSTATUS wg_transform_push_data(void *args) DECLSPEC_HIDDEN; extern NTSTATUS wg_transform_read_data(void *args) DECLSPEC_HIDDEN;
-extern GstAllocator *wg_allocator_create(void) DECLSPEC_HIDDEN; +/* wg_allocator_release_sample can be used to release any sample that was requested. */ +typedef struct wg_sample *(*wg_allocator_request_sample_cb)(gsize size, void *context); +extern GstAllocator *wg_allocator_create(wg_allocator_request_sample_cb request_sample, + void *request_sample_context) DECLSPEC_HIDDEN; extern void wg_allocator_destroy(GstAllocator *allocator) DECLSPEC_HIDDEN; +extern void wg_allocator_release_sample(GstAllocator *allocator, struct wg_sample *sample, + bool discard_data) DECLSPEC_HIDDEN;
#endif /* __WINE_WINEGSTREAMER_UNIX_PRIVATE_H */ diff --git a/dlls/winegstreamer/unixlib.h b/dlls/winegstreamer/unixlib.h index f334a168bd1..860a8ab2a52 100644 --- a/dlls/winegstreamer/unixlib.h +++ b/dlls/winegstreamer/unixlib.h @@ -128,6 +128,7 @@ struct wg_sample /* timestamp and duration are in 100-nanosecond units. */ UINT64 pts; UINT64 duration; + LONG refcount; /* unix refcount */ UINT32 flags; UINT32 max_size; UINT32 size; diff --git a/dlls/winegstreamer/wg_allocator.c b/dlls/winegstreamer/wg_allocator.c index 90dada288ae..c31751ce83f 100644 --- a/dlls/winegstreamer/wg_allocator.c +++ b/dlls/winegstreamer/wg_allocator.c @@ -33,20 +33,33 @@
#include "unix_private.h"
+#include "wine/list.h" + GST_DEBUG_CATEGORY_EXTERN(wine); #define GST_CAT_DEFAULT wine
typedef struct { GstMemory parent; + struct list entry;
GstMemory *unix_memory; GstMapInfo unix_map_info; + + struct wg_sample *sample; + gsize written; } WgMemory;
typedef struct { GstAllocator parent; + + wg_allocator_request_sample_cb request_sample; + void *request_sample_context; + + pthread_mutex_t mutex; + pthread_cond_t release_cond; + struct list memory_list; } WgAllocator;
typedef struct @@ -58,6 +71,7 @@ G_DEFINE_TYPE(WgAllocator, wg_allocator, GST_TYPE_ALLOCATOR);
static gpointer wg_allocator_map(GstMemory *gst_memory, GstMapInfo *info, gsize maxsize) { + WgAllocator *allocator = (WgAllocator *)gst_memory->allocator; WgMemory *memory = (WgMemory *)gst_memory;
if (gst_memory->parent) @@ -65,7 +79,19 @@ static gpointer wg_allocator_map(GstMemory *gst_memory, GstMapInfo *info, gsize
GST_LOG("memory %p, info %p, maxsize %#zx", memory, info, maxsize);
- info->data = memory->unix_map_info.data; + pthread_mutex_lock(&allocator->mutex); + + if (!memory->sample) + info->data = memory->unix_map_info.data; + else + { + InterlockedIncrement(&memory->sample->refcount); + info->data = memory->sample->data; + } + if (info->flags & GST_MAP_WRITE) + memory->written = max(memory->written, maxsize); + + pthread_mutex_unlock(&allocator->mutex);
GST_INFO("Mapped memory %p to %p", memory, info->data); return info->data; @@ -73,12 +99,23 @@ static gpointer wg_allocator_map(GstMemory *gst_memory, GstMapInfo *info, gsize
static void wg_allocator_unmap(GstMemory *gst_memory, GstMapInfo *info) { + WgAllocator *allocator = (WgAllocator *)gst_memory->allocator; WgMemory *memory = (WgMemory *)gst_memory;
if (gst_memory->parent) return wg_allocator_unmap(gst_memory->parent, info);
GST_LOG("memory %p, info %p", memory, info); + + pthread_mutex_lock(&allocator->mutex); + + if (memory->sample && info->data == memory->sample->data) + { + InterlockedDecrement(&memory->sample->refcount); + pthread_cond_signal(&allocator->release_cond); + } + + pthread_mutex_unlock(&allocator->mutex); }
static void wg_allocator_init(WgAllocator *allocator) @@ -91,6 +128,10 @@ static void wg_allocator_init(WgAllocator *allocator) allocator->parent.mem_unmap_full = wg_allocator_unmap;
GST_OBJECT_FLAG_SET(allocator, GST_ALLOCATOR_FLAG_CUSTOM_ALLOC); + + pthread_mutex_init(&allocator->mutex, NULL); + pthread_cond_init(&allocator->release_cond, NULL); + list_init(&allocator->memory_list); }
static void wg_allocator_finalize(GObject *object) @@ -99,6 +140,9 @@ static void wg_allocator_finalize(GObject *object)
GST_LOG("allocator %p", allocator);
+ pthread_cond_destroy(&allocator->release_cond); + pthread_mutex_destroy(&allocator->mutex); + G_OBJECT_CLASS(wg_allocator_parent_class)->finalize(object); }
@@ -116,8 +160,15 @@ static GstMemory *wg_allocator_alloc(GstAllocator *gst_allocator, gsize size, memory->unix_memory = gst_allocator_alloc(NULL, size, params); gst_memory_map(memory->unix_memory, &memory->unix_map_info, GST_MAP_WRITE);
- GST_INFO("Allocated memory %p, unix_memory %p, data %p", memory, memory->unix_memory, - memory->unix_map_info.data); + pthread_mutex_lock(&allocator->mutex); + + memory->sample = allocator->request_sample(size, allocator->request_sample_context); + list_add_tail(&allocator->memory_list, &memory->entry); + + pthread_mutex_unlock(&allocator->mutex); + + GST_INFO("Allocated memory %p, sample %p, unix_memory %p, data %p", memory, + memory->sample, memory->unix_memory, memory->unix_map_info.data); return (GstMemory *)memory; }
@@ -128,6 +179,16 @@ static void wg_allocator_free(GstAllocator *gst_allocator, GstMemory *gst_memory
GST_LOG("allocator %p, memory %p", allocator, memory);
+ pthread_mutex_lock(&allocator->mutex); + + if (memory->sample) + InterlockedDecrement(&memory->sample->refcount); + memory->sample = NULL; + + list_remove(&memory->entry); + + pthread_mutex_unlock(&allocator->mutex); + gst_memory_unmap(memory->unix_memory, &memory->unix_map_info); gst_memory_unref(memory->unix_memory); g_slice_free(WgMemory, memory); @@ -145,18 +206,82 @@ static void wg_allocator_class_init(WgAllocatorClass *klass) root_class->finalize = wg_allocator_finalize; }
-GstAllocator *wg_allocator_create(void) +GstAllocator *wg_allocator_create(wg_allocator_request_sample_cb request_sample, void *request_sample_context) +{ + WgAllocator *allocator; + + if (!(allocator = g_object_new(wg_allocator_get_type(), NULL))) + return NULL; + + allocator->request_sample = request_sample; + allocator->request_sample_context = request_sample_context; + return GST_ALLOCATOR(allocator); +} + +static void release_memory_sample(WgAllocator *allocator, WgMemory *memory, bool discard_data) { - return g_object_new(wg_allocator_get_type(), NULL); + struct wg_sample *sample; + + if (!(sample = memory->sample)) + return; + + while (sample->refcount > 1) + { + GST_WARNING("Waiting for sample %p to be unmapped", sample); + pthread_cond_wait(&allocator->release_cond, &allocator->mutex); + } + InterlockedDecrement(&sample->refcount); + + if (memory->written && !discard_data) + { + GST_WARNING("Copying %#zx bytes from sample %p, back to memory %p", memory->written, sample, memory); + memcpy(memory->unix_map_info.data, memory->sample->data, memory->written); + } + + memory->sample = NULL; + GST_INFO("Released sample %p from memory %p", sample, memory); }
void wg_allocator_destroy(GstAllocator *gst_allocator) { WgAllocator *allocator = (WgAllocator *)gst_allocator; + WgMemory *memory;
GST_LOG("allocator %p", allocator);
+ pthread_mutex_lock(&allocator->mutex); + LIST_FOR_EACH_ENTRY(memory, &allocator->memory_list, WgMemory, entry) + release_memory_sample(allocator, memory, true); + pthread_mutex_unlock(&allocator->mutex); + g_object_unref(allocator);
GST_INFO("Destroyed buffer allocator %p", allocator); } + +static WgMemory *find_sample_memory(WgAllocator *allocator, struct wg_sample *sample) +{ + WgMemory *memory; + + LIST_FOR_EACH_ENTRY(memory, &allocator->memory_list, WgMemory, entry) + if (memory->sample == sample) + return memory; + + return NULL; +} + +void wg_allocator_release_sample(GstAllocator *gst_allocator, struct wg_sample *sample, + bool discard_data) +{ + WgAllocator *allocator = (WgAllocator *)gst_allocator; + WgMemory *memory; + + GST_LOG("allocator %p, sample %p, discard_data %u", allocator, sample, discard_data); + + pthread_mutex_lock(&allocator->mutex); + if ((memory = find_sample_memory(allocator, sample))) + release_memory_sample(allocator, memory, discard_data); + else if (sample->refcount) + GST_ERROR("Couldn't find memory for sample %p", sample); + pthread_mutex_unlock(&allocator->mutex); +} diff --git a/dlls/winegstreamer/wg_transform.c b/dlls/winegstreamer/wg_transform.c index c87536e5fbb..31d50e389cf 100644 --- a/dlls/winegstreamer/wg_transform.c +++ b/dlls/winegstreamer/wg_transform.c @@ -54,6 +54,7 @@ struct wg_transform GstBufferList *input; guint input_max_length; guint output_plane_align; + struct wg_sample *output_wg_sample; GstAtomicQueue *output_queue; GstSample *output_sample; bool output_caps_changed; @@ -305,6 +306,20 @@ static bool transform_append_element(struct wg_transform *transform, GstElement return success; }
+static struct wg_sample *transform_request_sample(gsize size, void *context) +{ + struct wg_transform *transform = context; + struct wg_sample *sample; + + GST_LOG("size %#zx, context %p", size, transform); + + sample = InterlockedExchangePointer((void **)&transform->output_wg_sample, NULL); + if (!sample || sample->max_size < size) + return NULL; + + return sample; +} + NTSTATUS wg_transform_create(void *args) { struct wg_transform_create_params *params = args; @@ -329,7 +344,7 @@ NTSTATUS wg_transform_create(void *args) goto out; if (!(transform->output_queue = gst_atomic_queue_new(8))) goto out; - if (!(transform->allocator = wg_allocator_create())) + if (!(transform->allocator = wg_allocator_create(transform_request_sample, transform))) goto out; transform->input_max_length = 1; transform->output_plane_align = 0; @@ -622,10 +637,22 @@ static bool copy_buffer(GstBuffer *buffer, GstCaps *caps, struct wg_sample *samp static NTSTATUS read_transform_output_data(GstBuffer *buffer, GstCaps *caps, gsize plane_align, struct wg_sample *sample) { + bool ret, needs_copy; gsize total_size; - bool ret; + GstMapInfo info; + + if (!gst_buffer_map(buffer, &info, GST_MAP_READ)) + { + GST_ERROR("Failed to map buffer %p", buffer); + sample->size = 0; + return STATUS_UNSUCCESSFUL; + } + needs_copy = info.data != sample->data; + gst_buffer_unmap(buffer, &info);
- if (is_caps_video(caps)) + if ((ret = !needs_copy)) + total_size = sample->size = info.size; + else if (is_caps_video(caps)) ret = copy_video_buffer(buffer, caps, plane_align, sample, &total_size); else ret = copy_buffer(buffer, caps, sample, &total_size); @@ -657,7 +684,18 @@ static NTSTATUS read_transform_output_data(GstBuffer *buffer, GstCaps *caps, gsi if (!GST_BUFFER_FLAG_IS_SET(buffer, GST_BUFFER_FLAG_DELTA_UNIT)) sample->flags |= WG_SAMPLE_FLAG_SYNC_POINT;
- GST_INFO("Copied %u bytes, sample %p, flags %#x", sample->size, sample, sample->flags); + if (needs_copy) + { + if (is_caps_video(caps)) + GST_WARNING("Copied %u bytes, sample %p, flags %#x", sample->size, sample, sample->flags); + else + GST_INFO("Copied %u bytes, sample %p, flags %#x", sample->size, sample, sample->flags); + } + else if (sample->flags & WG_SAMPLE_FLAG_INCOMPLETE) + GST_ERROR("Partial read %u bytes, sample %p, flags %#x", sample->size, sample, sample->flags); + else + GST_INFO("Read %u bytes, sample %p, flags %#x", sample->size, sample, sample->flags); + return STATUS_SUCCESS; }
@@ -667,23 +705,38 @@ NTSTATUS wg_transform_read_data(void *args) struct wg_transform *transform = params->transform; struct wg_sample *sample = params->sample; struct wg_format *format = params->format; - GstBufferList *input = transform->input; + GstFlowReturn ret = GST_FLOW_OK; GstBuffer *output_buffer; + GstBufferList *input; GstCaps *output_caps; - GstFlowReturn ret; + bool discard_data; NTSTATUS status;
+ /* Provide the sample for transform_request_sample to pick it up */ + InterlockedIncrement(&sample->refcount); + InterlockedExchangePointer((void **)&transform->output_wg_sample, sample); + if (!gst_buffer_list_length(transform->input)) GST_DEBUG("Not input buffer queued"); - else if (!(transform->input = gst_buffer_list_new())) + else if ((input = gst_buffer_list_new())) + { + ret = gst_pad_push_list(transform->my_src, transform->input); + transform->input = input; + } + else { GST_ERROR("Failed to allocate new input queue"); - gst_buffer_list_unref(input); - return STATUS_NO_MEMORY; + ret = GST_FLOW_ERROR; } - else if ((ret = gst_pad_push_list(transform->my_src, input))) + + /* Remove the sample so transform_request_sample cannot use it */ + if (InterlockedExchangePointer((void **)&transform->output_wg_sample, NULL)) + InterlockedDecrement(&sample->refcount); + + if (ret) { GST_ERROR("Failed to push transform input, error %d", ret); + wg_allocator_release_sample(transform->allocator, sample, false); return STATUS_UNSUCCESSFUL; }
@@ -692,6 +745,7 @@ NTSTATUS wg_transform_read_data(void *args) sample->size = 0; params->result = MF_E_TRANSFORM_NEED_MORE_INPUT; GST_INFO("Cannot read %u bytes, no output available", sample->max_size); + wg_allocator_release_sample(transform->allocator, sample, false); return STATUS_SUCCESS; }
@@ -730,19 +784,41 @@ NTSTATUS wg_transform_read_data(void *args)
params->result = MF_E_TRANSFORM_STREAM_CHANGE; GST_INFO("Format changed detected, returning no output"); + wg_allocator_release_sample(transform->allocator, sample, false); return STATUS_SUCCESS; }
if ((status = read_transform_output_data(output_buffer, output_caps, transform->output_plane_align, sample))) + { + wg_allocator_release_sample(transform->allocator, sample, false); return status; + }
- if (!(sample->flags & WG_SAMPLE_FLAG_INCOMPLETE)) + if (sample->flags & WG_SAMPLE_FLAG_INCOMPLETE) + discard_data = false; + else { + /* Taint the buffer memory to make sure it cannot be reused by the buffer pool, + * for the pool to always requests new memory from the allocator, and so we can + * then always provide output sample memory to achieve zero-copy. + * + * However, some decoder keep a reference on the buffer they passed downstream, + * to re-use it later. In this case, it will not be possible to do zero-copy, + * and we should copy the data back to the buffer and leave it unchanged. + * + * Some other plugins make assumptions that the returned buffer will always have + * at least one memory attached, we cannot just remove it and need to replace the + * memory instead. + */ + if ((discard_data = gst_buffer_is_writable(output_buffer))) + gst_buffer_replace_all_memory(output_buffer, gst_allocator_alloc(NULL, 0, NULL)); + gst_sample_unref(transform->output_sample); transform->output_sample = NULL; }
params->result = S_OK; + wg_allocator_release_sample(transform->allocator, sample, discard_data); return STATUS_SUCCESS; }
From: Rémi Bernon rbernon@codeweavers.com
Signed-off-by: Rémi Bernon rbernon@codeweavers.com --- dlls/mf/tests/mf.c | 1 - dlls/winegstreamer/gst_private.h | 9 ++- dlls/winegstreamer/h264_decoder.c | 17 ++++-- dlls/winegstreamer/mfplat.c | 87 ++++++++++++++++++++++++++- dlls/winegstreamer/quartz_transform.c | 9 +++ dlls/winegstreamer/unixlib.h | 2 + dlls/winegstreamer/wg_transform.c | 28 ++++++++- dlls/winegstreamer/wma_decoder.c | 15 ++++- 8 files changed, 153 insertions(+), 15 deletions(-)
diff --git a/dlls/mf/tests/mf.c b/dlls/mf/tests/mf.c index 03035aa5e9f..13f0e38b0b8 100644 --- a/dlls/mf/tests/mf.c +++ b/dlls/mf/tests/mf.c @@ -6423,7 +6423,6 @@ static void test_wma_decoder(void) hr = IMFTransform_ProcessInput(transform, 0, sample, 0); ok(hr == MF_E_NOTACCEPTING, "ProcessInput returned %#lx\n", hr); ret = IMFSample_Release(sample); - todo_wine ok(ret == 1, "Release returned %lu\n", ret);
/* As output_info.dwFlags doesn't have MFT_OUTPUT_STREAM_CAN_PROVIDE_SAMPLES diff --git a/dlls/winegstreamer/gst_private.h b/dlls/winegstreamer/gst_private.h index ab89942847a..da76452fbf4 100644 --- a/dlls/winegstreamer/gst_private.h +++ b/dlls/winegstreamer/gst_private.h @@ -64,6 +64,12 @@ static inline const char *debugstr_time(REFERENCE_TIME time)
#define MEDIATIME_FROM_BYTES(x) ((LONGLONG)(x) * 10000000)
+struct wg_sample_queue; + +HRESULT wg_sample_queue_create(struct wg_sample_queue **out); +void wg_sample_queue_destroy(struct wg_sample_queue *queue); +void wg_sample_queue_flush(struct wg_sample_queue *queue, bool all); + struct wg_parser *wg_parser_create(enum wg_parser_type type, bool unlimited_buffering); void wg_parser_destroy(struct wg_parser *parser);
@@ -123,7 +129,8 @@ void mf_media_type_to_wg_format(IMFMediaType *type, struct wg_format *format); HRESULT wg_sample_create_mf(IMFSample *sample, struct wg_sample **out); void wg_sample_release(struct wg_sample *wg_sample);
-HRESULT wg_transform_push_mf(struct wg_transform *transform, struct wg_sample *sample); +HRESULT wg_transform_push_mf(struct wg_transform *transform, struct wg_sample *sample, + struct wg_sample_queue *queue); HRESULT wg_transform_read_mf(struct wg_transform *transform, struct wg_sample *sample, struct wg_format *format);
diff --git a/dlls/winegstreamer/h264_decoder.c b/dlls/winegstreamer/h264_decoder.c index 8d3de4355bb..012a7060f29 100644 --- a/dlls/winegstreamer/h264_decoder.c +++ b/dlls/winegstreamer/h264_decoder.c @@ -52,6 +52,7 @@ struct h264_decoder
struct wg_format wg_format; struct wg_transform *wg_transform; + struct wg_sample_queue *wg_sample_queue; };
static struct h264_decoder *impl_from_IMFTransform(IMFTransform *iface) @@ -237,6 +238,8 @@ static ULONG WINAPI transform_Release(IMFTransform *iface) IMFMediaType_Release(decoder->input_type); if (decoder->output_type) IMFMediaType_Release(decoder->output_type); + + wg_sample_queue_destroy(decoder->wg_sample_queue); free(decoder); }
@@ -543,9 +546,7 @@ static HRESULT WINAPI transform_ProcessInput(IMFTransform *iface, DWORD id, IMFS if (FAILED(hr = wg_sample_create_mf(sample, &wg_sample))) return hr;
- hr = wg_transform_push_mf(decoder->wg_transform, wg_sample); - wg_sample_release(wg_sample); - return hr; + return wg_transform_push_mf(decoder->wg_transform, wg_sample, decoder->wg_sample_queue); }
static HRESULT WINAPI transform_ProcessOutput(IMFTransform *iface, DWORD flags, DWORD count, @@ -582,7 +583,8 @@ static HRESULT WINAPI transform_ProcessOutput(IMFTransform *iface, DWORD flags, return MF_E_BUFFERTOOSMALL; }
- hr = wg_transform_read_mf(decoder->wg_transform, wg_sample, &wg_format); + if (SUCCEEDED(hr = wg_transform_read_mf(decoder->wg_transform, wg_sample, &wg_format))) + wg_sample_queue_flush(decoder->wg_sample_queue, false); wg_sample_release(wg_sample);
if (hr == MF_E_TRANSFORM_STREAM_CHANGE) @@ -648,6 +650,7 @@ HRESULT h264_decoder_create(REFIID riid, void **ret) static const struct wg_format input_format = {.major_type = WG_MAJOR_TYPE_H264}; struct wg_transform *transform; struct h264_decoder *decoder; + HRESULT hr;
TRACE("riid %s, ret %p.\n", debugstr_guid(riid), ret);
@@ -669,6 +672,12 @@ HRESULT h264_decoder_create(REFIID riid, void **ret) decoder->wg_format.u.video.fps_n = 30000; decoder->wg_format.u.video.fps_d = 1001;
+ if (FAILED(hr = wg_sample_queue_create(&decoder->wg_sample_queue))) + { + free(decoder); + return hr; + } + *ret = &decoder->IMFTransform_iface; TRACE("Created decoder %p\n", *ret); return S_OK; diff --git a/dlls/winegstreamer/mfplat.c b/dlls/winegstreamer/mfplat.c index 40199706759..157ed3ad2f2 100644 --- a/dlls/winegstreamer/mfplat.c +++ b/dlls/winegstreamer/mfplat.c @@ -26,6 +26,7 @@ #include "mfapi.h"
#include "wine/debug.h" +#include "wine/list.h"
WINE_DEFAULT_DEBUG_CHANNEL(mfplat);
@@ -958,11 +959,18 @@ void mf_media_type_to_wg_format(IMFMediaType *type, struct wg_format *format) FIXME("Unrecognized major type %s.\n", debugstr_guid(&major_type)); }
+struct wg_sample_queue +{ + CRITICAL_SECTION cs; + struct list samples; +}; + struct mf_sample { IMFSample *sample; IMFMediaBuffer *media_buffer; struct wg_sample wg_sample; + struct list entry; };
HRESULT wg_sample_create_mf(IMFSample *sample, struct wg_sample **out) @@ -1008,14 +1016,83 @@ void wg_sample_release(struct wg_sample *wg_sample) IMFMediaBuffer_Unlock(mf_sample->media_buffer); IMFMediaBuffer_Release(mf_sample->media_buffer); IMFSample_Release(mf_sample->sample); + free(mf_sample); }
-HRESULT wg_transform_push_mf(struct wg_transform *transform, struct wg_sample *sample) +static void wg_sample_queue_begin_append(struct wg_sample_queue *queue, struct wg_sample *wg_sample) +{ + struct mf_sample *mf_sample = CONTAINING_RECORD(wg_sample, struct mf_sample, wg_sample); + + /* make sure a concurrent wg_sample_queue_flush call won't release the sample until we're done */ + InterlockedIncrement(&wg_sample->refcount); + mf_sample->wg_sample.flags |= WG_SAMPLE_FLAG_HAS_REFCOUNT; + + EnterCriticalSection(&queue->cs); + list_add_tail(&queue->samples, &mf_sample->entry); + LeaveCriticalSection(&queue->cs); +} + +static void wg_sample_queue_end_append(struct wg_sample_queue *queue, struct wg_sample *wg_sample) +{ + /* release temporary ref taken in wg_sample_queue_begin_append */ + InterlockedDecrement(&wg_sample->refcount); + + wg_sample_queue_flush(queue, false); +} + +void wg_sample_queue_flush(struct wg_sample_queue *queue, bool all) +{ + struct mf_sample *mf_sample, *next; + + EnterCriticalSection(&queue->cs); + + LIST_FOR_EACH_ENTRY_SAFE(mf_sample, next, &queue->samples, struct mf_sample, entry) + { + if (!InterlockedOr(&mf_sample->wg_sample.refcount, 0) || all) + { + list_remove(&mf_sample->entry); + wg_sample_release(&mf_sample->wg_sample); + } + } + + LeaveCriticalSection(&queue->cs); +} + +HRESULT wg_sample_queue_create(struct wg_sample_queue **out) { - struct mf_sample *mf_sample = CONTAINING_RECORD(sample, struct mf_sample, wg_sample); + struct wg_sample_queue *queue; + + if (!(queue = calloc(1, sizeof(*queue)))) + return E_OUTOFMEMORY; + + InitializeCriticalSection(&queue->cs); + queue->cs.DebugInfo->Spare[0] = (DWORD_PTR)(__FILE__ ": cs"); + list_init(&queue->samples); + + TRACE("Created sample queue %p\n", queue); + *out = queue; + + return S_OK; +} + +void wg_sample_queue_destroy(struct wg_sample_queue *queue) +{ + wg_sample_queue_flush(queue, true); + + queue->cs.DebugInfo->Spare[0] = 0; + InitializeCriticalSection(&queue->cs); + + free(queue); +} + +HRESULT wg_transform_push_mf(struct wg_transform *transform, struct wg_sample *wg_sample, + struct wg_sample_queue *queue) +{ + struct mf_sample *mf_sample = CONTAINING_RECORD(wg_sample, struct mf_sample, wg_sample); LONGLONG time, duration; UINT32 value; + HRESULT hr;
if (SUCCEEDED(IMFSample_GetSampleTime(mf_sample->sample, &time))) { @@ -1030,7 +1107,11 @@ HRESULT wg_transform_push_mf(struct wg_transform *transform, struct wg_sample *s if (SUCCEEDED(IMFSample_GetUINT32(mf_sample->sample, &MFSampleExtension_CleanPoint, &value)) && value) mf_sample->wg_sample.flags |= WG_SAMPLE_FLAG_SYNC_POINT;
- return wg_transform_push_data(transform, sample); + wg_sample_queue_begin_append(queue, wg_sample); + hr = wg_transform_push_data(transform, wg_sample); + wg_sample_queue_end_append(queue, wg_sample); + + return hr; }
HRESULT wg_transform_read_mf(struct wg_transform *transform, struct wg_sample *wg_sample, diff --git a/dlls/winegstreamer/quartz_transform.c b/dlls/winegstreamer/quartz_transform.c index 326b8691a42..b701b3f6369 100644 --- a/dlls/winegstreamer/quartz_transform.c +++ b/dlls/winegstreamer/quartz_transform.c @@ -40,6 +40,7 @@ struct transform IQualityControl *qc_sink;
struct wg_transform *transform; + struct wg_sample_queue *sample_queue;
const struct transform_ops *ops; }; @@ -76,6 +77,7 @@ static void transform_destroy(struct strmbase_filter *iface) strmbase_sink_cleanup(&filter->sink); strmbase_filter_cleanup(&filter->filter);
+ wg_sample_queue_destroy(filter->sample_queue); free(filter); }
@@ -572,11 +574,18 @@ static const IQualityControlVtbl source_quality_control_vtbl = static HRESULT transform_create(IUnknown *outer, const CLSID *clsid, const struct transform_ops *ops, struct transform **out) { struct transform *object; + HRESULT hr;
object = calloc(1, sizeof(*object)); if (!object) return E_OUTOFMEMORY;
+ if (FAILED(hr = wg_sample_queue_create(&object->sample_queue))) + { + free(object); + return hr; + } + strmbase_filter_init(&object->filter, outer, clsid, &filter_ops); strmbase_sink_init(&object->sink, &object->filter, L"In", &sink_ops, NULL); strmbase_source_init(&object->source, &object->filter, L"Out", &source_ops); diff --git a/dlls/winegstreamer/unixlib.h b/dlls/winegstreamer/unixlib.h index 860a8ab2a52..ed56fb47908 100644 --- a/dlls/winegstreamer/unixlib.h +++ b/dlls/winegstreamer/unixlib.h @@ -121,6 +121,8 @@ enum wg_sample_flag WG_SAMPLE_FLAG_HAS_PTS = 2, WG_SAMPLE_FLAG_HAS_DURATION = 4, WG_SAMPLE_FLAG_SYNC_POINT = 8, + + WG_SAMPLE_FLAG_HAS_REFCOUNT = 0x10000, /* sample is queued on the client side and may be wrapped */ };
struct wg_sample diff --git a/dlls/winegstreamer/wg_transform.c b/dlls/winegstreamer/wg_transform.c index 31d50e389cf..b5dbbe0955b 100644 --- a/dlls/winegstreamer/wg_transform.c +++ b/dlls/winegstreamer/wg_transform.c @@ -518,6 +518,13 @@ out: return status; }
+static void wg_sample_free_notify(void *arg) +{ + struct wg_sample *sample = arg; + GST_DEBUG("Releasing wg_sample %p", sample); + InterlockedDecrement(&sample->refcount); +} + NTSTATUS wg_transform_push_data(void *args) { struct wg_transform_push_data_params *params = args; @@ -534,12 +541,28 @@ NTSTATUS wg_transform_push_data(void *args) return STATUS_SUCCESS; }
- if (!(buffer = gst_buffer_new_and_alloc(sample->size))) + if (!(sample->flags & WG_SAMPLE_FLAG_HAS_REFCOUNT)) + { + if (!(buffer = gst_buffer_new_and_alloc(sample->size))) + { + GST_ERROR("Failed to allocate input buffer"); + return STATUS_NO_MEMORY; + } + gst_buffer_fill(buffer, 0, sample->data, sample->size); + GST_INFO("Copied %u bytes from sample %p to buffer %p", sample->size, sample, buffer); + } + else if (!(buffer = gst_buffer_new_wrapped_full(GST_MEMORY_FLAG_READONLY, sample->data, sample->max_size, + 0, sample->size, sample, wg_sample_free_notify))) { GST_ERROR("Failed to allocate input buffer"); return STATUS_NO_MEMORY; } - gst_buffer_fill(buffer, 0, sample->data, sample->size); + else + { + InterlockedIncrement(&sample->refcount); + GST_INFO("Wrapped %u/%u bytes from sample %p to buffer %p", sample->size, sample->max_size, sample, buffer); + } + if (sample->flags & WG_SAMPLE_FLAG_HAS_PTS) GST_BUFFER_PTS(buffer) = sample->pts * 100; if (sample->flags & WG_SAMPLE_FLAG_HAS_DURATION) @@ -548,7 +571,6 @@ NTSTATUS wg_transform_push_data(void *args) GST_BUFFER_FLAG_SET(buffer, GST_BUFFER_FLAG_DELTA_UNIT); gst_buffer_list_insert(transform->input, -1, buffer);
- GST_INFO("Copied %u bytes from sample %p to input buffer list", sample->size, sample); params->result = S_OK; return STATUS_SUCCESS; } diff --git a/dlls/winegstreamer/wma_decoder.c b/dlls/winegstreamer/wma_decoder.c index 81285f5ad44..e2a7a770826 100644 --- a/dlls/winegstreamer/wma_decoder.c +++ b/dlls/winegstreamer/wma_decoder.c @@ -56,6 +56,7 @@ struct wma_decoder IMFMediaType *output_type;
struct wg_transform *wg_transform; + struct wg_sample_queue *wg_sample_queue; };
static inline struct wma_decoder *impl_from_IUnknown(IUnknown *iface) @@ -135,6 +136,8 @@ static ULONG WINAPI unknown_Release(IUnknown *iface) IMFMediaType_Release(decoder->input_type); if (decoder->output_type) IMFMediaType_Release(decoder->output_type); + + wg_sample_queue_destroy(decoder->wg_sample_queue); free(decoder); }
@@ -544,9 +547,7 @@ static HRESULT WINAPI transform_ProcessInput(IMFTransform *iface, DWORD id, IMFS return S_OK; }
- hr = wg_transform_push_mf(decoder->wg_transform, wg_sample); - wg_sample_release(wg_sample); - return hr; + return wg_transform_push_mf(decoder->wg_transform, wg_sample, decoder->wg_sample_queue); }
static HRESULT WINAPI transform_ProcessOutput(IMFTransform *iface, DWORD flags, DWORD count, @@ -590,6 +591,7 @@ static HRESULT WINAPI transform_ProcessOutput(IMFTransform *iface, DWORD flags, { if (wg_sample->flags & WG_SAMPLE_FLAG_INCOMPLETE) samples[0].dwStatus |= MFT_OUTPUT_DATA_BUFFER_INCOMPLETE; + wg_sample_queue_flush(decoder->wg_sample_queue, false); }
wg_sample_release(wg_sample); @@ -882,6 +884,7 @@ HRESULT wma_decoder_create(IUnknown *outer, IUnknown **out) static const struct wg_format input_format = {.major_type = WG_MAJOR_TYPE_WMA}; struct wg_transform *transform; struct wma_decoder *decoder; + HRESULT hr;
TRACE("outer %p, out %p.\n", outer, out);
@@ -895,6 +898,12 @@ HRESULT wma_decoder_create(IUnknown *outer, IUnknown **out) if (!(decoder = calloc(1, sizeof(*decoder)))) return E_OUTOFMEMORY;
+ if (FAILED(hr = wg_sample_queue_create(&decoder->wg_sample_queue))) + { + free(decoder); + return hr; + } + decoder->IUnknown_inner.lpVtbl = &unknown_vtbl; decoder->IMFTransform_iface.lpVtbl = &transform_vtbl; decoder->IMediaObject_iface.lpVtbl = &media_object_vtbl;
On 6/8/22 03:37, Rémi Bernon (@rbernon) wrote:
Could we do this without the callback? Just thinking out loud, maybe something like
void wg_allocator_add_sample(WgAllocator *allocator, struct wg_sample *sample); void wg_allocator_remove_sample(WgAllocator *allocator, struct wg_sample *sample);
which manipulate an internal wg_sample pointer, instead of having them access the wg_transform's sample pointer. Then wg_allocator_remove_sample() would also end up calling wg_allocator_release_sample().
I suspect I'm missing something, though, given my below comment...
The callback is with a future use from wg_parser in mind. In that case we would forward allocation requests to the read thread (for instance).
Ah, in order to allocate ad-hoc samples, since we don't have the same requirement for the output buffer?
It seems plausible, but I was also inclined to avoid this from the beginning due to the increased code complexity. Setting up a pool of wg_samples when connecting seems like a potentially better option, for instance.
I don't think so, we simply cannot know how many buffer GStreamer is going to need. Allocating them when it requests them is the right solution here, it's also simpler.
Had to look this up, but apparently the default GstBufferPool will throw away buffers if they're modified. I thought the "empty" buffers would pile up and never be freed, but evidently not.
I've probably had this question answered before, but can we replace the buffer pool itself, instead of just the allocator?
We could reimplement a custom buffer pool that always free buffers, but that would be more work and still won't solve the underlying problem.
The problem the allocator is solving is that some decoders don't release their buffers to the pool. The gst_buffer_is_writable check catches that and gst_buffer_replace_all_memory would fail anyway if we're not the only ones holding a ref on the buffer. So instead of discarding the data we copy it back into the buffer unix memory.
Right, but that's the problem that the allocator is solving on unmap, which is orthogonal to the "don't reuse this buffer" problem.
I guess that overriding unmap requires having a custom allocator, so I should ask if we can replace the pool *as well as* the allocator.
Re-implementing a pool doesn't help anything. The default implementation already frees the buffers when their memory is tainted, we don't need anything more.
Later, when the decoder will eventually release the buffer to the pool, it will be untainted, and so not freed until the pool is destroyed, and reusable without going through the allocator. After a few iterations the pool has enough allocated buffers, we won't get any allocation requests anymore and our samples are never used directly and we copy the data normally.
Actually, wait, isn't this a problem we'd be able to fix *only* by using a custom pool? Why doesn't the current patch 4/5 suffer from this problem?
I don't understand, it's not really a problem and the code handles that case with the discard_data option at the same time as it handles format changes and partial reads (which should not happen anyway).
On 6/8/22 17:10, Rémi Bernon (@rbernon) wrote:
On 6/8/22 03:37, Rémi Bernon (@rbernon) wrote:
Could we do this without the callback? Just thinking out loud, maybe something like
void wg_allocator_add_sample(WgAllocator *allocator, struct wg_sample *sample); void wg_allocator_remove_sample(WgAllocator *allocator, struct wg_sample *sample);
which manipulate an internal wg_sample pointer, instead of having them access the wg_transform's sample pointer. Then wg_allocator_remove_sample() would also end up calling wg_allocator_release_sample().
I suspect I'm missing something, though, given my below comment...
The callback is with a future use from wg_parser in mind. In that case we would forward allocation requests to the read thread (for instance).
Ah, in order to allocate ad-hoc samples, since we don't have the same requirement for the output buffer?
It seems plausible, but I was also inclined to avoid this from the beginning due to the increased code complexity. Setting up a pool of wg_samples when connecting seems like a potentially better option, for instance.
I don't think so, we simply cannot know how many buffer GStreamer is going to need. Allocating them when it requests them is the right solution here, it's also simpler.
Sure, conceptually it's simpler, but the callbacks make it really ugly. (Also the fact that it's so different from the API for the transform...)
We can't exactly predict how many buffers a GStreamer pipeline needs (well, unless it tells us through the buffer pool config), but we don't necessarily need to, either; we can allocate new buffers and blit if our pool wasn't large enough, and just make a point of always allocating "enough" PE buffers in practice.
Had to look this up, but apparently the default GstBufferPool will throw away buffers if they're modified. I thought the "empty" buffers would pile up and never be freed, but evidently not.
I've probably had this question answered before, but can we replace the buffer pool itself, instead of just the allocator?
We could reimplement a custom buffer pool that always free buffers, but that would be more work and still won't solve the underlying problem.
The problem the allocator is solving is that some decoders don't release their buffers to the pool. The gst_buffer_is_writable check catches that and gst_buffer_replace_all_memory would fail anyway if we're not the only ones holding a ref on the buffer. So instead of discarding the data we copy it back into the buffer unix memory.
Right, but that's the problem that the allocator is solving on unmap, which is orthogonal to the "don't reuse this buffer" problem.
I guess that overriding unmap requires having a custom allocator, so I should ask if we can replace the pool *as well as* the allocator.
Re-implementing a pool doesn't help anything. The default implementation already frees the buffers when their memory is tainted, we don't need anything more.
Well, sure. I just ask partly because tainting memory is not the most obvious way of accomplishing our goals, and I wanted to see if other options were considered. (Also, at this point, because of the problem described below.)
Later, when the decoder will eventually release the buffer to the pool, it will be untainted, and so not freed until the pool is destroyed, and reusable without going through the allocator. After a few iterations the pool has enough allocated buffers, we won't get any allocation requests anymore and our samples are never used directly and we copy the data normally.
Actually, wait, isn't this a problem we'd be able to fix *only* by using a custom pool? Why doesn't the current patch 4/5 suffer from this problem?
I don't understand, it's not really a problem and the code handles that case with the discard_data option at the same time as it handles format changes and partial reads (which should not happen anyway).
If we read an entire buffer but the decoder is still holding onto the GstBuffer, we won't be able to "taint" it because it's not writable, but we'll still remove the wg_sample. Later when the decoder does free the GstBuffer it'll be untainted and hence can be reused. Hence the buffer pool will pick up that buffer instead of the zero-copy buffer we want.
Sure, conceptually it's simpler, but the callbacks make it really ugly. (Also the fact that it's so different from the API for the transform...)
We can't exactly predict how many buffers a GStreamer pipeline needs (well, unless it tells us through the buffer pool config), but we don't necessarily need to, either; we can allocate new buffers and blit if our pool wasn't large enough, and just make a point of always allocating "enough" PE buffers in practice.
It seems brittle, not robust to GStreamer changes, and requires tweaking to avoid bad cases where we would copy buffers. The general solution is simple and I have it already implemented. I'm not going to rewrite everything just to get rid of a callback.
Well, sure. I just ask partly because tainting memory is not the most obvious way of accomplishing our goals, and I wanted to see if other options were considered. (Also, at this point, because of the problem described below.)
Yes, I have explored what could be done with a pool, I actually wanted to use one, as it seems more natural. It just doesn't work and doesn't help us doing anything except maybe free the released buffer automatically.
Though if, after all, the pool cannot be safely used for zero-copy as in the case discussed below, we also wouldn't want to free buffers. The default pool does all this exactly as we want, depending on whether memory has been tainted.
If we read an entire buffer but the decoder is still holding onto the GstBuffer, we won't be able to "taint" it because it's not writable, but we'll still remove the wg_sample. Later when the decoder does free the GstBuffer it'll be untainted and hence can be reused. Hence the buffer pool will pick up that buffer instead of the zero-copy buffer we want.
If the decoder and the pipeline (it also depends on what videoconvert decides to do) does hold on the buffers, then it will likely do it all the time. In this case there's no point in trying the zero-copy every time.
After a while and after enough buffers have been tried and not freed by the pool, we won't get any more useless allocations, and we'll continue in a mode where the buffers are just copied the normal way.
This merge request was approved by Zebediah Figura.