From: Ziqing Hui zhui@codeweavers.com
--- dlls/mf/tests/mf.c | 67 ++--- dlls/winegstreamer/Makefile.in | 1 + dlls/winegstreamer/gst_private.h | 2 + dlls/winegstreamer/main.c | 3 + dlls/winegstreamer/mpeg4_muxer.c | 243 +++++++++++++++++++ dlls/winegstreamer/winegstreamer_classes.idl | 7 + 6 files changed, 292 insertions(+), 31 deletions(-) create mode 100644 dlls/winegstreamer/mpeg4_muxer.c
diff --git a/dlls/mf/tests/mf.c b/dlls/mf/tests/mf.c index 18488649523..d880a4eb600 100644 --- a/dlls/mf/tests/mf.c +++ b/dlls/mf/tests/mf.c @@ -6802,61 +6802,42 @@ static void test_mpeg4_media_sink(void) ok(hr == S_OK, "Unexpected hr %#lx.\n", hr);
hr = MFCreateMPEG4MediaSink(NULL, NULL, NULL, NULL); - todo_wine ok(hr == E_POINTER, "Unexpected hr %#lx.\n", hr);
sink = (void *)0xdeadbeef; hr = MFCreateMPEG4MediaSink(NULL, NULL, NULL, &sink); - todo_wine ok(hr == E_POINTER, "Unexpected hr %#lx.\n", hr); ok(sink == (void *)0xdeadbeef, "Unexpected pointer %p.\n", sink); sink = NULL;
hr = MFCreateMPEG4MediaSink(bytestream_empty, NULL, NULL, &sink_empty); - todo_wine ok(hr == S_OK || broken(hr == E_INVALIDARG), "Unexpected hr %#lx.\n", hr); - hr = MFCreateMPEG4MediaSink(bytestream_audio, NULL, audio_type, &sink_audio); - todo_wine ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); - hr = MFCreateMPEG4MediaSink(bytestream_video, video_type, NULL, &sink_video); - todo_wine ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); - hr = MFCreateMPEG4MediaSink(bytestream, video_type, audio_type, &sink); - todo_wine ok(hr == S_OK, "Unexpected hr %#lx.\n", hr);
- if (!sink) - { - if (sink_video) - IMFMediaSink_Release(sink_video); - if (sink_audio) - IMFMediaSink_Release(sink_audio); - if (sink_empty) - IMFMediaSink_Release(sink_empty); - IMFByteStream_Release(bytestream); - IMFByteStream_Release(bytestream_empty); - IMFByteStream_Release(bytestream_video); - IMFByteStream_Release(bytestream_audio); - IMFMediaType_Release(video_type); - IMFMediaType_Release(audio_type); - return; - } - /* Test sink. */ hr = IMFMediaSink_GetCharacteristics(sink, &flags); + todo_wine ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + todo_wine ok(flags == MEDIASINK_RATELESS || broken(flags == (MEDIASINK_RATELESS | MEDIASINK_FIXED_STREAMS)), "Unexpected flags %#lx.\n", flags);
+ todo_wine check_interface(sink, &IID_IMFMediaEventGenerator, TRUE); check_interface(sink, &IID_IMFFinalizableMediaSink, TRUE); + todo_wine check_interface(sink, &IID_IMFClockStateSink, TRUE); + todo_wine check_interface(sink, &IID_IMFGetService, TRUE);
/* Test sink stream count. */ + todo_wine + { hr = IMFMediaSink_GetStreamSinkCount(sink, NULL); ok(hr == E_POINTER, "Unexpected hr %#lx.\n", hr);
@@ -6878,53 +6859,72 @@ static void test_mpeg4_media_sink(void) ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); ok(count == 0, "Unexpected count %lu.\n", count); } + }
/* Test GetStreamSinkByIndex. */ hr = IMFMediaSink_GetStreamSinkByIndex(sink_video, 0, &stream_sink); + todo_wine ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); hr = IMFStreamSink_GetIdentifier(stream_sink, &id); + todo_wine ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + todo_wine ok(id == 1, "Unexpected id %lu.\n", id); IMFStreamSink_Release(stream_sink);
hr = IMFMediaSink_GetStreamSinkByIndex(sink_audio, 0, &stream_sink); + todo_wine ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); hr = IMFStreamSink_GetIdentifier(stream_sink, &id); + todo_wine ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + todo_wine ok(id == 2, "Unexpected id %lu.\n", id); IMFStreamSink_Release(stream_sink);
stream_sink = (void *)0xdeadbeef; hr = IMFMediaSink_GetStreamSinkByIndex(sink_audio, 1, &stream_sink); + todo_wine ok(hr == MF_E_INVALIDINDEX, "Unexpected hr %#lx.\n", hr); ok(stream_sink == (void *)0xdeadbeef, "Unexpected pointer %p.\n", stream_sink);
stream_sink = (void *)0xdeadbeef; hr = IMFMediaSink_GetStreamSinkByIndex(sink_video, 1, &stream_sink); + todo_wine ok(hr == MF_E_INVALIDINDEX, "Unexpected hr %#lx.\n", hr); ok(stream_sink == (void *)0xdeadbeef, "Unexpected pointer %p.\n", stream_sink);
/* Test GetStreamSinkById. */ hr = IMFMediaSink_GetStreamSinkById(sink, 1, &stream_sink); + todo_wine ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); - IMFStreamSink_Release(stream_sink); + if (hr == S_OK) + IMFStreamSink_Release(stream_sink); hr = IMFMediaSink_GetStreamSinkById(sink, 2, &stream_sink); + todo_wine ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); - IMFStreamSink_Release(stream_sink); + if (hr == S_OK) + IMFStreamSink_Release(stream_sink); hr = IMFMediaSink_GetStreamSinkById(sink_video, 1, &stream_sink); + todo_wine ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); - IMFStreamSink_Release(stream_sink); + if (hr == S_OK) + IMFStreamSink_Release(stream_sink); hr = IMFMediaSink_GetStreamSinkById(sink_audio, 2, &stream_sink); + todo_wine ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); - IMFStreamSink_Release(stream_sink); + if (hr == S_OK) + IMFStreamSink_Release(stream_sink);
stream_sink = (void *)0xdeadbeef; hr = IMFMediaSink_GetStreamSinkById(sink_video, 2, &stream_sink); + todo_wine ok(hr == MF_E_INVALIDSTREAMNUMBER, "Unexpected hr %#lx.\n", hr); ok(stream_sink == (void *)0xdeadbeef, "Unexpected pointer %p.\n", stream_sink);
stream_sink = (void *)0xdeadbeef; hr = IMFMediaSink_GetStreamSinkById(sink_audio, 1, &stream_sink); + todo_wine ok(hr == MF_E_INVALIDSTREAMNUMBER, "Unexpected hr %#lx.\n", hr); ok(stream_sink == (void *)0xdeadbeef, "Unexpected pointer %p.\n", stream_sink);
@@ -6932,7 +6932,10 @@ static void test_mpeg4_media_sink(void) if (!(flags & MEDIASINK_FIXED_STREAMS)) { hr = IMFMediaSink_AddStreamSink(sink, 123, video_type, &stream_sink); + todo_wine ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); + if (hr != S_OK) + goto done; IMFStreamSink_Release(stream_sink); hr = IMFMediaSink_GetStreamSinkByIndex(sink, 2, &stream_sink); ok(hr == S_OK, "Unexpected hr %#lx.\n", hr); @@ -7039,7 +7042,9 @@ static void test_mpeg4_media_sink(void) hr = IMFMediaSink_GetCharacteristics(sink, &flags); ok(hr == MF_E_SHUTDOWN, "Unexpected hr %#lx.\n", hr);
- IMFMediaTypeHandler_Release(type_handler); +done: + if (type_handler) + IMFMediaTypeHandler_Release(type_handler); IMFMediaSink_Release(sink); IMFMediaSink_Release(sink_video); IMFMediaSink_Release(sink_audio); diff --git a/dlls/winegstreamer/Makefile.in b/dlls/winegstreamer/Makefile.in index 1c701bfa9f6..c61fc3f77dc 100644 --- a/dlls/winegstreamer/Makefile.in +++ b/dlls/winegstreamer/Makefile.in @@ -13,6 +13,7 @@ C_SRCS = \ main.c \ media_source.c \ mfplat.c \ + mpeg4_muxer.c \ quartz_parser.c \ quartz_transform.c \ resampler.c \ diff --git a/dlls/winegstreamer/gst_private.h b/dlls/winegstreamer/gst_private.h index 54f59aa708a..1e3461cb14f 100644 --- a/dlls/winegstreamer/gst_private.h +++ b/dlls/winegstreamer/gst_private.h @@ -35,6 +35,7 @@ #include "mfidl.h" #include "wine/debug.h" #include "wine/strmbase.h" +#include "wine/mfinternal.h"
#include "unixlib.h"
@@ -119,6 +120,7 @@ HRESULT wma_decoder_create(IUnknown *outer, IUnknown **out); HRESULT wmv_decoder_create(IUnknown *outer, IUnknown **out); HRESULT resampler_create(IUnknown *outer, IUnknown **out); HRESULT color_convert_create(IUnknown *outer, IUnknown **out); +HRESULT mpeg4_sink_class_factory_create(IUnknown *outer, IUnknown **out);
bool amt_from_wg_format(AM_MEDIA_TYPE *mt, const struct wg_format *format, bool wm); bool amt_to_wg_format(const AM_MEDIA_TYPE *mt, struct wg_format *format); diff --git a/dlls/winegstreamer/main.c b/dlls/winegstreamer/main.c index 2e7763872d0..4e091536b74 100644 --- a/dlls/winegstreamer/main.c +++ b/dlls/winegstreamer/main.c @@ -582,6 +582,7 @@ static struct class_factory wma_decoder_cf = {{&class_factory_vtbl}, wma_decoder static struct class_factory wmv_decoder_cf = {{&class_factory_vtbl}, wmv_decoder_create}; static struct class_factory resampler_cf = {{&class_factory_vtbl}, resampler_create}; static struct class_factory color_convert_cf = {{&class_factory_vtbl}, color_convert_create}; +static struct class_factory mpeg4_sink_class_factory_cf = {{&class_factory_vtbl}, mpeg4_sink_class_factory_create};
HRESULT WINAPI DllGetClassObject(REFCLSID clsid, REFIID iid, void **out) { @@ -616,6 +617,8 @@ HRESULT WINAPI DllGetClassObject(REFCLSID clsid, REFIID iid, void **out) factory = &resampler_cf; else if (IsEqualGUID(clsid, &CLSID_CColorConvertDMO)) factory = &color_convert_cf; + else if (IsEqualGUID(clsid, &CLSID_MFMPEG4SinkClassFactory)) + factory = &mpeg4_sink_class_factory_cf; else { FIXME("%s not implemented, returning CLASS_E_CLASSNOTAVAILABLE.\n", debugstr_guid(clsid)); diff --git a/dlls/winegstreamer/mpeg4_muxer.c b/dlls/winegstreamer/mpeg4_muxer.c new file mode 100644 index 00000000000..0d3406dfbc3 --- /dev/null +++ b/dlls/winegstreamer/mpeg4_muxer.c @@ -0,0 +1,243 @@ +/* MPEG4 Muxer + * + * Copyright 2023 Ziqing Hui for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "gst_private.h" + +#include "wine/debug.h" + +WINE_DEFAULT_DEBUG_CHANNEL(mfplat); + +struct mpeg4_media_sink +{ + IMFFinalizableMediaSink IMFFinalizableMediaSink_iface; + LONG refcount; + IMFByteStream *bytestream; +}; + +static struct mpeg4_media_sink *impl_from_IMFFinalizableMediaSink(IMFFinalizableMediaSink *iface) +{ + return CONTAINING_RECORD(iface, struct mpeg4_media_sink, IMFFinalizableMediaSink_iface); +} + +static HRESULT WINAPI mpeg4_media_sink_QueryInterface(IMFFinalizableMediaSink *iface, REFIID riid, void **obj) +{ + TRACE("%p, %s, %p.\n", iface, debugstr_guid(riid), obj); + + if (IsEqualIID(riid, &IID_IMFFinalizableMediaSink) || + IsEqualIID(riid, &IID_IMFMediaSink) || + IsEqualIID(riid, &IID_IUnknown)) + { + *obj = iface; + } + else + { + WARN("Unsupported interface %s.\n", debugstr_guid(riid)); + *obj = NULL; + return E_NOINTERFACE; + } + + IUnknown_AddRef((IUnknown *)*obj); + + return S_OK; +} + +static ULONG WINAPI mpeg4_media_sink_AddRef(IMFFinalizableMediaSink *iface) +{ + struct mpeg4_media_sink *sink = impl_from_IMFFinalizableMediaSink(iface); + ULONG refcount = InterlockedIncrement(&sink->refcount); + TRACE("%p, refcount %lu.\n", iface, refcount); + return refcount; +} + +static ULONG WINAPI mpeg4_media_sink_Release(IMFFinalizableMediaSink *iface) +{ + struct mpeg4_media_sink *sink = impl_from_IMFFinalizableMediaSink(iface); + ULONG refcount = InterlockedDecrement(&sink->refcount); + + TRACE("%p, refcount %lu.\n", iface, refcount); + + if (!refcount) + { + IMFByteStream_Release(sink->bytestream); + free(sink); + } + + return refcount; +} + +static HRESULT WINAPI mpeg4_media_sink_GetCharacteristics(IMFFinalizableMediaSink *iface, DWORD *flags) +{ + FIXME("%p, %p stub!\n", iface, flags); + + return E_NOTIMPL; +} + +static HRESULT WINAPI mpeg4_media_sink_AddStreamSink(IMFFinalizableMediaSink *iface, DWORD stream_sink_id, + IMFMediaType *media_type, IMFStreamSink **stream_sink) +{ + FIXME("%p, %#lx, %p, %p stub!\n", iface, stream_sink_id, media_type, stream_sink); + + return E_NOTIMPL; +} + +static HRESULT WINAPI mpeg4_media_sink_RemoveStreamSink(IMFFinalizableMediaSink *iface, DWORD stream_sink_id) +{ + FIXME("%p, %#lx stub!\n", iface, stream_sink_id); + + return E_NOTIMPL; +} + +static HRESULT WINAPI mpeg4_media_sink_GetStreamSinkCount(IMFFinalizableMediaSink *iface, DWORD *count) +{ + FIXME("%p, %p stub!\n", iface, count); + + return E_NOTIMPL; +} + +static HRESULT WINAPI mpeg4_media_sink_GetStreamSinkByIndex(IMFFinalizableMediaSink *iface, DWORD index, + IMFStreamSink **stream) +{ + FIXME("%p, %lu, %p stub!\n", iface, index, stream); + + return E_NOTIMPL; +} + +static HRESULT WINAPI mpeg4_media_sink_GetStreamSinkById(IMFFinalizableMediaSink *iface, DWORD stream_sink_id, + IMFStreamSink **stream) +{ + FIXME("%p, %#lx, %p stub!\n", iface, stream_sink_id, stream); + + return E_NOTIMPL; +} + +static HRESULT WINAPI mpeg4_media_sink_SetPresentationClock(IMFFinalizableMediaSink *iface, IMFPresentationClock *clock) +{ + + FIXME("%p, %p stub!\n", iface, clock); + + return E_NOTIMPL; +} + +static HRESULT WINAPI mpeg4_media_sink_GetPresentationClock(IMFFinalizableMediaSink *iface, IMFPresentationClock **clock) +{ + FIXME("%p, %p stub!\n", iface, clock); + + return E_NOTIMPL; +} + +static HRESULT WINAPI mpeg4_media_sink_Shutdown(IMFFinalizableMediaSink *iface) +{ + FIXME("%p stub!\n", iface); + + return E_NOTIMPL; +} + +static HRESULT WINAPI mpeg4_media_sink_BeginFinalize(IMFFinalizableMediaSink *iface, IMFAsyncCallback *callback, IUnknown *state) +{ + FIXME("%p, %p, %p stub!\n", iface, callback, state); + + return E_NOTIMPL; +} + +static HRESULT WINAPI mpeg4_media_sink_EndFinalize(IMFFinalizableMediaSink *iface, IMFAsyncResult *result) +{ + FIXME("%p, %p stub!\n", iface, result); + + return E_NOTIMPL; +} + +static const IMFFinalizableMediaSinkVtbl mpeg4_media_sink_vtbl = +{ + mpeg4_media_sink_QueryInterface, + mpeg4_media_sink_AddRef, + mpeg4_media_sink_Release, + mpeg4_media_sink_GetCharacteristics, + mpeg4_media_sink_AddStreamSink, + mpeg4_media_sink_RemoveStreamSink, + mpeg4_media_sink_GetStreamSinkCount, + mpeg4_media_sink_GetStreamSinkByIndex, + mpeg4_media_sink_GetStreamSinkById, + mpeg4_media_sink_SetPresentationClock, + mpeg4_media_sink_GetPresentationClock, + mpeg4_media_sink_Shutdown, + mpeg4_media_sink_BeginFinalize, + mpeg4_media_sink_EndFinalize, +}; + +static HRESULT WINAPI sink_class_factory_QueryInterface(IMFSinkClassFactory *iface, REFIID riid, void **out) +{ + if (IsEqualIID(riid, &IID_IMFSinkClassFactory) + || IsEqualIID(riid, &IID_IUnknown)) + { + *out = iface; + IMFSinkClassFactory_AddRef(iface); + return S_OK; + } + + *out = NULL; + return E_NOINTERFACE; +} + +static ULONG WINAPI sink_class_factory_AddRef(IMFSinkClassFactory *iface) +{ + return 2; +} + +static ULONG WINAPI sink_class_factory_Release(IMFSinkClassFactory *iface) +{ + return 1; +} + +static HRESULT WINAPI sink_class_factory_CreateMediaSink(IMFSinkClassFactory *iface, IMFByteStream *bytestream, + IMFMediaType *video_type, IMFMediaType *audio_type, IMFMediaSink **out) +{ + struct mpeg4_media_sink *sink; + + TRACE("%p, %p, %p, %p.\n", bytestream, video_type, audio_type, out); + + if (!out || ! bytestream) + return E_POINTER; + + if (!(sink = calloc(1, sizeof(*sink)))) + return E_OUTOFMEMORY; + + sink->IMFFinalizableMediaSink_iface.lpVtbl = &mpeg4_media_sink_vtbl; + sink->refcount = 1; + IMFByteStream_AddRef(sink->bytestream = bytestream); + + *out = (IMFMediaSink *)&sink->IMFFinalizableMediaSink_iface; + + return S_OK; +} + +static const IMFSinkClassFactoryVtbl mpeg4_sink_class_factory_vtbl = +{ + sink_class_factory_QueryInterface, + sink_class_factory_AddRef, + sink_class_factory_Release, + sink_class_factory_CreateMediaSink, +}; + +static IMFSinkClassFactory mpeg4_sink_class_factory = { &mpeg4_sink_class_factory_vtbl }; + +HRESULT mpeg4_sink_class_factory_create(IUnknown *outer, IUnknown **out) +{ + *out = (IUnknown *)&mpeg4_sink_class_factory; + return S_OK; +} diff --git a/dlls/winegstreamer/winegstreamer_classes.idl b/dlls/winegstreamer/winegstreamer_classes.idl index 30a99c9acfb..db36fc368c0 100644 --- a/dlls/winegstreamer/winegstreamer_classes.idl +++ b/dlls/winegstreamer/winegstreamer_classes.idl @@ -111,3 +111,10 @@ coclass CResamplerMediaObject {} uuid(98230571-0087-4204-b020-3282538e57d3) ] coclass CColorConvertDMO {} + +[ + helpstring("MF MPEG4 Sink Class Factory"), + threading(both), + uuid(a22c4fc7-6e91-4e1d-89e9-53b2667b72ba) +] +coclass MFMPEG4SinkClassFactory {}
Do you have a working draft that's using gstreamer encoding?
On Mon May 29 00:41:03 2023 +0000, Nikolay Sivov wrote:
Do you have a working draft that's using gstreamer encoding?
Not yet. It would be better to submit this after have gstreamer encoding?
On Mon May 29 00:41:03 2023 +0000, Ziqing Hui wrote:
Not yet. It would be better to submit this after have gstreamer encoding?
Not after, but maybe together, not sure. I'm asking because stubs are easy to add, but without gstreamer integration to me it's not clear if we will be using full encoding pipeline, or will need to implement individual encoder MFTs. It's better to have it match what happens on Windows, Source Reader and MF pipeline in general already show issues that are hard to fix, without piling up hacks.
It's better to have it match what happens on Windows, Source Reader and MF pipeline in general already show issues that are hard to fix, without piling up hacks.
I'm curious, what hacks are we piling up, exactly? I'm not aware of any.
On Mon May 29 19:59:57 2023 +0000, Zebediah Figura wrote:
It's better to have it match what happens on Windows, Source Reader
and MF pipeline in general already show issues that are hard to fix, without piling up hacks. I'm curious, what hacks are we piling up, exactly? I'm not aware of any.
For proton we do, which is no different. One issue is that exposed media formats differ a lot, another one is that used components are not the same - when application expects a decoder we don't have any.
it's not clear if we will be using full encoding pipeline, or will need to implement individual encoder MFTs
It may be possible to squeeze a full encoding pipeline into the muxer component, but I think the decoding side experience has shown that it is not future proof and that we should have individual components, and compose the pipelines on the PE side.
On Mon May 29 20:36:49 2023 +0000, Rémi Bernon wrote:
it's not clear if we will be using full encoding pipeline, or will
need to implement individual encoder MFTs It may be possible to squeeze a full encoding pipeline into the muxer component, but I think the decoding side experience has shown that it is not future proof and that we should have individual components, and compose the pipelines on the PE side.
To be clear I don't think that any positions on the decoding pipeline should inform those related to the encoding pipeline, and I don't even think it makes sense to use them as an argument.
None of the reasons to expose a generic decoding pipeline apply to encoding, I think.
On Mon May 29 20:45:26 2023 +0000, Zebediah Figura wrote:
To be clear I don't think that any positions on the decoding pipeline should inform those related to the encoding pipeline, and I don't even think it makes sense to use them as an argument. None of the reasons to expose a generic decoding pipeline apply to encoding, I think.
Applications have been seen building custom decoding pipelines by composing sources and decoders, because they can, among other reasons. Therefore we can expect applications to build encoding pipelines, for the same reasons. Is this really so stretched out?
Anyway, there's even more reasons to do so with encoding, as you very often need to configure the encoder components to fit your needs. I'm confident that having individual components is even more a requirement there.
On Mon May 29 21:19:06 2023 +0000, Rémi Bernon wrote:
Applications have been seen building custom decoding pipelines by composing sources and decoders, because they can, among other reasons. Therefore we can expect applications to build encoding pipelines, for the same reasons. Is this really so stretched out? Anyway, there's even more reasons to do so with encoding, as you very often need to configure the encoder components to fit your needs. I'm confident that having individual components is even more a requirement there.
What I think is add something like wg_unknown(I not sure that its name should be) to wingstreamer which represent the muxer component. Then we have 3 components in winegstreamer:
1. wg_parser to demux and decode. 2. wg_transform to decode and encode. 3. wg_unknown to mux (and maybe encode).
Now, the question is: should wg_unknown contain encoding pipeline like wg_parser who contains decoding pipeline? If do, it behaves like a opposite of wg_parser, then maybe we could make use of encodebin plugin? If not, it only does mux stuff.
On Wed May 31 01:49:23 2023 +0000, Ziqing Hui wrote:
What I think is add something like wg_unknown(I not sure that its name should be) to wingstreamer which represent the muxer component. Then we have 3 components in winegstreamer:
- wg_parser to demux and decode.
- wg_transform to decode and encode.
- wg_unknown to mux (and maybe encode).
Now, the question is: should wg_unknown contain encoding pipeline like wg_parser who contains decoding pipeline? If do, it behaves like a opposite of wg_parser, then maybe we could make use of encodebin plugin? If not, it only does mux stuff.
From my perspective, I prefer to have a individual muxer component.
On Wed May 31 01:54:00 2023 +0000, Ziqing Hui wrote:
From my perspective, I prefer to have a individual muxer component.
I think it should be as simple as wg_transform, operating in a synchronous way. I suggest wg_sink or wg_muxer.
In order to fix media source non determinism I intend to add a wg_source myself (could be wg_demuxer), doing the opposite.
Applications have been seen building custom decoding pipelines by composing sources and decoders, because they can, among other reasons. Therefore we can expect applications to build encoding pipelines, for the same reasons. Is this really so stretched out?
Anyway, there's even more reasons to do so with encoding, as you very often need to configure the encoder components to fit your needs. I'm confident that having individual components is even more a requirement there.
I'm sorry, what I'm trying to get at is that decoding and encoding are fundamentally different. They are not symmetric operations, essentially.
Decoding is predominantly a matter of turning a multimedia stream in any format into a set of formats that can be rendered. The application never has a reason to care about the source format (and therefore in practice rarely does), and usually doesn't have a reason to care about the destination format either (it just needs to be something that can be fed to GL/X11/D3D/GDI or Pulse/ALSA/mmdevapi). Autoplugging makes sense, since there's generally only one (best) element that can be used to decode.
Encoding, by contrast, requires decision. There are many "raw" formats for any given stream type, sure, but they're broadly interchangeable and mostly motivated by the limitations of downstream APIs. There are many more formats for compressed streams, however, and the selection is generally motivated by things the application cares about (file size, compression speed, patents, platform compatibility). This applies not only to the container but also to the individual stream codecs. You can't autoplug encoders unless you pick some defaults, which may easily turn out to be undesirable, and accordingly DirectShow doesn't try, and I believe GStreamer doesn't try either.
This was a pointless comment originally on my part, because ultimately this is just a distraction. Fundamentally I agree that we probably don't want to embed the whole GStreamer encoding pipeline in one element, but do it individually with transforms and muxers.
In order to fix media source non determinism I intend to add a wg_source myself (could be wg_demuxer), doing the opposite.
Wait, what, why? I know there was discussion about fixing non-determinism by manually reimplementing decodebin, but why does that need to affect the frontends at all? Why can't we just do it inside of the existing parser?
Decoding is predominantly a matter of turning a multimedia stream in any format into a set of formats that can be rendered. The application never has a reason to care about the source format (and therefore in practice rarely does), and usually doesn't have a reason to care about the destination format either (it just needs to be something that can be fed to GL/X11/D3D/GDI or Pulse/ALSA/mmdevapi). Autoplugging makes sense, since there's generally only one (best) element that can be used to decode.
Maybe, but that's only the theory, and in practice some applications don't follow this and either make assumptions, or control things manually.
Wait, what, why? I know there was discussion about fixing non-determinism by manually reimplementing decodebin, but why does that need to affect the frontends at all? Why can't we just do it inside of the existing parser?
The main reason is that I think wg_parser is complicated, hard to debug, and any change risky and difficult to review.
I would like to have a synchronous demuxer component used in media_source only for now, and that works in push mode only.
This will also allow it to be merged step by step more easily, and with less risk of breaking the other wg_parser clients.
Wait, what, why? I know there was discussion about fixing non-determinism by manually reimplementing decodebin, but why does that need to affect the frontends at all? Why can't we just do it inside of the existing parser?
The main reason is that I think wg_parser is complicated, hard to debug, and any change risky and difficult to review.
I don't understand this reasoning. Yes, the parser is complicated, but all of that complication is necessary, and pretty much none of it is related to the use of decodebin. As far as I can tell all of the complication is going to be necessary for anything that replaces decodebin as well.
I would like to have a synchronous demuxer component used in media_source only for now, and that works in push mode only.
I especially don't understand this. Forcing synchronicity, in the way that wg_transform does, is what makes wg_transform so much more complicated than the parser. Perhaps there is individual bias on both of our parts here, having written the relevant code, but I found the transform quite simple and logical before that part was introduced.
I also don't understand how restricting to push mode is going to help anything. It simplifies the entry point a bit, but at the cost of either memory, or sacrificing support for some formats. Note that you still need a thread in this case.
On Mon Jun 5 16:48:27 2023 +0000, Zebediah Figura wrote:
Wait, what, why? I know there was discussion about fixing
non-determinism by manually reimplementing decodebin, but why does that need to affect the frontends at all? Why can't we just do it inside of the existing parser?
The main reason is that I think wg_parser is complicated, hard to
debug, and any change risky and difficult to review. I don't understand this reasoning. Yes, the parser is complicated, but all of that complication is necessary, and pretty much none of it is related to the use of decodebin. As far as I can tell all of the complication is going to be necessary for anything that replaces decodebin as well.
I would like to have a synchronous demuxer component used in
media_source only for now, and that works in push mode only. I especially don't understand this. Forcing synchronicity, in the way that wg_transform does, is what makes wg_transform so much more complicated than the parser. Perhaps there is individual bias on both of our parts here, having written the relevant code, but I found the transform quite simple and logical before that part was introduced. I also don't understand how restricting to push mode is going to help anything. It simplifies the entry point a bit, but at the cost of either memory, or sacrificing support for some formats. Note that you still need a thread in this case.
How is wg_transform more complicated than wg_parser? There's no thread involved [*], no race condition or deadlock possible, no mutex and no condition variable (meaning no crash on pthread_exit) necessary.
[*] Regardless of what decoders might do internally, we can assume there's none.
How is wg_transform more complicated than wg_parser? There's no thread involved [*], no race condition or deadlock possible, no mutex and no condition variable (meaning no crash on pthread_exit) necessary.
Actually, no, I'm sorry, my earlier statement was incorrect. I do find the transform to be distinctly more complicated, but that's pretty much all because of zero-copy, not because of synchronicity.
Anyway, I don't want to belabor this point, it's not really fruitful to argue which component is more complicated, especially since that's going to be tainted by individual familiarity. I'd rather just understand why we're arranging the demuxer differently.
Actually, for that matter, the more I think about what decodebin does, the more I'd also like to understand why we want to replace it? It's not a trivial stack of code, and we need most of it. As far as I'm aware there are two things we need: deterministic stream order, and faster resolution. But IIRC the resolution delay is from prerolling, which we can do lazily if necessary. If the only thing we need is deterministic stream order, *and* that doesn't block the vast majority of applications, can we just get that into GStreamer instead?
But IIRC the resolution delay is from prerolling, which we can do lazily if necessary.
Imho adding some lazyness only makes things more complicated, and I don't know if it's truly about prerolling. As we also currently decode in the pipeline, we only get notified of decoded stream caps *after* auto-plugging is done. This is what is taking too long, making this faster *also* actually requires not to decode in the pipeline.
It's not a trivial stack of code, and we need most of it.
I don't think we do, at least not for media foundation but also probably in general. Using decodebin creates more problems than it solves. We lose control over everything that it internally does, its threading, most of its plugging decisions, and we then have to make up for it on our side through complex synchronization.
Not using it gives us back the control we need for compatibility. The Win32 components we have to implement, on the MF side particularly because applications are apparently using them more directly, but also on the DirectShow and WMVReader side, as has been seen a couple of times, are lower level and have deterministic behavior most of the time.
If the only thing we need is deterministic stream order, *and* that doesn't block the vast majority of applications, can we just get that into GStreamer instead?
It's non-deterministic in nature, its stream ordering but its sample ordering as well and everything about it. I am not even going to try convincing GStreamer to do otherwise, this is at best a feature, and at worst a necessary side effect of it using queues.
I am not interested in trying to implement this in GStreamer, I have it mostly done in Wine and it makes everything much simpler as well as also being quite simple in term of implementation. It gets rid of GStreamer threads, solving many problems at the same time and making debugging easier. It also removes the need for condition variables entirely, solving many crashes on thread exit as well.
Of course there's plenty of other ways to do it differently and elsewhere, but I don't see any good reason to look for something else, and I have spent enough time on this to not be interested in spending more of it on other approaches. I also know Wine best and I am not interested in spending weeks to figure how to do it in GStreamer.
I like using GStreamer as it offers us a nice abstraction over multimedia libraries and offers a lot of useful high and low level tools. But if using GStreamer means we have to be dogmatic about it and use only the higher level components, because they are supposedly implementing some important logic for us, I'm going to start thinking more seriously about supporting Paul's idea of using codec libraries more directly instead.
But IIRC the resolution delay is from prerolling, which we can do lazily if necessary.
Imho adding some lazyness only makes things more complicated, and I don't know if it's truly about prerolling. As we also currently decode in the pipeline, we only get notified of decoded stream caps *after* auto-plugging is done. This is what is taking too long, making this faster *also* actually requires not to decode in the pipeline.
Okay, I'm sorry, I misremembered what we were doing. We don't actually need to preroll—we need caps and duration, and yeah, it's the autoplugging that takes too long.
But that's something we can avoid. With the work that Ziqing did we already have some of the infrastructure in place for skipping the decoding part of the pipeline. It wouldn't be too hard to resolve some of the initialization earlier, even if we do append decoding chains.
It's not a trivial stack of code, and we need most of it.
I don't think we do, at least not for media foundation but also probably in general. Using decodebin creates more problems than it solves. We lose control over everything that it internally does, its threading, most of its plugging decisions, and we then have to make up for it on our side through complex synchronization.
Not using it gives us back the control we need for compatibility. The Win32 components we have to implement, on the MF side particularly because applications are apparently using them more directly, but also on the DirectShow and WMVReader side, as has been seen a couple of times, are lower level and have deterministic behavior most of the time.
If the only thing we need is deterministic stream order, *and* that doesn't block the vast majority of applications, can we just get that into GStreamer instead?
It's non-deterministic in nature, its stream ordering but its sample ordering as well and everything about it. I am not even going to try convincing GStreamer to do otherwise, this is at best a feature, and at worst a necessary side effect of it using queues.
I don't understand this assertion. I've reread the decodebin source since reading this comment, and I don't see anything in the autoplugging logic that we clearly don't need. (To be sure, there are some decodebin features we don't need, such as use-buffering, EOS handling, and possibly also the subtitle-encoding logic. But the bulk of decodebin is the autoplugging part, and as far as I can tell we need all of it.)
decodebin also has no control over sample ordering in the first place, that's left up to the whims of individual demuxers. multiqueue may change that sample ordering, but we kind of need multiqueue—particularly because of the starvation logic, but the idea of getting rid of buffering on the GStreamer side definitely makes me nervous. What problems exactly does it cause? Can you please spell them out? I'd like to understand so that I can think about and evaluate all of the possible solutions.
I am not interested in trying to implement this in GStreamer, I have it mostly done in Wine and it makes everything much simpler as well as also being quite simple in term of implementation. It gets rid of GStreamer threads, solving many problems at the same time and making debugging easier. It also removes the need for condition variables entirely, solving many crashes on thread exit as well.
Of course there's plenty of other ways to do it differently and elsewhere, but I don't see any good reason to look for something else, and I have spent enough time on this to not be interested in spending more of it on other approaches. I also know Wine best and I am not interested in spending weeks to figure how to do it in GStreamer.
I'm not going to demand you do the work, but I hope you can understand that this isn't an argument for doing something a certain way upstream.
I like using GStreamer as it offers us a nice abstraction over multimedia libraries and offers a lot of useful high and low level tools. But if using GStreamer means we have to be dogmatic about it and use only the higher level components, because they are supposedly implementing some important logic for us, I'm going to start thinking more seriously about supporting Paul's idea of using codec libraries more directly instead.
I'm sorry to be difficult, I'm really not trying to be. But as a maintainer of this component, it's my job to understand all of the code that's going in, and to have thought about the options holistically. Largely that's what I'm trying to do here—is ask questions, understand what it actually is that Win32 applications need, and evaluate whether we can do something another way. I'm not even trying to say "this definitely should be done another way", but I do need enough of a reason to discard other possibilities. I currently don't see that.
I recognize it can be frustrating to do a lot of work, and then be asked to do it a different way. Having been on the other side of that often, I can only offer that it's best to go into development with the idea already in mind that things may need to be redone, and also, to try to discuss high-level design with maintainers before spending the effort on implementation.
And I would like to offer that it's currently very frustrating in turn for me to see a patch, ask some questions about why it's being done a certain way—which seems surprising or potentially problematic to me—ask if it can be done a different way, and be met with nothing but obstinate and argumentative refusal, and high-level statements like "decodebin is a bad fit for wine" that don't explain the specific problems that you're trying to solve. It really does not make me want to engage in patch review or discussion, which I regret to say has contributed to my being rather slow to respond in general. I'm not asking for unquestioning accession to my propositions—even when I *do* fully understand the problem, I often forget things or have ideas that don't pan out the way I think they will, as I'm sure you're well aware. But I would appreciate a bit more effort to see things from my perspective, and understand why I'm making the propositions that I'm making.
But that's something we can avoid. With the work that Ziqing did we already have some of the infrastructure in place for skipping the decoding part of the pipeline. It wouldn't be too hard to resolve some of the initialization earlier, even if we do append decoding chains.
Adding a stream decodebin one after the other parser decodebin is in my opinion an ugly hack, and it adds complexity that will be painful to work with when debugging pipeline issues. It is not the right way to fix the issue, and the right way to fix it is to implement the mechanisms native has.
But the bulk of decodebin is the autoplugging part, and as far as I can tell we need all of it.
We don't. MF has this implemented already, and in a way that is tested against native behavior. What we lack now is the addition of decoder elements when pipelines are resolved.
decodebin also has no control over sample ordering in the first place, that's left up to the whims of individual demuxers. multiqueue may change that sample ordering, but we kind of need multiqueue—particularly because of the starvation logic, but the idea of getting rid of buffering on the GStreamer side definitely makes me nervous.
Yes, I can very well understand that. But this is how native works and we will at some point have to. Delaying this only keeps the related problems unsolved.
Working on a prototype I can already see that there's several broken parts when decoders are involved, but it's not a reason not to have them, it's the consequence of not having them earlier.
The more we wait and the more we add complexity to the hacks we have, and the harder it will get to untangle everything later when we will want / have to implement them.
Largely that's what I'm trying to do here—is ask questions, understand what it actually is that Win32 applications need, and evaluate whether we can do something another way. I'm not even trying to say "this definitely should be done another way", but I do need enough of a reason to discard other possibilities. I currently don't see that.
You have stated multiple times that you did not know anything about MF, and were not interested or confident to review any of the code there. I don't think this is compatible with maintaining the code and making decisions over its direction.
And I would like to offer that it's currently very frustrating in turn for me to see a patch, ask some questions about why it's being done a certain way—which seems surprising or potentially problematic to me—ask if it can be done a different way, and be met with nothing but obstinate and argumentative refusal, and high-level statements like "decodebin is a bad fit for wine" that don't explain the specific problems that you're trying to solve.
I have been explaining the problem in many occasions. Applications use MF in the same way we use GStreamer, they build pipelines, they insert, request or expect individual components presence.
To give some very actual examples:
MF applications are expected to get D3D buffers out by creating a IMFDXGIDeviceManager instance and configure the H264 decoder with it. If there is no H264 decoder, there is nothing to configure and they fail.
Trailmakers for instance calls IMFSourceReader::GetServiceForStream, and expects to receive a decoder IMFTransform. Yes, we can return a fake one, at the cost of unnecessary buffer copies, and it's what Proton currently does, but this is a hack.
This is true for MF, but this is also true for DirectShow:
* Space Engineers creates a DirectShow graph, instantiates a WMAsfReader and a WMV decoder DMO filter, and connect their pins itself. It then expects to receive buffers out of the decoder DMO.
Yes, we can maybe implement this by having fake pass-through decoders, but once again, this is not the right way because this is not the way native works.
Several games have expectations over video frame plane alignment, some in MF like Greedfall or Wargroove, some using WMV/ASF reader like Resident Evil 0:
This is some inherent property of the defaults of individual decoders, but also of the media types involved in the negotiation.
With the decodebin approach, this is complicated. We need to add the buffer pool and alignment metadata to wg_parser, but this is also going to be dependent on the client API that uses it because they don't behave all the same, and we would need to add heuristic to decide of the default alignments depending on the compressed formats.
Decoupling decoders and demuxers we can, first, test and implement this in the individual decoder components, like it is done with H264 decoder. Then, everything works out of the box after decoders are plugged in and this is the only place it needs to be done.
This merge request was closed by Ziqing Hui.