Signed-off-by: Derek Lesho dlesho@codeweavers.com --- dlls/winegstreamer/gst_private.h | 4 + dlls/winegstreamer/mfplat.c | 533 ++++++++++++++++++++++++++++++- include/codecapi.h | 38 +++ 3 files changed, 574 insertions(+), 1 deletion(-) create mode 100644 include/codecapi.h
diff --git a/dlls/winegstreamer/gst_private.h b/dlls/winegstreamer/gst_private.h index e6fb841fc8..a6c3fd3784 100644 --- a/dlls/winegstreamer/gst_private.h +++ b/dlls/winegstreamer/gst_private.h @@ -36,6 +36,7 @@ #include "winuser.h" #include "dshow.h" #include "strmif.h" +#include "mfobjects.h" #include "wine/heap.h" #include "wine/strmbase.h"
@@ -54,4 +55,7 @@ void start_dispatch_thread(void) DECLSPEC_HIDDEN;
extern HRESULT mfplat_get_class_object(REFCLSID rclsid, REFIID riid, void **obj) DECLSPEC_HIDDEN;
+IMFMediaType* media_type_from_caps(GstCaps *caps); +GstCaps *caps_from_media_type(IMFMediaType *type); + #endif /* __GST_PRIVATE_INCLUDED__ */ diff --git a/dlls/winegstreamer/mfplat.c b/dlls/winegstreamer/mfplat.c index 55b9b08876..a6f4fbc2ec 100644 --- a/dlls/winegstreamer/mfplat.c +++ b/dlls/winegstreamer/mfplat.c @@ -16,11 +16,16 @@ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA */
+#include "config.h" +#include <gst/gst.h> + +#include "gst_private.h" + #include <stdarg.h>
#include "gst_private.h" #include "mfapi.h" -#include "mfidl.h" +#include "codecapi.h"
#include "wine/debug.h" #include "wine/heap.h" @@ -433,3 +438,529 @@ HRESULT mfplat_get_class_object(REFCLSID rclsid, REFIID riid, void **obj)
return CLASS_E_CLASSNOTAVAILABLE; } + +struct aac_user_data +{ + WORD payload_type; + WORD profile_level_indication; + WORD struct_type; + WORD reserved; + /*BYTE audio_specific_config;*/ +}; + +/* IMPORTANT: caps will be modified to represent the exact type needed for the format */ +IMFMediaType* media_type_from_caps(GstCaps *caps) +{ + IMFMediaType *media_type; + GstStructure *info; + const char *media_type_name; + gchar *human_readable; + + if (FAILED(MFCreateMediaType(&media_type))) + { + return NULL; + } + + info = gst_caps_get_structure(caps, 0); + media_type_name = gst_structure_get_name(info); + + human_readable = gst_caps_to_string(caps); + TRACE("caps = %s\n", human_readable); + g_free(human_readable); + + if (!(strncmp(media_type_name, "video", 5))) + { + const char *video_format = media_type_name + 6; + gint width, height, framerate_num, framerate_den; + + IMFMediaType_SetGUID(media_type, &MF_MT_MAJOR_TYPE, &MFMediaType_Video); + + if (gst_structure_get_int(info, "width", &width) && gst_structure_get_int(info, "height", &height)) + { + IMFMediaType_SetUINT64(media_type, &MF_MT_FRAME_SIZE, ((UINT64)width << 32) | height); + } + if (gst_structure_get_fraction(info, "framerate", &framerate_num, &framerate_den)) + { + IMFMediaType_SetUINT64(media_type, &MF_MT_FRAME_RATE, ((UINT64)framerate_num << 32) | framerate_den); + } + + if (!(strcmp(video_format, "x-h264"))) + { + const char *profile, *level; + + IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_H264); + IMFMediaType_SetUINT32(media_type, &MF_MT_COMPRESSED, TRUE); + + if ((profile = gst_structure_get_string(info, "profile"))) + { + if (!(strcmp(profile, "main"))) + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_PROFILE, eAVEncH264VProfile_Main); + else if (!(strcmp(profile, "high"))) + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_PROFILE, eAVEncH264VProfile_High); + else if (!(strcmp(profile, "high-4:4:4"))) + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_PROFILE, eAVEncH264VProfile_444); + else + ERR("Unrecognized profile %s\n", profile); + } + if ((level = gst_structure_get_string(info, "level"))) + { + if (!(strcmp(level, "1"))) + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, eAVEncH264VLevel1); + else if (!(strcmp(level, "1.1"))) + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, eAVEncH264VLevel1_1); + else if (!(strcmp(level, "1.2"))) + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, eAVEncH264VLevel1_2); + else if (!(strcmp(level, "1.3"))) + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, eAVEncH264VLevel1_3); + else if (!(strcmp(level, "2"))) + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, eAVEncH264VLevel2); + else if (!(strcmp(level, "2.1"))) + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, eAVEncH264VLevel2_1); + else if (!(strcmp(level, "2.2"))) + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, eAVEncH264VLevel2_2); + else if (!(strcmp(level, "3"))) + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, eAVEncH264VLevel3); + else if (!(strcmp(level, "3.1"))) + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, eAVEncH264VLevel3_1); + else if (!(strcmp(level, "3.2"))) + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, eAVEncH264VLevel3_2); + else if (!(strcmp(level, "4"))) + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, eAVEncH264VLevel4); + else if (!(strcmp(level, "4.1"))) + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, eAVEncH264VLevel4_1); + else if (!(strcmp(level, "4.2"))) + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, eAVEncH264VLevel4_2); + else if (!(strcmp(level, "5"))) + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, eAVEncH264VLevel5); + else if (!(strcmp(level, "5.1"))) + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, eAVEncH264VLevel5_1); + else if (!(strcmp(level, "5.2"))) + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, eAVEncH264VLevel5_2); + else + ERR("Unrecognized level %s\n", level); + } + gst_caps_set_simple(caps, "stream-format", G_TYPE_STRING, "byte-stream", NULL); + gst_caps_set_simple(caps, "alignment", G_TYPE_STRING, "au", NULL); + for (unsigned int i = 0; i < gst_caps_get_size(caps); i++) + { + GstStructure *structure = gst_caps_get_structure (caps, i); + gst_structure_remove_field(structure, "codec_data"); + } + } + else if (!(strcmp(video_format, "x-wmv"))) + { + gint wmv_version; + const char *format; + const GValue *codec_data; + + if (gst_structure_get_int(info, "wmvversion", &wmv_version)) + { + switch (wmv_version) + { + case 1: + IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_WMV1); + break; + case 2: + IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_WMV2); + break; + case 3: + IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_WMV3); + break; + default: + ERR("Unrecognized wmvversion %d\n", wmv_version); + } + } + + if ((format = gst_structure_get_string(info, "format"))) + { + if (!(strcmp(format, "WVC1"))) + IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_WVC1); + } + + if ((codec_data = gst_structure_get_value(info, "codec_data"))) + { + GstBuffer *codec_data_buffer = gst_value_get_buffer(codec_data); + if (codec_data_buffer) + { + gsize codec_data_size = gst_buffer_get_size(codec_data_buffer); + gpointer codec_data_raw = heap_alloc(codec_data_size); + gst_buffer_extract(codec_data_buffer, 0, codec_data_raw, codec_data_size); + IMFMediaType_SetBlob(media_type, &MF_MT_USER_DATA, codec_data_raw, codec_data_size); + } + } + } + else if (!(strcmp(video_format, "mpeg"))) + { + IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_M4S2); + IMFMediaType_SetUINT32(media_type, &MF_MT_COMPRESSED, TRUE); + } + else if (!(strcmp(video_format, "x-raw"))) + { + const char *fourcc = gst_structure_get_string(info, "stream-format"); + IMFMediaType_SetUINT32(media_type, &MF_MT_COMPRESSED, FALSE); + if (fourcc && (strlen(fourcc) == 4)) + { + GUID fourcc_subtype = MFVideoFormat_Base; + fourcc_subtype.Data1 = MAKEFOURCC( + toupper(fourcc[0]), toupper(fourcc[1]), toupper(fourcc[2]), toupper(fourcc[3])); + IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &fourcc_subtype); + } + else + ERR("uncompressed video has no stream-format\n"); + } + else + ERR("Unrecognized video format %s\n", video_format); + } + else if (!(strncmp(media_type_name, "audio", 5))) + { + const char *audio_format = media_type_name + 6; + + IMFMediaType_SetGUID(media_type, &MF_MT_MAJOR_TYPE, &MFMediaType_Audio); + if (!(strcmp(audio_format, "mpeg"))) + { + int mpeg_version = -1; + + IMFMediaType_SetUINT32(media_type, &MF_MT_COMPRESSED, TRUE); + + if (!(gst_structure_get_int(info, "mpegversion", &mpeg_version))) + ERR("Failed to get mpegversion\n"); + switch (mpeg_version) + { + case 1: + { + IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFAudioFormat_MPEG); + break; + } + case 2: + case 4: + { + const char *format, *profile, *level; + DWORD profile_level_indication = 0; + const GValue *codec_data; + DWORD asc_size = 0; + struct aac_user_data *user_data = NULL; + + IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFAudioFormat_AAC); + + codec_data = gst_structure_get_value(info, "codec_data"); + if (codec_data) + { + GstBuffer *codec_data_buffer = gst_value_get_buffer(codec_data); + if (codec_data_buffer) + { + if ((asc_size = gst_buffer_get_size(codec_data_buffer)) >= 2) + { + user_data = heap_alloc_zero(sizeof(*user_data)+asc_size); + gst_buffer_extract(codec_data_buffer, 0, (gpointer)(user_data + 1), asc_size); + } + else + ERR("Unexpected buffer size\n"); + } + else + ERR("codec_data not a buffer\n"); + } + else + ERR("codec_data not found\n"); + if (!user_data) + user_data = heap_alloc_zero(sizeof(*user_data)); + + { + int rate; + if (gst_structure_get_int(info, "rate", &rate)) + IMFMediaType_SetUINT32(media_type, &MF_MT_AUDIO_SAMPLES_PER_SECOND, rate); + } + { + int channels; + if (gst_structure_get_int(info, "channels", &channels)) + IMFMediaType_SetUINT32(media_type, &MF_MT_AUDIO_NUM_CHANNELS, channels); + } + + if ((format = gst_structure_get_string(info, "stream-format"))) + { + DWORD payload_type = -1; + if (!(strcmp(format, "raw"))) + payload_type = 0; + else if (!(strcmp(format, "adts"))) + payload_type = 1; + else + ERR("Unrecognized stream-format\n"); + if (payload_type != -1) + { + IMFMediaType_SetUINT32(media_type, &MF_MT_AAC_PAYLOAD_TYPE, payload_type); + user_data->payload_type = payload_type; + } + } + else + { + ERR("Stream format not present\n"); + } + + profile = gst_structure_get_string(info, "profile"); + level = gst_structure_get_string(info, "level"); + /* Data from https://docs.microsoft.com/en-us/windows/win32/medfound/aac-encoder#output-t... */ + if (profile && level) + { + if (!(strcmp(profile, "lc")) && !(strcmp(level, "2"))) + profile_level_indication = 0x29; + else if (!(strcmp(profile, "lc")) && !(strcmp(level, "4"))) + profile_level_indication = 0x2A; + else if (!(strcmp(profile, "lc")) && !(strcmp(level, "5"))) + profile_level_indication = 0x2B; + else + ERR("Unhandled profile/level combo\n"); + } + else + ERR("Profile or level not present\n"); + + if (profile_level_indication) + { + IMFMediaType_SetUINT32(media_type, &MF_MT_AAC_AUDIO_PROFILE_LEVEL_INDICATION, profile_level_indication); + user_data->profile_level_indication = profile_level_indication; + } + + IMFMediaType_SetBlob(media_type, &MF_MT_USER_DATA, (BYTE *)user_data, sizeof(user_data) + asc_size); + heap_free(user_data); + break; + } + default: + ERR("Unhandled mpegversion %d\n", mpeg_version); + } + } + else if (!(strcmp(audio_format, "x-raw"))) + { + IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFAudioFormat_Float); + + gst_caps_set_simple(caps, "format", G_TYPE_STRING, "F32LE", NULL); + } + else + ERR("Unrecognized audio format %s\n", audio_format); + } + else + { + goto fail; + } + + return media_type; + fail: + IMFMediaType_Release(media_type); + return NULL; +} + +static const char *fourcc_str(DWORD fourcc) +{ + if (!fourcc) return NULL; + return wine_dbg_sprintf ("%c%c%c%c", + (char)(fourcc), (char)(fourcc >> 8), + (char)(fourcc >> 16), (char)(fourcc >> 24)); +} + +GstCaps *caps_from_media_type(IMFMediaType *type) +{ + GUID major_type; + GUID subtype; + GUID base_masked_subtype; + GstCaps *output = NULL; + + if (FAILED(IMFMediaType_GetMajorType(type, &major_type))) + return NULL; + if (FAILED(IMFMediaType_GetGUID(type, &MF_MT_SUBTYPE, &subtype))) + return NULL; + base_masked_subtype = subtype; + base_masked_subtype.Data1 = 0; + + if (IsEqualGUID(&major_type, &MFMediaType_Video)) + { + UINT64 frame_rate = 0, frame_size = 0; + DWORD *framerate_num = ((DWORD*)&frame_rate) + 1; + DWORD *framerate_den = ((DWORD*)&frame_rate); + DWORD *width = ((DWORD*)&frame_size) + 1; + DWORD *height = ((DWORD*)&frame_size); + + if (IsEqualGUID(&subtype, &MFVideoFormat_H264)) + { + enum eAVEncH264VProfile h264_profile; + enum eAVEncH264VLevel h264_level; + output = gst_caps_new_empty_simple("video/x-h264"); + gst_caps_set_simple(output, "stream-format", G_TYPE_STRING, "byte-stream", NULL); + gst_caps_set_simple(output, "alignment", G_TYPE_STRING, "au", NULL); + + if (SUCCEEDED(IMFMediaType_GetUINT32(type, &MF_MT_MPEG2_PROFILE, &h264_profile))) + { + const char *profile = NULL; + switch (h264_profile) + { + case eAVEncH264VProfile_Main: profile = "main"; break; + case eAVEncH264VProfile_High: profile = "high"; break; + case eAVEncH264VProfile_444: profile = "high-4:4:4"; break; + default: ERR("Unknown profile %u\n", h264_profile); + } + if (profile) + gst_caps_set_simple(output, "profile", G_TYPE_STRING, profile, NULL); + } + if (SUCCEEDED(IMFMediaType_GetUINT32(type, &MF_MT_MPEG2_LEVEL, &h264_level))) + { + const char *level = NULL; + switch (h264_level) + { + case eAVEncH264VLevel1: level = "1"; break; + case eAVEncH264VLevel1_1: level = "1.1"; break; + case eAVEncH264VLevel1_2: level = "1.2"; break; + case eAVEncH264VLevel1_3: level = "1.3"; break; + case eAVEncH264VLevel2: level = "2"; break; + case eAVEncH264VLevel2_1: level = "2.1"; break; + case eAVEncH264VLevel2_2: level = "2.2"; break; + case eAVEncH264VLevel3: level = "3"; break; + case eAVEncH264VLevel3_1: level = "3.1"; break; + case eAVEncH264VLevel3_2: level = "3.2"; break; + case eAVEncH264VLevel4: level = "4"; break; + case eAVEncH264VLevel4_1: level = "4.1"; break; + case eAVEncH264VLevel4_2: level = "4.2"; break; + case eAVEncH264VLevel5: level = "5"; break; + case eAVEncH264VLevel5_1: level = "5.1"; break; + case eAVEncH264VLevel5_2: level = "5.2"; break; + default: ERR("Unknown level %u\n", h264_level); + } + if (level) + gst_caps_set_simple(output, "level", G_TYPE_STRING, level, NULL); + } + } + else if (IsEqualGUID(&subtype, &MFVideoFormat_WVC1)) + { + BYTE *user_data; + DWORD user_data_size; + output = gst_caps_new_empty_simple("video/x-wmv"); + gst_caps_set_simple(output, "format", G_TYPE_STRING, "WVC1", NULL); + + gst_caps_set_simple(output, "wmvversion", G_TYPE_INT, 3, NULL); + + if (SUCCEEDED(IMFMediaType_GetAllocatedBlob(type, &MF_MT_USER_DATA, &user_data, &user_data_size))) + { + GstBuffer *codec_data_buffer = gst_buffer_new_allocate(NULL, user_data_size, NULL); + gst_buffer_fill(codec_data_buffer, 0, user_data, user_data_size); + gst_caps_set_simple(output, "codec_data", GST_TYPE_BUFFER, codec_data_buffer, NULL); + gst_buffer_unref(codec_data_buffer); + CoTaskMemFree(user_data); + } + } + else if (IsEqualGUID(&base_masked_subtype, &MFVideoFormat_Base)) + { + output = gst_caps_new_empty_simple("video/x-raw"); + gst_caps_set_simple(output, "format", G_TYPE_STRING, fourcc_str(subtype.Data1), NULL); + } + else { + ERR("Unrecognized subtype %s\n", debugstr_guid(&subtype)); + return NULL; + } + + IMFMediaType_GetUINT64(type, &MF_MT_FRAME_RATE, &frame_rate); + IMFMediaType_GetUINT64(type, &MF_MT_FRAME_SIZE, &frame_size); + + if (frame_rate) + gst_caps_set_simple(output, "framerate", GST_TYPE_FRACTION, *framerate_num, *framerate_den, NULL); + if (frame_size) + { + gst_caps_set_simple(output, "width", G_TYPE_INT, *width, NULL); + gst_caps_set_simple(output, "height", G_TYPE_INT, *height, NULL); + } + return output; + } + else if (IsEqualGUID(&major_type, &MFMediaType_Audio)) + { + DWORD rate, channels; + + if (IsEqualGUID(&subtype, &MFAudioFormat_AAC)) + { + DWORD payload_type, indication; + struct aac_user_data *user_data; + UINT32 user_data_size; + output = gst_caps_new_empty_simple("audio/mpeg"); + + /* TODO */ + gst_caps_set_simple(output, "framed", G_TYPE_BOOLEAN, TRUE, NULL); + gst_caps_set_simple(output, "mpegversion", G_TYPE_INT, 4, NULL); + + if (SUCCEEDED(IMFMediaType_GetUINT32(type, &MF_MT_AAC_PAYLOAD_TYPE, &payload_type))) + { + switch (payload_type) + { + case 0: + gst_caps_set_simple(output, "stream-format", G_TYPE_STRING, "raw", NULL); + break; + case 1: + gst_caps_set_simple(output, "stream-format", G_TYPE_STRING, "adts", NULL); + break; + default: + gst_caps_set_simple(output, "stream-format", G_TYPE_STRING, "raw", NULL); + } + } + else + gst_caps_set_simple(output, "stream-format", G_TYPE_STRING, "raw", NULL); + + if (SUCCEEDED(IMFMediaType_GetUINT32(type, &MF_MT_AAC_AUDIO_PROFILE_LEVEL_INDICATION, &indication))) + { + switch (indication) + { + case 0x29: + { + gst_caps_set_simple(output, "profile", G_TYPE_STRING, "lc", NULL); + gst_caps_set_simple(output, "level", G_TYPE_STRING, "2", NULL); + break; + } + case 0x2A: + { + gst_caps_set_simple(output, "profile", G_TYPE_STRING, "lc", NULL); + gst_caps_set_simple(output, "level", G_TYPE_STRING, "4", NULL); + break; + } + case 0x2B: + { + gst_caps_set_simple(output, "profile", G_TYPE_STRING, "lc", NULL); + gst_caps_set_simple(output, "level", G_TYPE_STRING, "5", NULL); + break; + } + default: + ERR("Unrecognized profile-level-indication %u\n", indication); + } + } + + if (SUCCEEDED(IMFMediaType_GetAllocatedBlob(type, &MF_MT_USER_DATA, (BYTE **) &user_data, &user_data_size))) + { + if (user_data_size > sizeof(sizeof(*user_data))) + { + GstBuffer *audio_specific_config = gst_buffer_new_allocate(NULL, user_data_size - sizeof(*user_data), NULL); + gst_buffer_fill(audio_specific_config, 0, user_data + 1, user_data_size - sizeof(*user_data)); + + gst_caps_set_simple(output, "codec_data", GST_TYPE_BUFFER, audio_specific_config, NULL); + gst_buffer_unref(audio_specific_config); + } + CoTaskMemFree(user_data); + } + } + else if (IsEqualGUID(&subtype, &MFAudioFormat_Float)) + { + output = gst_caps_new_empty_simple("audio/x-raw"); + + gst_caps_set_simple(output, "format", G_TYPE_STRING, "F32LE", NULL); + } + else + { + ERR("Unrecognized subtype %s\n", debugstr_guid(&subtype)); + if (output) + gst_caps_unref(output); + return NULL; + } + if (SUCCEEDED(IMFMediaType_GetUINT32(type, &MF_MT_AUDIO_SAMPLES_PER_SECOND, &rate))) + { + gst_caps_set_simple(output, "rate", G_TYPE_INT, rate, NULL); + } + if (SUCCEEDED(IMFMediaType_GetUINT32(type, &MF_MT_AUDIO_NUM_CHANNELS, &channels))) + { + gst_caps_set_simple(output, "channels", G_TYPE_INT, channels, NULL); + } + + return output; + } + + ERR("Unrecognized major type %s\n", debugstr_guid(&major_type)); + return NULL; +} diff --git a/include/codecapi.h b/include/codecapi.h new file mode 100644 index 0000000000..2690b523d7 --- /dev/null +++ b/include/codecapi.h @@ -0,0 +1,38 @@ +#ifndef __CODECAPI_H +#define __CODECAPI_H + +enum eAVEncH264VProfile +{ + eAVEncH264VProfile_unknown = 0, + eAVEncH264VProfile_Simple = 66, + eAVEncH264VProfile_Base = 66, + eAVEncH264VProfile_Main = 77, + eAVEncH264VProfile_High = 100, + eAVEncH264VProfile_422 = 122, + eAVEncH264VProfile_High10 = 110, + eAVEncH264VProfile_444 = 244, + eAVEncH264VProfile_Extended = 88, +}; + +enum eAVEncH264VLevel +{ + eAVEncH264VLevel1 = 10, + eAVEncH264VLevel1_b = 11, + eAVEncH264VLevel1_1 = 11, + eAVEncH264VLevel1_2 = 12, + eAVEncH264VLevel1_3 = 13, + eAVEncH264VLevel2 = 20, + eAVEncH264VLevel2_1 = 21, + eAVEncH264VLevel2_2 = 22, + eAVEncH264VLevel3 = 30, + eAVEncH264VLevel3_1 = 31, + eAVEncH264VLevel3_2 = 32, + eAVEncH264VLevel4 = 40, + eAVEncH264VLevel4_1 = 41, + eAVEncH264VLevel4_2 = 42, + eAVEncH264VLevel5 = 50, + eAVEncH264VLevel5_1 = 51, + eAVEncH264VLevel5_2 = 52 +}; + +#endif \ No newline at end of file
Signed-off-by: Derek Lesho dlesho@codeweavers.com --- dlls/winegstreamer/gst_private.h | 2 + dlls/winegstreamer/mfplat.c | 162 +++++++++++++++++++++++++++++++ 2 files changed, 164 insertions(+)
diff --git a/dlls/winegstreamer/gst_private.h b/dlls/winegstreamer/gst_private.h index a6c3fd3784..13ba467a9e 100644 --- a/dlls/winegstreamer/gst_private.h +++ b/dlls/winegstreamer/gst_private.h @@ -57,5 +57,7 @@ extern HRESULT mfplat_get_class_object(REFCLSID rclsid, REFIID riid, void **obj)
IMFMediaType* media_type_from_caps(GstCaps *caps); GstCaps *caps_from_media_type(IMFMediaType *type); +IMFSample* mf_sample_from_gst_buffer(GstBuffer *in); +GstBuffer* gst_buffer_from_mf_sample(IMFSample *in);
#endif /* __GST_PRIVATE_INCLUDED__ */ diff --git a/dlls/winegstreamer/mfplat.c b/dlls/winegstreamer/mfplat.c index a6f4fbc2ec..e66bc3ffe6 100644 --- a/dlls/winegstreamer/mfplat.c +++ b/dlls/winegstreamer/mfplat.c @@ -964,3 +964,165 @@ GstCaps *caps_from_media_type(IMFMediaType *type) ERR("Unrecognized major type %s\n", debugstr_guid(&major_type)); return NULL; } + +/* IMFSample = GstBuffer + IMFBuffer = GstMemory */ + +/* TODO: Future optimization could be to create a custom + IMFMediaBuffer wrapper around GstMemory, and to utilize + gst_memory_new_wrapped on IMFMediaBuffer data */ + +IMFSample* mf_sample_from_gst_buffer(GstBuffer *gst_buffer) +{ + IMFSample *out = NULL; + LONGLONG duration, time; + int buffer_count; + HRESULT hr; + + if (FAILED(hr = MFCreateSample(&out))) + goto fail; + + duration = GST_BUFFER_DURATION(gst_buffer); + time = GST_BUFFER_PTS(gst_buffer); + + if (FAILED(IMFSample_SetSampleDuration(out, duration / 100))) + goto fail; + + if (FAILED(IMFSample_SetSampleTime(out, time / 100))) + goto fail; + + buffer_count = gst_buffer_n_memory(gst_buffer); + + for (unsigned int i = 0; i < buffer_count; i++) + { + GstMemory *memory = gst_buffer_get_memory(gst_buffer, i); + IMFMediaBuffer *mf_buffer = NULL; + GstMapInfo map_info; + BYTE *buf_data; + + if (!memory) + { + hr = ERROR_INTERNAL_ERROR; + goto loop_done; + } + + if (!(gst_memory_map(memory, &map_info, GST_MAP_READ))) + { + hr = ERROR_INTERNAL_ERROR; + goto loop_done; + } + + if (FAILED(hr = MFCreateMemoryBuffer(map_info.maxsize, &mf_buffer))) + { + gst_memory_unmap(memory, &map_info); + goto loop_done; + } + + if (FAILED(hr = IMFMediaBuffer_Lock(mf_buffer, &buf_data, NULL, NULL))) + { + gst_memory_unmap(memory, &map_info); + goto loop_done; + } + + memcpy(buf_data, map_info.data, map_info.size); + + gst_memory_unmap(memory, &map_info); + + if (FAILED(hr = IMFMediaBuffer_Unlock(mf_buffer))) + goto loop_done; + + if (FAILED(hr = IMFMediaBuffer_SetCurrentLength(mf_buffer, map_info.size))) + goto loop_done; + + if (FAILED(hr = IMFSample_AddBuffer(out, mf_buffer))) + goto loop_done; + + loop_done: + if (mf_buffer) + IMFMediaBuffer_Release(mf_buffer); + if (memory) + gst_memory_unref(memory); + if (FAILED(hr)) + goto fail; + } + + return out; + fail: + ERR("Failed to copy IMFSample to GstBuffer, hr = %#x\n", hr); + IMFSample_Release(out); + return NULL; +} + +GstBuffer* gst_buffer_from_mf_sample(IMFSample *mf_sample) +{ + GstBuffer *out = gst_buffer_new(); + IMFMediaBuffer *mf_buffer = NULL; + LONGLONG duration, time; + DWORD buffer_count; + HRESULT hr; + + if (FAILED(hr = IMFSample_GetSampleDuration(mf_sample, &duration))) + goto fail; + + if (FAILED(hr = IMFSample_GetSampleTime(mf_sample, &time))) + goto fail; + + GST_BUFFER_DURATION(out) = duration; + GST_BUFFER_PTS(out) = time * 100; + + if (FAILED(hr = IMFSample_GetBufferCount(mf_sample, &buffer_count))) + goto fail; + + for (unsigned int i = 0; i < buffer_count; i++) + { + DWORD buffer_max_size, buffer_size; + GstMapInfo map_info; + GstMemory *memory; + BYTE *buf_data; + + if (FAILED(hr = IMFSample_GetBufferByIndex(mf_sample, i, &mf_buffer))) + goto fail; + + if (FAILED(hr = IMFMediaBuffer_GetMaxLength(mf_buffer, &buffer_max_size))) + goto fail; + + if (FAILED(hr = IMFMediaBuffer_GetCurrentLength(mf_buffer, &buffer_size))) + goto fail; + + memory = gst_allocator_alloc(NULL, buffer_size, NULL); + gst_memory_resize(memory, 0, buffer_size); + + if (!(gst_memory_map(memory, &map_info, GST_MAP_WRITE))) + { + hr = ERROR_INTERNAL_ERROR; + goto fail; + } + + if (FAILED(hr = IMFMediaBuffer_Lock(mf_buffer, &buf_data, NULL, NULL))) + goto fail; + + memcpy(map_info.data, buf_data, buffer_size); + + if (FAILED(hr = IMFMediaBuffer_Unlock(mf_buffer))) + goto fail; + + if (FAILED(hr = IMFMediaBuffer_SetCurrentLength(mf_buffer, buffer_size))) + goto fail; + + gst_memory_unmap(memory, &map_info); + + gst_buffer_append_memory(out, memory); + + IMFMediaBuffer_Release(mf_buffer); + mf_buffer = NULL; + } + + return out; + + fail: + ERR("Failed to copy IMFSample to GstBuffer, hr = %#x\n", hr); + if (mf_buffer) + IMFMediaBuffer_Release(mf_buffer); + gst_buffer_unref(out); + return NULL; +}
Hi,
While running your changed tests, I think I found new failures. Being a bot and all I'm not very good at pattern recognition, so I might be wrong, but could you please double-check?
Full results can be found at: https://testbot.winehq.org/JobDetails.pl?Key=68031
Your paranoid android.
=== debiant (build log) ===
../../../wine/dlls/winegstreamer/mfplat.c:37:5: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:43:71: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:48:54: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:65:44: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:75:45: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:94:55: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:104:54: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:113:52: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:119:58: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:119:89: error: unknown type name ‘MFT_INPUT_STREAM_INFO’ ../../../wine/dlls/winegstreamer/mfplat.c:124:59: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:124:90: error: unknown type name ‘MFT_OUTPUT_STREAM_INFO’ ../../../wine/dlls/winegstreamer/mfplat.c:129:53: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:141:64: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:147:65: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:160:57: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:167:55: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:174:61: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:182:62: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:190:52: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:197:53: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:204:59: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:211:60: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:218:54: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:225:55: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:232:55: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:239:52: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:246:54: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:246:75: error: unknown type name ‘MFT_MESSAGE_TYPE’; did you mean ‘GST_MESSAGE_TYPE’? ../../../wine/dlls/winegstreamer/mfplat.c:253:52: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:260:53: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:261:9: error: unknown type name ‘MFT_OUTPUT_DATA_BUFFER’; did you mean ‘DMO_OUTPUT_DATA_BUFFER’? ../../../wine/dlls/winegstreamer/mfplat.c:268:14: error: unknown type name ‘IMFTransformVtbl’ ../../../wine/dlls/winegstreamer/mfplat.c:270:5: error: ‘video_processor_QueryInterface’ undeclared here (not in a function); did you mean ‘IMemAllocator_QueryInterface’? ../../../wine/dlls/winegstreamer/mfplat.c:271:5: error: ‘video_processor_AddRef’ undeclared here (not in a function); did you mean ‘video_processor_vtbl’? ../../../wine/dlls/winegstreamer/mfplat.c:272:5: error: ‘video_processor_Release’ undeclared here (not in a function); did you mean ‘video_processor_vtbl’? ../../../wine/dlls/winegstreamer/mfplat.c:273:5: error: ‘video_processor_GetStreamLimits’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:274:5: error: ‘video_processor_GetStreamCount’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:275:5: error: ‘video_processor_GetStreamIDs’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:276:5: error: ‘video_processor_GetInputStreamInfo’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:277:5: error: ‘video_processor_GetOutputStreamInfo’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:278:5: error: ‘video_processor_GetAttributes’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:279:5: error: ‘video_processor_GetInputStreamAttributes’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:280:5: error: ‘video_processor_GetOutputStreamAttributes’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:281:5: error: ‘video_processor_DeleteInputStream’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:282:5: error: ‘video_processor_AddInputStreams’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:283:5: error: ‘video_processor_GetInputAvailableType’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:284:5: error: ‘video_processor_GetOutputAvailableType’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:285:5: error: ‘video_processor_SetInputType’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:286:5: error: ‘video_processor_SetOutputType’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:287:5: error: ‘video_processor_GetInputCurrentType’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:288:5: error: ‘video_processor_GetOutputCurrentType’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:289:5: error: ‘video_processor_GetInputStatus’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:290:5: error: ‘video_processor_GetOutputStatus’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:291:5: error: ‘video_processor_SetOutputBounds’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:292:5: error: ‘video_processor_ProcessEvent’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:293:5: error: ‘video_processor_ProcessMessage’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:294:5: error: ‘video_processor_ProcessInput’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:295:5: error: ‘video_processor_ProcessOutput’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:388:31: error: request for member ‘lpVtbl’ in something not a structure or union ../../../wine/dlls/winegstreamer/mfplat.c:413:8: error: ‘CLSID_VideoProcessorMFT’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:497:57: error: ‘MF_MT_MPEG2_PROFILE’ undeclared (first use in this function) ../../../wine/dlls/winegstreamer/mfplat.c:508:57: error: ‘MF_MT_MPEG2_LEVEL’ undeclared (first use in this function) ../../../wine/dlls/winegstreamer/mfplat.c:787:57: error: ‘MF_MT_MPEG2_PROFILE’ undeclared (first use in this function) ../../../wine/dlls/winegstreamer/mfplat.c:800:57: error: ‘MF_MT_MPEG2_LEVEL’ undeclared (first use in this function) Task: The win32 Wine build failed
=== debiant (build log) ===
../../../wine/dlls/winegstreamer/mfplat.c:37:5: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:43:71: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:48:54: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:65:44: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:75:45: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:94:55: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:104:54: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:113:52: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:119:58: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:119:89: error: unknown type name ‘MFT_INPUT_STREAM_INFO’ ../../../wine/dlls/winegstreamer/mfplat.c:124:59: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:124:90: error: unknown type name ‘MFT_OUTPUT_STREAM_INFO’ ../../../wine/dlls/winegstreamer/mfplat.c:129:53: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:141:64: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:147:65: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:160:57: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:167:55: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:174:61: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:182:62: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:190:52: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:197:53: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:204:59: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:211:60: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:218:54: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:225:55: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:232:55: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:239:52: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:246:54: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:246:75: error: unknown type name ‘MFT_MESSAGE_TYPE’; did you mean ‘GST_MESSAGE_TYPE’? ../../../wine/dlls/winegstreamer/mfplat.c:253:52: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:260:53: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:261:9: error: unknown type name ‘MFT_OUTPUT_DATA_BUFFER’; did you mean ‘DMO_OUTPUT_DATA_BUFFER’? ../../../wine/dlls/winegstreamer/mfplat.c:268:14: error: unknown type name ‘IMFTransformVtbl’ ../../../wine/dlls/winegstreamer/mfplat.c:270:5: error: ‘video_processor_QueryInterface’ undeclared here (not in a function); did you mean ‘IMemAllocator_QueryInterface’? ../../../wine/dlls/winegstreamer/mfplat.c:271:5: error: ‘video_processor_AddRef’ undeclared here (not in a function); did you mean ‘video_processor_vtbl’? ../../../wine/dlls/winegstreamer/mfplat.c:272:5: error: ‘video_processor_Release’ undeclared here (not in a function); did you mean ‘video_processor_vtbl’? ../../../wine/dlls/winegstreamer/mfplat.c:273:5: error: ‘video_processor_GetStreamLimits’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:274:5: error: ‘video_processor_GetStreamCount’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:275:5: error: ‘video_processor_GetStreamIDs’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:276:5: error: ‘video_processor_GetInputStreamInfo’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:277:5: error: ‘video_processor_GetOutputStreamInfo’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:278:5: error: ‘video_processor_GetAttributes’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:279:5: error: ‘video_processor_GetInputStreamAttributes’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:280:5: error: ‘video_processor_GetOutputStreamAttributes’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:281:5: error: ‘video_processor_DeleteInputStream’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:282:5: error: ‘video_processor_AddInputStreams’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:283:5: error: ‘video_processor_GetInputAvailableType’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:284:5: error: ‘video_processor_GetOutputAvailableType’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:285:5: error: ‘video_processor_SetInputType’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:286:5: error: ‘video_processor_SetOutputType’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:287:5: error: ‘video_processor_GetInputCurrentType’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:288:5: error: ‘video_processor_GetOutputCurrentType’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:289:5: error: ‘video_processor_GetInputStatus’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:290:5: error: ‘video_processor_GetOutputStatus’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:291:5: error: ‘video_processor_SetOutputBounds’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:292:5: error: ‘video_processor_ProcessEvent’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:293:5: error: ‘video_processor_ProcessMessage’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:294:5: error: ‘video_processor_ProcessInput’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:295:5: error: ‘video_processor_ProcessOutput’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:388:31: error: request for member ‘lpVtbl’ in something not a structure or union ../../../wine/dlls/winegstreamer/mfplat.c:413:8: error: ‘CLSID_VideoProcessorMFT’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:497:57: error: ‘MF_MT_MPEG2_PROFILE’ undeclared (first use in this function) ../../../wine/dlls/winegstreamer/mfplat.c:508:57: error: ‘MF_MT_MPEG2_LEVEL’ undeclared (first use in this function) ../../../wine/dlls/winegstreamer/mfplat.c:787:57: error: ‘MF_MT_MPEG2_PROFILE’ undeclared (first use in this function) ../../../wine/dlls/winegstreamer/mfplat.c:800:57: error: ‘MF_MT_MPEG2_LEVEL’ undeclared (first use in this function) Task: The wow64 Wine build failed
On 3/24/20 2:39 PM, Derek Lesho wrote:
Signed-off-by: Derek Lesho dlesho@codeweavers.com
dlls/winegstreamer/gst_private.h | 2 + dlls/winegstreamer/mfplat.c | 162 +++++++++++++++++++++++++++++++ 2 files changed, 164 insertions(+)
diff --git a/dlls/winegstreamer/gst_private.h b/dlls/winegstreamer/gst_private.h index a6c3fd3784..13ba467a9e 100644 --- a/dlls/winegstreamer/gst_private.h +++ b/dlls/winegstreamer/gst_private.h @@ -57,5 +57,7 @@ extern HRESULT mfplat_get_class_object(REFCLSID rclsid, REFIID riid, void **obj)
IMFMediaType* media_type_from_caps(GstCaps *caps); GstCaps *caps_from_media_type(IMFMediaType *type); +IMFSample* mf_sample_from_gst_buffer(GstBuffer *in); +GstBuffer* gst_buffer_from_mf_sample(IMFSample *in);
#endif /* __GST_PRIVATE_INCLUDED__ */ diff --git a/dlls/winegstreamer/mfplat.c b/dlls/winegstreamer/mfplat.c index a6f4fbc2ec..e66bc3ffe6 100644 --- a/dlls/winegstreamer/mfplat.c +++ b/dlls/winegstreamer/mfplat.c @@ -964,3 +964,165 @@ GstCaps *caps_from_media_type(IMFMediaType *type) ERR("Unrecognized major type %s\n", debugstr_guid(&major_type)); return NULL; }
+/* IMFSample = GstBuffer
- IMFBuffer = GstMemory */
IMFBuffer isn't an interface.
I'm not sure that this comment is especially useful anyway, though.
+/* TODO: Future optimization could be to create a custom
- IMFMediaBuffer wrapper around GstMemory, and to utilize
- gst_memory_new_wrapped on IMFMediaBuffer data */
This seems like a better idea than what's done below; any particular reason why not?
+IMFSample* mf_sample_from_gst_buffer(GstBuffer *gst_buffer) +{
- IMFSample *out = NULL;
- LONGLONG duration, time;
- int buffer_count;
- HRESULT hr;
- if (FAILED(hr = MFCreateSample(&out)))
goto fail;
- duration = GST_BUFFER_DURATION(gst_buffer);
- time = GST_BUFFER_PTS(gst_buffer);
- if (FAILED(IMFSample_SetSampleDuration(out, duration / 100)))
goto fail;
- if (FAILED(IMFSample_SetSampleTime(out, time / 100)))
goto fail;
- buffer_count = gst_buffer_n_memory(gst_buffer);
- for (unsigned int i = 0; i < buffer_count; i++)
- {
GstMemory *memory = gst_buffer_get_memory(gst_buffer, i);
IMFMediaBuffer *mf_buffer = NULL;
GstMapInfo map_info;
BYTE *buf_data;
if (!memory)
{
hr = ERROR_INTERNAL_ERROR;
That's not an HRESULT.
goto loop_done;
}
if (!(gst_memory_map(memory, &map_info, GST_MAP_READ)))
{
hr = ERROR_INTERNAL_ERROR;
goto loop_done;
}
if (FAILED(hr = MFCreateMemoryBuffer(map_info.maxsize, &mf_buffer)))
{
gst_memory_unmap(memory, &map_info);
goto loop_done;
}
if (FAILED(hr = IMFMediaBuffer_Lock(mf_buffer, &buf_data, NULL, NULL)))
{
gst_memory_unmap(memory, &map_info);
goto loop_done;
}
memcpy(buf_data, map_info.data, map_info.size);
gst_memory_unmap(memory, &map_info);
if (FAILED(hr = IMFMediaBuffer_Unlock(mf_buffer)))
goto loop_done;
if (FAILED(hr = IMFMediaBuffer_SetCurrentLength(mf_buffer, map_info.size)))
goto loop_done;
if (FAILED(hr = IMFSample_AddBuffer(out, mf_buffer)))
goto loop_done;
loop_done:
if (mf_buffer)
IMFMediaBuffer_Release(mf_buffer);
if (memory)
gst_memory_unref(memory);
if (FAILED(hr))
goto fail;
- }
- return out;
- fail:
- ERR("Failed to copy IMFSample to GstBuffer, hr = %#x\n", hr);
- IMFSample_Release(out);
- return NULL;
+}
+GstBuffer* gst_buffer_from_mf_sample(IMFSample *mf_sample) +{
- GstBuffer *out = gst_buffer_new();
- IMFMediaBuffer *mf_buffer = NULL;
- LONGLONG duration, time;
- DWORD buffer_count;
- HRESULT hr;
- if (FAILED(hr = IMFSample_GetSampleDuration(mf_sample, &duration)))
goto fail;
- if (FAILED(hr = IMFSample_GetSampleTime(mf_sample, &time)))
goto fail;
- GST_BUFFER_DURATION(out) = duration;
- GST_BUFFER_PTS(out) = time * 100;
- if (FAILED(hr = IMFSample_GetBufferCount(mf_sample, &buffer_count)))
goto fail;
- for (unsigned int i = 0; i < buffer_count; i++)
- {
DWORD buffer_max_size, buffer_size;
GstMapInfo map_info;
GstMemory *memory;
BYTE *buf_data;
if (FAILED(hr = IMFSample_GetBufferByIndex(mf_sample, i, &mf_buffer)))
goto fail;
if (FAILED(hr = IMFMediaBuffer_GetMaxLength(mf_buffer, &buffer_max_size)))
goto fail;
if (FAILED(hr = IMFMediaBuffer_GetCurrentLength(mf_buffer, &buffer_size)))
goto fail;
memory = gst_allocator_alloc(NULL, buffer_size, NULL);
gst_memory_resize(memory, 0, buffer_size);
Why is this call to gst_memory_resize() here?
if (!(gst_memory_map(memory, &map_info, GST_MAP_WRITE)))
{
hr = ERROR_INTERNAL_ERROR;
goto fail;
}
if (FAILED(hr = IMFMediaBuffer_Lock(mf_buffer, &buf_data, NULL, NULL)))
goto fail;
memcpy(map_info.data, buf_data, buffer_size);
if (FAILED(hr = IMFMediaBuffer_Unlock(mf_buffer)))
goto fail;
if (FAILED(hr = IMFMediaBuffer_SetCurrentLength(mf_buffer, buffer_size)))
goto fail;
gst_memory_unmap(memory, &map_info);
gst_buffer_append_memory(out, memory);
IMFMediaBuffer_Release(mf_buffer);
mf_buffer = NULL;
- }
- return out;
- fail:
- ERR("Failed to copy IMFSample to GstBuffer, hr = %#x\n", hr);
- if (mf_buffer)
IMFMediaBuffer_Release(mf_buffer);
- gst_buffer_unref(out);
- return NULL;
+}
On 3/24/20 3:37 PM, Zebediah Figura wrote:
On 3/24/20 2:39 PM, Derek Lesho wrote:
Signed-off-by: Derek Lesho dlesho@codeweavers.com
dlls/winegstreamer/gst_private.h | 2 + dlls/winegstreamer/mfplat.c | 162 +++++++++++++++++++++++++++++++ 2 files changed, 164 insertions(+)
diff --git a/dlls/winegstreamer/gst_private.h b/dlls/winegstreamer/gst_private.h index a6c3fd3784..13ba467a9e 100644 --- a/dlls/winegstreamer/gst_private.h +++ b/dlls/winegstreamer/gst_private.h @@ -57,5 +57,7 @@ extern HRESULT mfplat_get_class_object(REFCLSID rclsid, REFIID riid, void **obj)
IMFMediaType* media_type_from_caps(GstCaps *caps); GstCaps *caps_from_media_type(IMFMediaType *type); +IMFSample* mf_sample_from_gst_buffer(GstBuffer *in); +GstBuffer* gst_buffer_from_mf_sample(IMFSample *in);
#endif /* __GST_PRIVATE_INCLUDED__ */ diff --git a/dlls/winegstreamer/mfplat.c b/dlls/winegstreamer/mfplat.c index a6f4fbc2ec..e66bc3ffe6 100644 --- a/dlls/winegstreamer/mfplat.c +++ b/dlls/winegstreamer/mfplat.c @@ -964,3 +964,165 @@ GstCaps *caps_from_media_type(IMFMediaType *type) ERR("Unrecognized major type %s\n", debugstr_guid(&major_type)); return NULL; }
+/* IMFSample = GstBuffer
- IMFBuffer = GstMemory */
IMFBuffer isn't an interface.
I'm not sure that this comment is especially useful anyway, though.
That is a typo, but in general I think it helps avoid confusion between gstreamer buffers and media foundation buffers.
+/* TODO: Future optimization could be to create a custom
- IMFMediaBuffer wrapper around GstMemory, and to utilize
- gst_memory_new_wrapped on IMFMediaBuffer data */
This seems like a better idea than what's done below; any particular reason why not?
Mainly because this way is simpler and works fine for now. Also, Microsoft's decoders which I've looked at have the user of the transform allocate the output buffers, and this optimization wouldn't work if we wanted to match that.
+IMFSample* mf_sample_from_gst_buffer(GstBuffer *gst_buffer) +{
- IMFSample *out = NULL;
- LONGLONG duration, time;
- int buffer_count;
- HRESULT hr;
- if (FAILED(hr = MFCreateSample(&out)))
goto fail;
- duration = GST_BUFFER_DURATION(gst_buffer);
- time = GST_BUFFER_PTS(gst_buffer);
- if (FAILED(IMFSample_SetSampleDuration(out, duration / 100)))
goto fail;
- if (FAILED(IMFSample_SetSampleTime(out, time / 100)))
goto fail;
- buffer_count = gst_buffer_n_memory(gst_buffer);
- for (unsigned int i = 0; i < buffer_count; i++)
- {
GstMemory *memory = gst_buffer_get_memory(gst_buffer, i);
IMFMediaBuffer *mf_buffer = NULL;
GstMapInfo map_info;
BYTE *buf_data;
if (!memory)
{
hr = ERROR_INTERNAL_ERROR;
That's not an HRESULT.
oof, yeah I'll fix that.
goto loop_done;
}
if (!(gst_memory_map(memory, &map_info, GST_MAP_READ)))
{
hr = ERROR_INTERNAL_ERROR;
goto loop_done;
}
if (FAILED(hr = MFCreateMemoryBuffer(map_info.maxsize, &mf_buffer)))
{
gst_memory_unmap(memory, &map_info);
goto loop_done;
}
if (FAILED(hr = IMFMediaBuffer_Lock(mf_buffer, &buf_data, NULL, NULL)))
{
gst_memory_unmap(memory, &map_info);
goto loop_done;
}
memcpy(buf_data, map_info.data, map_info.size);
gst_memory_unmap(memory, &map_info);
if (FAILED(hr = IMFMediaBuffer_Unlock(mf_buffer)))
goto loop_done;
if (FAILED(hr = IMFMediaBuffer_SetCurrentLength(mf_buffer, map_info.size)))
goto loop_done;
if (FAILED(hr = IMFSample_AddBuffer(out, mf_buffer)))
goto loop_done;
loop_done:
if (mf_buffer)
IMFMediaBuffer_Release(mf_buffer);
if (memory)
gst_memory_unref(memory);
if (FAILED(hr))
goto fail;
- }
- return out;
- fail:
- ERR("Failed to copy IMFSample to GstBuffer, hr = %#x\n", hr);
- IMFSample_Release(out);
- return NULL;
+}
+GstBuffer* gst_buffer_from_mf_sample(IMFSample *mf_sample) +{
- GstBuffer *out = gst_buffer_new();
- IMFMediaBuffer *mf_buffer = NULL;
- LONGLONG duration, time;
- DWORD buffer_count;
- HRESULT hr;
- if (FAILED(hr = IMFSample_GetSampleDuration(mf_sample, &duration)))
goto fail;
- if (FAILED(hr = IMFSample_GetSampleTime(mf_sample, &time)))
goto fail;
- GST_BUFFER_DURATION(out) = duration;
- GST_BUFFER_PTS(out) = time * 100;
- if (FAILED(hr = IMFSample_GetBufferCount(mf_sample, &buffer_count)))
goto fail;
- for (unsigned int i = 0; i < buffer_count; i++)
- {
DWORD buffer_max_size, buffer_size;
GstMapInfo map_info;
GstMemory *memory;
BYTE *buf_data;
if (FAILED(hr = IMFSample_GetBufferByIndex(mf_sample, i, &mf_buffer)))
goto fail;
if (FAILED(hr = IMFMediaBuffer_GetMaxLength(mf_buffer, &buffer_max_size)))
goto fail;
if (FAILED(hr = IMFMediaBuffer_GetCurrentLength(mf_buffer, &buffer_size)))
goto fail;
memory = gst_allocator_alloc(NULL, buffer_size, NULL);
gst_memory_resize(memory, 0, buffer_size);
Why is this call to gst_memory_resize() here?
I think I planned on using buffer_max_size for the allocation, but yeah that shouldn't matter and I'll remove it.
if (!(gst_memory_map(memory, &map_info, GST_MAP_WRITE)))
{
hr = ERROR_INTERNAL_ERROR;
goto fail;
}
if (FAILED(hr = IMFMediaBuffer_Lock(mf_buffer, &buf_data, NULL, NULL)))
goto fail;
memcpy(map_info.data, buf_data, buffer_size);
if (FAILED(hr = IMFMediaBuffer_Unlock(mf_buffer)))
goto fail;
if (FAILED(hr = IMFMediaBuffer_SetCurrentLength(mf_buffer, buffer_size)))
goto fail;
gst_memory_unmap(memory, &map_info);
gst_buffer_append_memory(out, memory);
IMFMediaBuffer_Release(mf_buffer);
mf_buffer = NULL;
- }
- return out;
- fail:
- ERR("Failed to copy IMFSample to GstBuffer, hr = %#x\n", hr);
- if (mf_buffer)
IMFMediaBuffer_Release(mf_buffer);
- gst_buffer_unref(out);
- return NULL;
+}
On 3/24/20 3:45 PM, Derek Lesho wrote:
On 3/24/20 3:37 PM, Zebediah Figura wrote:
On 3/24/20 2:39 PM, Derek Lesho wrote:
Signed-off-by: Derek Lesho dlesho@codeweavers.com
dlls/winegstreamer/gst_private.h | 2 + dlls/winegstreamer/mfplat.c | 162 +++++++++++++++++++++++++++++++ 2 files changed, 164 insertions(+)
diff --git a/dlls/winegstreamer/gst_private.h b/dlls/winegstreamer/gst_private.h index a6c3fd3784..13ba467a9e 100644 --- a/dlls/winegstreamer/gst_private.h +++ b/dlls/winegstreamer/gst_private.h @@ -57,5 +57,7 @@ extern HRESULT mfplat_get_class_object(REFCLSID rclsid, REFIID riid, void **obj)
IMFMediaType* media_type_from_caps(GstCaps *caps); GstCaps *caps_from_media_type(IMFMediaType *type); +IMFSample* mf_sample_from_gst_buffer(GstBuffer *in); +GstBuffer* gst_buffer_from_mf_sample(IMFSample *in);
#endif /* __GST_PRIVATE_INCLUDED__ */ diff --git a/dlls/winegstreamer/mfplat.c b/dlls/winegstreamer/mfplat.c index a6f4fbc2ec..e66bc3ffe6 100644 --- a/dlls/winegstreamer/mfplat.c +++ b/dlls/winegstreamer/mfplat.c @@ -964,3 +964,165 @@ GstCaps *caps_from_media_type(IMFMediaType *type) ERR("Unrecognized major type %s\n", debugstr_guid(&major_type)); return NULL; }
+/* IMFSample = GstBuffer
- IMFBuffer = GstMemory */
IMFBuffer isn't an interface.
I'm not sure that this comment is especially useful anyway, though.
That is a typo, but in general I think it helps avoid confusion between gstreamer buffers and media foundation buffers.
+/* TODO: Future optimization could be to create a custom
- IMFMediaBuffer wrapper around GstMemory, and to utilize
- gst_memory_new_wrapped on IMFMediaBuffer data */
This seems like a better idea than what's done below; any particular reason why not?
Mainly because this way is simpler and works fine for now. Also, Microsoft's decoders which I've looked at have the user of the transform allocate the output buffers, and this optimization wouldn't work if we wanted to match that.
If the optimization wouldn't work, then why mention it?
(Even better, you could mention why it wouldn't work, so that nobody is tempted to try it.)
+IMFSample* mf_sample_from_gst_buffer(GstBuffer *gst_buffer) +{
- IMFSample *out = NULL;
- LONGLONG duration, time;
- int buffer_count;
- HRESULT hr;
- if (FAILED(hr = MFCreateSample(&out)))
goto fail;
- duration = GST_BUFFER_DURATION(gst_buffer);
- time = GST_BUFFER_PTS(gst_buffer);
- if (FAILED(IMFSample_SetSampleDuration(out, duration / 100)))
goto fail;
- if (FAILED(IMFSample_SetSampleTime(out, time / 100)))
goto fail;
- buffer_count = gst_buffer_n_memory(gst_buffer);
- for (unsigned int i = 0; i < buffer_count; i++)
- {
GstMemory *memory = gst_buffer_get_memory(gst_buffer, i);
IMFMediaBuffer *mf_buffer = NULL;
GstMapInfo map_info;
BYTE *buf_data;
if (!memory)
{
hr = ERROR_INTERNAL_ERROR;
That's not an HRESULT.
oof, yeah I'll fix that.
goto loop_done;
}
if (!(gst_memory_map(memory, &map_info, GST_MAP_READ)))
{
hr = ERROR_INTERNAL_ERROR;
goto loop_done;
}
if (FAILED(hr = MFCreateMemoryBuffer(map_info.maxsize, &mf_buffer)))
{
gst_memory_unmap(memory, &map_info);
goto loop_done;
}
if (FAILED(hr = IMFMediaBuffer_Lock(mf_buffer, &buf_data, NULL, NULL)))
{
gst_memory_unmap(memory, &map_info);
goto loop_done;
}
memcpy(buf_data, map_info.data, map_info.size);
gst_memory_unmap(memory, &map_info);
if (FAILED(hr = IMFMediaBuffer_Unlock(mf_buffer)))
goto loop_done;
if (FAILED(hr = IMFMediaBuffer_SetCurrentLength(mf_buffer, map_info.size)))
goto loop_done;
if (FAILED(hr = IMFSample_AddBuffer(out, mf_buffer)))
goto loop_done;
loop_done:
if (mf_buffer)
IMFMediaBuffer_Release(mf_buffer);
if (memory)
gst_memory_unref(memory);
if (FAILED(hr))
goto fail;
- }
- return out;
- fail:
- ERR("Failed to copy IMFSample to GstBuffer, hr = %#x\n", hr);
- IMFSample_Release(out);
- return NULL;
+}
+GstBuffer* gst_buffer_from_mf_sample(IMFSample *mf_sample) +{
- GstBuffer *out = gst_buffer_new();
- IMFMediaBuffer *mf_buffer = NULL;
- LONGLONG duration, time;
- DWORD buffer_count;
- HRESULT hr;
- if (FAILED(hr = IMFSample_GetSampleDuration(mf_sample, &duration)))
goto fail;
- if (FAILED(hr = IMFSample_GetSampleTime(mf_sample, &time)))
goto fail;
- GST_BUFFER_DURATION(out) = duration;
- GST_BUFFER_PTS(out) = time * 100;
- if (FAILED(hr = IMFSample_GetBufferCount(mf_sample, &buffer_count)))
goto fail;
- for (unsigned int i = 0; i < buffer_count; i++)
- {
DWORD buffer_max_size, buffer_size;
GstMapInfo map_info;
GstMemory *memory;
BYTE *buf_data;
if (FAILED(hr = IMFSample_GetBufferByIndex(mf_sample, i, &mf_buffer)))
goto fail;
if (FAILED(hr = IMFMediaBuffer_GetMaxLength(mf_buffer, &buffer_max_size)))
goto fail;
if (FAILED(hr = IMFMediaBuffer_GetCurrentLength(mf_buffer, &buffer_size)))
goto fail;
memory = gst_allocator_alloc(NULL, buffer_size, NULL);
gst_memory_resize(memory, 0, buffer_size);
Why is this call to gst_memory_resize() here?
I think I planned on using buffer_max_size for the allocation, but yeah that shouldn't matter and I'll remove it.
if (!(gst_memory_map(memory, &map_info, GST_MAP_WRITE)))
{
hr = ERROR_INTERNAL_ERROR;
goto fail;
}
if (FAILED(hr = IMFMediaBuffer_Lock(mf_buffer, &buf_data, NULL, NULL)))
goto fail;
memcpy(map_info.data, buf_data, buffer_size);
if (FAILED(hr = IMFMediaBuffer_Unlock(mf_buffer)))
goto fail;
if (FAILED(hr = IMFMediaBuffer_SetCurrentLength(mf_buffer, buffer_size)))
goto fail;
gst_memory_unmap(memory, &map_info);
gst_buffer_append_memory(out, memory);
IMFMediaBuffer_Release(mf_buffer);
mf_buffer = NULL;
- }
- return out;
- fail:
- ERR("Failed to copy IMFSample to GstBuffer, hr = %#x\n", hr);
- if (mf_buffer)
IMFMediaBuffer_Release(mf_buffer);
- gst_buffer_unref(out);
- return NULL;
+}
Hi,
While running your changed tests, I think I found new failures. Being a bot and all I'm not very good at pattern recognition, so I might be wrong, but could you please double-check?
Full results can be found at: https://testbot.winehq.org/JobDetails.pl?Key=68030
Your paranoid android.
=== debiant (build log) ===
../../../wine/dlls/winegstreamer/mfplat.c:37:5: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:43:71: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:48:54: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:65:44: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:75:45: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:94:55: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:104:54: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:113:52: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:119:58: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:119:89: error: unknown type name ‘MFT_INPUT_STREAM_INFO’ ../../../wine/dlls/winegstreamer/mfplat.c:124:59: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:124:90: error: unknown type name ‘MFT_OUTPUT_STREAM_INFO’ ../../../wine/dlls/winegstreamer/mfplat.c:129:53: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:141:64: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:147:65: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:160:57: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:167:55: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:174:61: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:182:62: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:190:52: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:197:53: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:204:59: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:211:60: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:218:54: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:225:55: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:232:55: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:239:52: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:246:54: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:246:75: error: unknown type name ‘MFT_MESSAGE_TYPE’; did you mean ‘GST_MESSAGE_TYPE’? ../../../wine/dlls/winegstreamer/mfplat.c:253:52: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:260:53: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:261:9: error: unknown type name ‘MFT_OUTPUT_DATA_BUFFER’; did you mean ‘DMO_OUTPUT_DATA_BUFFER’? ../../../wine/dlls/winegstreamer/mfplat.c:268:14: error: unknown type name ‘IMFTransformVtbl’ ../../../wine/dlls/winegstreamer/mfplat.c:270:5: error: ‘video_processor_QueryInterface’ undeclared here (not in a function); did you mean ‘IMemAllocator_QueryInterface’? ../../../wine/dlls/winegstreamer/mfplat.c:271:5: error: ‘video_processor_AddRef’ undeclared here (not in a function); did you mean ‘video_processor_vtbl’? ../../../wine/dlls/winegstreamer/mfplat.c:272:5: error: ‘video_processor_Release’ undeclared here (not in a function); did you mean ‘video_processor_vtbl’? ../../../wine/dlls/winegstreamer/mfplat.c:273:5: error: ‘video_processor_GetStreamLimits’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:274:5: error: ‘video_processor_GetStreamCount’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:275:5: error: ‘video_processor_GetStreamIDs’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:276:5: error: ‘video_processor_GetInputStreamInfo’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:277:5: error: ‘video_processor_GetOutputStreamInfo’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:278:5: error: ‘video_processor_GetAttributes’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:279:5: error: ‘video_processor_GetInputStreamAttributes’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:280:5: error: ‘video_processor_GetOutputStreamAttributes’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:281:5: error: ‘video_processor_DeleteInputStream’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:282:5: error: ‘video_processor_AddInputStreams’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:283:5: error: ‘video_processor_GetInputAvailableType’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:284:5: error: ‘video_processor_GetOutputAvailableType’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:285:5: error: ‘video_processor_SetInputType’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:286:5: error: ‘video_processor_SetOutputType’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:287:5: error: ‘video_processor_GetInputCurrentType’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:288:5: error: ‘video_processor_GetOutputCurrentType’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:289:5: error: ‘video_processor_GetInputStatus’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:290:5: error: ‘video_processor_GetOutputStatus’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:291:5: error: ‘video_processor_SetOutputBounds’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:292:5: error: ‘video_processor_ProcessEvent’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:293:5: error: ‘video_processor_ProcessMessage’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:294:5: error: ‘video_processor_ProcessInput’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:295:5: error: ‘video_processor_ProcessOutput’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:388:31: error: request for member ‘lpVtbl’ in something not a structure or union ../../../wine/dlls/winegstreamer/mfplat.c:413:8: error: ‘CLSID_VideoProcessorMFT’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:497:57: error: ‘MF_MT_MPEG2_PROFILE’ undeclared (first use in this function) ../../../wine/dlls/winegstreamer/mfplat.c:508:57: error: ‘MF_MT_MPEG2_LEVEL’ undeclared (first use in this function) ../../../wine/dlls/winegstreamer/mfplat.c:787:57: error: ‘MF_MT_MPEG2_PROFILE’ undeclared (first use in this function) ../../../wine/dlls/winegstreamer/mfplat.c:800:57: error: ‘MF_MT_MPEG2_LEVEL’ undeclared (first use in this function) Task: The win32 Wine build failed
=== debiant (build log) ===
../../../wine/dlls/winegstreamer/mfplat.c:37:5: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:43:71: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:48:54: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:65:44: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:75:45: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:94:55: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:104:54: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:113:52: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:119:58: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:119:89: error: unknown type name ‘MFT_INPUT_STREAM_INFO’ ../../../wine/dlls/winegstreamer/mfplat.c:124:59: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:124:90: error: unknown type name ‘MFT_OUTPUT_STREAM_INFO’ ../../../wine/dlls/winegstreamer/mfplat.c:129:53: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:141:64: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:147:65: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:160:57: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:167:55: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:174:61: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:182:62: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:190:52: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:197:53: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:204:59: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:211:60: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:218:54: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:225:55: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:232:55: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:239:52: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:246:54: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:246:75: error: unknown type name ‘MFT_MESSAGE_TYPE’; did you mean ‘GST_MESSAGE_TYPE’? ../../../wine/dlls/winegstreamer/mfplat.c:253:52: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:260:53: error: unknown type name ‘IMFTransform’ ../../../wine/dlls/winegstreamer/mfplat.c:261:9: error: unknown type name ‘MFT_OUTPUT_DATA_BUFFER’; did you mean ‘DMO_OUTPUT_DATA_BUFFER’? ../../../wine/dlls/winegstreamer/mfplat.c:268:14: error: unknown type name ‘IMFTransformVtbl’ ../../../wine/dlls/winegstreamer/mfplat.c:270:5: error: ‘video_processor_QueryInterface’ undeclared here (not in a function); did you mean ‘IMemAllocator_QueryInterface’? ../../../wine/dlls/winegstreamer/mfplat.c:271:5: error: ‘video_processor_AddRef’ undeclared here (not in a function); did you mean ‘video_processor_vtbl’? ../../../wine/dlls/winegstreamer/mfplat.c:272:5: error: ‘video_processor_Release’ undeclared here (not in a function); did you mean ‘video_processor_vtbl’? ../../../wine/dlls/winegstreamer/mfplat.c:273:5: error: ‘video_processor_GetStreamLimits’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:274:5: error: ‘video_processor_GetStreamCount’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:275:5: error: ‘video_processor_GetStreamIDs’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:276:5: error: ‘video_processor_GetInputStreamInfo’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:277:5: error: ‘video_processor_GetOutputStreamInfo’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:278:5: error: ‘video_processor_GetAttributes’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:279:5: error: ‘video_processor_GetInputStreamAttributes’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:280:5: error: ‘video_processor_GetOutputStreamAttributes’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:281:5: error: ‘video_processor_DeleteInputStream’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:282:5: error: ‘video_processor_AddInputStreams’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:283:5: error: ‘video_processor_GetInputAvailableType’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:284:5: error: ‘video_processor_GetOutputAvailableType’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:285:5: error: ‘video_processor_SetInputType’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:286:5: error: ‘video_processor_SetOutputType’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:287:5: error: ‘video_processor_GetInputCurrentType’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:288:5: error: ‘video_processor_GetOutputCurrentType’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:289:5: error: ‘video_processor_GetInputStatus’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:290:5: error: ‘video_processor_GetOutputStatus’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:291:5: error: ‘video_processor_SetOutputBounds’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:292:5: error: ‘video_processor_ProcessEvent’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:293:5: error: ‘video_processor_ProcessMessage’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:294:5: error: ‘video_processor_ProcessInput’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:295:5: error: ‘video_processor_ProcessOutput’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:388:31: error: request for member ‘lpVtbl’ in something not a structure or union ../../../wine/dlls/winegstreamer/mfplat.c:413:8: error: ‘CLSID_VideoProcessorMFT’ undeclared here (not in a function) ../../../wine/dlls/winegstreamer/mfplat.c:497:57: error: ‘MF_MT_MPEG2_PROFILE’ undeclared (first use in this function) ../../../wine/dlls/winegstreamer/mfplat.c:508:57: error: ‘MF_MT_MPEG2_LEVEL’ undeclared (first use in this function) ../../../wine/dlls/winegstreamer/mfplat.c:787:57: error: ‘MF_MT_MPEG2_PROFILE’ undeclared (first use in this function) ../../../wine/dlls/winegstreamer/mfplat.c:800:57: error: ‘MF_MT_MPEG2_LEVEL’ undeclared (first use in this function) Task: The wow64 Wine build failed
General comments:
It's not great to introduce code that's not used anywhere, it's essentially dead until then.
This could, I think, be split up into much smaller pieces in any case: you're introducing two different functions here, and each function introduces support for several different formats.
On 3/24/20 2:39 PM, Derek Lesho wrote:
Signed-off-by: Derek Lesho dlesho@codeweavers.com
dlls/winegstreamer/gst_private.h | 4 + dlls/winegstreamer/mfplat.c | 533 ++++++++++++++++++++++++++++++- include/codecapi.h | 38 +++ 3 files changed, 574 insertions(+), 1 deletion(-) create mode 100644 include/codecapi.h
diff --git a/dlls/winegstreamer/gst_private.h b/dlls/winegstreamer/gst_private.h index e6fb841fc8..a6c3fd3784 100644 --- a/dlls/winegstreamer/gst_private.h +++ b/dlls/winegstreamer/gst_private.h @@ -36,6 +36,7 @@ #include "winuser.h" #include "dshow.h" #include "strmif.h" +#include "mfobjects.h" #include "wine/heap.h" #include "wine/strmbase.h"
@@ -54,4 +55,7 @@ void start_dispatch_thread(void) DECLSPEC_HIDDEN;
extern HRESULT mfplat_get_class_object(REFCLSID rclsid, REFIID riid, void **obj) DECLSPEC_HIDDEN;
+IMFMediaType* media_type_from_caps(GstCaps *caps); +GstCaps *caps_from_media_type(IMFMediaType *type);
Using the generic name "media_type", in a module that serves multiple media APIs, is not great.
Also, why is this in the public header?
Also, style nitpick: please try to be consistent about your asterisk placement (ideally using "type *var" style.)
#endif /* __GST_PRIVATE_INCLUDED__ */
...
@@ -433,3 +438,529 @@ HRESULT mfplat_get_class_object(REFCLSID rclsid, REFIID riid, void **obj)
return CLASS_E_CLASSNOTAVAILABLE;
}
+struct aac_user_data +{
- WORD payload_type;
- WORD profile_level_indication;
- WORD struct_type;
- WORD reserved;
- /*BYTE audio_specific_config;*/
What's this field doing here?
+};
+/* IMPORTANT: caps will be modified to represent the exact type needed for the format */
Why?
+IMFMediaType* media_type_from_caps(GstCaps *caps) +{
- IMFMediaType *media_type;
- GstStructure *info;
- const char *media_type_name;
- gchar *human_readable;
- if (FAILED(MFCreateMediaType(&media_type)))
- {
return NULL;
- }
- info = gst_caps_get_structure(caps, 0);
- media_type_name = gst_structure_get_name(info);
- human_readable = gst_caps_to_string(caps);
- TRACE("caps = %s\n", human_readable);
- g_free(human_readable);
Probably would be best to guard this with TRACE_ON, so that we don't bother allocating anything otherwise.
Also, you'll want to use debugstr_a(), especially since caps can overrun the static buffer in ntdll.
- if (!(strncmp(media_type_name, "video", 5)))
Style nitpick, superfluous parentheses.
I think Nikolay already mentioned this, but it's probably not a bad idea to just match against the whole "video/x-h264" etc. sequence.
- {
const char *video_format = media_type_name + 6;
gint width, height, framerate_num, framerate_den;
IMFMediaType_SetGUID(media_type, &MF_MT_MAJOR_TYPE, &MFMediaType_Video);
if (gst_structure_get_int(info, "width", &width) && gst_structure_get_int(info, "height", &height))
{
IMFMediaType_SetUINT64(media_type, &MF_MT_FRAME_SIZE, ((UINT64)width << 32) | height);
}
if (gst_structure_get_fraction(info, "framerate", &framerate_num, &framerate_den))
{
IMFMediaType_SetUINT64(media_type, &MF_MT_FRAME_RATE, ((UINT64)framerate_num << 32) | framerate_den);
}
if (!(strcmp(video_format, "x-h264")))
{
const char *profile, *level;
IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_H264);
IMFMediaType_SetUINT32(media_type, &MF_MT_COMPRESSED, TRUE);
if ((profile = gst_structure_get_string(info, "profile")))
{
if (!(strcmp(profile, "main")))
IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_PROFILE, eAVEncH264VProfile_Main);
else if (!(strcmp(profile, "high")))
IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_PROFILE, eAVEncH264VProfile_High);
else if (!(strcmp(profile, "high-4:4:4")))
IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_PROFILE, eAVEncH264VProfile_444);
else
ERR("Unrecognized profile %s\n", profile);
This ERR (and many below) should probably be a FIXME instead, methinks.
}
if ((level = gst_structure_get_string(info, "level")))
{
if (!(strcmp(level, "1")))
IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, eAVEncH264VLevel1);
else if (!(strcmp(level, "1.1")))
IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, eAVEncH264VLevel1_1);
else if (!(strcmp(level, "1.2")))
IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, eAVEncH264VLevel1_2);
else if (!(strcmp(level, "1.3")))
IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, eAVEncH264VLevel1_3);
else if (!(strcmp(level, "2")))
IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, eAVEncH264VLevel2);
else if (!(strcmp(level, "2.1")))
IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, eAVEncH264VLevel2_1);
else if (!(strcmp(level, "2.2")))
IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, eAVEncH264VLevel2_2);
else if (!(strcmp(level, "3")))
IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, eAVEncH264VLevel3);
else if (!(strcmp(level, "3.1")))
IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, eAVEncH264VLevel3_1);
else if (!(strcmp(level, "3.2")))
IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, eAVEncH264VLevel3_2);
else if (!(strcmp(level, "4")))
IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, eAVEncH264VLevel4);
else if (!(strcmp(level, "4.1")))
IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, eAVEncH264VLevel4_1);
else if (!(strcmp(level, "4.2")))
IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, eAVEncH264VLevel4_2);
else if (!(strcmp(level, "5")))
IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, eAVEncH264VLevel5);
else if (!(strcmp(level, "5.1")))
IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, eAVEncH264VLevel5_1);
else if (!(strcmp(level, "5.2")))
IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, eAVEncH264VLevel5_2);
else
ERR("Unrecognized level %s\n", level);
}
Could we maybe make this a table instead?
gst_caps_set_simple(caps, "stream-format", G_TYPE_STRING, "byte-stream", NULL);
gst_caps_set_simple(caps, "alignment", G_TYPE_STRING, "au", NULL);
for (unsigned int i = 0; i < gst_caps_get_size(caps); i++)
{
GstStructure *structure = gst_caps_get_structure (caps, i);
gst_structure_remove_field(structure, "codec_data");
}
}
else if (!(strcmp(video_format, "x-wmv")))
{
gint wmv_version;
const char *format;
const GValue *codec_data;
if (gst_structure_get_int(info, "wmvversion", &wmv_version))
{
switch (wmv_version)
{
case 1:
IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_WMV1);
break;
case 2:
IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_WMV2);
break;
case 3:
IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_WMV3);
break;
default:
ERR("Unrecognized wmvversion %d\n", wmv_version);
}
}
if ((format = gst_structure_get_string(info, "format")))
{
if (!(strcmp(format, "WVC1")))
IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_WVC1);
What if it's not? I think that deserves at least a FIXME.
(Style nitpick, extra parentheses.)
}
if ((codec_data = gst_structure_get_value(info, "codec_data")))
{
GstBuffer *codec_data_buffer = gst_value_get_buffer(codec_data);
if (codec_data_buffer)
{
gsize codec_data_size = gst_buffer_get_size(codec_data_buffer);
gpointer codec_data_raw = heap_alloc(codec_data_size);
gst_buffer_extract(codec_data_buffer, 0, codec_data_raw, codec_data_size);
IMFMediaType_SetBlob(media_type, &MF_MT_USER_DATA, codec_data_raw, codec_data_size);
}
}
}
else if (!(strcmp(video_format, "mpeg")))
{
IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_M4S2);
IMFMediaType_SetUINT32(media_type, &MF_MT_COMPRESSED, TRUE);
There are other video/mpeg formats.
}
else if (!(strcmp(video_format, "x-raw")))
{
const char *fourcc = gst_structure_get_string(info, "stream-format");
IMFMediaType_SetUINT32(media_type, &MF_MT_COMPRESSED, FALSE);
if (fourcc && (strlen(fourcc) == 4))
{
GUID fourcc_subtype = MFVideoFormat_Base;
fourcc_subtype.Data1 = MAKEFOURCC(
toupper(fourcc[0]), toupper(fourcc[1]), toupper(fourcc[2]), toupper(fourcc[3]));
IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &fourcc_subtype);
}
else
ERR("uncompressed video has no stream-format\n");
I've never seen a FOURCC stored in the "stream-format" tag; where are you getting this from?
}
else
ERR("Unrecognized video format %s\n", video_format);
- }
- else if (!(strncmp(media_type_name, "audio", 5)))
- {
const char *audio_format = media_type_name + 6;
IMFMediaType_SetGUID(media_type, &MF_MT_MAJOR_TYPE, &MFMediaType_Audio);
if (!(strcmp(audio_format, "mpeg")))
{
int mpeg_version = -1;
IMFMediaType_SetUINT32(media_type, &MF_MT_COMPRESSED, TRUE);
if (!(gst_structure_get_int(info, "mpegversion", &mpeg_version)))
ERR("Failed to get mpegversion\n");
switch (mpeg_version)
{
case 1:
{
IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFAudioFormat_MPEG);
break;
}
What about MFAudioFormat_MP3?
case 2:
case 4:
{
const char *format, *profile, *level;
DWORD profile_level_indication = 0;
const GValue *codec_data;
DWORD asc_size = 0;
struct aac_user_data *user_data = NULL;
IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFAudioFormat_AAC);
codec_data = gst_structure_get_value(info, "codec_data");
if (codec_data)
{
GstBuffer *codec_data_buffer = gst_value_get_buffer(codec_data);
if (codec_data_buffer)
{
if ((asc_size = gst_buffer_get_size(codec_data_buffer)) >= 2)
{
user_data = heap_alloc_zero(sizeof(*user_data)+asc_size);
gst_buffer_extract(codec_data_buffer, 0, (gpointer)(user_data + 1), asc_size);
}
else
ERR("Unexpected buffer size\n");
}
else
ERR("codec_data not a buffer\n");
}
else
ERR("codec_data not found\n");
if (!user_data)
user_data = heap_alloc_zero(sizeof(*user_data));
{
int rate;
if (gst_structure_get_int(info, "rate", &rate))
IMFMediaType_SetUINT32(media_type, &MF_MT_AUDIO_SAMPLES_PER_SECOND, rate);
}
{
int channels;
if (gst_structure_get_int(info, "channels", &channels))
IMFMediaType_SetUINT32(media_type, &MF_MT_AUDIO_NUM_CHANNELS, channels);
}
Did you mean to add these blocks?
if ((format = gst_structure_get_string(info, "stream-format")))
{
DWORD payload_type = -1;
if (!(strcmp(format, "raw")))
payload_type = 0;
else if (!(strcmp(format, "adts")))
payload_type = 1;
else
ERR("Unrecognized stream-format\n");
if (payload_type != -1)
{
IMFMediaType_SetUINT32(media_type, &MF_MT_AAC_PAYLOAD_TYPE, payload_type);
user_data->payload_type = payload_type;
}
}
else
{
ERR("Stream format not present\n");
}
profile = gst_structure_get_string(info, "profile");
level = gst_structure_get_string(info, "level");
/* Data from https://docs.microsoft.com/en-us/windows/win32/medfound/aac-encoder#output-types */
I'm not sure I'd link to Microsoft documentation; it's not very stable.
if (profile && level)
{
if (!(strcmp(profile, "lc")) && !(strcmp(level, "2")))
profile_level_indication = 0x29;
else if (!(strcmp(profile, "lc")) && !(strcmp(level, "4")))
profile_level_indication = 0x2A;
else if (!(strcmp(profile, "lc")) && !(strcmp(level, "5")))
profile_level_indication = 0x2B;
else
ERR("Unhandled profile/level combo\n");
}
else
ERR("Profile or level not present\n");
if (profile_level_indication)
{
IMFMediaType_SetUINT32(media_type, &MF_MT_AAC_AUDIO_PROFILE_LEVEL_INDICATION, profile_level_indication);
user_data->profile_level_indication = profile_level_indication;
}
IMFMediaType_SetBlob(media_type, &MF_MT_USER_DATA, (BYTE *)user_data, sizeof(user_data) + asc_size);
heap_free(user_data);
break;
}
default:
ERR("Unhandled mpegversion %d\n", mpeg_version);
}
}
else if (!(strcmp(audio_format, "x-raw")))
{
IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFAudioFormat_Float);
gst_caps_set_simple(caps, "format", G_TYPE_STRING, "F32LE", NULL);
There are other audio formats.
}
else
ERR("Unrecognized audio format %s\n", audio_format);
- }
- else
- {
goto fail;
I'm generally of the opinion that one line of cleanup doesn't merit a "goto".
- }
- return media_type;
- fail:
- IMFMediaType_Release(media_type);
- return NULL;
+}
+static const char *fourcc_str(DWORD fourcc) +{
- if (!fourcc) return NULL;
- return wine_dbg_sprintf ("%c%c%c%c",
(char)(fourcc), (char)(fourcc >> 8),
(char)(fourcc >> 16), (char)(fourcc >> 24));
+}
I don't think you want to use Wine's debugging utilities for non-debug code.
+GstCaps *caps_from_media_type(IMFMediaType *type) +{
- GUID major_type;
- GUID subtype;
- GUID base_masked_subtype;
- GstCaps *output = NULL;
- if (FAILED(IMFMediaType_GetMajorType(type, &major_type)))
return NULL;
- if (FAILED(IMFMediaType_GetGUID(type, &MF_MT_SUBTYPE, &subtype)))
return NULL;
- base_masked_subtype = subtype;
- base_masked_subtype.Data1 = 0;
- if (IsEqualGUID(&major_type, &MFMediaType_Video))
- {
UINT64 frame_rate = 0, frame_size = 0;
DWORD *framerate_num = ((DWORD*)&frame_rate) + 1;
DWORD *framerate_den = ((DWORD*)&frame_rate);
DWORD *width = ((DWORD*)&frame_size) + 1;
DWORD *height = ((DWORD*)&frame_size);
It seems simpler to me to do e.g.
DWORD width = frame_size; DWORD height = frame_size >> 32;
if (IsEqualGUID(&subtype, &MFVideoFormat_H264))
{
enum eAVEncH264VProfile h264_profile;
enum eAVEncH264VLevel h264_level;
output = gst_caps_new_empty_simple("video/x-h264");
gst_caps_set_simple(output, "stream-format", G_TYPE_STRING, "byte-stream", NULL);
gst_caps_set_simple(output, "alignment", G_TYPE_STRING, "au", NULL);
if (SUCCEEDED(IMFMediaType_GetUINT32(type, &MF_MT_MPEG2_PROFILE, &h264_profile)))
{
const char *profile = NULL;
switch (h264_profile)
{
case eAVEncH264VProfile_Main: profile = "main"; break;
case eAVEncH264VProfile_High: profile = "high"; break;
case eAVEncH264VProfile_444: profile = "high-4:4:4"; break;
default: ERR("Unknown profile %u\n", h264_profile);
}
if (profile)
gst_caps_set_simple(output, "profile", G_TYPE_STRING, profile, NULL);
}
if (SUCCEEDED(IMFMediaType_GetUINT32(type, &MF_MT_MPEG2_LEVEL, &h264_level)))
{
const char *level = NULL;
switch (h264_level)
{
case eAVEncH264VLevel1: level = "1"; break;
case eAVEncH264VLevel1_1: level = "1.1"; break;
case eAVEncH264VLevel1_2: level = "1.2"; break;
case eAVEncH264VLevel1_3: level = "1.3"; break;
case eAVEncH264VLevel2: level = "2"; break;
case eAVEncH264VLevel2_1: level = "2.1"; break;
case eAVEncH264VLevel2_2: level = "2.2"; break;
case eAVEncH264VLevel3: level = "3"; break;
case eAVEncH264VLevel3_1: level = "3.1"; break;
case eAVEncH264VLevel3_2: level = "3.2"; break;
case eAVEncH264VLevel4: level = "4"; break;
case eAVEncH264VLevel4_1: level = "4.1"; break;
case eAVEncH264VLevel4_2: level = "4.2"; break;
case eAVEncH264VLevel5: level = "5"; break;
case eAVEncH264VLevel5_1: level = "5.1"; break;
case eAVEncH264VLevel5_2: level = "5.2"; break;
default: ERR("Unknown level %u\n", h264_level);
}
if (level)
gst_caps_set_simple(output, "level", G_TYPE_STRING, level, NULL);
}
}
else if (IsEqualGUID(&subtype, &MFVideoFormat_WVC1))
{
BYTE *user_data;
DWORD user_data_size;
output = gst_caps_new_empty_simple("video/x-wmv");
gst_caps_set_simple(output, "format", G_TYPE_STRING, "WVC1", NULL);
gst_caps_set_simple(output, "wmvversion", G_TYPE_INT, 3, NULL);
if (SUCCEEDED(IMFMediaType_GetAllocatedBlob(type, &MF_MT_USER_DATA, &user_data, &user_data_size)))
{
GstBuffer *codec_data_buffer = gst_buffer_new_allocate(NULL, user_data_size, NULL);
gst_buffer_fill(codec_data_buffer, 0, user_data, user_data_size);
gst_caps_set_simple(output, "codec_data", GST_TYPE_BUFFER, codec_data_buffer, NULL);
gst_buffer_unref(codec_data_buffer);
CoTaskMemFree(user_data);
}
}
else if (IsEqualGUID(&base_masked_subtype, &MFVideoFormat_Base))
{
output = gst_caps_new_empty_simple("video/x-raw");
gst_caps_set_simple(output, "format", G_TYPE_STRING, fourcc_str(subtype.Data1), NULL);
What about RGB formats?
}
else {
ERR("Unrecognized subtype %s\n", debugstr_guid(&subtype));
return NULL;
}
IMFMediaType_GetUINT64(type, &MF_MT_FRAME_RATE, &frame_rate);
IMFMediaType_GetUINT64(type, &MF_MT_FRAME_SIZE, &frame_size);
if (frame_rate)
gst_caps_set_simple(output, "framerate", GST_TYPE_FRACTION, *framerate_num, *framerate_den, NULL);
if (frame_size)
{
gst_caps_set_simple(output, "width", G_TYPE_INT, *width, NULL);
gst_caps_set_simple(output, "height", G_TYPE_INT, *height, NULL);
}
return output;
- }
- else if (IsEqualGUID(&major_type, &MFMediaType_Audio))
- {
DWORD rate, channels;
if (IsEqualGUID(&subtype, &MFAudioFormat_AAC))
{
DWORD payload_type, indication;
struct aac_user_data *user_data;
UINT32 user_data_size;
output = gst_caps_new_empty_simple("audio/mpeg");
/* TODO */
gst_caps_set_simple(output, "framed", G_TYPE_BOOLEAN, TRUE, NULL);
gst_caps_set_simple(output, "mpegversion", G_TYPE_INT, 4, NULL);
What's TODO here?
if (SUCCEEDED(IMFMediaType_GetUINT32(type, &MF_MT_AAC_PAYLOAD_TYPE, &payload_type)))
{
switch (payload_type)
{
case 0:
gst_caps_set_simple(output, "stream-format", G_TYPE_STRING, "raw", NULL);
break;
case 1:
gst_caps_set_simple(output, "stream-format", G_TYPE_STRING, "adts", NULL);
break;
default:
gst_caps_set_simple(output, "stream-format", G_TYPE_STRING, "raw", NULL);
Seems to me that 2 and 3 should be mapped to "adif" and "loas", respectively.
}
}
else
gst_caps_set_simple(output, "stream-format", G_TYPE_STRING, "raw", NULL);
if (SUCCEEDED(IMFMediaType_GetUINT32(type, &MF_MT_AAC_AUDIO_PROFILE_LEVEL_INDICATION, &indication)))
{
switch (indication)
{
case 0x29:
{
gst_caps_set_simple(output, "profile", G_TYPE_STRING, "lc", NULL);
gst_caps_set_simple(output, "level", G_TYPE_STRING, "2", NULL);
break;
}
case 0x2A:
{
gst_caps_set_simple(output, "profile", G_TYPE_STRING, "lc", NULL);
gst_caps_set_simple(output, "level", G_TYPE_STRING, "4", NULL);
break;
}
case 0x2B:
{
gst_caps_set_simple(output, "profile", G_TYPE_STRING, "lc", NULL);
gst_caps_set_simple(output, "level", G_TYPE_STRING, "5", NULL);
break;
}
default:
ERR("Unrecognized profile-level-indication %u\n", indication);
}
I think you could significantly deduplicate this switch.
}
if (SUCCEEDED(IMFMediaType_GetAllocatedBlob(type, &MF_MT_USER_DATA, (BYTE **) &user_data, &user_data_size)))
{
if (user_data_size > sizeof(sizeof(*user_data)))
{
GstBuffer *audio_specific_config = gst_buffer_new_allocate(NULL, user_data_size - sizeof(*user_data), NULL);
gst_buffer_fill(audio_specific_config, 0, user_data + 1, user_data_size - sizeof(*user_data));
gst_caps_set_simple(output, "codec_data", GST_TYPE_BUFFER, audio_specific_config, NULL);
gst_buffer_unref(audio_specific_config);
}
CoTaskMemFree(user_data);
}
}
else if (IsEqualGUID(&subtype, &MFAudioFormat_Float))
{
output = gst_caps_new_empty_simple("audio/x-raw");
gst_caps_set_simple(output, "format", G_TYPE_STRING, "F32LE", NULL);
}
else
{
ERR("Unrecognized subtype %s\n", debugstr_guid(&subtype));
if (output)
gst_caps_unref(output);
return NULL;
}
if (SUCCEEDED(IMFMediaType_GetUINT32(type, &MF_MT_AUDIO_SAMPLES_PER_SECOND, &rate)))
{
gst_caps_set_simple(output, "rate", G_TYPE_INT, rate, NULL);
}
if (SUCCEEDED(IMFMediaType_GetUINT32(type, &MF_MT_AUDIO_NUM_CHANNELS, &channels)))
{
gst_caps_set_simple(output, "channels", G_TYPE_INT, channels, NULL);
}
return output;
- }
- ERR("Unrecognized major type %s\n", debugstr_guid(&major_type));
- return NULL;
+} diff --git a/include/codecapi.h b/include/codecapi.h new file mode 100644 index 0000000000..2690b523d7 --- /dev/null +++ b/include/codecapi.h @@ -0,0 +1,38 @@ +#ifndef __CODECAPI_H +#define __CODECAPI_H
+enum eAVEncH264VProfile +{
- eAVEncH264VProfile_unknown = 0,
- eAVEncH264VProfile_Simple = 66,
- eAVEncH264VProfile_Base = 66,
- eAVEncH264VProfile_Main = 77,
- eAVEncH264VProfile_High = 100,
- eAVEncH264VProfile_422 = 122,
- eAVEncH264VProfile_High10 = 110,
- eAVEncH264VProfile_444 = 244,
- eAVEncH264VProfile_Extended = 88,
+};
+enum eAVEncH264VLevel +{
- eAVEncH264VLevel1 = 10,
- eAVEncH264VLevel1_b = 11,
- eAVEncH264VLevel1_1 = 11,
- eAVEncH264VLevel1_2 = 12,
- eAVEncH264VLevel1_3 = 13,
- eAVEncH264VLevel2 = 20,
- eAVEncH264VLevel2_1 = 21,
- eAVEncH264VLevel2_2 = 22,
- eAVEncH264VLevel3 = 30,
- eAVEncH264VLevel3_1 = 31,
- eAVEncH264VLevel3_2 = 32,
- eAVEncH264VLevel4 = 40,
- eAVEncH264VLevel4_1 = 41,
- eAVEncH264VLevel4_2 = 42,
- eAVEncH264VLevel5 = 50,
- eAVEncH264VLevel5_1 = 51,
- eAVEncH264VLevel5_2 = 52
+};
+#endif \ No newline at end of file
On 3/24/20 3:22 PM, Zebediah Figura wrote:
General comments:
It's not great to introduce code that's not used anywhere, it's essentially dead until then.
This could, I think, be split up into much smaller pieces in any case: you're introducing two different functions here, and each function introduces support for several different formats.
On 3/24/20 2:39 PM, Derek Lesho wrote:
Signed-off-by: Derek Lesho dlesho@codeweavers.com
dlls/winegstreamer/gst_private.h | 4 + dlls/winegstreamer/mfplat.c | 533 ++++++++++++++++++++++++++++++- include/codecapi.h | 38 +++ 3 files changed, 574 insertions(+), 1 deletion(-) create mode 100644 include/codecapi.h
diff --git a/dlls/winegstreamer/gst_private.h b/dlls/winegstreamer/gst_private.h index e6fb841fc8..a6c3fd3784 100644 --- a/dlls/winegstreamer/gst_private.h +++ b/dlls/winegstreamer/gst_private.h @@ -36,6 +36,7 @@ #include "winuser.h" #include "dshow.h" #include "strmif.h" +#include "mfobjects.h" #include "wine/heap.h" #include "wine/strmbase.h" @@ -54,4 +55,7 @@ void start_dispatch_thread(void) DECLSPEC_HIDDEN; extern HRESULT mfplat_get_class_object(REFCLSID rclsid, REFIID riid, void **obj) DECLSPEC_HIDDEN; +IMFMediaType* media_type_from_caps(GstCaps *caps); +GstCaps *caps_from_media_type(IMFMediaType *type);
Using the generic name "media_type", in a module that serves multiple media APIs, is not great.
Would you prefer mf_media_type?
Also, why is this in the public header?
Would it be better to split this into a mfplat_private.h header?
Also, style nitpick: please try to be consistent about your asterisk placement (ideally using "type *var" style.)
Ack.
#endif /* __GST_PRIVATE_INCLUDED__ */
...
@@ -433,3 +438,529 @@ HRESULT mfplat_get_class_object(REFCLSID rclsid, REFIID riid, void **obj) return CLASS_E_CLASSNOTAVAILABLE; }
+struct aac_user_data +{
- WORD payload_type;
- WORD profile_level_indication;
- WORD struct_type;
- WORD reserved;
- /*BYTE audio_specific_config;*/
What's this field doing here?
We store the audio_config_config after these fields, and I wanted to express that here, it's not important though.
+};
+/* IMPORTANT: caps will be modified to represent the exact type needed for the format */
Why?
Because in the case of a demuxer, the caps of the stream we receive might not map 1:1 with the representation in media foundation. Because of this, in the media source, if any adjustments are needed, we feed the buffers through a parser to correct it.
See: https://github.com/Guy1524/wine/commit/7ab88be3882ab95f3fc17dab374184e06f018...
+IMFMediaType* media_type_from_caps(GstCaps *caps) +{
- IMFMediaType *media_type;
- GstStructure *info;
- const char *media_type_name;
- gchar *human_readable;
- if (FAILED(MFCreateMediaType(&media_type)))
- {
- return NULL;
- }
- info = gst_caps_get_structure(caps, 0);
- media_type_name = gst_structure_get_name(info);
- human_readable = gst_caps_to_string(caps);
- TRACE("caps = %s\n", human_readable);
- g_free(human_readable);
Probably would be best to guard this with TRACE_ON, so that we don't bother allocating anything otherwise.
Also, you'll want to use debugstr_a(), especially since caps can overrun the static buffer in ntdll.
Ack.
- if (!(strncmp(media_type_name, "video", 5)))
Style nitpick, superfluous parentheses.
I think Nikolay already mentioned this, but it's probably not a bad idea to just match against the whole "video/x-h264" etc. sequence.
Ack.
- {
- const char *video_format = media_type_name + 6;
- gint width, height, framerate_num, framerate_den;
- IMFMediaType_SetGUID(media_type, &MF_MT_MAJOR_TYPE,
&MFMediaType_Video);
- if (gst_structure_get_int(info, "width", &width) &&
gst_structure_get_int(info, "height", &height))
- {
- IMFMediaType_SetUINT64(media_type, &MF_MT_FRAME_SIZE,
((UINT64)width << 32) | height);
- }
- if (gst_structure_get_fraction(info, "framerate", &framerate_num,
&framerate_den))
- {
- IMFMediaType_SetUINT64(media_type, &MF_MT_FRAME_RATE,
((UINT64)framerate_num << 32) | framerate_den);
- }
- if (!(strcmp(video_format, "x-h264")))
- {
- const char *profile, *level;
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_H264);
- IMFMediaType_SetUINT32(media_type, &MF_MT_COMPRESSED, TRUE);
- if ((profile = gst_structure_get_string(info, "profile")))
- {
- if (!(strcmp(profile, "main")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_PROFILE,
eAVEncH264VProfile_Main);
- else if (!(strcmp(profile, "high")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_PROFILE,
eAVEncH264VProfile_High);
- else if (!(strcmp(profile, "high-4:4:4")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_PROFILE,
eAVEncH264VProfile_444);
- else
- ERR("Unrecognized profile %s\n", profile);
This ERR (and many below) should probably be a FIXME instead, methinks.
Ack.
- }
- if ((level = gst_structure_get_string(info, "level")))
- {
- if (!(strcmp(level, "1")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel1);
- else if (!(strcmp(level, "1.1")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel1_1);
- else if (!(strcmp(level, "1.2")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel1_2);
- else if (!(strcmp(level, "1.3")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel1_3);
- else if (!(strcmp(level, "2")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel2);
- else if (!(strcmp(level, "2.1")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel2_1);
- else if (!(strcmp(level, "2.2")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel2_2);
- else if (!(strcmp(level, "3")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel3);
- else if (!(strcmp(level, "3.1")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel3_1);
- else if (!(strcmp(level, "3.2")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel3_2);
- else if (!(strcmp(level, "4")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel4);
- else if (!(strcmp(level, "4.1")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel4_1);
- else if (!(strcmp(level, "4.2")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel4_2);
- else if (!(strcmp(level, "5")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel5);
- else if (!(strcmp(level, "5.1")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel5_1);
- else if (!(strcmp(level, "5.2")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel5_2);
- else
- ERR("Unrecognized level %s\n", level);
- }
Could we maybe make this a table instead?
Sure.
- gst_caps_set_simple(caps, "stream-format", G_TYPE_STRING,
"byte-stream", NULL);
- gst_caps_set_simple(caps, "alignment", G_TYPE_STRING, "au", NULL);
- for (unsigned int i = 0; i < gst_caps_get_size(caps); i++)
- {
- GstStructure *structure = gst_caps_get_structure (caps, i);
- gst_structure_remove_field(structure, "codec_data");
- }
- }
- else if (!(strcmp(video_format, "x-wmv")))
- {
- gint wmv_version;
- const char *format;
- const GValue *codec_data;
- if (gst_structure_get_int(info, "wmvversion", &wmv_version))
- {
- switch (wmv_version)
- {
- case 1:
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_WMV1);
- break;
- case 2:
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_WMV2);
- break;
- case 3:
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_WMV3);
- break;
- default:
- ERR("Unrecognized wmvversion %d\n", wmv_version);
- }
- }
- if ((format = gst_structure_get_string(info, "format")))
- {
- if (!(strcmp(format, "WVC1")))
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_WVC1);
What if it's not? I think that deserves at least a FIXME.
(Style nitpick, extra parentheses.)
Ack.
- }
- if ((codec_data = gst_structure_get_value(info, "codec_data")))
- {
- GstBuffer *codec_data_buffer = gst_value_get_buffer(codec_data);
- if (codec_data_buffer)
- {
- gsize codec_data_size = gst_buffer_get_size(codec_data_buffer);
- gpointer codec_data_raw = heap_alloc(codec_data_size);
- gst_buffer_extract(codec_data_buffer, 0, codec_data_raw,
codec_data_size);
- IMFMediaType_SetBlob(media_type, &MF_MT_USER_DATA, codec_data_raw,
codec_data_size);
- }
- }
- }
- else if (!(strcmp(video_format, "mpeg")))
- {
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_M4S2);
- IMFMediaType_SetUINT32(media_type, &MF_MT_COMPRESSED, TRUE);
There are other video/mpeg formats.
TBH, the only reason I've included this is for the tests to work, I'll look into how to differentiate the mpeg types tomorrow.
- }
- else if (!(strcmp(video_format, "x-raw")))
- {
- const char *fourcc = gst_structure_get_string(info, "stream-format");
- IMFMediaType_SetUINT32(media_type, &MF_MT_COMPRESSED, FALSE);
- if (fourcc && (strlen(fourcc) == 4))
- {
- GUID fourcc_subtype = MFVideoFormat_Base;
- fourcc_subtype.Data1 = MAKEFOURCC(
- toupper(fourcc[0]), toupper(fourcc[1]), toupper(fourcc[2]),
toupper(fourcc[3]));
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &fourcc_subtype);
- }
- else
- ERR("uncompressed video has no stream-format\n");
I've never seen a FOURCC stored in the "stream-format" tag; where are you getting this from?
You're right, I think I'm supposed to use "format" here, but this is dead code rn so I that's why I didn't see any problems.
- }
- else
- ERR("Unrecognized video format %s\n", video_format);
- }
- else if (!(strncmp(media_type_name, "audio", 5)))
- {
- const char *audio_format = media_type_name + 6;
- IMFMediaType_SetGUID(media_type, &MF_MT_MAJOR_TYPE,
&MFMediaType_Audio);
- if (!(strcmp(audio_format, "mpeg")))
- {
- int mpeg_version = -1;
- IMFMediaType_SetUINT32(media_type, &MF_MT_COMPRESSED, TRUE);
- if (!(gst_structure_get_int(info, "mpegversion", &mpeg_version)))
- ERR("Failed to get mpegversion\n");
- switch (mpeg_version)
- {
- case 1:
- {
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFAudioFormat_MPEG);
- break;
- }
What about MFAudioFormat_MP3?
I'm actually not sure what to use here, I should probably remove it for now.
- case 2:
- case 4:
- {
- const char *format, *profile, *level;
- DWORD profile_level_indication = 0;
- const GValue *codec_data;
- DWORD asc_size = 0;
- struct aac_user_data *user_data = NULL;
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFAudioFormat_AAC);
- codec_data = gst_structure_get_value(info, "codec_data");
- if (codec_data)
- {
- GstBuffer *codec_data_buffer = gst_value_get_buffer(codec_data);
- if (codec_data_buffer)
- {
- if ((asc_size = gst_buffer_get_size(codec_data_buffer)) >= 2)
- {
- user_data = heap_alloc_zero(sizeof(*user_data)+asc_size);
- gst_buffer_extract(codec_data_buffer, 0, (gpointer)(user_data + 1),
asc_size);
- }
- else
- ERR("Unexpected buffer size\n");
- }
- else
- ERR("codec_data not a buffer\n");
- }
- else
- ERR("codec_data not found\n");
- if (!user_data)
- user_data = heap_alloc_zero(sizeof(*user_data));
- {
- int rate;
- if (gst_structure_get_int(info, "rate", &rate))
- IMFMediaType_SetUINT32(media_type, &MF_MT_AUDIO_SAMPLES_PER_SECOND,
rate);
- }
- {
- int channels;
- if (gst_structure_get_int(info, "channels", &channels))
- IMFMediaType_SetUINT32(media_type, &MF_MT_AUDIO_NUM_CHANNELS,
channels);
- }
Did you mean to add these blocks?
Yeah, it's so I can declare the variables closer to where they are used.
- if ((format = gst_structure_get_string(info, "stream-format")))
- {
- DWORD payload_type = -1;
- if (!(strcmp(format, "raw")))
- payload_type = 0;
- else if (!(strcmp(format, "adts")))
- payload_type = 1;
- else
- ERR("Unrecognized stream-format\n");
- if (payload_type != -1)
- {
- IMFMediaType_SetUINT32(media_type, &MF_MT_AAC_PAYLOAD_TYPE,
payload_type);
- user_data->payload_type = payload_type;
- }
- }
- else
- {
- ERR("Stream format not present\n");
- }
- profile = gst_structure_get_string(info, "profile");
- level = gst_structure_get_string(info, "level");
- /* Data from
https://docs.microsoft.com/en-us/windows/win32/medfound/aac-encoder#output-t... */
I'm not sure I'd link to Microsoft documentation; it's not very stable.
Would a link to an archive.is backup of it be better?
- if (profile && level)
- {
- if (!(strcmp(profile, "lc")) && !(strcmp(level, "2")))
- profile_level_indication = 0x29;
- else if (!(strcmp(profile, "lc")) && !(strcmp(level, "4")))
- profile_level_indication = 0x2A;
- else if (!(strcmp(profile, "lc")) && !(strcmp(level, "5")))
- profile_level_indication = 0x2B;
- else
- ERR("Unhandled profile/level combo\n");
- }
- else
- ERR("Profile or level not present\n");
- if (profile_level_indication)
- {
- IMFMediaType_SetUINT32(media_type,
&MF_MT_AAC_AUDIO_PROFILE_LEVEL_INDICATION, profile_level_indication);
- user_data->profile_level_indication = profile_level_indication;
- }
- IMFMediaType_SetBlob(media_type, &MF_MT_USER_DATA, (BYTE
*)user_data, sizeof(user_data) + asc_size);
- heap_free(user_data);
- break;
- }
- default:
- ERR("Unhandled mpegversion %d\n", mpeg_version);
- }
- }
- else if (!(strcmp(audio_format, "x-raw")))
- {
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFAudioFormat_Float);
- gst_caps_set_simple(caps, "format", G_TYPE_STRING, "F32LE", NULL);
There are other audio formats.
Ah, you mean PCM? I'll add a case for that tomorrow.
- }
- else
- ERR("Unrecognized audio format %s\n", audio_format);
- }
- else
- {
- goto fail;
I'm generally of the opinion that one line of cleanup doesn't merit a "goto".
Okay I'll change that then.
- }
- return media_type;
- fail:
- IMFMediaType_Release(media_type);
- return NULL;
+}
+static const char *fourcc_str(DWORD fourcc) +{
- if (!fourcc) return NULL;
- return wine_dbg_sprintf ("%c%c%c%c",
- (char)(fourcc), (char)(fourcc >> 8),
- (char)(fourcc >> 16), (char)(fourcc >> 24));
+}
I don't think you want to use Wine's debugging utilities for non-debug code.
Ack.
+GstCaps *caps_from_media_type(IMFMediaType *type) +{
- GUID major_type;
- GUID subtype;
- GUID base_masked_subtype;
- GstCaps *output = NULL;
- if (FAILED(IMFMediaType_GetMajorType(type, &major_type)))
- return NULL;
- if (FAILED(IMFMediaType_GetGUID(type, &MF_MT_SUBTYPE, &subtype)))
- return NULL;
- base_masked_subtype = subtype;
- base_masked_subtype.Data1 = 0;
- if (IsEqualGUID(&major_type, &MFMediaType_Video))
- {
- UINT64 frame_rate = 0, frame_size = 0;
- DWORD *framerate_num = ((DWORD*)&frame_rate) + 1;
- DWORD *framerate_den = ((DWORD*)&frame_rate);
- DWORD *width = ((DWORD*)&frame_size) + 1;
- DWORD *height = ((DWORD*)&frame_size);
It seems simpler to me to do e.g.
DWORD width = frame_size; DWORD height = frame_size >> 32;
I'm not getting the width and height here, I'm declaring pointers to them which are set later on.
- if (IsEqualGUID(&subtype, &MFVideoFormat_H264))
- {
- enum eAVEncH264VProfile h264_profile;
- enum eAVEncH264VLevel h264_level;
- output = gst_caps_new_empty_simple("video/x-h264");
- gst_caps_set_simple(output, "stream-format", G_TYPE_STRING,
"byte-stream", NULL);
- gst_caps_set_simple(output, "alignment", G_TYPE_STRING, "au", NULL);
- if (SUCCEEDED(IMFMediaType_GetUINT32(type, &MF_MT_MPEG2_PROFILE,
&h264_profile)))
- {
- const char *profile = NULL;
- switch (h264_profile)
- {
- case eAVEncH264VProfile_Main: profile = "main"; break;
- case eAVEncH264VProfile_High: profile = "high"; break;
- case eAVEncH264VProfile_444: profile = "high-4:4:4"; break;
- default: ERR("Unknown profile %u\n", h264_profile);
- }
- if (profile)
- gst_caps_set_simple(output, "profile", G_TYPE_STRING, profile, NULL);
- }
- if (SUCCEEDED(IMFMediaType_GetUINT32(type, &MF_MT_MPEG2_LEVEL,
&h264_level)))
- {
- const char *level = NULL;
- switch (h264_level)
- {
- case eAVEncH264VLevel1: level = "1"; break;
- case eAVEncH264VLevel1_1: level = "1.1"; break;
- case eAVEncH264VLevel1_2: level = "1.2"; break;
- case eAVEncH264VLevel1_3: level = "1.3"; break;
- case eAVEncH264VLevel2: level = "2"; break;
- case eAVEncH264VLevel2_1: level = "2.1"; break;
- case eAVEncH264VLevel2_2: level = "2.2"; break;
- case eAVEncH264VLevel3: level = "3"; break;
- case eAVEncH264VLevel3_1: level = "3.1"; break;
- case eAVEncH264VLevel3_2: level = "3.2"; break;
- case eAVEncH264VLevel4: level = "4"; break;
- case eAVEncH264VLevel4_1: level = "4.1"; break;
- case eAVEncH264VLevel4_2: level = "4.2"; break;
- case eAVEncH264VLevel5: level = "5"; break;
- case eAVEncH264VLevel5_1: level = "5.1"; break;
- case eAVEncH264VLevel5_2: level = "5.2"; break;
- default: ERR("Unknown level %u\n", h264_level);
- }
- if (level)
- gst_caps_set_simple(output, "level", G_TYPE_STRING, level, NULL);
- }
- }
- else if (IsEqualGUID(&subtype, &MFVideoFormat_WVC1))
- {
- BYTE *user_data;
- DWORD user_data_size;
- output = gst_caps_new_empty_simple("video/x-wmv");
- gst_caps_set_simple(output, "format", G_TYPE_STRING, "WVC1", NULL);
- gst_caps_set_simple(output, "wmvversion", G_TYPE_INT, 3, NULL);
- if (SUCCEEDED(IMFMediaType_GetAllocatedBlob(type, &MF_MT_USER_DATA,
&user_data, &user_data_size)))
- {
- GstBuffer *codec_data_buffer = gst_buffer_new_allocate(NULL,
user_data_size, NULL);
- gst_buffer_fill(codec_data_buffer, 0, user_data, user_data_size);
- gst_caps_set_simple(output, "codec_data", GST_TYPE_BUFFER,
codec_data_buffer, NULL);
- gst_buffer_unref(codec_data_buffer);
- CoTaskMemFree(user_data);
- }
- }
- else if (IsEqualGUID(&base_masked_subtype, &MFVideoFormat_Base))
- {
- output = gst_caps_new_empty_simple("video/x-raw");
- gst_caps_set_simple(output, "format", G_TYPE_STRING,
fourcc_str(subtype.Data1), NULL);
What about RGB formats?
Ah, I didn't think about those, looks like we'll have to use a table of known conversions instead.
- }
- else {
- ERR("Unrecognized subtype %s\n", debugstr_guid(&subtype));
- return NULL;
- }
- IMFMediaType_GetUINT64(type, &MF_MT_FRAME_RATE, &frame_rate);
- IMFMediaType_GetUINT64(type, &MF_MT_FRAME_SIZE, &frame_size);
- if (frame_rate)
- gst_caps_set_simple(output, "framerate", GST_TYPE_FRACTION,
*framerate_num, *framerate_den, NULL);
- if (frame_size)
- {
- gst_caps_set_simple(output, "width", G_TYPE_INT, *width, NULL);
- gst_caps_set_simple(output, "height", G_TYPE_INT, *height, NULL);
- }
- return output;
- }
- else if (IsEqualGUID(&major_type, &MFMediaType_Audio))
- {
- DWORD rate, channels;
- if (IsEqualGUID(&subtype, &MFAudioFormat_AAC))
- {
- DWORD payload_type, indication;
- struct aac_user_data *user_data;
- UINT32 user_data_size;
- output = gst_caps_new_empty_simple("audio/mpeg");
- /* TODO */
- gst_caps_set_simple(output, "framed", G_TYPE_BOOLEAN, TRUE, NULL);
- gst_caps_set_simple(output, "mpegversion", G_TYPE_INT, 4, NULL);
What's TODO here?
MFAudioFormat_AAC could also mean mpegversion=2, and I don't know what the "framed" attribute is for.
- if (SUCCEEDED(IMFMediaType_GetUINT32(type, &MF_MT_AAC_PAYLOAD_TYPE,
&payload_type)))
- {
- switch (payload_type)
- {
- case 0:
- gst_caps_set_simple(output, "stream-format", G_TYPE_STRING, "raw",
NULL);
- break;
- case 1:
- gst_caps_set_simple(output, "stream-format", G_TYPE_STRING, "adts",
NULL);
- break;
- default:
- gst_caps_set_simple(output, "stream-format", G_TYPE_STRING, "raw",
NULL);
Seems to me that 2 and 3 should be mapped to "adif" and "loas", respectively.
Ack.
- }
- }
- else
- gst_caps_set_simple(output, "stream-format", G_TYPE_STRING, "raw",
NULL);
- if (SUCCEEDED(IMFMediaType_GetUINT32(type,
&MF_MT_AAC_AUDIO_PROFILE_LEVEL_INDICATION, &indication)))
- {
- switch (indication)
- {
- case 0x29:
- {
- gst_caps_set_simple(output, "profile", G_TYPE_STRING, "lc", NULL);
- gst_caps_set_simple(output, "level", G_TYPE_STRING, "2", NULL);
- break;
- }
- case 0x2A:
- {
- gst_caps_set_simple(output, "profile", G_TYPE_STRING, "lc", NULL);
- gst_caps_set_simple(output, "level", G_TYPE_STRING, "4", NULL);
- break;
- }
- case 0x2B:
- {
- gst_caps_set_simple(output, "profile", G_TYPE_STRING, "lc", NULL);
- gst_caps_set_simple(output, "level", G_TYPE_STRING, "5", NULL);
- break;
- }
- default:
- ERR("Unrecognized profile-level-indication %u\n", indication);
- }
I think you could significantly deduplicate this switch.
Ack.
- }
- if (SUCCEEDED(IMFMediaType_GetAllocatedBlob(type, &MF_MT_USER_DATA,
(BYTE **) &user_data, &user_data_size)))
- {
- if (user_data_size > sizeof(sizeof(*user_data)))
- {
- GstBuffer *audio_specific_config = gst_buffer_new_allocate(NULL,
user_data_size - sizeof(*user_data), NULL);
- gst_buffer_fill(audio_specific_config, 0, user_data + 1,
user_data_size - sizeof(*user_data));
- gst_caps_set_simple(output, "codec_data", GST_TYPE_BUFFER,
audio_specific_config, NULL);
- gst_buffer_unref(audio_specific_config);
- }
- CoTaskMemFree(user_data);
- }
- }
- else if (IsEqualGUID(&subtype, &MFAudioFormat_Float))
- {
- output = gst_caps_new_empty_simple("audio/x-raw");
- gst_caps_set_simple(output, "format", G_TYPE_STRING, "F32LE", NULL);
- }
- else
- {
- ERR("Unrecognized subtype %s\n", debugstr_guid(&subtype));
- if (output)
- gst_caps_unref(output);
- return NULL;
- }
- if (SUCCEEDED(IMFMediaType_GetUINT32(type,
&MF_MT_AUDIO_SAMPLES_PER_SECOND, &rate)))
- {
- gst_caps_set_simple(output, "rate", G_TYPE_INT, rate, NULL);
- }
- if (SUCCEEDED(IMFMediaType_GetUINT32(type,
&MF_MT_AUDIO_NUM_CHANNELS, &channels)))
- {
- gst_caps_set_simple(output, "channels", G_TYPE_INT, channels, NULL);
- }
- return output;
- }
- ERR("Unrecognized major type %s\n", debugstr_guid(&major_type));
- return NULL;
+} diff --git a/include/codecapi.h b/include/codecapi.h new file mode 100644 index 0000000000..2690b523d7 --- /dev/null +++ b/include/codecapi.h @@ -0,0 +1,38 @@ +#ifndef __CODECAPI_H +#define __CODECAPI_H
+enum eAVEncH264VProfile +{
- eAVEncH264VProfile_unknown = 0,
- eAVEncH264VProfile_Simple = 66,
- eAVEncH264VProfile_Base = 66,
- eAVEncH264VProfile_Main = 77,
- eAVEncH264VProfile_High = 100,
- eAVEncH264VProfile_422 = 122,
- eAVEncH264VProfile_High10 = 110,
- eAVEncH264VProfile_444 = 244,
- eAVEncH264VProfile_Extended = 88,
+};
+enum eAVEncH264VLevel +{
- eAVEncH264VLevel1 = 10,
- eAVEncH264VLevel1_b = 11,
- eAVEncH264VLevel1_1 = 11,
- eAVEncH264VLevel1_2 = 12,
- eAVEncH264VLevel1_3 = 13,
- eAVEncH264VLevel2 = 20,
- eAVEncH264VLevel2_1 = 21,
- eAVEncH264VLevel2_2 = 22,
- eAVEncH264VLevel3 = 30,
- eAVEncH264VLevel3_1 = 31,
- eAVEncH264VLevel3_2 = 32,
- eAVEncH264VLevel4 = 40,
- eAVEncH264VLevel4_1 = 41,
- eAVEncH264VLevel4_2 = 42,
- eAVEncH264VLevel5 = 50,
- eAVEncH264VLevel5_1 = 51,
- eAVEncH264VLevel5_2 = 52
+};
+#endif \ No newline at end of file
On 3/25/20 11:57 PM, Derek Lesho wrote:
On 3/24/20 3:22 PM, Zebediah Figura wrote:
General comments:
It's not great to introduce code that's not used anywhere, it's essentially dead until then.
This could, I think, be split up into much smaller pieces in any case: you're introducing two different functions here, and each function introduces support for several different formats.
On 3/24/20 2:39 PM, Derek Lesho wrote:
Signed-off-by: Derek Lesho dlesho@codeweavers.com
dlls/winegstreamer/gst_private.h | 4 + dlls/winegstreamer/mfplat.c | 533 ++++++++++++++++++++++++++++++- include/codecapi.h | 38 +++ 3 files changed, 574 insertions(+), 1 deletion(-) create mode 100644 include/codecapi.h
diff --git a/dlls/winegstreamer/gst_private.h b/dlls/winegstreamer/gst_private.h index e6fb841fc8..a6c3fd3784 100644 --- a/dlls/winegstreamer/gst_private.h +++ b/dlls/winegstreamer/gst_private.h @@ -36,6 +36,7 @@ #include "winuser.h" #include "dshow.h" #include "strmif.h" +#include "mfobjects.h" #include "wine/heap.h" #include "wine/strmbase.h" @@ -54,4 +55,7 @@ void start_dispatch_thread(void) DECLSPEC_HIDDEN; extern HRESULT mfplat_get_class_object(REFCLSID rclsid, REFIID riid, void **obj) DECLSPEC_HIDDEN; +IMFMediaType* media_type_from_caps(GstCaps *caps); +GstCaps *caps_from_media_type(IMFMediaType *type);
Using the generic name "media_type", in a module that serves multiple media APIs, is not great.
Would you prefer mf_media_type?
That's probably better, yes.
Also, why is this in the public header?
Would it be better to split this into a mfplat_private.h header?
I mean, why do you need to use it from anything other than mfplat.c?
This is another reason why it doesn't make a lot of sense to submit dead code.
Also, style nitpick: please try to be consistent about your asterisk placement (ideally using "type *var" style.)
Ack.
#endif /* __GST_PRIVATE_INCLUDED__ */
...
@@ -433,3 +438,529 @@ HRESULT mfplat_get_class_object(REFCLSID rclsid, REFIID riid, void **obj) return CLASS_E_CLASSNOTAVAILABLE; }
+struct aac_user_data +{
- WORD payload_type;
- WORD profile_level_indication;
- WORD struct_type;
- WORD reserved;
- /*BYTE audio_specific_config;*/
What's this field doing here?
We store the audio_config_config after these fields, and I wanted to express that here, it's not important though.
It's not necessarily a problem to specify that arbitrary data comes after the struct, but that comment is not particularly clear.
+};
+/* IMPORTANT: caps will be modified to represent the exact type needed for the format */
Why?
Because in the case of a demuxer, the caps of the stream we receive might not map 1:1 with the representation in media foundation. Because of this, in the media source, if any adjustments are needed, we feed the buffers through a parser to correct it.
See: https://github.com/Guy1524/wine/commit/7ab88be3882ab95f3fc17dab374184e06f018...
This seems like a very confusing way to do that. At least I'd relegate it to a separate function. I wouldn't expect a conversion function to modify its argument, and it moreover makes it essentially unusable anywhere else.
That said, these modifications are specific to the format, and along those lines it may make more sense to append specific elements rather than to make specific changes to the caps and try to find an element that can accommodate those. This will also help if you ever need to append multiple such elements. Thus you can e.g. append an audioconvert element unconditionally, and if no conversion is necessary it'll just pass through.
Looking at the modifications you do make—
* you force h264 into annex B format, which is the job of h264parse;
* you force all raw audio into 32-bit float. Does native mfplat really never output integer PCM?
+IMFMediaType* media_type_from_caps(GstCaps *caps) +{
- IMFMediaType *media_type;
- GstStructure *info;
- const char *media_type_name;
- gchar *human_readable;
- if (FAILED(MFCreateMediaType(&media_type)))
- {
- return NULL;
- }
- info = gst_caps_get_structure(caps, 0);
- media_type_name = gst_structure_get_name(info);
- human_readable = gst_caps_to_string(caps);
- TRACE("caps = %s\n", human_readable);
- g_free(human_readable);
Probably would be best to guard this with TRACE_ON, so that we don't bother allocating anything otherwise.
Also, you'll want to use debugstr_a(), especially since caps can overrun the static buffer in ntdll.
Ack.
- if (!(strncmp(media_type_name, "video", 5)))
Style nitpick, superfluous parentheses.
I think Nikolay already mentioned this, but it's probably not a bad idea to just match against the whole "video/x-h264" etc. sequence.
Ack.
- {
- const char *video_format = media_type_name + 6;
- gint width, height, framerate_num, framerate_den;
- IMFMediaType_SetGUID(media_type, &MF_MT_MAJOR_TYPE,
&MFMediaType_Video);
- if (gst_structure_get_int(info, "width", &width) &&
gst_structure_get_int(info, "height", &height))
- {
- IMFMediaType_SetUINT64(media_type, &MF_MT_FRAME_SIZE,
((UINT64)width << 32) | height);
- }
- if (gst_structure_get_fraction(info, "framerate", &framerate_num,
&framerate_den))
- {
- IMFMediaType_SetUINT64(media_type, &MF_MT_FRAME_RATE,
((UINT64)framerate_num << 32) | framerate_den);
- }
- if (!(strcmp(video_format, "x-h264")))
- {
- const char *profile, *level;
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_H264);
- IMFMediaType_SetUINT32(media_type, &MF_MT_COMPRESSED, TRUE);
- if ((profile = gst_structure_get_string(info, "profile")))
- {
- if (!(strcmp(profile, "main")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_PROFILE,
eAVEncH264VProfile_Main);
- else if (!(strcmp(profile, "high")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_PROFILE,
eAVEncH264VProfile_High);
- else if (!(strcmp(profile, "high-4:4:4")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_PROFILE,
eAVEncH264VProfile_444);
- else
- ERR("Unrecognized profile %s\n", profile);
This ERR (and many below) should probably be a FIXME instead, methinks.
Ack.
- }
- if ((level = gst_structure_get_string(info, "level")))
- {
- if (!(strcmp(level, "1")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel1);
- else if (!(strcmp(level, "1.1")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel1_1);
- else if (!(strcmp(level, "1.2")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel1_2);
- else if (!(strcmp(level, "1.3")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel1_3);
- else if (!(strcmp(level, "2")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel2);
- else if (!(strcmp(level, "2.1")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel2_1);
- else if (!(strcmp(level, "2.2")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel2_2);
- else if (!(strcmp(level, "3")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel3);
- else if (!(strcmp(level, "3.1")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel3_1);
- else if (!(strcmp(level, "3.2")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel3_2);
- else if (!(strcmp(level, "4")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel4);
- else if (!(strcmp(level, "4.1")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel4_1);
- else if (!(strcmp(level, "4.2")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel4_2);
- else if (!(strcmp(level, "5")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel5);
- else if (!(strcmp(level, "5.1")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel5_1);
- else if (!(strcmp(level, "5.2")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel5_2);
- else
- ERR("Unrecognized level %s\n", level);
- }
Could we maybe make this a table instead?
Sure.
- gst_caps_set_simple(caps, "stream-format", G_TYPE_STRING,
"byte-stream", NULL);
- gst_caps_set_simple(caps, "alignment", G_TYPE_STRING, "au", NULL);
- for (unsigned int i = 0; i < gst_caps_get_size(caps); i++)
- {
- GstStructure *structure = gst_caps_get_structure (caps, i);
- gst_structure_remove_field(structure, "codec_data");
- }
- }
- else if (!(strcmp(video_format, "x-wmv")))
- {
- gint wmv_version;
- const char *format;
- const GValue *codec_data;
- if (gst_structure_get_int(info, "wmvversion", &wmv_version))
- {
- switch (wmv_version)
- {
- case 1:
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_WMV1);
- break;
- case 2:
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_WMV2);
- break;
- case 3:
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_WMV3);
- break;
- default:
- ERR("Unrecognized wmvversion %d\n", wmv_version);
- }
- }
- if ((format = gst_structure_get_string(info, "format")))
- {
- if (!(strcmp(format, "WVC1")))
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_WVC1);
What if it's not? I think that deserves at least a FIXME.
(Style nitpick, extra parentheses.)
Ack.
- }
- if ((codec_data = gst_structure_get_value(info, "codec_data")))
- {
- GstBuffer *codec_data_buffer = gst_value_get_buffer(codec_data);
- if (codec_data_buffer)
- {
- gsize codec_data_size = gst_buffer_get_size(codec_data_buffer);
- gpointer codec_data_raw = heap_alloc(codec_data_size);
- gst_buffer_extract(codec_data_buffer, 0, codec_data_raw,
codec_data_size);
- IMFMediaType_SetBlob(media_type, &MF_MT_USER_DATA, codec_data_raw,
codec_data_size);
- }
- }
- }
- else if (!(strcmp(video_format, "mpeg")))
- {
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_M4S2);
- IMFMediaType_SetUINT32(media_type, &MF_MT_COMPRESSED, TRUE);
There are other video/mpeg formats.
TBH, the only reason I've included this is for the tests to work, I'll look into how to differentiate the mpeg types tomorrow.
- }
- else if (!(strcmp(video_format, "x-raw")))
- {
- const char *fourcc = gst_structure_get_string(info, "stream-format");
- IMFMediaType_SetUINT32(media_type, &MF_MT_COMPRESSED, FALSE);
- if (fourcc && (strlen(fourcc) == 4))
- {
- GUID fourcc_subtype = MFVideoFormat_Base;
- fourcc_subtype.Data1 = MAKEFOURCC(
- toupper(fourcc[0]), toupper(fourcc[1]), toupper(fourcc[2]),
toupper(fourcc[3]));
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &fourcc_subtype);
- }
- else
- ERR("uncompressed video has no stream-format\n");
I've never seen a FOURCC stored in the "stream-format" tag; where are you getting this from?
You're right, I think I'm supposed to use "format" here, but this is dead code rn so I that's why I didn't see any problems.
- }
- else
- ERR("Unrecognized video format %s\n", video_format);
- }
- else if (!(strncmp(media_type_name, "audio", 5)))
- {
- const char *audio_format = media_type_name + 6;
- IMFMediaType_SetGUID(media_type, &MF_MT_MAJOR_TYPE,
&MFMediaType_Audio);
- if (!(strcmp(audio_format, "mpeg")))
- {
- int mpeg_version = -1;
- IMFMediaType_SetUINT32(media_type, &MF_MT_COMPRESSED, TRUE);
- if (!(gst_structure_get_int(info, "mpegversion", &mpeg_version)))
- ERR("Failed to get mpegversion\n");
- switch (mpeg_version)
- {
- case 1:
- {
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFAudioFormat_MPEG);
- break;
- }
What about MFAudioFormat_MP3?
I'm actually not sure what to use here, I should probably remove it for now.
- case 2:
- case 4:
- {
- const char *format, *profile, *level;
- DWORD profile_level_indication = 0;
- const GValue *codec_data;
- DWORD asc_size = 0;
- struct aac_user_data *user_data = NULL;
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFAudioFormat_AAC);
- codec_data = gst_structure_get_value(info, "codec_data");
- if (codec_data)
- {
- GstBuffer *codec_data_buffer = gst_value_get_buffer(codec_data);
- if (codec_data_buffer)
- {
- if ((asc_size = gst_buffer_get_size(codec_data_buffer)) >= 2)
- {
- user_data = heap_alloc_zero(sizeof(*user_data)+asc_size);
- gst_buffer_extract(codec_data_buffer, 0, (gpointer)(user_data + 1),
asc_size);
- }
- else
- ERR("Unexpected buffer size\n");
- }
- else
- ERR("codec_data not a buffer\n");
- }
- else
- ERR("codec_data not found\n");
- if (!user_data)
- user_data = heap_alloc_zero(sizeof(*user_data));
- {
- int rate;
- if (gst_structure_get_int(info, "rate", &rate))
- IMFMediaType_SetUINT32(media_type, &MF_MT_AUDIO_SAMPLES_PER_SECOND,
rate);
- }
- {
- int channels;
- if (gst_structure_get_int(info, "channels", &channels))
- IMFMediaType_SetUINT32(media_type, &MF_MT_AUDIO_NUM_CHANNELS,
channels);
- }
Did you mean to add these blocks?
Yeah, it's so I can declare the variables closer to where they are used.
I'll admit I don't get the obsession with C99 variable declarations, but this just seems janky.
- if ((format = gst_structure_get_string(info, "stream-format")))
- {
- DWORD payload_type = -1;
- if (!(strcmp(format, "raw")))
- payload_type = 0;
- else if (!(strcmp(format, "adts")))
- payload_type = 1;
- else
- ERR("Unrecognized stream-format\n");
- if (payload_type != -1)
- {
- IMFMediaType_SetUINT32(media_type, &MF_MT_AAC_PAYLOAD_TYPE,
payload_type);
- user_data->payload_type = payload_type;
- }
- }
- else
- {
- ERR("Stream format not present\n");
- }
- profile = gst_structure_get_string(info, "profile");
- level = gst_structure_get_string(info, "level");
- /* Data from
https://docs.microsoft.com/en-us/windows/win32/medfound/aac-encoder#output-t... */
I'm not sure I'd link to Microsoft documentation; it's not very stable.
Would a link to an archive.is backup of it be better?
Probably.
- if (profile && level)
- {
- if (!(strcmp(profile, "lc")) && !(strcmp(level, "2")))
- profile_level_indication = 0x29;
- else if (!(strcmp(profile, "lc")) && !(strcmp(level, "4")))
- profile_level_indication = 0x2A;
- else if (!(strcmp(profile, "lc")) && !(strcmp(level, "5")))
- profile_level_indication = 0x2B;
- else
- ERR("Unhandled profile/level combo\n");
- }
- else
- ERR("Profile or level not present\n");
- if (profile_level_indication)
- {
- IMFMediaType_SetUINT32(media_type,
&MF_MT_AAC_AUDIO_PROFILE_LEVEL_INDICATION, profile_level_indication);
- user_data->profile_level_indication = profile_level_indication;
- }
- IMFMediaType_SetBlob(media_type, &MF_MT_USER_DATA, (BYTE
*)user_data, sizeof(user_data) + asc_size);
- heap_free(user_data);
- break;
- }
- default:
- ERR("Unhandled mpegversion %d\n", mpeg_version);
- }
- }
- else if (!(strcmp(audio_format, "x-raw")))
- {
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFAudioFormat_Float);
- gst_caps_set_simple(caps, "format", G_TYPE_STRING, "F32LE", NULL);
There are other audio formats.
Ah, you mean PCM? I'll add a case for that tomorrow.
f32le is PCM, but I mean integer PCM and other depths than 32-bit.
Presumably there should also be channel and sample rate data here.
- }
- else
- ERR("Unrecognized audio format %s\n", audio_format);
- }
- else
- {
- goto fail;
I'm generally of the opinion that one line of cleanup doesn't merit a "goto".
Okay I'll change that then.
- }
- return media_type;
- fail:
- IMFMediaType_Release(media_type);
- return NULL;
+}
+static const char *fourcc_str(DWORD fourcc) +{
- if (!fourcc) return NULL;
- return wine_dbg_sprintf ("%c%c%c%c",
- (char)(fourcc), (char)(fourcc >> 8),
- (char)(fourcc >> 16), (char)(fourcc >> 24));
+}
I don't think you want to use Wine's debugging utilities for non-debug code.
Ack.
+GstCaps *caps_from_media_type(IMFMediaType *type) +{
- GUID major_type;
- GUID subtype;
- GUID base_masked_subtype;
- GstCaps *output = NULL;
- if (FAILED(IMFMediaType_GetMajorType(type, &major_type)))
- return NULL;
- if (FAILED(IMFMediaType_GetGUID(type, &MF_MT_SUBTYPE, &subtype)))
- return NULL;
- base_masked_subtype = subtype;
- base_masked_subtype.Data1 = 0;
- if (IsEqualGUID(&major_type, &MFMediaType_Video))
- {
- UINT64 frame_rate = 0, frame_size = 0;
- DWORD *framerate_num = ((DWORD*)&frame_rate) + 1;
- DWORD *framerate_den = ((DWORD*)&frame_rate);
- DWORD *width = ((DWORD*)&frame_size) + 1;
- DWORD *height = ((DWORD*)&frame_size);
It seems simpler to me to do e.g.
DWORD width = frame_size; DWORD height = frame_size >> 32;
I'm not getting the width and height here, I'm declaring pointers to them which are set later on.
Right, I mean actually set the variables after retrieving frame_size; in full something like
DWORD width, height; /* ... */ IMFMediaType_GetUINT64(type, &MF_MT_FRAME_SIZE, &frame_size); width = frame_size; height = frame_size >> 32;
- if (IsEqualGUID(&subtype, &MFVideoFormat_H264))
- {
- enum eAVEncH264VProfile h264_profile;
- enum eAVEncH264VLevel h264_level;
- output = gst_caps_new_empty_simple("video/x-h264");
- gst_caps_set_simple(output, "stream-format", G_TYPE_STRING,
"byte-stream", NULL);
- gst_caps_set_simple(output, "alignment", G_TYPE_STRING, "au", NULL);
- if (SUCCEEDED(IMFMediaType_GetUINT32(type, &MF_MT_MPEG2_PROFILE,
&h264_profile)))
- {
- const char *profile = NULL;
- switch (h264_profile)
- {
- case eAVEncH264VProfile_Main: profile = "main"; break;
- case eAVEncH264VProfile_High: profile = "high"; break;
- case eAVEncH264VProfile_444: profile = "high-4:4:4"; break;
- default: ERR("Unknown profile %u\n", h264_profile);
- }
- if (profile)
- gst_caps_set_simple(output, "profile", G_TYPE_STRING, profile, NULL);
- }
- if (SUCCEEDED(IMFMediaType_GetUINT32(type, &MF_MT_MPEG2_LEVEL,
&h264_level)))
- {
- const char *level = NULL;
- switch (h264_level)
- {
- case eAVEncH264VLevel1: level = "1"; break;
- case eAVEncH264VLevel1_1: level = "1.1"; break;
- case eAVEncH264VLevel1_2: level = "1.2"; break;
- case eAVEncH264VLevel1_3: level = "1.3"; break;
- case eAVEncH264VLevel2: level = "2"; break;
- case eAVEncH264VLevel2_1: level = "2.1"; break;
- case eAVEncH264VLevel2_2: level = "2.2"; break;
- case eAVEncH264VLevel3: level = "3"; break;
- case eAVEncH264VLevel3_1: level = "3.1"; break;
- case eAVEncH264VLevel3_2: level = "3.2"; break;
- case eAVEncH264VLevel4: level = "4"; break;
- case eAVEncH264VLevel4_1: level = "4.1"; break;
- case eAVEncH264VLevel4_2: level = "4.2"; break;
- case eAVEncH264VLevel5: level = "5"; break;
- case eAVEncH264VLevel5_1: level = "5.1"; break;
- case eAVEncH264VLevel5_2: level = "5.2"; break;
- default: ERR("Unknown level %u\n", h264_level);
- }
- if (level)
- gst_caps_set_simple(output, "level", G_TYPE_STRING, level, NULL);
- }
- }
- else if (IsEqualGUID(&subtype, &MFVideoFormat_WVC1))
- {
- BYTE *user_data;
- DWORD user_data_size;
- output = gst_caps_new_empty_simple("video/x-wmv");
- gst_caps_set_simple(output, "format", G_TYPE_STRING, "WVC1", NULL);
- gst_caps_set_simple(output, "wmvversion", G_TYPE_INT, 3, NULL);
- if (SUCCEEDED(IMFMediaType_GetAllocatedBlob(type, &MF_MT_USER_DATA,
&user_data, &user_data_size)))
- {
- GstBuffer *codec_data_buffer = gst_buffer_new_allocate(NULL,
user_data_size, NULL);
- gst_buffer_fill(codec_data_buffer, 0, user_data, user_data_size);
- gst_caps_set_simple(output, "codec_data", GST_TYPE_BUFFER,
codec_data_buffer, NULL);
- gst_buffer_unref(codec_data_buffer);
- CoTaskMemFree(user_data);
- }
- }
- else if (IsEqualGUID(&base_masked_subtype, &MFVideoFormat_Base))
- {
- output = gst_caps_new_empty_simple("video/x-raw");
- gst_caps_set_simple(output, "format", G_TYPE_STRING,
fourcc_str(subtype.Data1), NULL);
What about RGB formats?
Ah, I didn't think about those, looks like we'll have to use a table of known conversions instead.
Well, to some degree, though you can also make use of gst_video_format_from_fourcc(). See also amt_to_gst_caps_video() in gstdemux.c.
- }
- else {
- ERR("Unrecognized subtype %s\n", debugstr_guid(&subtype));
- return NULL;
- }
- IMFMediaType_GetUINT64(type, &MF_MT_FRAME_RATE, &frame_rate);
- IMFMediaType_GetUINT64(type, &MF_MT_FRAME_SIZE, &frame_size);
- if (frame_rate)
- gst_caps_set_simple(output, "framerate", GST_TYPE_FRACTION,
*framerate_num, *framerate_den, NULL);
- if (frame_size)
- {
- gst_caps_set_simple(output, "width", G_TYPE_INT, *width, NULL);
- gst_caps_set_simple(output, "height", G_TYPE_INT, *height, NULL);
- }
- return output;
- }
- else if (IsEqualGUID(&major_type, &MFMediaType_Audio))
- {
- DWORD rate, channels;
- if (IsEqualGUID(&subtype, &MFAudioFormat_AAC))
- {
- DWORD payload_type, indication;
- struct aac_user_data *user_data;
- UINT32 user_data_size;
- output = gst_caps_new_empty_simple("audio/mpeg");
- /* TODO */
- gst_caps_set_simple(output, "framed", G_TYPE_BOOLEAN, TRUE, NULL);
- gst_caps_set_simple(output, "mpegversion", G_TYPE_INT, 4, NULL);
What's TODO here?
MFAudioFormat_AAC could also mean mpegversion=2, and I don't know what the "framed" attribute is for.
A TODO message should probably mention what exactly is to be done.
In general it's good practice to understand what your code is doing before you submit it, but regardless, "framed" means there is exactly one frame per buffer. Is that guaranteed by the MF source? (It's not obvious to me that it is...)
- if (SUCCEEDED(IMFMediaType_GetUINT32(type, &MF_MT_AAC_PAYLOAD_TYPE,
&payload_type)))
- {
- switch (payload_type)
- {
- case 0:
- gst_caps_set_simple(output, "stream-format", G_TYPE_STRING, "raw",
NULL);
- break;
- case 1:
- gst_caps_set_simple(output, "stream-format", G_TYPE_STRING, "adts",
NULL);
- break;
- default:
- gst_caps_set_simple(output, "stream-format", G_TYPE_STRING, "raw",
NULL);
Seems to me that 2 and 3 should be mapped to "adif" and "loas", respectively.
Ack.
- }
- }
- else
- gst_caps_set_simple(output, "stream-format", G_TYPE_STRING, "raw",
NULL);
- if (SUCCEEDED(IMFMediaType_GetUINT32(type,
&MF_MT_AAC_AUDIO_PROFILE_LEVEL_INDICATION, &indication)))
- {
- switch (indication)
- {
- case 0x29:
- {
- gst_caps_set_simple(output, "profile", G_TYPE_STRING, "lc", NULL);
- gst_caps_set_simple(output, "level", G_TYPE_STRING, "2", NULL);
- break;
- }
- case 0x2A:
- {
- gst_caps_set_simple(output, "profile", G_TYPE_STRING, "lc", NULL);
- gst_caps_set_simple(output, "level", G_TYPE_STRING, "4", NULL);
- break;
- }
- case 0x2B:
- {
- gst_caps_set_simple(output, "profile", G_TYPE_STRING, "lc", NULL);
- gst_caps_set_simple(output, "level", G_TYPE_STRING, "5", NULL);
- break;
- }
- default:
- ERR("Unrecognized profile-level-indication %u\n", indication);
- }
I think you could significantly deduplicate this switch.
Ack.
- }
- if (SUCCEEDED(IMFMediaType_GetAllocatedBlob(type, &MF_MT_USER_DATA,
(BYTE **) &user_data, &user_data_size)))
- {
- if (user_data_size > sizeof(sizeof(*user_data)))
- {
- GstBuffer *audio_specific_config = gst_buffer_new_allocate(NULL,
user_data_size - sizeof(*user_data), NULL);
- gst_buffer_fill(audio_specific_config, 0, user_data + 1,
user_data_size - sizeof(*user_data));
- gst_caps_set_simple(output, "codec_data", GST_TYPE_BUFFER,
audio_specific_config, NULL);
- gst_buffer_unref(audio_specific_config);
- }
- CoTaskMemFree(user_data);
- }
- }
- else if (IsEqualGUID(&subtype, &MFAudioFormat_Float))
- {
- output = gst_caps_new_empty_simple("audio/x-raw");
- gst_caps_set_simple(output, "format", G_TYPE_STRING, "F32LE", NULL);
- }
- else
- {
- ERR("Unrecognized subtype %s\n", debugstr_guid(&subtype));
- if (output)
- gst_caps_unref(output);
- return NULL;
- }
- if (SUCCEEDED(IMFMediaType_GetUINT32(type,
&MF_MT_AUDIO_SAMPLES_PER_SECOND, &rate)))
- {
- gst_caps_set_simple(output, "rate", G_TYPE_INT, rate, NULL);
- }
- if (SUCCEEDED(IMFMediaType_GetUINT32(type,
&MF_MT_AUDIO_NUM_CHANNELS, &channels)))
- {
- gst_caps_set_simple(output, "channels", G_TYPE_INT, channels, NULL);
- }
- return output;
- }
- ERR("Unrecognized major type %s\n", debugstr_guid(&major_type));
- return NULL;
+} diff --git a/include/codecapi.h b/include/codecapi.h new file mode 100644 index 0000000000..2690b523d7 --- /dev/null +++ b/include/codecapi.h @@ -0,0 +1,38 @@ +#ifndef __CODECAPI_H +#define __CODECAPI_H
+enum eAVEncH264VProfile +{
- eAVEncH264VProfile_unknown = 0,
- eAVEncH264VProfile_Simple = 66,
- eAVEncH264VProfile_Base = 66,
- eAVEncH264VProfile_Main = 77,
- eAVEncH264VProfile_High = 100,
- eAVEncH264VProfile_422 = 122,
- eAVEncH264VProfile_High10 = 110,
- eAVEncH264VProfile_444 = 244,
- eAVEncH264VProfile_Extended = 88,
+};
+enum eAVEncH264VLevel +{
- eAVEncH264VLevel1 = 10,
- eAVEncH264VLevel1_b = 11,
- eAVEncH264VLevel1_1 = 11,
- eAVEncH264VLevel1_2 = 12,
- eAVEncH264VLevel1_3 = 13,
- eAVEncH264VLevel2 = 20,
- eAVEncH264VLevel2_1 = 21,
- eAVEncH264VLevel2_2 = 22,
- eAVEncH264VLevel3 = 30,
- eAVEncH264VLevel3_1 = 31,
- eAVEncH264VLevel3_2 = 32,
- eAVEncH264VLevel4 = 40,
- eAVEncH264VLevel4_1 = 41,
- eAVEncH264VLevel4_2 = 42,
- eAVEncH264VLevel5 = 50,
- eAVEncH264VLevel5_1 = 51,
- eAVEncH264VLevel5_2 = 52
+};
+#endif \ No newline at end of file
On 3/26/20 11:40 AM, Zebediah Figura wrote:
On 3/25/20 11:57 PM, Derek Lesho wrote:
On 3/24/20 3:22 PM, Zebediah Figura wrote:
General comments:
It's not great to introduce code that's not used anywhere, it's essentially dead until then.
This could, I think, be split up into much smaller pieces in any case: you're introducing two different functions here, and each function introduces support for several different formats.
On 3/24/20 2:39 PM, Derek Lesho wrote:
Signed-off-by: Derek Lesho dlesho@codeweavers.com
dlls/winegstreamer/gst_private.h | 4 + dlls/winegstreamer/mfplat.c | 533 ++++++++++++++++++++++++++++++- include/codecapi.h | 38 +++ 3 files changed, 574 insertions(+), 1 deletion(-) create mode 100644 include/codecapi.h
diff --git a/dlls/winegstreamer/gst_private.h b/dlls/winegstreamer/gst_private.h index e6fb841fc8..a6c3fd3784 100644 --- a/dlls/winegstreamer/gst_private.h +++ b/dlls/winegstreamer/gst_private.h @@ -36,6 +36,7 @@ #include "winuser.h" #include "dshow.h" #include "strmif.h" +#include "mfobjects.h" #include "wine/heap.h" #include "wine/strmbase.h" @@ -54,4 +55,7 @@ void start_dispatch_thread(void) DECLSPEC_HIDDEN; extern HRESULT mfplat_get_class_object(REFCLSID rclsid, REFIID riid, void **obj) DECLSPEC_HIDDEN; +IMFMediaType* media_type_from_caps(GstCaps *caps); +GstCaps *caps_from_media_type(IMFMediaType *type);
Using the generic name "media_type", in a module that serves multiple media APIs, is not great.
Would you prefer mf_media_type?
That's probably better, yes.
Also, why is this in the public header?
Would it be better to split this into a mfplat_private.h header?
I mean, why do you need to use it from anything other than mfplat.c?
Because I'd prefer to not merge around 4000 thousands lines of code into a single file. (See media_source.c, mf_decode.c)
This is another reason why it doesn't make a lot of sense to submit dead code.
The code which uses these functions are included in my more recent patch-set.
Also, style nitpick: please try to be consistent about your asterisk placement (ideally using "type *var" style.)
Ack.
#endif /* __GST_PRIVATE_INCLUDED__ */
...
@@ -433,3 +438,529 @@ HRESULT mfplat_get_class_object(REFCLSID rclsid, REFIID riid, void **obj) return CLASS_E_CLASSNOTAVAILABLE; }
+struct aac_user_data +{
- WORD payload_type;
- WORD profile_level_indication;
- WORD struct_type;
- WORD reserved;
- /*BYTE audio_specific_config;*/
What's this field doing here?
We store the audio_config_config after these fields, and I wanted to express that here, it's not important though.
It's not necessarily a problem to specify that arbitrary data comes after the struct, but that comment is not particularly clear.
+};
+/* IMPORTANT: caps will be modified to represent the exact type needed for the format */
Why?
Because in the case of a demuxer, the caps of the stream we receive might not map 1:1 with the representation in media foundation. Because of this, in the media source, if any adjustments are needed, we feed the buffers through a parser to correct it.
See: https://github.com/Guy1524/wine/commit/7ab88be3882ab95f3fc17dab374184e06f018...
This seems like a very confusing way to do that. At least I'd relegate it to a separate function. I wouldn't expect a conversion function to modify its argument, and it moreover makes it essentially unusable anywhere else.
The alternative is to just fail, because there's no mapping. For example there's no equivalent to a non annex b h.264 stream in MF.
That said, these modifications are specific to the format, and along those lines it may make more sense to append specific elements rather than to make specific changes to the caps and try to find an element that can accommodate those. This will also help if you ever need to append multiple such elements. Thus you can e.g. append an audioconvert element unconditionally, and if no conversion is necessary it'll just pass through.
In the case of compressed sample parsers, what would I append unconditionally? It's very specific to the type.
Looking at the modifications you do make—
- you force h264 into annex B format, which is the job of h264parse;
Yes, because that's how it's represented on windows.
- you force all raw audio into 32-bit float. Does native mfplat really
never output integer PCM?
I think I can fix that, I do know that MFAudioFormat float can only be be F32LE though.
+IMFMediaType* media_type_from_caps(GstCaps *caps) +{
- IMFMediaType *media_type;
- GstStructure *info;
- const char *media_type_name;
- gchar *human_readable;
- if (FAILED(MFCreateMediaType(&media_type)))
- {
- return NULL;
- }
- info = gst_caps_get_structure(caps, 0);
- media_type_name = gst_structure_get_name(info);
- human_readable = gst_caps_to_string(caps);
- TRACE("caps = %s\n", human_readable);
- g_free(human_readable);
Probably would be best to guard this with TRACE_ON, so that we don't bother allocating anything otherwise.
Also, you'll want to use debugstr_a(), especially since caps can overrun the static buffer in ntdll.
Ack.
- if (!(strncmp(media_type_name, "video", 5)))
Style nitpick, superfluous parentheses.
I think Nikolay already mentioned this, but it's probably not a bad idea to just match against the whole "video/x-h264" etc. sequence.
Ack.
- {
- const char *video_format = media_type_name + 6;
- gint width, height, framerate_num, framerate_den;
- IMFMediaType_SetGUID(media_type, &MF_MT_MAJOR_TYPE,
&MFMediaType_Video);
- if (gst_structure_get_int(info, "width", &width) &&
gst_structure_get_int(info, "height", &height))
- {
- IMFMediaType_SetUINT64(media_type, &MF_MT_FRAME_SIZE,
((UINT64)width << 32) | height);
- }
- if (gst_structure_get_fraction(info, "framerate", &framerate_num,
&framerate_den))
- {
- IMFMediaType_SetUINT64(media_type, &MF_MT_FRAME_RATE,
((UINT64)framerate_num << 32) | framerate_den);
- }
- if (!(strcmp(video_format, "x-h264")))
- {
- const char *profile, *level;
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_H264);
- IMFMediaType_SetUINT32(media_type, &MF_MT_COMPRESSED, TRUE);
- if ((profile = gst_structure_get_string(info, "profile")))
- {
- if (!(strcmp(profile, "main")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_PROFILE,
eAVEncH264VProfile_Main);
- else if (!(strcmp(profile, "high")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_PROFILE,
eAVEncH264VProfile_High);
- else if (!(strcmp(profile, "high-4:4:4")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_PROFILE,
eAVEncH264VProfile_444);
- else
- ERR("Unrecognized profile %s\n", profile);
This ERR (and many below) should probably be a FIXME instead, methinks.
Ack.
- }
- if ((level = gst_structure_get_string(info, "level")))
- {
- if (!(strcmp(level, "1")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel1);
- else if (!(strcmp(level, "1.1")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel1_1);
- else if (!(strcmp(level, "1.2")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel1_2);
- else if (!(strcmp(level, "1.3")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel1_3);
- else if (!(strcmp(level, "2")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel2);
- else if (!(strcmp(level, "2.1")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel2_1);
- else if (!(strcmp(level, "2.2")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel2_2);
- else if (!(strcmp(level, "3")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel3);
- else if (!(strcmp(level, "3.1")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel3_1);
- else if (!(strcmp(level, "3.2")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel3_2);
- else if (!(strcmp(level, "4")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel4);
- else if (!(strcmp(level, "4.1")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel4_1);
- else if (!(strcmp(level, "4.2")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel4_2);
- else if (!(strcmp(level, "5")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel5);
- else if (!(strcmp(level, "5.1")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel5_1);
- else if (!(strcmp(level, "5.2")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel5_2);
- else
- ERR("Unrecognized level %s\n", level);
- }
Could we maybe make this a table instead?
Sure.
- gst_caps_set_simple(caps, "stream-format", G_TYPE_STRING,
"byte-stream", NULL);
- gst_caps_set_simple(caps, "alignment", G_TYPE_STRING, "au", NULL);
- for (unsigned int i = 0; i < gst_caps_get_size(caps); i++)
- {
- GstStructure *structure = gst_caps_get_structure (caps, i);
- gst_structure_remove_field(structure, "codec_data");
- }
- }
- else if (!(strcmp(video_format, "x-wmv")))
- {
- gint wmv_version;
- const char *format;
- const GValue *codec_data;
- if (gst_structure_get_int(info, "wmvversion", &wmv_version))
- {
- switch (wmv_version)
- {
- case 1:
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_WMV1);
- break;
- case 2:
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_WMV2);
- break;
- case 3:
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_WMV3);
- break;
- default:
- ERR("Unrecognized wmvversion %d\n", wmv_version);
- }
- }
- if ((format = gst_structure_get_string(info, "format")))
- {
- if (!(strcmp(format, "WVC1")))
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_WVC1);
What if it's not? I think that deserves at least a FIXME.
(Style nitpick, extra parentheses.)
Ack.
- }
- if ((codec_data = gst_structure_get_value(info, "codec_data")))
- {
- GstBuffer *codec_data_buffer = gst_value_get_buffer(codec_data);
- if (codec_data_buffer)
- {
- gsize codec_data_size = gst_buffer_get_size(codec_data_buffer);
- gpointer codec_data_raw = heap_alloc(codec_data_size);
- gst_buffer_extract(codec_data_buffer, 0, codec_data_raw,
codec_data_size);
- IMFMediaType_SetBlob(media_type, &MF_MT_USER_DATA, codec_data_raw,
codec_data_size);
- }
- }
- }
- else if (!(strcmp(video_format, "mpeg")))
- {
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_M4S2);
- IMFMediaType_SetUINT32(media_type, &MF_MT_COMPRESSED, TRUE);
There are other video/mpeg formats.
TBH, the only reason I've included this is for the tests to work, I'll look into how to differentiate the mpeg types tomorrow.
- }
- else if (!(strcmp(video_format, "x-raw")))
- {
- const char *fourcc = gst_structure_get_string(info, "stream-format");
- IMFMediaType_SetUINT32(media_type, &MF_MT_COMPRESSED, FALSE);
- if (fourcc && (strlen(fourcc) == 4))
- {
- GUID fourcc_subtype = MFVideoFormat_Base;
- fourcc_subtype.Data1 = MAKEFOURCC(
- toupper(fourcc[0]), toupper(fourcc[1]), toupper(fourcc[2]),
toupper(fourcc[3]));
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &fourcc_subtype);
- }
- else
- ERR("uncompressed video has no stream-format\n");
I've never seen a FOURCC stored in the "stream-format" tag; where are you getting this from?
You're right, I think I'm supposed to use "format" here, but this is dead code rn so I that's why I didn't see any problems.
- }
- else
- ERR("Unrecognized video format %s\n", video_format);
- }
- else if (!(strncmp(media_type_name, "audio", 5)))
- {
- const char *audio_format = media_type_name + 6;
- IMFMediaType_SetGUID(media_type, &MF_MT_MAJOR_TYPE,
&MFMediaType_Audio);
- if (!(strcmp(audio_format, "mpeg")))
- {
- int mpeg_version = -1;
- IMFMediaType_SetUINT32(media_type, &MF_MT_COMPRESSED, TRUE);
- if (!(gst_structure_get_int(info, "mpegversion", &mpeg_version)))
- ERR("Failed to get mpegversion\n");
- switch (mpeg_version)
- {
- case 1:
- {
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFAudioFormat_MPEG);
- break;
- }
What about MFAudioFormat_MP3?
I'm actually not sure what to use here, I should probably remove it for now.
- case 2:
- case 4:
- {
- const char *format, *profile, *level;
- DWORD profile_level_indication = 0;
- const GValue *codec_data;
- DWORD asc_size = 0;
- struct aac_user_data *user_data = NULL;
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFAudioFormat_AAC);
- codec_data = gst_structure_get_value(info, "codec_data");
- if (codec_data)
- {
- GstBuffer *codec_data_buffer = gst_value_get_buffer(codec_data);
- if (codec_data_buffer)
- {
- if ((asc_size = gst_buffer_get_size(codec_data_buffer)) >= 2)
- {
- user_data = heap_alloc_zero(sizeof(*user_data)+asc_size);
- gst_buffer_extract(codec_data_buffer, 0, (gpointer)(user_data + 1),
asc_size);
- }
- else
- ERR("Unexpected buffer size\n");
- }
- else
- ERR("codec_data not a buffer\n");
- }
- else
- ERR("codec_data not found\n");
- if (!user_data)
- user_data = heap_alloc_zero(sizeof(*user_data));
- {
- int rate;
- if (gst_structure_get_int(info, "rate", &rate))
- IMFMediaType_SetUINT32(media_type, &MF_MT_AUDIO_SAMPLES_PER_SECOND,
rate);
- }
- {
- int channels;
- if (gst_structure_get_int(info, "channels", &channels))
- IMFMediaType_SetUINT32(media_type, &MF_MT_AUDIO_NUM_CHANNELS,
channels);
- }
Did you mean to add these blocks?
Yeah, it's so I can declare the variables closer to where they are used.
I'll admit I don't get the obsession with C99 variable declarations, but this just seems janky.
It wouldn't seem janky if we had C99 variable declarations :P
- if ((format = gst_structure_get_string(info, "stream-format")))
- {
- DWORD payload_type = -1;
- if (!(strcmp(format, "raw")))
- payload_type = 0;
- else if (!(strcmp(format, "adts")))
- payload_type = 1;
- else
- ERR("Unrecognized stream-format\n");
- if (payload_type != -1)
- {
- IMFMediaType_SetUINT32(media_type, &MF_MT_AAC_PAYLOAD_TYPE,
payload_type);
- user_data->payload_type = payload_type;
- }
- }
- else
- {
- ERR("Stream format not present\n");
- }
- profile = gst_structure_get_string(info, "profile");
- level = gst_structure_get_string(info, "level");
- /* Data from
https://docs.microsoft.com/en-us/windows/win32/medfound/aac-encoder#output-t... */
I'm not sure I'd link to Microsoft documentation; it's not very stable.
Would a link to an archive.is backup of it be better?
Probably.
- if (profile && level)
- {
- if (!(strcmp(profile, "lc")) && !(strcmp(level, "2")))
- profile_level_indication = 0x29;
- else if (!(strcmp(profile, "lc")) && !(strcmp(level, "4")))
- profile_level_indication = 0x2A;
- else if (!(strcmp(profile, "lc")) && !(strcmp(level, "5")))
- profile_level_indication = 0x2B;
- else
- ERR("Unhandled profile/level combo\n");
- }
- else
- ERR("Profile or level not present\n");
- if (profile_level_indication)
- {
- IMFMediaType_SetUINT32(media_type,
&MF_MT_AAC_AUDIO_PROFILE_LEVEL_INDICATION, profile_level_indication);
- user_data->profile_level_indication = profile_level_indication;
- }
- IMFMediaType_SetBlob(media_type, &MF_MT_USER_DATA, (BYTE
*)user_data, sizeof(user_data) + asc_size);
- heap_free(user_data);
- break;
- }
- default:
- ERR("Unhandled mpegversion %d\n", mpeg_version);
- }
- }
- else if (!(strcmp(audio_format, "x-raw")))
- {
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFAudioFormat_Float);
- gst_caps_set_simple(caps, "format", G_TYPE_STRING, "F32LE", NULL);
There are other audio formats.
Ah, you mean PCM? I'll add a case for that tomorrow.
f32le is PCM, but I mean integer PCM and other depths than 32-bit.
Hmm okay, I'll do more research on that.
Presumably there should also be channel and sample rate data here.
Yeah good catch.
- }
- else
- ERR("Unrecognized audio format %s\n", audio_format);
- }
- else
- {
- goto fail;
I'm generally of the opinion that one line of cleanup doesn't merit a "goto".
Okay I'll change that then.
- }
- return media_type;
- fail:
- IMFMediaType_Release(media_type);
- return NULL;
+}
+static const char *fourcc_str(DWORD fourcc) +{
- if (!fourcc) return NULL;
- return wine_dbg_sprintf ("%c%c%c%c",
- (char)(fourcc), (char)(fourcc >> 8),
- (char)(fourcc >> 16), (char)(fourcc >> 24));
+}
I don't think you want to use Wine's debugging utilities for non-debug code.
Ack.
+GstCaps *caps_from_media_type(IMFMediaType *type) +{
- GUID major_type;
- GUID subtype;
- GUID base_masked_subtype;
- GstCaps *output = NULL;
- if (FAILED(IMFMediaType_GetMajorType(type, &major_type)))
- return NULL;
- if (FAILED(IMFMediaType_GetGUID(type, &MF_MT_SUBTYPE, &subtype)))
- return NULL;
- base_masked_subtype = subtype;
- base_masked_subtype.Data1 = 0;
- if (IsEqualGUID(&major_type, &MFMediaType_Video))
- {
- UINT64 frame_rate = 0, frame_size = 0;
- DWORD *framerate_num = ((DWORD*)&frame_rate) + 1;
- DWORD *framerate_den = ((DWORD*)&frame_rate);
- DWORD *width = ((DWORD*)&frame_size) + 1;
- DWORD *height = ((DWORD*)&frame_size);
It seems simpler to me to do e.g.
DWORD width = frame_size; DWORD height = frame_size >> 32;
I'm not getting the width and height here, I'm declaring pointers to them which are set later on.
Right, I mean actually set the variables after retrieving frame_size; in full something like
DWORD width, height; /* ... */ IMFMediaType_GetUINT64(type, &MF_MT_FRAME_SIZE, &frame_size); width = frame_size; height = frame_size >> 32;
Yeah that works.
- if (IsEqualGUID(&subtype, &MFVideoFormat_H264))
- {
- enum eAVEncH264VProfile h264_profile;
- enum eAVEncH264VLevel h264_level;
- output = gst_caps_new_empty_simple("video/x-h264");
- gst_caps_set_simple(output, "stream-format", G_TYPE_STRING,
"byte-stream", NULL);
- gst_caps_set_simple(output, "alignment", G_TYPE_STRING, "au", NULL);
- if (SUCCEEDED(IMFMediaType_GetUINT32(type, &MF_MT_MPEG2_PROFILE,
&h264_profile)))
- {
- const char *profile = NULL;
- switch (h264_profile)
- {
- case eAVEncH264VProfile_Main: profile = "main"; break;
- case eAVEncH264VProfile_High: profile = "high"; break;
- case eAVEncH264VProfile_444: profile = "high-4:4:4"; break;
- default: ERR("Unknown profile %u\n", h264_profile);
- }
- if (profile)
- gst_caps_set_simple(output, "profile", G_TYPE_STRING, profile, NULL);
- }
- if (SUCCEEDED(IMFMediaType_GetUINT32(type, &MF_MT_MPEG2_LEVEL,
&h264_level)))
- {
- const char *level = NULL;
- switch (h264_level)
- {
- case eAVEncH264VLevel1: level = "1"; break;
- case eAVEncH264VLevel1_1: level = "1.1"; break;
- case eAVEncH264VLevel1_2: level = "1.2"; break;
- case eAVEncH264VLevel1_3: level = "1.3"; break;
- case eAVEncH264VLevel2: level = "2"; break;
- case eAVEncH264VLevel2_1: level = "2.1"; break;
- case eAVEncH264VLevel2_2: level = "2.2"; break;
- case eAVEncH264VLevel3: level = "3"; break;
- case eAVEncH264VLevel3_1: level = "3.1"; break;
- case eAVEncH264VLevel3_2: level = "3.2"; break;
- case eAVEncH264VLevel4: level = "4"; break;
- case eAVEncH264VLevel4_1: level = "4.1"; break;
- case eAVEncH264VLevel4_2: level = "4.2"; break;
- case eAVEncH264VLevel5: level = "5"; break;
- case eAVEncH264VLevel5_1: level = "5.1"; break;
- case eAVEncH264VLevel5_2: level = "5.2"; break;
- default: ERR("Unknown level %u\n", h264_level);
- }
- if (level)
- gst_caps_set_simple(output, "level", G_TYPE_STRING, level, NULL);
- }
- }
- else if (IsEqualGUID(&subtype, &MFVideoFormat_WVC1))
- {
- BYTE *user_data;
- DWORD user_data_size;
- output = gst_caps_new_empty_simple("video/x-wmv");
- gst_caps_set_simple(output, "format", G_TYPE_STRING, "WVC1", NULL);
- gst_caps_set_simple(output, "wmvversion", G_TYPE_INT, 3, NULL);
- if (SUCCEEDED(IMFMediaType_GetAllocatedBlob(type, &MF_MT_USER_DATA,
&user_data, &user_data_size)))
- {
- GstBuffer *codec_data_buffer = gst_buffer_new_allocate(NULL,
user_data_size, NULL);
- gst_buffer_fill(codec_data_buffer, 0, user_data, user_data_size);
- gst_caps_set_simple(output, "codec_data", GST_TYPE_BUFFER,
codec_data_buffer, NULL);
- gst_buffer_unref(codec_data_buffer);
- CoTaskMemFree(user_data);
- }
- }
- else if (IsEqualGUID(&base_masked_subtype, &MFVideoFormat_Base))
- {
- output = gst_caps_new_empty_simple("video/x-raw");
- gst_caps_set_simple(output, "format", G_TYPE_STRING,
fourcc_str(subtype.Data1), NULL);
What about RGB formats?
Ah, I didn't think about those, looks like we'll have to use a table of known conversions instead.
Well, to some degree, though you can also make use of gst_video_format_from_fourcc(). See also amt_to_gst_caps_video() in gstdemux.c.
Ah check for RGB formats first then fall back to FOURCC conversion, okay sure.
- }
- else {
- ERR("Unrecognized subtype %s\n", debugstr_guid(&subtype));
- return NULL;
- }
- IMFMediaType_GetUINT64(type, &MF_MT_FRAME_RATE, &frame_rate);
- IMFMediaType_GetUINT64(type, &MF_MT_FRAME_SIZE, &frame_size);
- if (frame_rate)
- gst_caps_set_simple(output, "framerate", GST_TYPE_FRACTION,
*framerate_num, *framerate_den, NULL);
- if (frame_size)
- {
- gst_caps_set_simple(output, "width", G_TYPE_INT, *width, NULL);
- gst_caps_set_simple(output, "height", G_TYPE_INT, *height, NULL);
- }
- return output;
- }
- else if (IsEqualGUID(&major_type, &MFMediaType_Audio))
- {
- DWORD rate, channels;
- if (IsEqualGUID(&subtype, &MFAudioFormat_AAC))
- {
- DWORD payload_type, indication;
- struct aac_user_data *user_data;
- UINT32 user_data_size;
- output = gst_caps_new_empty_simple("audio/mpeg");
- /* TODO */
- gst_caps_set_simple(output, "framed", G_TYPE_BOOLEAN, TRUE, NULL);
- gst_caps_set_simple(output, "mpegversion", G_TYPE_INT, 4, NULL);
What's TODO here?
MFAudioFormat_AAC could also mean mpegversion=2, and I don't know what the "framed" attribute is for.
A TODO message should probably mention what exactly is to be done.
In general it's good practice to understand what your code is doing before you submit it, but regardless, "framed" means there is exactly one frame per buffer. Is that guaranteed by the MF source? (It's not obvious to me that it is...)
Yeah I should probably remove it in that case, I was trying to match up all the attributes when going through the conversion to IMFMediaType and back, but it's probably not necessary.
- if (SUCCEEDED(IMFMediaType_GetUINT32(type, &MF_MT_AAC_PAYLOAD_TYPE,
&payload_type)))
- {
- switch (payload_type)
- {
- case 0:
- gst_caps_set_simple(output, "stream-format", G_TYPE_STRING, "raw",
NULL);
- break;
- case 1:
- gst_caps_set_simple(output, "stream-format", G_TYPE_STRING, "adts",
NULL);
- break;
- default:
- gst_caps_set_simple(output, "stream-format", G_TYPE_STRING, "raw",
NULL);
Seems to me that 2 and 3 should be mapped to "adif" and "loas", respectively.
Ack.
- }
- }
- else
- gst_caps_set_simple(output, "stream-format", G_TYPE_STRING, "raw",
NULL);
- if (SUCCEEDED(IMFMediaType_GetUINT32(type,
&MF_MT_AAC_AUDIO_PROFILE_LEVEL_INDICATION, &indication)))
- {
- switch (indication)
- {
- case 0x29:
- {
- gst_caps_set_simple(output, "profile", G_TYPE_STRING, "lc", NULL);
- gst_caps_set_simple(output, "level", G_TYPE_STRING, "2", NULL);
- break;
- }
- case 0x2A:
- {
- gst_caps_set_simple(output, "profile", G_TYPE_STRING, "lc", NULL);
- gst_caps_set_simple(output, "level", G_TYPE_STRING, "4", NULL);
- break;
- }
- case 0x2B:
- {
- gst_caps_set_simple(output, "profile", G_TYPE_STRING, "lc", NULL);
- gst_caps_set_simple(output, "level", G_TYPE_STRING, "5", NULL);
- break;
- }
- default:
- ERR("Unrecognized profile-level-indication %u\n", indication);
- }
I think you could significantly deduplicate this switch.
Ack.
- }
- if (SUCCEEDED(IMFMediaType_GetAllocatedBlob(type, &MF_MT_USER_DATA,
(BYTE **) &user_data, &user_data_size)))
- {
- if (user_data_size > sizeof(sizeof(*user_data)))
- {
- GstBuffer *audio_specific_config = gst_buffer_new_allocate(NULL,
user_data_size - sizeof(*user_data), NULL);
- gst_buffer_fill(audio_specific_config, 0, user_data + 1,
user_data_size - sizeof(*user_data));
- gst_caps_set_simple(output, "codec_data", GST_TYPE_BUFFER,
audio_specific_config, NULL);
- gst_buffer_unref(audio_specific_config);
- }
- CoTaskMemFree(user_data);
- }
- }
- else if (IsEqualGUID(&subtype, &MFAudioFormat_Float))
- {
- output = gst_caps_new_empty_simple("audio/x-raw");
- gst_caps_set_simple(output, "format", G_TYPE_STRING, "F32LE", NULL);
- }
- else
- {
- ERR("Unrecognized subtype %s\n", debugstr_guid(&subtype));
- if (output)
- gst_caps_unref(output);
- return NULL;
- }
- if (SUCCEEDED(IMFMediaType_GetUINT32(type,
&MF_MT_AUDIO_SAMPLES_PER_SECOND, &rate)))
- {
- gst_caps_set_simple(output, "rate", G_TYPE_INT, rate, NULL);
- }
- if (SUCCEEDED(IMFMediaType_GetUINT32(type,
&MF_MT_AUDIO_NUM_CHANNELS, &channels)))
- {
- gst_caps_set_simple(output, "channels", G_TYPE_INT, channels, NULL);
- }
- return output;
- }
- ERR("Unrecognized major type %s\n", debugstr_guid(&major_type));
- return NULL;
+} diff --git a/include/codecapi.h b/include/codecapi.h new file mode 100644 index 0000000000..2690b523d7 --- /dev/null +++ b/include/codecapi.h @@ -0,0 +1,38 @@ +#ifndef __CODECAPI_H +#define __CODECAPI_H
+enum eAVEncH264VProfile +{
- eAVEncH264VProfile_unknown = 0,
- eAVEncH264VProfile_Simple = 66,
- eAVEncH264VProfile_Base = 66,
- eAVEncH264VProfile_Main = 77,
- eAVEncH264VProfile_High = 100,
- eAVEncH264VProfile_422 = 122,
- eAVEncH264VProfile_High10 = 110,
- eAVEncH264VProfile_444 = 244,
- eAVEncH264VProfile_Extended = 88,
+};
+enum eAVEncH264VLevel +{
- eAVEncH264VLevel1 = 10,
- eAVEncH264VLevel1_b = 11,
- eAVEncH264VLevel1_1 = 11,
- eAVEncH264VLevel1_2 = 12,
- eAVEncH264VLevel1_3 = 13,
- eAVEncH264VLevel2 = 20,
- eAVEncH264VLevel2_1 = 21,
- eAVEncH264VLevel2_2 = 22,
- eAVEncH264VLevel3 = 30,
- eAVEncH264VLevel3_1 = 31,
- eAVEncH264VLevel3_2 = 32,
- eAVEncH264VLevel4 = 40,
- eAVEncH264VLevel4_1 = 41,
- eAVEncH264VLevel4_2 = 42,
- eAVEncH264VLevel5 = 50,
- eAVEncH264VLevel5_1 = 51,
- eAVEncH264VLevel5_2 = 52
+};
+#endif \ No newline at end of file
On 3/26/20 12:18 PM, Derek Lesho wrote:
On 3/26/20 11:40 AM, Zebediah Figura wrote:
On 3/25/20 11:57 PM, Derek Lesho wrote:
On 3/24/20 3:22 PM, Zebediah Figura wrote:
General comments:
It's not great to introduce code that's not used anywhere, it's essentially dead until then.
This could, I think, be split up into much smaller pieces in any case: you're introducing two different functions here, and each function introduces support for several different formats.
On 3/24/20 2:39 PM, Derek Lesho wrote:
Signed-off-by: Derek Lesho dlesho@codeweavers.com
dlls/winegstreamer/gst_private.h | 4 + dlls/winegstreamer/mfplat.c | 533 ++++++++++++++++++++++++++++++- include/codecapi.h | 38 +++ 3 files changed, 574 insertions(+), 1 deletion(-) create mode 100644 include/codecapi.h
diff --git a/dlls/winegstreamer/gst_private.h b/dlls/winegstreamer/gst_private.h index e6fb841fc8..a6c3fd3784 100644 --- a/dlls/winegstreamer/gst_private.h +++ b/dlls/winegstreamer/gst_private.h @@ -36,6 +36,7 @@ #include "winuser.h" #include "dshow.h" #include "strmif.h" +#include "mfobjects.h" #include "wine/heap.h" #include "wine/strmbase.h" @@ -54,4 +55,7 @@ void start_dispatch_thread(void) DECLSPEC_HIDDEN; extern HRESULT mfplat_get_class_object(REFCLSID rclsid, REFIID riid, void **obj) DECLSPEC_HIDDEN; +IMFMediaType* media_type_from_caps(GstCaps *caps); +GstCaps *caps_from_media_type(IMFMediaType *type);
Using the generic name "media_type", in a module that serves multiple media APIs, is not great.
Would you prefer mf_media_type?
That's probably better, yes.
Also, why is this in the public header?
Would it be better to split this into a mfplat_private.h header?
I mean, why do you need to use it from anything other than mfplat.c?
Because I'd prefer to not merge around 4000 thousands lines of code into a single file. (See media_source.c, mf_decode.c)
This is another reason why it doesn't make a lot of sense to submit dead code.
The code which uses these functions are included in my more recent patch-set.
While submitting the code that uses a helper function in the same patch set does help, it's still not the best way to organize patches. Also, in this case, it means submitting at least 16 patches in one set, which is not desirable either.
The best way to submit such a patch set is to add the code which uses (or is going to use) media_type_from_caps() first, then actually implement media_type_from_caps(). That can mean e.g. adding a stub media_type_from_caps() that prints a FIXME and returns NULL, such as in fb6956c7d, or just leaving that part out of the caller (and probably doing a similar fail-with-FIXME). I don't know what the best way to arrange that is in this case, but I'm not the one writing the patches.
Such a top-down approach is much easier to review, because then you know exactly how a helper will be used when or before you have to review that helper's implementation. When you submit the helper by itself, first, it's hard to understand if it's doing the right thing. You also won't have dead code (and won't have to work around compiler warnings for such by e.g. making functions non-static).
Also, style nitpick: please try to be consistent about your asterisk placement (ideally using "type *var" style.)
Ack.
#endif /* __GST_PRIVATE_INCLUDED__ */
...
@@ -433,3 +438,529 @@ HRESULT mfplat_get_class_object(REFCLSID rclsid, REFIID riid, void **obj) return CLASS_E_CLASSNOTAVAILABLE; }
+struct aac_user_data +{
- WORD payload_type;
- WORD profile_level_indication;
- WORD struct_type;
- WORD reserved;
- /*BYTE audio_specific_config;*/
What's this field doing here?
We store the audio_config_config after these fields, and I wanted to express that here, it's not important though.
It's not necessarily a problem to specify that arbitrary data comes after the struct, but that comment is not particularly clear.
+};
+/* IMPORTANT: caps will be modified to represent the exact type needed for the format */
Why?
Because in the case of a demuxer, the caps of the stream we receive might not map 1:1 with the representation in media foundation. Because of this, in the media source, if any adjustments are needed, we feed the buffers through a parser to correct it.
See: https://github.com/Guy1524/wine/commit/7ab88be3882ab95f3fc17dab374184e06f018...
This seems like a very confusing way to do that. At least I'd relegate it to a separate function. I wouldn't expect a conversion function to modify its argument, and it moreover makes it essentially unusable anywhere else.
The alternative is to just fail, because there's no mapping. For example there's no equivalent to a non annex b h.264 stream in MF.
Sure, I think that's a good design. It's a clear way to communicate "we don't support these caps".
That said, these modifications are specific to the format, and along those lines it may make more sense to append specific elements rather than to make specific changes to the caps and try to find an element that can accommodate those. This will also help if you ever need to append multiple such elements. Thus you can e.g. append an audioconvert element unconditionally, and if no conversion is necessary it'll just pass through.
In the case of compressed sample parsers, what would I append unconditionally? It's very specific to the type.
Looking at the modifications you do make—
- you force h264 into annex B format, which is the job of h264parse;
Yes, because that's how it's represented on windows.
- you force all raw audio into 32-bit float. Does native mfplat really
never output integer PCM?
I think I can fix that, I do know that MFAudioFormat float can only be be F32LE though.
64-bit float exists. (So does 16-bit and 24-bit, in fact.) That's not necessarily to say that any given MF object handles it, but I'd recommend at least checking whether the bit depth and endianness matches what you expect, instead of just assuming that it does.
+IMFMediaType* media_type_from_caps(GstCaps *caps) +{
- IMFMediaType *media_type;
- GstStructure *info;
- const char *media_type_name;
- gchar *human_readable;
- if (FAILED(MFCreateMediaType(&media_type)))
- {
- return NULL;
- }
- info = gst_caps_get_structure(caps, 0);
- media_type_name = gst_structure_get_name(info);
- human_readable = gst_caps_to_string(caps);
- TRACE("caps = %s\n", human_readable);
- g_free(human_readable);
Probably would be best to guard this with TRACE_ON, so that we don't bother allocating anything otherwise.
Also, you'll want to use debugstr_a(), especially since caps can overrun the static buffer in ntdll.
Ack.
- if (!(strncmp(media_type_name, "video", 5)))
Style nitpick, superfluous parentheses.
I think Nikolay already mentioned this, but it's probably not a bad idea to just match against the whole "video/x-h264" etc. sequence.
Ack.
- {
- const char *video_format = media_type_name + 6;
- gint width, height, framerate_num, framerate_den;
- IMFMediaType_SetGUID(media_type, &MF_MT_MAJOR_TYPE,
&MFMediaType_Video);
- if (gst_structure_get_int(info, "width", &width) &&
gst_structure_get_int(info, "height", &height))
- {
- IMFMediaType_SetUINT64(media_type, &MF_MT_FRAME_SIZE,
((UINT64)width << 32) | height);
- }
- if (gst_structure_get_fraction(info, "framerate", &framerate_num,
&framerate_den))
- {
- IMFMediaType_SetUINT64(media_type, &MF_MT_FRAME_RATE,
((UINT64)framerate_num << 32) | framerate_den);
- }
- if (!(strcmp(video_format, "x-h264")))
- {
- const char *profile, *level;
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_H264);
- IMFMediaType_SetUINT32(media_type, &MF_MT_COMPRESSED, TRUE);
- if ((profile = gst_structure_get_string(info, "profile")))
- {
- if (!(strcmp(profile, "main")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_PROFILE,
eAVEncH264VProfile_Main);
- else if (!(strcmp(profile, "high")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_PROFILE,
eAVEncH264VProfile_High);
- else if (!(strcmp(profile, "high-4:4:4")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_PROFILE,
eAVEncH264VProfile_444);
- else
- ERR("Unrecognized profile %s\n", profile);
This ERR (and many below) should probably be a FIXME instead, methinks.
Ack.
- }
- if ((level = gst_structure_get_string(info, "level")))
- {
- if (!(strcmp(level, "1")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel1);
- else if (!(strcmp(level, "1.1")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel1_1);
- else if (!(strcmp(level, "1.2")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel1_2);
- else if (!(strcmp(level, "1.3")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel1_3);
- else if (!(strcmp(level, "2")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel2);
- else if (!(strcmp(level, "2.1")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel2_1);
- else if (!(strcmp(level, "2.2")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel2_2);
- else if (!(strcmp(level, "3")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel3);
- else if (!(strcmp(level, "3.1")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel3_1);
- else if (!(strcmp(level, "3.2")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel3_2);
- else if (!(strcmp(level, "4")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel4);
- else if (!(strcmp(level, "4.1")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel4_1);
- else if (!(strcmp(level, "4.2")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel4_2);
- else if (!(strcmp(level, "5")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel5);
- else if (!(strcmp(level, "5.1")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel5_1);
- else if (!(strcmp(level, "5.2")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel5_2);
- else
- ERR("Unrecognized level %s\n", level);
- }
Could we maybe make this a table instead?
Sure.
- gst_caps_set_simple(caps, "stream-format", G_TYPE_STRING,
"byte-stream", NULL);
- gst_caps_set_simple(caps, "alignment", G_TYPE_STRING, "au", NULL);
- for (unsigned int i = 0; i < gst_caps_get_size(caps); i++)
- {
- GstStructure *structure = gst_caps_get_structure (caps, i);
- gst_structure_remove_field(structure, "codec_data");
- }
- }
- else if (!(strcmp(video_format, "x-wmv")))
- {
- gint wmv_version;
- const char *format;
- const GValue *codec_data;
- if (gst_structure_get_int(info, "wmvversion", &wmv_version))
- {
- switch (wmv_version)
- {
- case 1:
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_WMV1);
- break;
- case 2:
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_WMV2);
- break;
- case 3:
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_WMV3);
- break;
- default:
- ERR("Unrecognized wmvversion %d\n", wmv_version);
- }
- }
- if ((format = gst_structure_get_string(info, "format")))
- {
- if (!(strcmp(format, "WVC1")))
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_WVC1);
What if it's not? I think that deserves at least a FIXME.
(Style nitpick, extra parentheses.)
Ack.
- }
- if ((codec_data = gst_structure_get_value(info, "codec_data")))
- {
- GstBuffer *codec_data_buffer = gst_value_get_buffer(codec_data);
- if (codec_data_buffer)
- {
- gsize codec_data_size = gst_buffer_get_size(codec_data_buffer);
- gpointer codec_data_raw = heap_alloc(codec_data_size);
- gst_buffer_extract(codec_data_buffer, 0, codec_data_raw,
codec_data_size);
- IMFMediaType_SetBlob(media_type, &MF_MT_USER_DATA, codec_data_raw,
codec_data_size);
- }
- }
- }
- else if (!(strcmp(video_format, "mpeg")))
- {
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_M4S2);
- IMFMediaType_SetUINT32(media_type, &MF_MT_COMPRESSED, TRUE);
There are other video/mpeg formats.
TBH, the only reason I've included this is for the tests to work, I'll look into how to differentiate the mpeg types tomorrow.
- }
- else if (!(strcmp(video_format, "x-raw")))
- {
- const char *fourcc = gst_structure_get_string(info, "stream-format");
- IMFMediaType_SetUINT32(media_type, &MF_MT_COMPRESSED, FALSE);
- if (fourcc && (strlen(fourcc) == 4))
- {
- GUID fourcc_subtype = MFVideoFormat_Base;
- fourcc_subtype.Data1 = MAKEFOURCC(
- toupper(fourcc[0]), toupper(fourcc[1]), toupper(fourcc[2]),
toupper(fourcc[3]));
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &fourcc_subtype);
- }
- else
- ERR("uncompressed video has no stream-format\n");
I've never seen a FOURCC stored in the "stream-format" tag; where are you getting this from?
You're right, I think I'm supposed to use "format" here, but this is dead code rn so I that's why I didn't see any problems.
- }
- else
- ERR("Unrecognized video format %s\n", video_format);
- }
- else if (!(strncmp(media_type_name, "audio", 5)))
- {
- const char *audio_format = media_type_name + 6;
- IMFMediaType_SetGUID(media_type, &MF_MT_MAJOR_TYPE,
&MFMediaType_Audio);
- if (!(strcmp(audio_format, "mpeg")))
- {
- int mpeg_version = -1;
- IMFMediaType_SetUINT32(media_type, &MF_MT_COMPRESSED, TRUE);
- if (!(gst_structure_get_int(info, "mpegversion", &mpeg_version)))
- ERR("Failed to get mpegversion\n");
- switch (mpeg_version)
- {
- case 1:
- {
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFAudioFormat_MPEG);
- break;
- }
What about MFAudioFormat_MP3?
I'm actually not sure what to use here, I should probably remove it for now.
- case 2:
- case 4:
- {
- const char *format, *profile, *level;
- DWORD profile_level_indication = 0;
- const GValue *codec_data;
- DWORD asc_size = 0;
- struct aac_user_data *user_data = NULL;
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFAudioFormat_AAC);
- codec_data = gst_structure_get_value(info, "codec_data");
- if (codec_data)
- {
- GstBuffer *codec_data_buffer = gst_value_get_buffer(codec_data);
- if (codec_data_buffer)
- {
- if ((asc_size = gst_buffer_get_size(codec_data_buffer)) >= 2)
- {
- user_data = heap_alloc_zero(sizeof(*user_data)+asc_size);
- gst_buffer_extract(codec_data_buffer, 0, (gpointer)(user_data + 1),
asc_size);
- }
- else
- ERR("Unexpected buffer size\n");
- }
- else
- ERR("codec_data not a buffer\n");
- }
- else
- ERR("codec_data not found\n");
- if (!user_data)
- user_data = heap_alloc_zero(sizeof(*user_data));
- {
- int rate;
- if (gst_structure_get_int(info, "rate", &rate))
- IMFMediaType_SetUINT32(media_type, &MF_MT_AUDIO_SAMPLES_PER_SECOND,
rate);
- }
- {
- int channels;
- if (gst_structure_get_int(info, "channels", &channels))
- IMFMediaType_SetUINT32(media_type, &MF_MT_AUDIO_NUM_CHANNELS,
channels);
- }
Did you mean to add these blocks?
Yeah, it's so I can declare the variables closer to where they are used.
I'll admit I don't get the obsession with C99 variable declarations, but this just seems janky.
It wouldn't seem janky if we had C99 variable declarations :P
- if ((format = gst_structure_get_string(info, "stream-format")))
- {
- DWORD payload_type = -1;
- if (!(strcmp(format, "raw")))
- payload_type = 0;
- else if (!(strcmp(format, "adts")))
- payload_type = 1;
- else
- ERR("Unrecognized stream-format\n");
- if (payload_type != -1)
- {
- IMFMediaType_SetUINT32(media_type, &MF_MT_AAC_PAYLOAD_TYPE,
payload_type);
- user_data->payload_type = payload_type;
- }
- }
- else
- {
- ERR("Stream format not present\n");
- }
- profile = gst_structure_get_string(info, "profile");
- level = gst_structure_get_string(info, "level");
- /* Data from
https://docs.microsoft.com/en-us/windows/win32/medfound/aac-encoder#output-t... */
I'm not sure I'd link to Microsoft documentation; it's not very stable.
Would a link to an archive.is backup of it be better?
Probably.
- if (profile && level)
- {
- if (!(strcmp(profile, "lc")) && !(strcmp(level, "2")))
- profile_level_indication = 0x29;
- else if (!(strcmp(profile, "lc")) && !(strcmp(level, "4")))
- profile_level_indication = 0x2A;
- else if (!(strcmp(profile, "lc")) && !(strcmp(level, "5")))
- profile_level_indication = 0x2B;
- else
- ERR("Unhandled profile/level combo\n");
- }
- else
- ERR("Profile or level not present\n");
- if (profile_level_indication)
- {
- IMFMediaType_SetUINT32(media_type,
&MF_MT_AAC_AUDIO_PROFILE_LEVEL_INDICATION, profile_level_indication);
- user_data->profile_level_indication = profile_level_indication;
- }
- IMFMediaType_SetBlob(media_type, &MF_MT_USER_DATA, (BYTE
*)user_data, sizeof(user_data) + asc_size);
- heap_free(user_data);
- break;
- }
- default:
- ERR("Unhandled mpegversion %d\n", mpeg_version);
- }
- }
- else if (!(strcmp(audio_format, "x-raw")))
- {
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFAudioFormat_Float);
- gst_caps_set_simple(caps, "format", G_TYPE_STRING, "F32LE", NULL);
There are other audio formats.
Ah, you mean PCM? I'll add a case for that tomorrow.
f32le is PCM, but I mean integer PCM and other depths than 32-bit.
Hmm okay, I'll do more research on that.
Presumably there should also be channel and sample rate data here.
Yeah good catch.
- }
- else
- ERR("Unrecognized audio format %s\n", audio_format);
- }
- else
- {
- goto fail;
I'm generally of the opinion that one line of cleanup doesn't merit a "goto".
Okay I'll change that then.
- }
- return media_type;
- fail:
- IMFMediaType_Release(media_type);
- return NULL;
+}
+static const char *fourcc_str(DWORD fourcc) +{
- if (!fourcc) return NULL;
- return wine_dbg_sprintf ("%c%c%c%c",
- (char)(fourcc), (char)(fourcc >> 8),
- (char)(fourcc >> 16), (char)(fourcc >> 24));
+}
I don't think you want to use Wine's debugging utilities for non-debug code.
Ack.
+GstCaps *caps_from_media_type(IMFMediaType *type) +{
- GUID major_type;
- GUID subtype;
- GUID base_masked_subtype;
- GstCaps *output = NULL;
- if (FAILED(IMFMediaType_GetMajorType(type, &major_type)))
- return NULL;
- if (FAILED(IMFMediaType_GetGUID(type, &MF_MT_SUBTYPE, &subtype)))
- return NULL;
- base_masked_subtype = subtype;
- base_masked_subtype.Data1 = 0;
- if (IsEqualGUID(&major_type, &MFMediaType_Video))
- {
- UINT64 frame_rate = 0, frame_size = 0;
- DWORD *framerate_num = ((DWORD*)&frame_rate) + 1;
- DWORD *framerate_den = ((DWORD*)&frame_rate);
- DWORD *width = ((DWORD*)&frame_size) + 1;
- DWORD *height = ((DWORD*)&frame_size);
It seems simpler to me to do e.g.
DWORD width = frame_size; DWORD height = frame_size >> 32;
I'm not getting the width and height here, I'm declaring pointers to them which are set later on.
Right, I mean actually set the variables after retrieving frame_size; in full something like
DWORD width, height; /* ... */ IMFMediaType_GetUINT64(type, &MF_MT_FRAME_SIZE, &frame_size); width = frame_size; height = frame_size >> 32;
Yeah that works.
- if (IsEqualGUID(&subtype, &MFVideoFormat_H264))
- {
- enum eAVEncH264VProfile h264_profile;
- enum eAVEncH264VLevel h264_level;
- output = gst_caps_new_empty_simple("video/x-h264");
- gst_caps_set_simple(output, "stream-format", G_TYPE_STRING,
"byte-stream", NULL);
- gst_caps_set_simple(output, "alignment", G_TYPE_STRING, "au", NULL);
- if (SUCCEEDED(IMFMediaType_GetUINT32(type, &MF_MT_MPEG2_PROFILE,
&h264_profile)))
- {
- const char *profile = NULL;
- switch (h264_profile)
- {
- case eAVEncH264VProfile_Main: profile = "main"; break;
- case eAVEncH264VProfile_High: profile = "high"; break;
- case eAVEncH264VProfile_444: profile = "high-4:4:4"; break;
- default: ERR("Unknown profile %u\n", h264_profile);
- }
- if (profile)
- gst_caps_set_simple(output, "profile", G_TYPE_STRING, profile, NULL);
- }
- if (SUCCEEDED(IMFMediaType_GetUINT32(type, &MF_MT_MPEG2_LEVEL,
&h264_level)))
- {
- const char *level = NULL;
- switch (h264_level)
- {
- case eAVEncH264VLevel1: level = "1"; break;
- case eAVEncH264VLevel1_1: level = "1.1"; break;
- case eAVEncH264VLevel1_2: level = "1.2"; break;
- case eAVEncH264VLevel1_3: level = "1.3"; break;
- case eAVEncH264VLevel2: level = "2"; break;
- case eAVEncH264VLevel2_1: level = "2.1"; break;
- case eAVEncH264VLevel2_2: level = "2.2"; break;
- case eAVEncH264VLevel3: level = "3"; break;
- case eAVEncH264VLevel3_1: level = "3.1"; break;
- case eAVEncH264VLevel3_2: level = "3.2"; break;
- case eAVEncH264VLevel4: level = "4"; break;
- case eAVEncH264VLevel4_1: level = "4.1"; break;
- case eAVEncH264VLevel4_2: level = "4.2"; break;
- case eAVEncH264VLevel5: level = "5"; break;
- case eAVEncH264VLevel5_1: level = "5.1"; break;
- case eAVEncH264VLevel5_2: level = "5.2"; break;
- default: ERR("Unknown level %u\n", h264_level);
- }
- if (level)
- gst_caps_set_simple(output, "level", G_TYPE_STRING, level, NULL);
- }
- }
- else if (IsEqualGUID(&subtype, &MFVideoFormat_WVC1))
- {
- BYTE *user_data;
- DWORD user_data_size;
- output = gst_caps_new_empty_simple("video/x-wmv");
- gst_caps_set_simple(output, "format", G_TYPE_STRING, "WVC1", NULL);
- gst_caps_set_simple(output, "wmvversion", G_TYPE_INT, 3, NULL);
- if (SUCCEEDED(IMFMediaType_GetAllocatedBlob(type, &MF_MT_USER_DATA,
&user_data, &user_data_size)))
- {
- GstBuffer *codec_data_buffer = gst_buffer_new_allocate(NULL,
user_data_size, NULL);
- gst_buffer_fill(codec_data_buffer, 0, user_data, user_data_size);
- gst_caps_set_simple(output, "codec_data", GST_TYPE_BUFFER,
codec_data_buffer, NULL);
- gst_buffer_unref(codec_data_buffer);
- CoTaskMemFree(user_data);
- }
- }
- else if (IsEqualGUID(&base_masked_subtype, &MFVideoFormat_Base))
- {
- output = gst_caps_new_empty_simple("video/x-raw");
- gst_caps_set_simple(output, "format", G_TYPE_STRING,
fourcc_str(subtype.Data1), NULL);
What about RGB formats?
Ah, I didn't think about those, looks like we'll have to use a table of known conversions instead.
Well, to some degree, though you can also make use of gst_video_format_from_fourcc(). See also amt_to_gst_caps_video() in gstdemux.c.
Ah check for RGB formats first then fall back to FOURCC conversion, okay sure.
- }
- else {
- ERR("Unrecognized subtype %s\n", debugstr_guid(&subtype));
- return NULL;
- }
- IMFMediaType_GetUINT64(type, &MF_MT_FRAME_RATE, &frame_rate);
- IMFMediaType_GetUINT64(type, &MF_MT_FRAME_SIZE, &frame_size);
- if (frame_rate)
- gst_caps_set_simple(output, "framerate", GST_TYPE_FRACTION,
*framerate_num, *framerate_den, NULL);
- if (frame_size)
- {
- gst_caps_set_simple(output, "width", G_TYPE_INT, *width, NULL);
- gst_caps_set_simple(output, "height", G_TYPE_INT, *height, NULL);
- }
- return output;
- }
- else if (IsEqualGUID(&major_type, &MFMediaType_Audio))
- {
- DWORD rate, channels;
- if (IsEqualGUID(&subtype, &MFAudioFormat_AAC))
- {
- DWORD payload_type, indication;
- struct aac_user_data *user_data;
- UINT32 user_data_size;
- output = gst_caps_new_empty_simple("audio/mpeg");
- /* TODO */
- gst_caps_set_simple(output, "framed", G_TYPE_BOOLEAN, TRUE, NULL);
- gst_caps_set_simple(output, "mpegversion", G_TYPE_INT, 4, NULL);
What's TODO here?
MFAudioFormat_AAC could also mean mpegversion=2, and I don't know what the "framed" attribute is for.
A TODO message should probably mention what exactly is to be done.
In general it's good practice to understand what your code is doing before you submit it, but regardless, "framed" means there is exactly one frame per buffer. Is that guaranteed by the MF source? (It's not obvious to me that it is...)
Yeah I should probably remove it in that case, I was trying to match up all the attributes when going through the conversion to IMFMediaType and back, but it's probably not necessary.
- if (SUCCEEDED(IMFMediaType_GetUINT32(type, &MF_MT_AAC_PAYLOAD_TYPE,
&payload_type)))
- {
- switch (payload_type)
- {
- case 0:
- gst_caps_set_simple(output, "stream-format", G_TYPE_STRING, "raw",
NULL);
- break;
- case 1:
- gst_caps_set_simple(output, "stream-format", G_TYPE_STRING, "adts",
NULL);
- break;
- default:
- gst_caps_set_simple(output, "stream-format", G_TYPE_STRING, "raw",
NULL);
Seems to me that 2 and 3 should be mapped to "adif" and "loas", respectively.
Ack.
- }
- }
- else
- gst_caps_set_simple(output, "stream-format", G_TYPE_STRING, "raw",
NULL);
- if (SUCCEEDED(IMFMediaType_GetUINT32(type,
&MF_MT_AAC_AUDIO_PROFILE_LEVEL_INDICATION, &indication)))
- {
- switch (indication)
- {
- case 0x29:
- {
- gst_caps_set_simple(output, "profile", G_TYPE_STRING, "lc", NULL);
- gst_caps_set_simple(output, "level", G_TYPE_STRING, "2", NULL);
- break;
- }
- case 0x2A:
- {
- gst_caps_set_simple(output, "profile", G_TYPE_STRING, "lc", NULL);
- gst_caps_set_simple(output, "level", G_TYPE_STRING, "4", NULL);
- break;
- }
- case 0x2B:
- {
- gst_caps_set_simple(output, "profile", G_TYPE_STRING, "lc", NULL);
- gst_caps_set_simple(output, "level", G_TYPE_STRING, "5", NULL);
- break;
- }
- default:
- ERR("Unrecognized profile-level-indication %u\n", indication);
- }
I think you could significantly deduplicate this switch.
Ack.
- }
- if (SUCCEEDED(IMFMediaType_GetAllocatedBlob(type, &MF_MT_USER_DATA,
(BYTE **) &user_data, &user_data_size)))
- {
- if (user_data_size > sizeof(sizeof(*user_data)))
- {
- GstBuffer *audio_specific_config = gst_buffer_new_allocate(NULL,
user_data_size - sizeof(*user_data), NULL);
- gst_buffer_fill(audio_specific_config, 0, user_data + 1,
user_data_size - sizeof(*user_data));
- gst_caps_set_simple(output, "codec_data", GST_TYPE_BUFFER,
audio_specific_config, NULL);
- gst_buffer_unref(audio_specific_config);
- }
- CoTaskMemFree(user_data);
- }
- }
- else if (IsEqualGUID(&subtype, &MFAudioFormat_Float))
- {
- output = gst_caps_new_empty_simple("audio/x-raw");
- gst_caps_set_simple(output, "format", G_TYPE_STRING, "F32LE", NULL);
- }
- else
- {
- ERR("Unrecognized subtype %s\n", debugstr_guid(&subtype));
- if (output)
- gst_caps_unref(output);
- return NULL;
- }
- if (SUCCEEDED(IMFMediaType_GetUINT32(type,
&MF_MT_AUDIO_SAMPLES_PER_SECOND, &rate)))
- {
- gst_caps_set_simple(output, "rate", G_TYPE_INT, rate, NULL);
- }
- if (SUCCEEDED(IMFMediaType_GetUINT32(type,
&MF_MT_AUDIO_NUM_CHANNELS, &channels)))
- {
- gst_caps_set_simple(output, "channels", G_TYPE_INT, channels, NULL);
- }
- return output;
- }
- ERR("Unrecognized major type %s\n", debugstr_guid(&major_type));
- return NULL;
+} diff --git a/include/codecapi.h b/include/codecapi.h new file mode 100644 index 0000000000..2690b523d7 --- /dev/null +++ b/include/codecapi.h @@ -0,0 +1,38 @@ +#ifndef __CODECAPI_H +#define __CODECAPI_H
+enum eAVEncH264VProfile +{
- eAVEncH264VProfile_unknown = 0,
- eAVEncH264VProfile_Simple = 66,
- eAVEncH264VProfile_Base = 66,
- eAVEncH264VProfile_Main = 77,
- eAVEncH264VProfile_High = 100,
- eAVEncH264VProfile_422 = 122,
- eAVEncH264VProfile_High10 = 110,
- eAVEncH264VProfile_444 = 244,
- eAVEncH264VProfile_Extended = 88,
+};
+enum eAVEncH264VLevel +{
- eAVEncH264VLevel1 = 10,
- eAVEncH264VLevel1_b = 11,
- eAVEncH264VLevel1_1 = 11,
- eAVEncH264VLevel1_2 = 12,
- eAVEncH264VLevel1_3 = 13,
- eAVEncH264VLevel2 = 20,
- eAVEncH264VLevel2_1 = 21,
- eAVEncH264VLevel2_2 = 22,
- eAVEncH264VLevel3 = 30,
- eAVEncH264VLevel3_1 = 31,
- eAVEncH264VLevel3_2 = 32,
- eAVEncH264VLevel4 = 40,
- eAVEncH264VLevel4_1 = 41,
- eAVEncH264VLevel4_2 = 42,
- eAVEncH264VLevel5 = 50,
- eAVEncH264VLevel5_1 = 51,
- eAVEncH264VLevel5_2 = 52
+};
+#endif \ No newline at end of file
On 3/26/20 2:46 PM, Zebediah Figura wrote:
On 3/26/20 12:18 PM, Derek Lesho wrote:
On 3/26/20 11:40 AM, Zebediah Figura wrote:
On 3/25/20 11:57 PM, Derek Lesho wrote:
On 3/24/20 3:22 PM, Zebediah Figura wrote:
General comments:
It's not great to introduce code that's not used anywhere, it's essentially dead until then.
This could, I think, be split up into much smaller pieces in any case: you're introducing two different functions here, and each function introduces support for several different formats.
On 3/24/20 2:39 PM, Derek Lesho wrote:
Signed-off-by: Derek Lesho dlesho@codeweavers.com
dlls/winegstreamer/gst_private.h | 4 + dlls/winegstreamer/mfplat.c | 533 ++++++++++++++++++++++++++++++- include/codecapi.h | 38 +++ 3 files changed, 574 insertions(+), 1 deletion(-) create mode 100644 include/codecapi.h
diff --git a/dlls/winegstreamer/gst_private.h b/dlls/winegstreamer/gst_private.h index e6fb841fc8..a6c3fd3784 100644 --- a/dlls/winegstreamer/gst_private.h +++ b/dlls/winegstreamer/gst_private.h @@ -36,6 +36,7 @@ #include "winuser.h" #include "dshow.h" #include "strmif.h" +#include "mfobjects.h" #include "wine/heap.h" #include "wine/strmbase.h" @@ -54,4 +55,7 @@ void start_dispatch_thread(void) DECLSPEC_HIDDEN; extern HRESULT mfplat_get_class_object(REFCLSID rclsid, REFIID riid, void **obj) DECLSPEC_HIDDEN; +IMFMediaType* media_type_from_caps(GstCaps *caps); +GstCaps *caps_from_media_type(IMFMediaType *type);
Using the generic name "media_type", in a module that serves multiple media APIs, is not great.
Would you prefer mf_media_type?
That's probably better, yes.
Also, why is this in the public header?
Would it be better to split this into a mfplat_private.h header?
I mean, why do you need to use it from anything other than mfplat.c?
Because I'd prefer to not merge around 4000 thousands lines of code into a single file. (See media_source.c, mf_decode.c)
This is another reason why it doesn't make a lot of sense to submit dead code.
The code which uses these functions are included in my more recent patch-set.
While submitting the code that uses a helper function in the same patch set does help, it's still not the best way to organize patches. Also, in this case, it means submitting at least 16 patches in one set, which is not desirable either.
The best way to submit such a patch set is to add the code which uses (or is going to use) media_type_from_caps() first, then actually implement media_type_from_caps(). That can mean e.g. adding a stub media_type_from_caps() that prints a FIXME and returns NULL, such as in fb6956c7d, or just leaving that part out of the caller (and probably doing a similar fail-with-FIXME). I don't know what the best way to arrange that is in this case, but I'm not the one writing the patches.
Such a top-down approach is much easier to review, because then you know exactly how a helper will be used when or before you have to review that helper's implementation. When you submit the helper by itself, first, it's hard to understand if it's doing the right thing. You also won't have dead code (and won't have to work around compiler warnings for such by e.g. making functions non-static).
Ah, I see what you mean, that makes sense.
Also, style nitpick: please try to be consistent about your asterisk placement (ideally using "type *var" style.)
Ack.
#endif /* __GST_PRIVATE_INCLUDED__ */
...
@@ -433,3 +438,529 @@ HRESULT mfplat_get_class_object(REFCLSID rclsid, REFIID riid, void **obj) return CLASS_E_CLASSNOTAVAILABLE; }
+struct aac_user_data +{
- WORD payload_type;
- WORD profile_level_indication;
- WORD struct_type;
- WORD reserved;
- /*BYTE audio_specific_config;*/
What's this field doing here?
We store the audio_config_config after these fields, and I wanted to express that here, it's not important though.
It's not necessarily a problem to specify that arbitrary data comes after the struct, but that comment is not particularly clear.
+};
+/* IMPORTANT: caps will be modified to represent the exact type needed for the format */
Why?
Because in the case of a demuxer, the caps of the stream we receive might not map 1:1 with the representation in media foundation. Because of this, in the media source, if any adjustments are needed, we feed the buffers through a parser to correct it.
See: https://github.com/Guy1524/wine/commit/7ab88be3882ab95f3fc17dab374184e06f018...
This seems like a very confusing way to do that. At least I'd relegate it to a separate function. I wouldn't expect a conversion function to modify its argument, and it moreover makes it essentially unusable anywhere else.
The alternative is to just fail, because there's no mapping. For example there's no equivalent to a non annex b h.264 stream in MF.
Sure, I think that's a good design. It's a clear way to communicate "we don't support these caps".
But we do want to support those caps, with modifications. When modifications are needed, we try to use a parser to perform those transformations. For example, qtdemux doesn't output h264 streams in annex b form, so we find a parser that converts it into that form.
That said, these modifications are specific to the format, and along those lines it may make more sense to append specific elements rather than to make specific changes to the caps and try to find an element that can accommodate those. This will also help if you ever need to append multiple such elements. Thus you can e.g. append an audioconvert element unconditionally, and if no conversion is necessary it'll just pass through.
In the case of compressed sample parsers, what would I append unconditionally? It's very specific to the type.
Looking at the modifications you do make—
- you force h264 into annex B format, which is the job of h264parse;
Yes, because that's how it's represented on windows.
- you force all raw audio into 32-bit float. Does native mfplat really
never output integer PCM?
I think I can fix that, I do know that MFAudioFormat float can only be be F32LE though.
64-bit float exists. (So does 16-bit and 24-bit, in fact.) That's not necessarily to say that any given MF object handles it, but I'd recommend at least checking whether the bit depth and endianness matches what you expect, instead of just assuming that it does.
Okay, I heard somewhere that MFAudioFormat_Float was always 32 bit. That must have been wrong information, I'll fix that.
+IMFMediaType* media_type_from_caps(GstCaps *caps) +{
- IMFMediaType *media_type;
- GstStructure *info;
- const char *media_type_name;
- gchar *human_readable;
- if (FAILED(MFCreateMediaType(&media_type)))
- {
- return NULL;
- }
- info = gst_caps_get_structure(caps, 0);
- media_type_name = gst_structure_get_name(info);
- human_readable = gst_caps_to_string(caps);
- TRACE("caps = %s\n", human_readable);
- g_free(human_readable);
Probably would be best to guard this with TRACE_ON, so that we don't bother allocating anything otherwise.
Also, you'll want to use debugstr_a(), especially since caps can overrun the static buffer in ntdll.
Ack.
- if (!(strncmp(media_type_name, "video", 5)))
Style nitpick, superfluous parentheses.
I think Nikolay already mentioned this, but it's probably not a bad idea to just match against the whole "video/x-h264" etc. sequence.
Ack.
- {
- const char *video_format = media_type_name + 6;
- gint width, height, framerate_num, framerate_den;
- IMFMediaType_SetGUID(media_type, &MF_MT_MAJOR_TYPE,
&MFMediaType_Video);
- if (gst_structure_get_int(info, "width", &width) &&
gst_structure_get_int(info, "height", &height))
- {
- IMFMediaType_SetUINT64(media_type, &MF_MT_FRAME_SIZE,
((UINT64)width << 32) | height);
- }
- if (gst_structure_get_fraction(info, "framerate", &framerate_num,
&framerate_den))
- {
- IMFMediaType_SetUINT64(media_type, &MF_MT_FRAME_RATE,
((UINT64)framerate_num << 32) | framerate_den);
- }
- if (!(strcmp(video_format, "x-h264")))
- {
- const char *profile, *level;
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_H264);
- IMFMediaType_SetUINT32(media_type, &MF_MT_COMPRESSED, TRUE);
- if ((profile = gst_structure_get_string(info, "profile")))
- {
- if (!(strcmp(profile, "main")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_PROFILE,
eAVEncH264VProfile_Main);
- else if (!(strcmp(profile, "high")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_PROFILE,
eAVEncH264VProfile_High);
- else if (!(strcmp(profile, "high-4:4:4")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_PROFILE,
eAVEncH264VProfile_444);
- else
- ERR("Unrecognized profile %s\n", profile);
This ERR (and many below) should probably be a FIXME instead, methinks.
Ack.
- }
- if ((level = gst_structure_get_string(info, "level")))
- {
- if (!(strcmp(level, "1")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel1);
- else if (!(strcmp(level, "1.1")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel1_1);
- else if (!(strcmp(level, "1.2")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel1_2);
- else if (!(strcmp(level, "1.3")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel1_3);
- else if (!(strcmp(level, "2")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel2);
- else if (!(strcmp(level, "2.1")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel2_1);
- else if (!(strcmp(level, "2.2")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel2_2);
- else if (!(strcmp(level, "3")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel3);
- else if (!(strcmp(level, "3.1")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel3_1);
- else if (!(strcmp(level, "3.2")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel3_2);
- else if (!(strcmp(level, "4")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel4);
- else if (!(strcmp(level, "4.1")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel4_1);
- else if (!(strcmp(level, "4.2")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel4_2);
- else if (!(strcmp(level, "5")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel5);
- else if (!(strcmp(level, "5.1")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel5_1);
- else if (!(strcmp(level, "5.2")))
- IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL,
eAVEncH264VLevel5_2);
- else
- ERR("Unrecognized level %s\n", level);
- }
Could we maybe make this a table instead?
Sure.
- gst_caps_set_simple(caps, "stream-format", G_TYPE_STRING,
"byte-stream", NULL);
- gst_caps_set_simple(caps, "alignment", G_TYPE_STRING, "au", NULL);
- for (unsigned int i = 0; i < gst_caps_get_size(caps); i++)
- {
- GstStructure *structure = gst_caps_get_structure (caps, i);
- gst_structure_remove_field(structure, "codec_data");
- }
- }
- else if (!(strcmp(video_format, "x-wmv")))
- {
- gint wmv_version;
- const char *format;
- const GValue *codec_data;
- if (gst_structure_get_int(info, "wmvversion", &wmv_version))
- {
- switch (wmv_version)
- {
- case 1:
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_WMV1);
- break;
- case 2:
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_WMV2);
- break;
- case 3:
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_WMV3);
- break;
- default:
- ERR("Unrecognized wmvversion %d\n", wmv_version);
- }
- }
- if ((format = gst_structure_get_string(info, "format")))
- {
- if (!(strcmp(format, "WVC1")))
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_WVC1);
What if it's not? I think that deserves at least a FIXME.
(Style nitpick, extra parentheses.)
Ack.
- }
- if ((codec_data = gst_structure_get_value(info, "codec_data")))
- {
- GstBuffer *codec_data_buffer = gst_value_get_buffer(codec_data);
- if (codec_data_buffer)
- {
- gsize codec_data_size = gst_buffer_get_size(codec_data_buffer);
- gpointer codec_data_raw = heap_alloc(codec_data_size);
- gst_buffer_extract(codec_data_buffer, 0, codec_data_raw,
codec_data_size);
- IMFMediaType_SetBlob(media_type, &MF_MT_USER_DATA, codec_data_raw,
codec_data_size);
- }
- }
- }
- else if (!(strcmp(video_format, "mpeg")))
- {
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_M4S2);
- IMFMediaType_SetUINT32(media_type, &MF_MT_COMPRESSED, TRUE);
There are other video/mpeg formats.
TBH, the only reason I've included this is for the tests to work, I'll look into how to differentiate the mpeg types tomorrow.
- }
- else if (!(strcmp(video_format, "x-raw")))
- {
- const char *fourcc = gst_structure_get_string(info, "stream-format");
- IMFMediaType_SetUINT32(media_type, &MF_MT_COMPRESSED, FALSE);
- if (fourcc && (strlen(fourcc) == 4))
- {
- GUID fourcc_subtype = MFVideoFormat_Base;
- fourcc_subtype.Data1 = MAKEFOURCC(
- toupper(fourcc[0]), toupper(fourcc[1]), toupper(fourcc[2]),
toupper(fourcc[3]));
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &fourcc_subtype);
- }
- else
- ERR("uncompressed video has no stream-format\n");
I've never seen a FOURCC stored in the "stream-format" tag; where are you getting this from?
You're right, I think I'm supposed to use "format" here, but this is dead code rn so I that's why I didn't see any problems.
- }
- else
- ERR("Unrecognized video format %s\n", video_format);
- }
- else if (!(strncmp(media_type_name, "audio", 5)))
- {
- const char *audio_format = media_type_name + 6;
- IMFMediaType_SetGUID(media_type, &MF_MT_MAJOR_TYPE,
&MFMediaType_Audio);
- if (!(strcmp(audio_format, "mpeg")))
- {
- int mpeg_version = -1;
- IMFMediaType_SetUINT32(media_type, &MF_MT_COMPRESSED, TRUE);
- if (!(gst_structure_get_int(info, "mpegversion", &mpeg_version)))
- ERR("Failed to get mpegversion\n");
- switch (mpeg_version)
- {
- case 1:
- {
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFAudioFormat_MPEG);
- break;
- }
What about MFAudioFormat_MP3?
I'm actually not sure what to use here, I should probably remove it for now.
- case 2:
- case 4:
- {
- const char *format, *profile, *level;
- DWORD profile_level_indication = 0;
- const GValue *codec_data;
- DWORD asc_size = 0;
- struct aac_user_data *user_data = NULL;
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFAudioFormat_AAC);
- codec_data = gst_structure_get_value(info, "codec_data");
- if (codec_data)
- {
- GstBuffer *codec_data_buffer = gst_value_get_buffer(codec_data);
- if (codec_data_buffer)
- {
- if ((asc_size = gst_buffer_get_size(codec_data_buffer)) >= 2)
- {
- user_data = heap_alloc_zero(sizeof(*user_data)+asc_size);
- gst_buffer_extract(codec_data_buffer, 0, (gpointer)(user_data + 1),
asc_size);
- }
- else
- ERR("Unexpected buffer size\n");
- }
- else
- ERR("codec_data not a buffer\n");
- }
- else
- ERR("codec_data not found\n");
- if (!user_data)
- user_data = heap_alloc_zero(sizeof(*user_data));
- {
- int rate;
- if (gst_structure_get_int(info, "rate", &rate))
- IMFMediaType_SetUINT32(media_type, &MF_MT_AUDIO_SAMPLES_PER_SECOND,
rate);
- }
- {
- int channels;
- if (gst_structure_get_int(info, "channels", &channels))
- IMFMediaType_SetUINT32(media_type, &MF_MT_AUDIO_NUM_CHANNELS,
channels);
- }
Did you mean to add these blocks?
Yeah, it's so I can declare the variables closer to where they are used.
I'll admit I don't get the obsession with C99 variable declarations, but this just seems janky.
It wouldn't seem janky if we had C99 variable declarations :P
- if ((format = gst_structure_get_string(info, "stream-format")))
- {
- DWORD payload_type = -1;
- if (!(strcmp(format, "raw")))
- payload_type = 0;
- else if (!(strcmp(format, "adts")))
- payload_type = 1;
- else
- ERR("Unrecognized stream-format\n");
- if (payload_type != -1)
- {
- IMFMediaType_SetUINT32(media_type, &MF_MT_AAC_PAYLOAD_TYPE,
payload_type);
- user_data->payload_type = payload_type;
- }
- }
- else
- {
- ERR("Stream format not present\n");
- }
- profile = gst_structure_get_string(info, "profile");
- level = gst_structure_get_string(info, "level");
- /* Data from
https://docs.microsoft.com/en-us/windows/win32/medfound/aac-encoder#output-t... */
I'm not sure I'd link to Microsoft documentation; it's not very stable.
Would a link to an archive.is backup of it be better?
Probably.
- if (profile && level)
- {
- if (!(strcmp(profile, "lc")) && !(strcmp(level, "2")))
- profile_level_indication = 0x29;
- else if (!(strcmp(profile, "lc")) && !(strcmp(level, "4")))
- profile_level_indication = 0x2A;
- else if (!(strcmp(profile, "lc")) && !(strcmp(level, "5")))
- profile_level_indication = 0x2B;
- else
- ERR("Unhandled profile/level combo\n");
- }
- else
- ERR("Profile or level not present\n");
- if (profile_level_indication)
- {
- IMFMediaType_SetUINT32(media_type,
&MF_MT_AAC_AUDIO_PROFILE_LEVEL_INDICATION, profile_level_indication);
- user_data->profile_level_indication = profile_level_indication;
- }
- IMFMediaType_SetBlob(media_type, &MF_MT_USER_DATA, (BYTE
*)user_data, sizeof(user_data) + asc_size);
- heap_free(user_data);
- break;
- }
- default:
- ERR("Unhandled mpegversion %d\n", mpeg_version);
- }
- }
- else if (!(strcmp(audio_format, "x-raw")))
- {
- IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFAudioFormat_Float);
- gst_caps_set_simple(caps, "format", G_TYPE_STRING, "F32LE", NULL);
There are other audio formats.
Ah, you mean PCM? I'll add a case for that tomorrow.
f32le is PCM, but I mean integer PCM and other depths than 32-bit.
Hmm okay, I'll do more research on that.
Presumably there should also be channel and sample rate data here.
Yeah good catch.
- }
- else
- ERR("Unrecognized audio format %s\n", audio_format);
- }
- else
- {
- goto fail;
I'm generally of the opinion that one line of cleanup doesn't merit a "goto".
Okay I'll change that then.
- }
- return media_type;
- fail:
- IMFMediaType_Release(media_type);
- return NULL;
+}
+static const char *fourcc_str(DWORD fourcc) +{
- if (!fourcc) return NULL;
- return wine_dbg_sprintf ("%c%c%c%c",
- (char)(fourcc), (char)(fourcc >> 8),
- (char)(fourcc >> 16), (char)(fourcc >> 24));
+}
I don't think you want to use Wine's debugging utilities for non-debug code.
Ack.
+GstCaps *caps_from_media_type(IMFMediaType *type) +{
- GUID major_type;
- GUID subtype;
- GUID base_masked_subtype;
- GstCaps *output = NULL;
- if (FAILED(IMFMediaType_GetMajorType(type, &major_type)))
- return NULL;
- if (FAILED(IMFMediaType_GetGUID(type, &MF_MT_SUBTYPE, &subtype)))
- return NULL;
- base_masked_subtype = subtype;
- base_masked_subtype.Data1 = 0;
- if (IsEqualGUID(&major_type, &MFMediaType_Video))
- {
- UINT64 frame_rate = 0, frame_size = 0;
- DWORD *framerate_num = ((DWORD*)&frame_rate) + 1;
- DWORD *framerate_den = ((DWORD*)&frame_rate);
- DWORD *width = ((DWORD*)&frame_size) + 1;
- DWORD *height = ((DWORD*)&frame_size);
It seems simpler to me to do e.g.
DWORD width = frame_size; DWORD height = frame_size >> 32;
I'm not getting the width and height here, I'm declaring pointers to them which are set later on.
Right, I mean actually set the variables after retrieving frame_size; in full something like
DWORD width, height; /* ... */ IMFMediaType_GetUINT64(type, &MF_MT_FRAME_SIZE, &frame_size); width = frame_size; height = frame_size >> 32;
Yeah that works.
- if (IsEqualGUID(&subtype, &MFVideoFormat_H264))
- {
- enum eAVEncH264VProfile h264_profile;
- enum eAVEncH264VLevel h264_level;
- output = gst_caps_new_empty_simple("video/x-h264");
- gst_caps_set_simple(output, "stream-format", G_TYPE_STRING,
"byte-stream", NULL);
- gst_caps_set_simple(output, "alignment", G_TYPE_STRING, "au", NULL);
- if (SUCCEEDED(IMFMediaType_GetUINT32(type, &MF_MT_MPEG2_PROFILE,
&h264_profile)))
- {
- const char *profile = NULL;
- switch (h264_profile)
- {
- case eAVEncH264VProfile_Main: profile = "main"; break;
- case eAVEncH264VProfile_High: profile = "high"; break;
- case eAVEncH264VProfile_444: profile = "high-4:4:4"; break;
- default: ERR("Unknown profile %u\n", h264_profile);
- }
- if (profile)
- gst_caps_set_simple(output, "profile", G_TYPE_STRING, profile, NULL);
- }
- if (SUCCEEDED(IMFMediaType_GetUINT32(type, &MF_MT_MPEG2_LEVEL,
&h264_level)))
- {
- const char *level = NULL;
- switch (h264_level)
- {
- case eAVEncH264VLevel1: level = "1"; break;
- case eAVEncH264VLevel1_1: level = "1.1"; break;
- case eAVEncH264VLevel1_2: level = "1.2"; break;
- case eAVEncH264VLevel1_3: level = "1.3"; break;
- case eAVEncH264VLevel2: level = "2"; break;
- case eAVEncH264VLevel2_1: level = "2.1"; break;
- case eAVEncH264VLevel2_2: level = "2.2"; break;
- case eAVEncH264VLevel3: level = "3"; break;
- case eAVEncH264VLevel3_1: level = "3.1"; break;
- case eAVEncH264VLevel3_2: level = "3.2"; break;
- case eAVEncH264VLevel4: level = "4"; break;
- case eAVEncH264VLevel4_1: level = "4.1"; break;
- case eAVEncH264VLevel4_2: level = "4.2"; break;
- case eAVEncH264VLevel5: level = "5"; break;
- case eAVEncH264VLevel5_1: level = "5.1"; break;
- case eAVEncH264VLevel5_2: level = "5.2"; break;
- default: ERR("Unknown level %u\n", h264_level);
- }
- if (level)
- gst_caps_set_simple(output, "level", G_TYPE_STRING, level, NULL);
- }
- }
- else if (IsEqualGUID(&subtype, &MFVideoFormat_WVC1))
- {
- BYTE *user_data;
- DWORD user_data_size;
- output = gst_caps_new_empty_simple("video/x-wmv");
- gst_caps_set_simple(output, "format", G_TYPE_STRING, "WVC1", NULL);
- gst_caps_set_simple(output, "wmvversion", G_TYPE_INT, 3, NULL);
- if (SUCCEEDED(IMFMediaType_GetAllocatedBlob(type, &MF_MT_USER_DATA,
&user_data, &user_data_size)))
- {
- GstBuffer *codec_data_buffer = gst_buffer_new_allocate(NULL,
user_data_size, NULL);
- gst_buffer_fill(codec_data_buffer, 0, user_data, user_data_size);
- gst_caps_set_simple(output, "codec_data", GST_TYPE_BUFFER,
codec_data_buffer, NULL);
- gst_buffer_unref(codec_data_buffer);
- CoTaskMemFree(user_data);
- }
- }
- else if (IsEqualGUID(&base_masked_subtype, &MFVideoFormat_Base))
- {
- output = gst_caps_new_empty_simple("video/x-raw");
- gst_caps_set_simple(output, "format", G_TYPE_STRING,
fourcc_str(subtype.Data1), NULL);
What about RGB formats?
Ah, I didn't think about those, looks like we'll have to use a table of known conversions instead.
Well, to some degree, though you can also make use of gst_video_format_from_fourcc(). See also amt_to_gst_caps_video() in gstdemux.c.
Ah check for RGB formats first then fall back to FOURCC conversion, okay sure.
- }
- else {
- ERR("Unrecognized subtype %s\n", debugstr_guid(&subtype));
- return NULL;
- }
- IMFMediaType_GetUINT64(type, &MF_MT_FRAME_RATE, &frame_rate);
- IMFMediaType_GetUINT64(type, &MF_MT_FRAME_SIZE, &frame_size);
- if (frame_rate)
- gst_caps_set_simple(output, "framerate", GST_TYPE_FRACTION,
*framerate_num, *framerate_den, NULL);
- if (frame_size)
- {
- gst_caps_set_simple(output, "width", G_TYPE_INT, *width, NULL);
- gst_caps_set_simple(output, "height", G_TYPE_INT, *height, NULL);
- }
- return output;
- }
- else if (IsEqualGUID(&major_type, &MFMediaType_Audio))
- {
- DWORD rate, channels;
- if (IsEqualGUID(&subtype, &MFAudioFormat_AAC))
- {
- DWORD payload_type, indication;
- struct aac_user_data *user_data;
- UINT32 user_data_size;
- output = gst_caps_new_empty_simple("audio/mpeg");
- /* TODO */
- gst_caps_set_simple(output, "framed", G_TYPE_BOOLEAN, TRUE, NULL);
- gst_caps_set_simple(output, "mpegversion", G_TYPE_INT, 4, NULL);
What's TODO here?
MFAudioFormat_AAC could also mean mpegversion=2, and I don't know what the "framed" attribute is for.
A TODO message should probably mention what exactly is to be done.
In general it's good practice to understand what your code is doing before you submit it, but regardless, "framed" means there is exactly one frame per buffer. Is that guaranteed by the MF source? (It's not obvious to me that it is...)
Yeah I should probably remove it in that case, I was trying to match up all the attributes when going through the conversion to IMFMediaType and back, but it's probably not necessary.
- if (SUCCEEDED(IMFMediaType_GetUINT32(type, &MF_MT_AAC_PAYLOAD_TYPE,
&payload_type)))
- {
- switch (payload_type)
- {
- case 0:
- gst_caps_set_simple(output, "stream-format", G_TYPE_STRING, "raw",
NULL);
- break;
- case 1:
- gst_caps_set_simple(output, "stream-format", G_TYPE_STRING, "adts",
NULL);
- break;
- default:
- gst_caps_set_simple(output, "stream-format", G_TYPE_STRING, "raw",
NULL);
Seems to me that 2 and 3 should be mapped to "adif" and "loas", respectively.
Ack.
- }
- }
- else
- gst_caps_set_simple(output, "stream-format", G_TYPE_STRING, "raw",
NULL);
- if (SUCCEEDED(IMFMediaType_GetUINT32(type,
&MF_MT_AAC_AUDIO_PROFILE_LEVEL_INDICATION, &indication)))
- {
- switch (indication)
- {
- case 0x29:
- {
- gst_caps_set_simple(output, "profile", G_TYPE_STRING, "lc", NULL);
- gst_caps_set_simple(output, "level", G_TYPE_STRING, "2", NULL);
- break;
- }
- case 0x2A:
- {
- gst_caps_set_simple(output, "profile", G_TYPE_STRING, "lc", NULL);
- gst_caps_set_simple(output, "level", G_TYPE_STRING, "4", NULL);
- break;
- }
- case 0x2B:
- {
- gst_caps_set_simple(output, "profile", G_TYPE_STRING, "lc", NULL);
- gst_caps_set_simple(output, "level", G_TYPE_STRING, "5", NULL);
- break;
- }
- default:
- ERR("Unrecognized profile-level-indication %u\n", indication);
- }
I think you could significantly deduplicate this switch.
Ack.
- }
- if (SUCCEEDED(IMFMediaType_GetAllocatedBlob(type, &MF_MT_USER_DATA,
(BYTE **) &user_data, &user_data_size)))
- {
- if (user_data_size > sizeof(sizeof(*user_data)))
- {
- GstBuffer *audio_specific_config = gst_buffer_new_allocate(NULL,
user_data_size - sizeof(*user_data), NULL);
- gst_buffer_fill(audio_specific_config, 0, user_data + 1,
user_data_size - sizeof(*user_data));
- gst_caps_set_simple(output, "codec_data", GST_TYPE_BUFFER,
audio_specific_config, NULL);
- gst_buffer_unref(audio_specific_config);
- }
- CoTaskMemFree(user_data);
- }
- }
- else if (IsEqualGUID(&subtype, &MFAudioFormat_Float))
- {
- output = gst_caps_new_empty_simple("audio/x-raw");
- gst_caps_set_simple(output, "format", G_TYPE_STRING, "F32LE", NULL);
- }
- else
- {
- ERR("Unrecognized subtype %s\n", debugstr_guid(&subtype));
- if (output)
- gst_caps_unref(output);
- return NULL;
- }
- if (SUCCEEDED(IMFMediaType_GetUINT32(type,
&MF_MT_AUDIO_SAMPLES_PER_SECOND, &rate)))
- {
- gst_caps_set_simple(output, "rate", G_TYPE_INT, rate, NULL);
- }
- if (SUCCEEDED(IMFMediaType_GetUINT32(type,
&MF_MT_AUDIO_NUM_CHANNELS, &channels)))
- {
- gst_caps_set_simple(output, "channels", G_TYPE_INT, channels, NULL);
- }
- return output;
- }
- ERR("Unrecognized major type %s\n", debugstr_guid(&major_type));
- return NULL;
+} diff --git a/include/codecapi.h b/include/codecapi.h new file mode 100644 index 0000000000..2690b523d7 --- /dev/null +++ b/include/codecapi.h @@ -0,0 +1,38 @@ +#ifndef __CODECAPI_H +#define __CODECAPI_H
+enum eAVEncH264VProfile +{
- eAVEncH264VProfile_unknown = 0,
- eAVEncH264VProfile_Simple = 66,
- eAVEncH264VProfile_Base = 66,
- eAVEncH264VProfile_Main = 77,
- eAVEncH264VProfile_High = 100,
- eAVEncH264VProfile_422 = 122,
- eAVEncH264VProfile_High10 = 110,
- eAVEncH264VProfile_444 = 244,
- eAVEncH264VProfile_Extended = 88,
+};
+enum eAVEncH264VLevel +{
- eAVEncH264VLevel1 = 10,
- eAVEncH264VLevel1_b = 11,
- eAVEncH264VLevel1_1 = 11,
- eAVEncH264VLevel1_2 = 12,
- eAVEncH264VLevel1_3 = 13,
- eAVEncH264VLevel2 = 20,
- eAVEncH264VLevel2_1 = 21,
- eAVEncH264VLevel2_2 = 22,
- eAVEncH264VLevel3 = 30,
- eAVEncH264VLevel3_1 = 31,
- eAVEncH264VLevel3_2 = 32,
- eAVEncH264VLevel4 = 40,
- eAVEncH264VLevel4_1 = 41,
- eAVEncH264VLevel4_2 = 42,
- eAVEncH264VLevel5 = 50,
- eAVEncH264VLevel5_1 = 51,
- eAVEncH264VLevel5_2 = 52
+};
+#endif \ No newline at end of file
On 3/26/20 2:54 PM, Derek Lesho wrote:
On 3/26/20 2:46 PM, Zebediah Figura wrote:
On 3/26/20 12:18 PM, Derek Lesho wrote:
On 3/26/20 11:40 AM, Zebediah Figura wrote:
On 3/25/20 11:57 PM, Derek Lesho wrote:
On 3/24/20 3:22 PM, Zebediah Figura wrote:
General comments:
It's not great to introduce code that's not used anywhere, it's essentially dead until then.
This could, I think, be split up into much smaller pieces in any case: you're introducing two different functions here, and each function introduces support for several different formats.
On 3/24/20 2:39 PM, Derek Lesho wrote: > Signed-off-by: Derek Lesho dlesho@codeweavers.com > --- > dlls/winegstreamer/gst_private.h | 4 + > dlls/winegstreamer/mfplat.c | 533 ++++++++++++++++++++++++++++++- > include/codecapi.h | 38 +++ > 3 files changed, 574 insertions(+), 1 deletion(-) > create mode 100644 include/codecapi.h > > diff --git a/dlls/winegstreamer/gst_private.h > b/dlls/winegstreamer/gst_private.h > index e6fb841fc8..a6c3fd3784 100644 > --- a/dlls/winegstreamer/gst_private.h > +++ b/dlls/winegstreamer/gst_private.h > @@ -36,6 +36,7 @@ > #include "winuser.h" > #include "dshow.h" > #include "strmif.h" > +#include "mfobjects.h" > #include "wine/heap.h" > #include "wine/strmbase.h" > @@ -54,4 +55,7 @@ void start_dispatch_thread(void) DECLSPEC_HIDDEN; > extern HRESULT mfplat_get_class_object(REFCLSID rclsid, REFIID riid, > void **obj) DECLSPEC_HIDDEN; > +IMFMediaType* media_type_from_caps(GstCaps *caps); > +GstCaps *caps_from_media_type(IMFMediaType *type); > + Using the generic name "media_type", in a module that serves multiple media APIs, is not great.
Would you prefer mf_media_type?
That's probably better, yes.
Also, why is this in the public header?
Would it be better to split this into a mfplat_private.h header?
I mean, why do you need to use it from anything other than mfplat.c?
Because I'd prefer to not merge around 4000 thousands lines of code into a single file. (See media_source.c, mf_decode.c)
This is another reason why it doesn't make a lot of sense to submit dead code.
The code which uses these functions are included in my more recent patch-set.
While submitting the code that uses a helper function in the same patch set does help, it's still not the best way to organize patches. Also, in this case, it means submitting at least 16 patches in one set, which is not desirable either.
The best way to submit such a patch set is to add the code which uses (or is going to use) media_type_from_caps() first, then actually implement media_type_from_caps(). That can mean e.g. adding a stub media_type_from_caps() that prints a FIXME and returns NULL, such as in fb6956c7d, or just leaving that part out of the caller (and probably doing a similar fail-with-FIXME). I don't know what the best way to arrange that is in this case, but I'm not the one writing the patches.
Such a top-down approach is much easier to review, because then you know exactly how a helper will be used when or before you have to review that helper's implementation. When you submit the helper by itself, first, it's hard to understand if it's doing the right thing. You also won't have dead code (and won't have to work around compiler warnings for such by e.g. making functions non-static).
Ah, I see what you mean, that makes sense.
Also, style nitpick: please try to be consistent about your asterisk placement (ideally using "type *var" style.)
Ack.
> #endif /* __GST_PRIVATE_INCLUDED__ */ ...
> @@ -433,3 +438,529 @@ HRESULT mfplat_get_class_object(REFCLSID > rclsid, REFIID riid, void **obj) > return CLASS_E_CLASSNOTAVAILABLE; > } > + > +struct aac_user_data > +{ > + WORD payload_type; > + WORD profile_level_indication; > + WORD struct_type; > + WORD reserved; > + /*BYTE audio_specific_config;*/ What's this field doing here?
We store the audio_config_config after these fields, and I wanted to express that here, it's not important though.
It's not necessarily a problem to specify that arbitrary data comes after the struct, but that comment is not particularly clear.
> +}; > + > +/* IMPORTANT: caps will be modified to represent the exact type > needed for the format */ Why?
Because in the case of a demuxer, the caps of the stream we receive might not map 1:1 with the representation in media foundation. Because of this, in the media source, if any adjustments are needed, we feed the buffers through a parser to correct it.
See: https://github.com/Guy1524/wine/commit/7ab88be3882ab95f3fc17dab374184e06f018...
This seems like a very confusing way to do that. At least I'd relegate it to a separate function. I wouldn't expect a conversion function to modify its argument, and it moreover makes it essentially unusable anywhere else.
The alternative is to just fail, because there's no mapping. For example there's no equivalent to a non annex b h.264 stream in MF.
Sure, I think that's a good design. It's a clear way to communicate "we don't support these caps".
But we do want to support those caps, with modifications. When modifications are needed, we try to use a parser to perform those transformations. For example, qtdemux doesn't output h264 streams in annex b form, so we find a parser that converts it into that form.
Right, the idea is to make those modifications *before* converting into an MF media type.
For reference, the way it's done in quartz is:
(1) during test-play, we record the pin's preferred caps and convert them to a DirectShow media type [setcaps_sink()]; (2) when connecting to the downstream DirectShow sink, we first propose that media type [gstdecoder_source_get_media_type(index = 0)]; (3) if that fails, we propose a bunch of other DirectShow types to the downstream sink [gstdecoder_source_get_media_type(index > 0)]; (4) if none of those work, it tries any types enumerated by the downstream sink, ensuring that GStreamer can understand them [gstdecoder_source_query_accept()]; (5) we convert that type to GstCaps, stripping details we don't care about if necessary, and specify that as the format our sink pad demands [query_sink(), case GST_QUERY_CAPS].
Some of this is implied by the design of quartz (e.g. source pins generally try formats suggested by the downstream sink, though it's not a requirement), but in general, the idea that I think also makes sense here is to determine a media type that you support from the media type that the element exposes, require that type on the sink pad, give GStreamer the tools to convert between the two if necessary, and let GStreamer's caps negotiation do the rest.
That said, these modifications are specific to the format, and along those lines it may make more sense to append specific elements rather than to make specific changes to the caps and try to find an element that can accommodate those. This will also help if you ever need to append multiple such elements. Thus you can e.g. append an audioconvert element unconditionally, and if no conversion is necessary it'll just pass through.
In the case of compressed sample parsers, what would I append unconditionally? It's very specific to the type.
Looking at the modifications you do make—
- you force h264 into annex B format, which is the job of h264parse;
Yes, because that's how it's represented on windows.
- you force all raw audio into 32-bit float. Does native mfplat really
never output integer PCM?
I think I can fix that, I do know that MFAudioFormat float can only be be F32LE though.
64-bit float exists. (So does 16-bit and 24-bit, in fact.) That's not necessarily to say that any given MF object handles it, but I'd recommend at least checking whether the bit depth and endianness matches what you expect, instead of just assuming that it does.
Okay, I heard somewhere that MFAudioFormat_Float was always 32 bit. That must have been wrong information, I'll fix that.
It's possible that MFAudioFormat_Float is always 32-bit, but GStreamer's audio/x-raw isn't always 32-bit.
> +IMFMediaType* media_type_from_caps(GstCaps *caps) > +{ > + IMFMediaType *media_type; > + GstStructure *info; > + const char *media_type_name; > + gchar *human_readable; > + > + if (FAILED(MFCreateMediaType(&media_type))) > + { > + return NULL; > + } > + > + info = gst_caps_get_structure(caps, 0); > + media_type_name = gst_structure_get_name(info); > + > + human_readable = gst_caps_to_string(caps); > + TRACE("caps = %s\n", human_readable); > + g_free(human_readable); Probably would be best to guard this with TRACE_ON, so that we don't bother allocating anything otherwise.
Also, you'll want to use debugstr_a(), especially since caps can overrun the static buffer in ntdll.
Ack.
> + > + if (!(strncmp(media_type_name, "video", 5))) Style nitpick, superfluous parentheses.
I think Nikolay already mentioned this, but it's probably not a bad idea to just match against the whole "video/x-h264" etc. sequence.
Ack.
> + { > + const char *video_format = media_type_name + 6; > + gint width, height, framerate_num, framerate_den; > + > + IMFMediaType_SetGUID(media_type, &MF_MT_MAJOR_TYPE, > &MFMediaType_Video); > + > + if (gst_structure_get_int(info, "width", &width) && > gst_structure_get_int(info, "height", &height)) > + { > + IMFMediaType_SetUINT64(media_type, &MF_MT_FRAME_SIZE, > ((UINT64)width << 32) | height); > + } > + if (gst_structure_get_fraction(info, "framerate", &framerate_num, > &framerate_den)) > + { > + IMFMediaType_SetUINT64(media_type, &MF_MT_FRAME_RATE, > ((UINT64)framerate_num << 32) | framerate_den); > + } > + > + if (!(strcmp(video_format, "x-h264"))) > + { > + const char *profile, *level; > + > + IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_H264); > + IMFMediaType_SetUINT32(media_type, &MF_MT_COMPRESSED, TRUE); > + > + if ((profile = gst_structure_get_string(info, "profile"))) > + { > + if (!(strcmp(profile, "main"))) > + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_PROFILE, > eAVEncH264VProfile_Main); > + else if (!(strcmp(profile, "high"))) > + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_PROFILE, > eAVEncH264VProfile_High); > + else if (!(strcmp(profile, "high-4:4:4"))) > + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_PROFILE, > eAVEncH264VProfile_444); > + else > + ERR("Unrecognized profile %s\n", profile); This ERR (and many below) should probably be a FIXME instead, methinks.
Ack.
> + } > + if ((level = gst_structure_get_string(info, "level"))) > + { > + if (!(strcmp(level, "1"))) > + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, > eAVEncH264VLevel1); > + else if (!(strcmp(level, "1.1"))) > + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, > eAVEncH264VLevel1_1); > + else if (!(strcmp(level, "1.2"))) > + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, > eAVEncH264VLevel1_2); > + else if (!(strcmp(level, "1.3"))) > + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, > eAVEncH264VLevel1_3); > + else if (!(strcmp(level, "2"))) > + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, > eAVEncH264VLevel2); > + else if (!(strcmp(level, "2.1"))) > + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, > eAVEncH264VLevel2_1); > + else if (!(strcmp(level, "2.2"))) > + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, > eAVEncH264VLevel2_2); > + else if (!(strcmp(level, "3"))) > + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, > eAVEncH264VLevel3); > + else if (!(strcmp(level, "3.1"))) > + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, > eAVEncH264VLevel3_1); > + else if (!(strcmp(level, "3.2"))) > + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, > eAVEncH264VLevel3_2); > + else if (!(strcmp(level, "4"))) > + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, > eAVEncH264VLevel4); > + else if (!(strcmp(level, "4.1"))) > + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, > eAVEncH264VLevel4_1); > + else if (!(strcmp(level, "4.2"))) > + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, > eAVEncH264VLevel4_2); > + else if (!(strcmp(level, "5"))) > + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, > eAVEncH264VLevel5); > + else if (!(strcmp(level, "5.1"))) > + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, > eAVEncH264VLevel5_1); > + else if (!(strcmp(level, "5.2"))) > + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, > eAVEncH264VLevel5_2); > + else > + ERR("Unrecognized level %s\n", level); > + } Could we maybe make this a table instead?
Sure.
> + gst_caps_set_simple(caps, "stream-format", G_TYPE_STRING, > "byte-stream", NULL); > + gst_caps_set_simple(caps, "alignment", G_TYPE_STRING, "au", NULL); > + for (unsigned int i = 0; i < gst_caps_get_size(caps); i++) > + { > + GstStructure *structure = gst_caps_get_structure (caps, i); > + gst_structure_remove_field(structure, "codec_data"); > + } > + } > + else if (!(strcmp(video_format, "x-wmv"))) > + { > + gint wmv_version; > + const char *format; > + const GValue *codec_data; > + > + if (gst_structure_get_int(info, "wmvversion", &wmv_version)) > + { > + switch (wmv_version) > + { > + case 1: > + IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_WMV1); > + break; > + case 2: > + IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_WMV2); > + break; > + case 3: > + IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_WMV3); > + break; > + default: > + ERR("Unrecognized wmvversion %d\n", wmv_version); > + } > + } > + > + if ((format = gst_structure_get_string(info, "format"))) > + { > + if (!(strcmp(format, "WVC1"))) > + IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_WVC1); What if it's not? I think that deserves at least a FIXME.
(Style nitpick, extra parentheses.)
Ack.
> + } > + > + if ((codec_data = gst_structure_get_value(info, "codec_data"))) > + { > + GstBuffer *codec_data_buffer = gst_value_get_buffer(codec_data); > + if (codec_data_buffer) > + { > + gsize codec_data_size = gst_buffer_get_size(codec_data_buffer); > + gpointer codec_data_raw = heap_alloc(codec_data_size); > + gst_buffer_extract(codec_data_buffer, 0, codec_data_raw, > codec_data_size); > + IMFMediaType_SetBlob(media_type, &MF_MT_USER_DATA, codec_data_raw, > codec_data_size); > + } > + } > + } > + else if (!(strcmp(video_format, "mpeg"))) > + { > + IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_M4S2); > + IMFMediaType_SetUINT32(media_type, &MF_MT_COMPRESSED, TRUE); There are other video/mpeg formats.
TBH, the only reason I've included this is for the tests to work, I'll look into how to differentiate the mpeg types tomorrow.
> + } > + else if (!(strcmp(video_format, "x-raw"))) > + { > + const char *fourcc = gst_structure_get_string(info, "stream-format"); > + IMFMediaType_SetUINT32(media_type, &MF_MT_COMPRESSED, FALSE); > + if (fourcc && (strlen(fourcc) == 4)) > + { > + GUID fourcc_subtype = MFVideoFormat_Base; > + fourcc_subtype.Data1 = MAKEFOURCC( > + toupper(fourcc[0]), toupper(fourcc[1]), toupper(fourcc[2]), > toupper(fourcc[3])); > + IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &fourcc_subtype); > + } > + else > + ERR("uncompressed video has no stream-format\n"); I've never seen a FOURCC stored in the "stream-format" tag; where are you getting this from?
You're right, I think I'm supposed to use "format" here, but this is dead code rn so I that's why I didn't see any problems.
> + } > + else > + ERR("Unrecognized video format %s\n", video_format); > + } > + else if (!(strncmp(media_type_name, "audio", 5))) > + { > + const char *audio_format = media_type_name + 6; > + > + IMFMediaType_SetGUID(media_type, &MF_MT_MAJOR_TYPE, > &MFMediaType_Audio); > + if (!(strcmp(audio_format, "mpeg"))) > + { > + int mpeg_version = -1; > + > + IMFMediaType_SetUINT32(media_type, &MF_MT_COMPRESSED, TRUE); > + > + if (!(gst_structure_get_int(info, "mpegversion", &mpeg_version))) > + ERR("Failed to get mpegversion\n"); > + switch (mpeg_version) > + { > + case 1: > + { > + IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFAudioFormat_MPEG); > + break; > + } What about MFAudioFormat_MP3?
I'm actually not sure what to use here, I should probably remove it for now.
> + case 2: > + case 4: > + { > + const char *format, *profile, *level; > + DWORD profile_level_indication = 0; > + const GValue *codec_data; > + DWORD asc_size = 0; > + struct aac_user_data *user_data = NULL; > + > + IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFAudioFormat_AAC); > + > + codec_data = gst_structure_get_value(info, "codec_data"); > + if (codec_data) > + { > + GstBuffer *codec_data_buffer = gst_value_get_buffer(codec_data); > + if (codec_data_buffer) > + { > + if ((asc_size = gst_buffer_get_size(codec_data_buffer)) >= 2) > + { > + user_data = heap_alloc_zero(sizeof(*user_data)+asc_size); > + gst_buffer_extract(codec_data_buffer, 0, (gpointer)(user_data + 1), > asc_size); > + } > + else > + ERR("Unexpected buffer size\n"); > + } > + else > + ERR("codec_data not a buffer\n"); > + } > + else > + ERR("codec_data not found\n"); > + if (!user_data) > + user_data = heap_alloc_zero(sizeof(*user_data)); > + > + { > + int rate; > + if (gst_structure_get_int(info, "rate", &rate)) > + IMFMediaType_SetUINT32(media_type, &MF_MT_AUDIO_SAMPLES_PER_SECOND, > rate); > + } > + { > + int channels; > + if (gst_structure_get_int(info, "channels", &channels)) > + IMFMediaType_SetUINT32(media_type, &MF_MT_AUDIO_NUM_CHANNELS, > channels); > + } Did you mean to add these blocks?
Yeah, it's so I can declare the variables closer to where they are used.
I'll admit I don't get the obsession with C99 variable declarations, but this just seems janky.
It wouldn't seem janky if we had C99 variable declarations :P
> + > + if ((format = gst_structure_get_string(info, "stream-format"))) > + { > + DWORD payload_type = -1; > + if (!(strcmp(format, "raw"))) > + payload_type = 0; > + else if (!(strcmp(format, "adts"))) > + payload_type = 1; > + else > + ERR("Unrecognized stream-format\n"); > + if (payload_type != -1) > + { > + IMFMediaType_SetUINT32(media_type, &MF_MT_AAC_PAYLOAD_TYPE, > payload_type); > + user_data->payload_type = payload_type; > + } > + } > + else > + { > + ERR("Stream format not present\n"); > + } > + > + profile = gst_structure_get_string(info, "profile"); > + level = gst_structure_get_string(info, "level"); > + /* Data from > https://docs.microsoft.com/en-us/windows/win32/medfound/aac-encoder#output-t... > */ I'm not sure I'd link to Microsoft documentation; it's not very stable.
Would a link to an archive.is backup of it be better?
Probably.
> + if (profile && level) > + { > + if (!(strcmp(profile, "lc")) && !(strcmp(level, "2"))) > + profile_level_indication = 0x29; > + else if (!(strcmp(profile, "lc")) && !(strcmp(level, "4"))) > + profile_level_indication = 0x2A; > + else if (!(strcmp(profile, "lc")) && !(strcmp(level, "5"))) > + profile_level_indication = 0x2B; > + else > + ERR("Unhandled profile/level combo\n"); > + } > + else > + ERR("Profile or level not present\n"); > + > + if (profile_level_indication) > + { > + IMFMediaType_SetUINT32(media_type, > &MF_MT_AAC_AUDIO_PROFILE_LEVEL_INDICATION, profile_level_indication); > + user_data->profile_level_indication = profile_level_indication; > + } > + > + IMFMediaType_SetBlob(media_type, &MF_MT_USER_DATA, (BYTE > *)user_data, sizeof(user_data) + asc_size); > + heap_free(user_data); > + break; > + } > + default: > + ERR("Unhandled mpegversion %d\n", mpeg_version); > + } > + } > + else if (!(strcmp(audio_format, "x-raw"))) > + { > + IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFAudioFormat_Float); > + > + gst_caps_set_simple(caps, "format", G_TYPE_STRING, "F32LE", NULL); There are other audio formats.
Ah, you mean PCM? I'll add a case for that tomorrow.
f32le is PCM, but I mean integer PCM and other depths than 32-bit.
Hmm okay, I'll do more research on that.
Presumably there should also be channel and sample rate data here.
Yeah good catch.
> + } > + else > + ERR("Unrecognized audio format %s\n", audio_format); > + } > + else > + { > + goto fail; I'm generally of the opinion that one line of cleanup doesn't merit a "goto".
Okay I'll change that then.
> + } > + > + return media_type; > + fail: > + IMFMediaType_Release(media_type); > + return NULL; > +} > + > +static const char *fourcc_str(DWORD fourcc) > +{ > + if (!fourcc) return NULL; > + return wine_dbg_sprintf ("%c%c%c%c", > + (char)(fourcc), (char)(fourcc >> 8), > + (char)(fourcc >> 16), (char)(fourcc >> 24)); > +} I don't think you want to use Wine's debugging utilities for non-debug code.
Ack.
> + > +GstCaps *caps_from_media_type(IMFMediaType *type) > +{ > + GUID major_type; > + GUID subtype; > + GUID base_masked_subtype; > + GstCaps *output = NULL; > + > + if (FAILED(IMFMediaType_GetMajorType(type, &major_type))) > + return NULL; > + if (FAILED(IMFMediaType_GetGUID(type, &MF_MT_SUBTYPE, &subtype))) > + return NULL; > + base_masked_subtype = subtype; > + base_masked_subtype.Data1 = 0; > + > + if (IsEqualGUID(&major_type, &MFMediaType_Video)) > + { > + UINT64 frame_rate = 0, frame_size = 0; > + DWORD *framerate_num = ((DWORD*)&frame_rate) + 1; > + DWORD *framerate_den = ((DWORD*)&frame_rate); > + DWORD *width = ((DWORD*)&frame_size) + 1; > + DWORD *height = ((DWORD*)&frame_size); It seems simpler to me to do e.g.
DWORD width = frame_size; DWORD height = frame_size >> 32;
I'm not getting the width and height here, I'm declaring pointers to them which are set later on.
Right, I mean actually set the variables after retrieving frame_size; in full something like
DWORD width, height; /* ... */ IMFMediaType_GetUINT64(type, &MF_MT_FRAME_SIZE, &frame_size); width = frame_size; height = frame_size >> 32;
Yeah that works.
> + > + if (IsEqualGUID(&subtype, &MFVideoFormat_H264)) > + { > + enum eAVEncH264VProfile h264_profile; > + enum eAVEncH264VLevel h264_level; > + output = gst_caps_new_empty_simple("video/x-h264"); > + gst_caps_set_simple(output, "stream-format", G_TYPE_STRING, > "byte-stream", NULL); > + gst_caps_set_simple(output, "alignment", G_TYPE_STRING, "au", NULL); > + > + if (SUCCEEDED(IMFMediaType_GetUINT32(type, &MF_MT_MPEG2_PROFILE, > &h264_profile))) > + { > + const char *profile = NULL; > + switch (h264_profile) > + { > + case eAVEncH264VProfile_Main: profile = "main"; break; > + case eAVEncH264VProfile_High: profile = "high"; break; > + case eAVEncH264VProfile_444: profile = "high-4:4:4"; break; > + default: ERR("Unknown profile %u\n", h264_profile); > + } > + if (profile) > + gst_caps_set_simple(output, "profile", G_TYPE_STRING, profile, NULL); > + } > + if (SUCCEEDED(IMFMediaType_GetUINT32(type, &MF_MT_MPEG2_LEVEL, > &h264_level))) > + { > + const char *level = NULL; > + switch (h264_level) > + { > + case eAVEncH264VLevel1: level = "1"; break; > + case eAVEncH264VLevel1_1: level = "1.1"; break; > + case eAVEncH264VLevel1_2: level = "1.2"; break; > + case eAVEncH264VLevel1_3: level = "1.3"; break; > + case eAVEncH264VLevel2: level = "2"; break; > + case eAVEncH264VLevel2_1: level = "2.1"; break; > + case eAVEncH264VLevel2_2: level = "2.2"; break; > + case eAVEncH264VLevel3: level = "3"; break; > + case eAVEncH264VLevel3_1: level = "3.1"; break; > + case eAVEncH264VLevel3_2: level = "3.2"; break; > + case eAVEncH264VLevel4: level = "4"; break; > + case eAVEncH264VLevel4_1: level = "4.1"; break; > + case eAVEncH264VLevel4_2: level = "4.2"; break; > + case eAVEncH264VLevel5: level = "5"; break; > + case eAVEncH264VLevel5_1: level = "5.1"; break; > + case eAVEncH264VLevel5_2: level = "5.2"; break; > + default: ERR("Unknown level %u\n", h264_level); > + } > + if (level) > + gst_caps_set_simple(output, "level", G_TYPE_STRING, level, NULL); > + } > + } > + else if (IsEqualGUID(&subtype, &MFVideoFormat_WVC1)) > + { > + BYTE *user_data; > + DWORD user_data_size; > + output = gst_caps_new_empty_simple("video/x-wmv"); > + gst_caps_set_simple(output, "format", G_TYPE_STRING, "WVC1", NULL); > + > + gst_caps_set_simple(output, "wmvversion", G_TYPE_INT, 3, NULL); > + > + if (SUCCEEDED(IMFMediaType_GetAllocatedBlob(type, &MF_MT_USER_DATA, > &user_data, &user_data_size))) > + { > + GstBuffer *codec_data_buffer = gst_buffer_new_allocate(NULL, > user_data_size, NULL); > + gst_buffer_fill(codec_data_buffer, 0, user_data, user_data_size); > + gst_caps_set_simple(output, "codec_data", GST_TYPE_BUFFER, > codec_data_buffer, NULL); > + gst_buffer_unref(codec_data_buffer); > + CoTaskMemFree(user_data); > + } > + } > + else if (IsEqualGUID(&base_masked_subtype, &MFVideoFormat_Base)) > + { > + output = gst_caps_new_empty_simple("video/x-raw"); > + gst_caps_set_simple(output, "format", G_TYPE_STRING, > fourcc_str(subtype.Data1), NULL); What about RGB formats?
Ah, I didn't think about those, looks like we'll have to use a table of known conversions instead.
Well, to some degree, though you can also make use of gst_video_format_from_fourcc(). See also amt_to_gst_caps_video() in gstdemux.c.
Ah check for RGB formats first then fall back to FOURCC conversion, okay sure.
> + } > + else { > + ERR("Unrecognized subtype %s\n", debugstr_guid(&subtype)); > + return NULL; > + } > + > + IMFMediaType_GetUINT64(type, &MF_MT_FRAME_RATE, &frame_rate); > + IMFMediaType_GetUINT64(type, &MF_MT_FRAME_SIZE, &frame_size); > + > + if (frame_rate) > + gst_caps_set_simple(output, "framerate", GST_TYPE_FRACTION, > *framerate_num, *framerate_den, NULL); > + if (frame_size) > + { > + gst_caps_set_simple(output, "width", G_TYPE_INT, *width, NULL); > + gst_caps_set_simple(output, "height", G_TYPE_INT, *height, NULL); > + } > + return output; > + } > + else if (IsEqualGUID(&major_type, &MFMediaType_Audio)) > + { > + DWORD rate, channels; > + > + if (IsEqualGUID(&subtype, &MFAudioFormat_AAC)) > + { > + DWORD payload_type, indication; > + struct aac_user_data *user_data; > + UINT32 user_data_size; > + output = gst_caps_new_empty_simple("audio/mpeg"); > + > + /* TODO */ > + gst_caps_set_simple(output, "framed", G_TYPE_BOOLEAN, TRUE, NULL); > + gst_caps_set_simple(output, "mpegversion", G_TYPE_INT, 4, NULL); What's TODO here?
MFAudioFormat_AAC could also mean mpegversion=2, and I don't know what the "framed" attribute is for.
A TODO message should probably mention what exactly is to be done.
In general it's good practice to understand what your code is doing before you submit it, but regardless, "framed" means there is exactly one frame per buffer. Is that guaranteed by the MF source? (It's not obvious to me that it is...)
Yeah I should probably remove it in that case, I was trying to match up all the attributes when going through the conversion to IMFMediaType and back, but it's probably not necessary.
> + > + if (SUCCEEDED(IMFMediaType_GetUINT32(type, &MF_MT_AAC_PAYLOAD_TYPE, > &payload_type))) > + { > + switch (payload_type) > + { > + case 0: > + gst_caps_set_simple(output, "stream-format", G_TYPE_STRING, "raw", > NULL); > + break; > + case 1: > + gst_caps_set_simple(output, "stream-format", G_TYPE_STRING, "adts", > NULL); > + break; > + default: > + gst_caps_set_simple(output, "stream-format", G_TYPE_STRING, "raw", > NULL); Seems to me that 2 and 3 should be mapped to "adif" and "loas", respectively.
Ack.
> + } > + } > + else > + gst_caps_set_simple(output, "stream-format", G_TYPE_STRING, "raw", > NULL); > + > + if (SUCCEEDED(IMFMediaType_GetUINT32(type, > &MF_MT_AAC_AUDIO_PROFILE_LEVEL_INDICATION, &indication))) > + { > + switch (indication) > + { > + case 0x29: > + { > + gst_caps_set_simple(output, "profile", G_TYPE_STRING, "lc", NULL); > + gst_caps_set_simple(output, "level", G_TYPE_STRING, "2", NULL); > + break; > + } > + case 0x2A: > + { > + gst_caps_set_simple(output, "profile", G_TYPE_STRING, "lc", NULL); > + gst_caps_set_simple(output, "level", G_TYPE_STRING, "4", NULL); > + break; > + } > + case 0x2B: > + { > + gst_caps_set_simple(output, "profile", G_TYPE_STRING, "lc", NULL); > + gst_caps_set_simple(output, "level", G_TYPE_STRING, "5", NULL); > + break; > + } > + default: > + ERR("Unrecognized profile-level-indication %u\n", indication); > + } I think you could significantly deduplicate this switch.
Ack.
> + } > + > + if (SUCCEEDED(IMFMediaType_GetAllocatedBlob(type, &MF_MT_USER_DATA, > (BYTE **) &user_data, &user_data_size))) > + { > + if (user_data_size > sizeof(sizeof(*user_data))) > + { > + GstBuffer *audio_specific_config = gst_buffer_new_allocate(NULL, > user_data_size - sizeof(*user_data), NULL); > + gst_buffer_fill(audio_specific_config, 0, user_data + 1, > user_data_size - sizeof(*user_data)); > + > + gst_caps_set_simple(output, "codec_data", GST_TYPE_BUFFER, > audio_specific_config, NULL); > + gst_buffer_unref(audio_specific_config); > + } > + CoTaskMemFree(user_data); > + } > + } > + else if (IsEqualGUID(&subtype, &MFAudioFormat_Float)) > + { > + output = gst_caps_new_empty_simple("audio/x-raw"); > + > + gst_caps_set_simple(output, "format", G_TYPE_STRING, "F32LE", NULL); > + } > + else > + { > + ERR("Unrecognized subtype %s\n", debugstr_guid(&subtype)); > + if (output) > + gst_caps_unref(output); > + return NULL; > + } > + if (SUCCEEDED(IMFMediaType_GetUINT32(type, > &MF_MT_AUDIO_SAMPLES_PER_SECOND, &rate))) > + { > + gst_caps_set_simple(output, "rate", G_TYPE_INT, rate, NULL); > + } > + if (SUCCEEDED(IMFMediaType_GetUINT32(type, > &MF_MT_AUDIO_NUM_CHANNELS, &channels))) > + { > + gst_caps_set_simple(output, "channels", G_TYPE_INT, channels, NULL); > + } > + > + return output; > + } > + > + ERR("Unrecognized major type %s\n", debugstr_guid(&major_type)); > + return NULL; > +} > diff --git a/include/codecapi.h b/include/codecapi.h > new file mode 100644 > index 0000000000..2690b523d7 > --- /dev/null > +++ b/include/codecapi.h > @@ -0,0 +1,38 @@ > +#ifndef __CODECAPI_H > +#define __CODECAPI_H > + > +enum eAVEncH264VProfile > +{ > + eAVEncH264VProfile_unknown = 0, > + eAVEncH264VProfile_Simple = 66, > + eAVEncH264VProfile_Base = 66, > + eAVEncH264VProfile_Main = 77, > + eAVEncH264VProfile_High = 100, > + eAVEncH264VProfile_422 = 122, > + eAVEncH264VProfile_High10 = 110, > + eAVEncH264VProfile_444 = 244, > + eAVEncH264VProfile_Extended = 88, > +}; > + > +enum eAVEncH264VLevel > +{ > + eAVEncH264VLevel1 = 10, > + eAVEncH264VLevel1_b = 11, > + eAVEncH264VLevel1_1 = 11, > + eAVEncH264VLevel1_2 = 12, > + eAVEncH264VLevel1_3 = 13, > + eAVEncH264VLevel2 = 20, > + eAVEncH264VLevel2_1 = 21, > + eAVEncH264VLevel2_2 = 22, > + eAVEncH264VLevel3 = 30, > + eAVEncH264VLevel3_1 = 31, > + eAVEncH264VLevel3_2 = 32, > + eAVEncH264VLevel4 = 40, > + eAVEncH264VLevel4_1 = 41, > + eAVEncH264VLevel4_2 = 42, > + eAVEncH264VLevel5 = 50, > + eAVEncH264VLevel5_1 = 51, > + eAVEncH264VLevel5_2 = 52 > +}; > + > +#endif > \ No newline at end of file >
On 3/26/20 3:19 PM, Zebediah Figura wrote:
On 3/26/20 2:54 PM, Derek Lesho wrote:
On 3/26/20 2:46 PM, Zebediah Figura wrote:
On 3/26/20 12:18 PM, Derek Lesho wrote:
On 3/26/20 11:40 AM, Zebediah Figura wrote:
On 3/25/20 11:57 PM, Derek Lesho wrote:
On 3/24/20 3:22 PM, Zebediah Figura wrote:
> General comments: > > It's not great to introduce code that's not used anywhere, it's > essentially dead until then. > > This could, I think, be split up into much smaller pieces in any case: > you're introducing two different functions here, and each function > introduces support for several different formats. > > On 3/24/20 2:39 PM, Derek Lesho wrote: >> Signed-off-by: Derek Lesho dlesho@codeweavers.com >> --- >> dlls/winegstreamer/gst_private.h | 4 + >> dlls/winegstreamer/mfplat.c | 533 ++++++++++++++++++++++++++++++- >> include/codecapi.h | 38 +++ >> 3 files changed, 574 insertions(+), 1 deletion(-) >> create mode 100644 include/codecapi.h >> >> diff --git a/dlls/winegstreamer/gst_private.h >> b/dlls/winegstreamer/gst_private.h >> index e6fb841fc8..a6c3fd3784 100644 >> --- a/dlls/winegstreamer/gst_private.h >> +++ b/dlls/winegstreamer/gst_private.h >> @@ -36,6 +36,7 @@ >> #include "winuser.h" >> #include "dshow.h" >> #include "strmif.h" >> +#include "mfobjects.h" >> #include "wine/heap.h" >> #include "wine/strmbase.h" >> @@ -54,4 +55,7 @@ void start_dispatch_thread(void) DECLSPEC_HIDDEN; >> extern HRESULT mfplat_get_class_object(REFCLSID rclsid, REFIID riid, >> void **obj) DECLSPEC_HIDDEN; >> +IMFMediaType* media_type_from_caps(GstCaps *caps); >> +GstCaps *caps_from_media_type(IMFMediaType *type); >> + > Using the generic name "media_type", in a module that serves multiple > media APIs, is not great. Would you prefer mf_media_type?
That's probably better, yes.
> Also, why is this in the public header? Would it be better to split this into a mfplat_private.h header?
I mean, why do you need to use it from anything other than mfplat.c?
Because I'd prefer to not merge around 4000 thousands lines of code into a single file. (See media_source.c, mf_decode.c)
This is another reason why it doesn't make a lot of sense to submit dead code.
The code which uses these functions are included in my more recent patch-set.
While submitting the code that uses a helper function in the same patch set does help, it's still not the best way to organize patches. Also, in this case, it means submitting at least 16 patches in one set, which is not desirable either.
The best way to submit such a patch set is to add the code which uses (or is going to use) media_type_from_caps() first, then actually implement media_type_from_caps(). That can mean e.g. adding a stub media_type_from_caps() that prints a FIXME and returns NULL, such as in fb6956c7d, or just leaving that part out of the caller (and probably doing a similar fail-with-FIXME). I don't know what the best way to arrange that is in this case, but I'm not the one writing the patches.
Such a top-down approach is much easier to review, because then you know exactly how a helper will be used when or before you have to review that helper's implementation. When you submit the helper by itself, first, it's hard to understand if it's doing the right thing. You also won't have dead code (and won't have to work around compiler warnings for such by e.g. making functions non-static).
Ah, I see what you mean, that makes sense.
> Also, style nitpick: please try to be consistent about your asterisk > placement (ideally using "type *var" style.) Ack. >> #endif /* __GST_PRIVATE_INCLUDED__ */ > ... > >> @@ -433,3 +438,529 @@ HRESULT mfplat_get_class_object(REFCLSID >> rclsid, REFIID riid, void **obj) >> return CLASS_E_CLASSNOTAVAILABLE; >> } >> + >> +struct aac_user_data >> +{ >> + WORD payload_type; >> + WORD profile_level_indication; >> + WORD struct_type; >> + WORD reserved; >> + /*BYTE audio_specific_config;*/ > What's this field doing here? We store the audio_config_config after these fields, and I wanted to express that here, it's not important though.
It's not necessarily a problem to specify that arbitrary data comes after the struct, but that comment is not particularly clear.
>> +}; >> + >> +/* IMPORTANT: caps will be modified to represent the exact type >> needed for the format */ > Why? Because in the case of a demuxer, the caps of the stream we receive might not map 1:1 with the representation in media foundation. Because of this, in the media source, if any adjustments are needed, we feed the buffers through a parser to correct it.
See: https://github.com/Guy1524/wine/commit/7ab88be3882ab95f3fc17dab374184e06f018...
This seems like a very confusing way to do that. At least I'd relegate it to a separate function. I wouldn't expect a conversion function to modify its argument, and it moreover makes it essentially unusable anywhere else.
The alternative is to just fail, because there's no mapping. For example there's no equivalent to a non annex b h.264 stream in MF.
Sure, I think that's a good design. It's a clear way to communicate "we don't support these caps".
But we do want to support those caps, with modifications. When modifications are needed, we try to use a parser to perform those transformations. For example, qtdemux doesn't output h264 streams in annex b form, so we find a parser that converts it into that form.
Right, the idea is to make those modifications *before* converting into an MF media type.
For reference, the way it's done in quartz is:
(1) during test-play, we record the pin's preferred caps and convert them to a DirectShow media type [setcaps_sink()]; (2) when connecting to the downstream DirectShow sink, we first propose that media type [gstdecoder_source_get_media_type(index = 0)]; (3) if that fails, we propose a bunch of other DirectShow types to the downstream sink [gstdecoder_source_get_media_type(index > 0)]; (4) if none of those work, it tries any types enumerated by the downstream sink, ensuring that GStreamer can understand them [gstdecoder_source_query_accept()]; (5) we convert that type to GstCaps, stripping details we don't care about if necessary, and specify that as the format our sink pad demands [query_sink(), case GST_QUERY_CAPS].
Some of this is implied by the design of quartz (e.g. source pins generally try formats suggested by the downstream sink, though it's not a requirement), but in general, the idea that I think also makes sense here is to determine a media type that you support from the media type that the element exposes, require that type on the sink pad, give GStreamer the tools to convert between the two if necessary, and let GStreamer's caps negotiation do the rest.
I don't think that using GStreamer's caps negotiation is mutually exclusive with the modifications I'm making to the caps in this function. The function's purpose is to find the closest matching GstCaps, and it describes how it got there by performing the modifications. Yes, right now, the code just manually finds a parser, but we could easily set the sink caps to the those returned from this function, then use caps negotation. Essentially, the function determines the media type we support by looking at the preferred caps' format, and just changes the details to make it match with the media foundation representation. I don't see a need to split that off into a separate function, as, in my opinion, the function serves as good documentation on the exact meanings of a given IMFMediaType.
That said, these modifications are specific to the format, and along those lines it may make more sense to append specific elements rather than to make specific changes to the caps and try to find an element that can accommodate those. This will also help if you ever need to append multiple such elements. Thus you can e.g. append an audioconvert element unconditionally, and if no conversion is necessary it'll just pass through.
In the case of compressed sample parsers, what would I append unconditionally? It's very specific to the type.
Looking at the modifications you do make—
- you force h264 into annex B format, which is the job of h264parse;
Yes, because that's how it's represented on windows.
- you force all raw audio into 32-bit float. Does native mfplat really
never output integer PCM?
I think I can fix that, I do know that MFAudioFormat float can only be be F32LE though.
64-bit float exists. (So does 16-bit and 24-bit, in fact.) That's not necessarily to say that any given MF object handles it, but I'd recommend at least checking whether the bit depth and endianness matches what you expect, instead of just assuming that it does.
Okay, I heard somewhere that MFAudioFormat_Float was always 32 bit. That must have been wrong information, I'll fix that.
It's possible that MFAudioFormat_Float is always 32-bit, but GStreamer's audio/x-raw isn't always 32-bit.
Oh, that's why why we set the F32LE format, so that if it isn't 32-bit little-endian, it will be converted to that.
>> +IMFMediaType* media_type_from_caps(GstCaps *caps) >> +{ >> + IMFMediaType *media_type; >> + GstStructure *info; >> + const char *media_type_name; >> + gchar *human_readable; >> + >> + if (FAILED(MFCreateMediaType(&media_type))) >> + { >> + return NULL; >> + } >> + >> + info = gst_caps_get_structure(caps, 0); >> + media_type_name = gst_structure_get_name(info); >> + >> + human_readable = gst_caps_to_string(caps); >> + TRACE("caps = %s\n", human_readable); >> + g_free(human_readable); > Probably would be best to guard this with TRACE_ON, so that we don't > bother allocating anything otherwise. > > Also, you'll want to use debugstr_a(), especially since caps can overrun > the static buffer in ntdll. Ack. >> + >> + if (!(strncmp(media_type_name, "video", 5))) > Style nitpick, superfluous parentheses. > > I think Nikolay already mentioned this, but it's probably not a bad idea > to just match against the whole "video/x-h264" etc. sequence. Ack. >> + { >> + const char *video_format = media_type_name + 6; >> + gint width, height, framerate_num, framerate_den; >> + >> + IMFMediaType_SetGUID(media_type, &MF_MT_MAJOR_TYPE, >> &MFMediaType_Video); >> + >> + if (gst_structure_get_int(info, "width", &width) && >> gst_structure_get_int(info, "height", &height)) >> + { >> + IMFMediaType_SetUINT64(media_type, &MF_MT_FRAME_SIZE, >> ((UINT64)width << 32) | height); >> + } >> + if (gst_structure_get_fraction(info, "framerate", &framerate_num, >> &framerate_den)) >> + { >> + IMFMediaType_SetUINT64(media_type, &MF_MT_FRAME_RATE, >> ((UINT64)framerate_num << 32) | framerate_den); >> + } >> + >> + if (!(strcmp(video_format, "x-h264"))) >> + { >> + const char *profile, *level; >> + >> + IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_H264); >> + IMFMediaType_SetUINT32(media_type, &MF_MT_COMPRESSED, TRUE); >> + >> + if ((profile = gst_structure_get_string(info, "profile"))) >> + { >> + if (!(strcmp(profile, "main"))) >> + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_PROFILE, >> eAVEncH264VProfile_Main); >> + else if (!(strcmp(profile, "high"))) >> + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_PROFILE, >> eAVEncH264VProfile_High); >> + else if (!(strcmp(profile, "high-4:4:4"))) >> + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_PROFILE, >> eAVEncH264VProfile_444); >> + else >> + ERR("Unrecognized profile %s\n", profile); > This ERR (and many below) should probably be a FIXME instead, methinks. Ack. >> + } >> + if ((level = gst_structure_get_string(info, "level"))) >> + { >> + if (!(strcmp(level, "1"))) >> + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, >> eAVEncH264VLevel1); >> + else if (!(strcmp(level, "1.1"))) >> + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, >> eAVEncH264VLevel1_1); >> + else if (!(strcmp(level, "1.2"))) >> + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, >> eAVEncH264VLevel1_2); >> + else if (!(strcmp(level, "1.3"))) >> + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, >> eAVEncH264VLevel1_3); >> + else if (!(strcmp(level, "2"))) >> + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, >> eAVEncH264VLevel2); >> + else if (!(strcmp(level, "2.1"))) >> + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, >> eAVEncH264VLevel2_1); >> + else if (!(strcmp(level, "2.2"))) >> + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, >> eAVEncH264VLevel2_2); >> + else if (!(strcmp(level, "3"))) >> + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, >> eAVEncH264VLevel3); >> + else if (!(strcmp(level, "3.1"))) >> + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, >> eAVEncH264VLevel3_1); >> + else if (!(strcmp(level, "3.2"))) >> + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, >> eAVEncH264VLevel3_2); >> + else if (!(strcmp(level, "4"))) >> + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, >> eAVEncH264VLevel4); >> + else if (!(strcmp(level, "4.1"))) >> + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, >> eAVEncH264VLevel4_1); >> + else if (!(strcmp(level, "4.2"))) >> + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, >> eAVEncH264VLevel4_2); >> + else if (!(strcmp(level, "5"))) >> + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, >> eAVEncH264VLevel5); >> + else if (!(strcmp(level, "5.1"))) >> + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, >> eAVEncH264VLevel5_1); >> + else if (!(strcmp(level, "5.2"))) >> + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, >> eAVEncH264VLevel5_2); >> + else >> + ERR("Unrecognized level %s\n", level); >> + } > Could we maybe make this a table instead? Sure. >> + gst_caps_set_simple(caps, "stream-format", G_TYPE_STRING, >> "byte-stream", NULL); >> + gst_caps_set_simple(caps, "alignment", G_TYPE_STRING, "au", NULL); >> + for (unsigned int i = 0; i < gst_caps_get_size(caps); i++) >> + { >> + GstStructure *structure = gst_caps_get_structure (caps, i); >> + gst_structure_remove_field(structure, "codec_data"); >> + } >> + } >> + else if (!(strcmp(video_format, "x-wmv"))) >> + { >> + gint wmv_version; >> + const char *format; >> + const GValue *codec_data; >> + >> + if (gst_structure_get_int(info, "wmvversion", &wmv_version)) >> + { >> + switch (wmv_version) >> + { >> + case 1: >> + IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_WMV1); >> + break; >> + case 2: >> + IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_WMV2); >> + break; >> + case 3: >> + IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_WMV3); >> + break; >> + default: >> + ERR("Unrecognized wmvversion %d\n", wmv_version); >> + } >> + } >> + >> + if ((format = gst_structure_get_string(info, "format"))) >> + { >> + if (!(strcmp(format, "WVC1"))) >> + IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_WVC1); > What if it's not? I think that deserves at least a FIXME. > > (Style nitpick, extra parentheses.) Ack. >> + } >> + >> + if ((codec_data = gst_structure_get_value(info, "codec_data"))) >> + { >> + GstBuffer *codec_data_buffer = gst_value_get_buffer(codec_data); >> + if (codec_data_buffer) >> + { >> + gsize codec_data_size = gst_buffer_get_size(codec_data_buffer); >> + gpointer codec_data_raw = heap_alloc(codec_data_size); >> + gst_buffer_extract(codec_data_buffer, 0, codec_data_raw, >> codec_data_size); >> + IMFMediaType_SetBlob(media_type, &MF_MT_USER_DATA, codec_data_raw, >> codec_data_size); >> + } >> + } >> + } >> + else if (!(strcmp(video_format, "mpeg"))) >> + { >> + IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_M4S2); >> + IMFMediaType_SetUINT32(media_type, &MF_MT_COMPRESSED, TRUE); > There are other video/mpeg formats. TBH, the only reason I've included this is for the tests to work, I'll look into how to differentiate the mpeg types tomorrow. >> + } >> + else if (!(strcmp(video_format, "x-raw"))) >> + { >> + const char *fourcc = gst_structure_get_string(info, "stream-format"); >> + IMFMediaType_SetUINT32(media_type, &MF_MT_COMPRESSED, FALSE); >> + if (fourcc && (strlen(fourcc) == 4)) >> + { >> + GUID fourcc_subtype = MFVideoFormat_Base; >> + fourcc_subtype.Data1 = MAKEFOURCC( >> + toupper(fourcc[0]), toupper(fourcc[1]), toupper(fourcc[2]), >> toupper(fourcc[3])); >> + IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &fourcc_subtype); >> + } >> + else >> + ERR("uncompressed video has no stream-format\n"); > I've never seen a FOURCC stored in the "stream-format" tag; where are > you getting this from? You're right, I think I'm supposed to use "format" here, but this is dead code rn so I that's why I didn't see any problems. >> + } >> + else >> + ERR("Unrecognized video format %s\n", video_format); >> + } >> + else if (!(strncmp(media_type_name, "audio", 5))) >> + { >> + const char *audio_format = media_type_name + 6; >> + >> + IMFMediaType_SetGUID(media_type, &MF_MT_MAJOR_TYPE, >> &MFMediaType_Audio); >> + if (!(strcmp(audio_format, "mpeg"))) >> + { >> + int mpeg_version = -1; >> + >> + IMFMediaType_SetUINT32(media_type, &MF_MT_COMPRESSED, TRUE); >> + >> + if (!(gst_structure_get_int(info, "mpegversion", &mpeg_version))) >> + ERR("Failed to get mpegversion\n"); >> + switch (mpeg_version) >> + { >> + case 1: >> + { >> + IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFAudioFormat_MPEG); >> + break; >> + } > What about MFAudioFormat_MP3? I'm actually not sure what to use here, I should probably remove it for now. >> + case 2: >> + case 4: >> + { >> + const char *format, *profile, *level; >> + DWORD profile_level_indication = 0; >> + const GValue *codec_data; >> + DWORD asc_size = 0; >> + struct aac_user_data *user_data = NULL; >> + >> + IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFAudioFormat_AAC); >> + >> + codec_data = gst_structure_get_value(info, "codec_data"); >> + if (codec_data) >> + { >> + GstBuffer *codec_data_buffer = gst_value_get_buffer(codec_data); >> + if (codec_data_buffer) >> + { >> + if ((asc_size = gst_buffer_get_size(codec_data_buffer)) >= 2) >> + { >> + user_data = heap_alloc_zero(sizeof(*user_data)+asc_size); >> + gst_buffer_extract(codec_data_buffer, 0, (gpointer)(user_data + 1), >> asc_size); >> + } >> + else >> + ERR("Unexpected buffer size\n"); >> + } >> + else >> + ERR("codec_data not a buffer\n"); >> + } >> + else >> + ERR("codec_data not found\n"); >> + if (!user_data) >> + user_data = heap_alloc_zero(sizeof(*user_data)); >> + >> + { >> + int rate; >> + if (gst_structure_get_int(info, "rate", &rate)) >> + IMFMediaType_SetUINT32(media_type, &MF_MT_AUDIO_SAMPLES_PER_SECOND, >> rate); >> + } >> + { >> + int channels; >> + if (gst_structure_get_int(info, "channels", &channels)) >> + IMFMediaType_SetUINT32(media_type, &MF_MT_AUDIO_NUM_CHANNELS, >> channels); >> + } > Did you mean to add these blocks? Yeah, it's so I can declare the variables closer to where they are used.
I'll admit I don't get the obsession with C99 variable declarations, but this just seems janky.
It wouldn't seem janky if we had C99 variable declarations :P
>> + >> + if ((format = gst_structure_get_string(info, "stream-format"))) >> + { >> + DWORD payload_type = -1; >> + if (!(strcmp(format, "raw"))) >> + payload_type = 0; >> + else if (!(strcmp(format, "adts"))) >> + payload_type = 1; >> + else >> + ERR("Unrecognized stream-format\n"); >> + if (payload_type != -1) >> + { >> + IMFMediaType_SetUINT32(media_type, &MF_MT_AAC_PAYLOAD_TYPE, >> payload_type); >> + user_data->payload_type = payload_type; >> + } >> + } >> + else >> + { >> + ERR("Stream format not present\n"); >> + } >> + >> + profile = gst_structure_get_string(info, "profile"); >> + level = gst_structure_get_string(info, "level"); >> + /* Data from >> https://docs.microsoft.com/en-us/windows/win32/medfound/aac-encoder#output-t... >> */ > I'm not sure I'd link to Microsoft documentation; it's not very stable. Would a link to an archive.is backup of it be better?
Probably.
>> + if (profile && level) >> + { >> + if (!(strcmp(profile, "lc")) && !(strcmp(level, "2"))) >> + profile_level_indication = 0x29; >> + else if (!(strcmp(profile, "lc")) && !(strcmp(level, "4"))) >> + profile_level_indication = 0x2A; >> + else if (!(strcmp(profile, "lc")) && !(strcmp(level, "5"))) >> + profile_level_indication = 0x2B; >> + else >> + ERR("Unhandled profile/level combo\n"); >> + } >> + else >> + ERR("Profile or level not present\n"); >> + >> + if (profile_level_indication) >> + { >> + IMFMediaType_SetUINT32(media_type, >> &MF_MT_AAC_AUDIO_PROFILE_LEVEL_INDICATION, profile_level_indication); >> + user_data->profile_level_indication = profile_level_indication; >> + } >> + >> + IMFMediaType_SetBlob(media_type, &MF_MT_USER_DATA, (BYTE >> *)user_data, sizeof(user_data) + asc_size); >> + heap_free(user_data); >> + break; >> + } >> + default: >> + ERR("Unhandled mpegversion %d\n", mpeg_version); >> + } >> + } >> + else if (!(strcmp(audio_format, "x-raw"))) >> + { >> + IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFAudioFormat_Float); >> + >> + gst_caps_set_simple(caps, "format", G_TYPE_STRING, "F32LE", NULL); > There are other audio formats. Ah, you mean PCM? I'll add a case for that tomorrow.
f32le is PCM, but I mean integer PCM and other depths than 32-bit.
Hmm okay, I'll do more research on that.
Presumably there should also be channel and sample rate data here.
Yeah good catch.
>> + } >> + else >> + ERR("Unrecognized audio format %s\n", audio_format); >> + } >> + else >> + { >> + goto fail; > I'm generally of the opinion that one line of cleanup doesn't merit a > "goto". Okay I'll change that then. >> + } >> + >> + return media_type; >> + fail: >> + IMFMediaType_Release(media_type); >> + return NULL; >> +} >> + >> +static const char *fourcc_str(DWORD fourcc) >> +{ >> + if (!fourcc) return NULL; >> + return wine_dbg_sprintf ("%c%c%c%c", >> + (char)(fourcc), (char)(fourcc >> 8), >> + (char)(fourcc >> 16), (char)(fourcc >> 24)); >> +} > I don't think you want to use Wine's debugging utilities for non-debug > code. Ack. >> + >> +GstCaps *caps_from_media_type(IMFMediaType *type) >> +{ >> + GUID major_type; >> + GUID subtype; >> + GUID base_masked_subtype; >> + GstCaps *output = NULL; >> + >> + if (FAILED(IMFMediaType_GetMajorType(type, &major_type))) >> + return NULL; >> + if (FAILED(IMFMediaType_GetGUID(type, &MF_MT_SUBTYPE, &subtype))) >> + return NULL; >> + base_masked_subtype = subtype; >> + base_masked_subtype.Data1 = 0; >> + >> + if (IsEqualGUID(&major_type, &MFMediaType_Video)) >> + { >> + UINT64 frame_rate = 0, frame_size = 0; >> + DWORD *framerate_num = ((DWORD*)&frame_rate) + 1; >> + DWORD *framerate_den = ((DWORD*)&frame_rate); >> + DWORD *width = ((DWORD*)&frame_size) + 1; >> + DWORD *height = ((DWORD*)&frame_size); > It seems simpler to me to do e.g. > > DWORD width = frame_size; > DWORD height = frame_size >> 32; I'm not getting the width and height here, I'm declaring pointers to them which are set later on.
Right, I mean actually set the variables after retrieving frame_size; in full something like
DWORD width, height; /* ... */ IMFMediaType_GetUINT64(type, &MF_MT_FRAME_SIZE, &frame_size); width = frame_size; height = frame_size >> 32;
Yeah that works.
>> + >> + if (IsEqualGUID(&subtype, &MFVideoFormat_H264)) >> + { >> + enum eAVEncH264VProfile h264_profile; >> + enum eAVEncH264VLevel h264_level; >> + output = gst_caps_new_empty_simple("video/x-h264"); >> + gst_caps_set_simple(output, "stream-format", G_TYPE_STRING, >> "byte-stream", NULL); >> + gst_caps_set_simple(output, "alignment", G_TYPE_STRING, "au", NULL); >> + >> + if (SUCCEEDED(IMFMediaType_GetUINT32(type, &MF_MT_MPEG2_PROFILE, >> &h264_profile))) >> + { >> + const char *profile = NULL; >> + switch (h264_profile) >> + { >> + case eAVEncH264VProfile_Main: profile = "main"; break; >> + case eAVEncH264VProfile_High: profile = "high"; break; >> + case eAVEncH264VProfile_444: profile = "high-4:4:4"; break; >> + default: ERR("Unknown profile %u\n", h264_profile); >> + } >> + if (profile) >> + gst_caps_set_simple(output, "profile", G_TYPE_STRING, profile, NULL); >> + } >> + if (SUCCEEDED(IMFMediaType_GetUINT32(type, &MF_MT_MPEG2_LEVEL, >> &h264_level))) >> + { >> + const char *level = NULL; >> + switch (h264_level) >> + { >> + case eAVEncH264VLevel1: level = "1"; break; >> + case eAVEncH264VLevel1_1: level = "1.1"; break; >> + case eAVEncH264VLevel1_2: level = "1.2"; break; >> + case eAVEncH264VLevel1_3: level = "1.3"; break; >> + case eAVEncH264VLevel2: level = "2"; break; >> + case eAVEncH264VLevel2_1: level = "2.1"; break; >> + case eAVEncH264VLevel2_2: level = "2.2"; break; >> + case eAVEncH264VLevel3: level = "3"; break; >> + case eAVEncH264VLevel3_1: level = "3.1"; break; >> + case eAVEncH264VLevel3_2: level = "3.2"; break; >> + case eAVEncH264VLevel4: level = "4"; break; >> + case eAVEncH264VLevel4_1: level = "4.1"; break; >> + case eAVEncH264VLevel4_2: level = "4.2"; break; >> + case eAVEncH264VLevel5: level = "5"; break; >> + case eAVEncH264VLevel5_1: level = "5.1"; break; >> + case eAVEncH264VLevel5_2: level = "5.2"; break; >> + default: ERR("Unknown level %u\n", h264_level); >> + } >> + if (level) >> + gst_caps_set_simple(output, "level", G_TYPE_STRING, level, NULL); >> + } >> + } >> + else if (IsEqualGUID(&subtype, &MFVideoFormat_WVC1)) >> + { >> + BYTE *user_data; >> + DWORD user_data_size; >> + output = gst_caps_new_empty_simple("video/x-wmv"); >> + gst_caps_set_simple(output, "format", G_TYPE_STRING, "WVC1", NULL); >> + >> + gst_caps_set_simple(output, "wmvversion", G_TYPE_INT, 3, NULL); >> + >> + if (SUCCEEDED(IMFMediaType_GetAllocatedBlob(type, &MF_MT_USER_DATA, >> &user_data, &user_data_size))) >> + { >> + GstBuffer *codec_data_buffer = gst_buffer_new_allocate(NULL, >> user_data_size, NULL); >> + gst_buffer_fill(codec_data_buffer, 0, user_data, user_data_size); >> + gst_caps_set_simple(output, "codec_data", GST_TYPE_BUFFER, >> codec_data_buffer, NULL); >> + gst_buffer_unref(codec_data_buffer); >> + CoTaskMemFree(user_data); >> + } >> + } >> + else if (IsEqualGUID(&base_masked_subtype, &MFVideoFormat_Base)) >> + { >> + output = gst_caps_new_empty_simple("video/x-raw"); >> + gst_caps_set_simple(output, "format", G_TYPE_STRING, >> fourcc_str(subtype.Data1), NULL); > What about RGB formats? Ah, I didn't think about those, looks like we'll have to use a table of known conversions instead.
Well, to some degree, though you can also make use of gst_video_format_from_fourcc(). See also amt_to_gst_caps_video() in gstdemux.c.
Ah check for RGB formats first then fall back to FOURCC conversion, okay sure.
>> + } >> + else { >> + ERR("Unrecognized subtype %s\n", debugstr_guid(&subtype)); >> + return NULL; >> + } >> + >> + IMFMediaType_GetUINT64(type, &MF_MT_FRAME_RATE, &frame_rate); >> + IMFMediaType_GetUINT64(type, &MF_MT_FRAME_SIZE, &frame_size); >> + >> + if (frame_rate) >> + gst_caps_set_simple(output, "framerate", GST_TYPE_FRACTION, >> *framerate_num, *framerate_den, NULL); >> + if (frame_size) >> + { >> + gst_caps_set_simple(output, "width", G_TYPE_INT, *width, NULL); >> + gst_caps_set_simple(output, "height", G_TYPE_INT, *height, NULL); >> + } >> + return output; >> + } >> + else if (IsEqualGUID(&major_type, &MFMediaType_Audio)) >> + { >> + DWORD rate, channels; >> + >> + if (IsEqualGUID(&subtype, &MFAudioFormat_AAC)) >> + { >> + DWORD payload_type, indication; >> + struct aac_user_data *user_data; >> + UINT32 user_data_size; >> + output = gst_caps_new_empty_simple("audio/mpeg"); >> + >> + /* TODO */ >> + gst_caps_set_simple(output, "framed", G_TYPE_BOOLEAN, TRUE, NULL); >> + gst_caps_set_simple(output, "mpegversion", G_TYPE_INT, 4, NULL); > What's TODO here? MFAudioFormat_AAC could also mean mpegversion=2, and I don't know what the "framed" attribute is for.
A TODO message should probably mention what exactly is to be done.
In general it's good practice to understand what your code is doing before you submit it, but regardless, "framed" means there is exactly one frame per buffer. Is that guaranteed by the MF source? (It's not obvious to me that it is...)
Yeah I should probably remove it in that case, I was trying to match up all the attributes when going through the conversion to IMFMediaType and back, but it's probably not necessary.
>> + >> + if (SUCCEEDED(IMFMediaType_GetUINT32(type, &MF_MT_AAC_PAYLOAD_TYPE, >> &payload_type))) >> + { >> + switch (payload_type) >> + { >> + case 0: >> + gst_caps_set_simple(output, "stream-format", G_TYPE_STRING, "raw", >> NULL); >> + break; >> + case 1: >> + gst_caps_set_simple(output, "stream-format", G_TYPE_STRING, "adts", >> NULL); >> + break; >> + default: >> + gst_caps_set_simple(output, "stream-format", G_TYPE_STRING, "raw", >> NULL); > Seems to me that 2 and 3 should be mapped to "adif" and "loas", > respectively. Ack. >> + } >> + } >> + else >> + gst_caps_set_simple(output, "stream-format", G_TYPE_STRING, "raw", >> NULL); >> + >> + if (SUCCEEDED(IMFMediaType_GetUINT32(type, >> &MF_MT_AAC_AUDIO_PROFILE_LEVEL_INDICATION, &indication))) >> + { >> + switch (indication) >> + { >> + case 0x29: >> + { >> + gst_caps_set_simple(output, "profile", G_TYPE_STRING, "lc", NULL); >> + gst_caps_set_simple(output, "level", G_TYPE_STRING, "2", NULL); >> + break; >> + } >> + case 0x2A: >> + { >> + gst_caps_set_simple(output, "profile", G_TYPE_STRING, "lc", NULL); >> + gst_caps_set_simple(output, "level", G_TYPE_STRING, "4", NULL); >> + break; >> + } >> + case 0x2B: >> + { >> + gst_caps_set_simple(output, "profile", G_TYPE_STRING, "lc", NULL); >> + gst_caps_set_simple(output, "level", G_TYPE_STRING, "5", NULL); >> + break; >> + } >> + default: >> + ERR("Unrecognized profile-level-indication %u\n", indication); >> + } > I think you could significantly deduplicate this switch. Ack. >> + } >> + >> + if (SUCCEEDED(IMFMediaType_GetAllocatedBlob(type, &MF_MT_USER_DATA, >> (BYTE **) &user_data, &user_data_size))) >> + { >> + if (user_data_size > sizeof(sizeof(*user_data))) >> + { >> + GstBuffer *audio_specific_config = gst_buffer_new_allocate(NULL, >> user_data_size - sizeof(*user_data), NULL); >> + gst_buffer_fill(audio_specific_config, 0, user_data + 1, >> user_data_size - sizeof(*user_data)); >> + >> + gst_caps_set_simple(output, "codec_data", GST_TYPE_BUFFER, >> audio_specific_config, NULL); >> + gst_buffer_unref(audio_specific_config); >> + } >> + CoTaskMemFree(user_data); >> + } >> + } >> + else if (IsEqualGUID(&subtype, &MFAudioFormat_Float)) >> + { >> + output = gst_caps_new_empty_simple("audio/x-raw"); >> + >> + gst_caps_set_simple(output, "format", G_TYPE_STRING, "F32LE", NULL); >> + } >> + else >> + { >> + ERR("Unrecognized subtype %s\n", debugstr_guid(&subtype)); >> + if (output) >> + gst_caps_unref(output); >> + return NULL; >> + } >> + if (SUCCEEDED(IMFMediaType_GetUINT32(type, >> &MF_MT_AUDIO_SAMPLES_PER_SECOND, &rate))) >> + { >> + gst_caps_set_simple(output, "rate", G_TYPE_INT, rate, NULL); >> + } >> + if (SUCCEEDED(IMFMediaType_GetUINT32(type, >> &MF_MT_AUDIO_NUM_CHANNELS, &channels))) >> + { >> + gst_caps_set_simple(output, "channels", G_TYPE_INT, channels, NULL); >> + } >> + >> + return output; >> + } >> + >> + ERR("Unrecognized major type %s\n", debugstr_guid(&major_type)); >> + return NULL; >> +} >> diff --git a/include/codecapi.h b/include/codecapi.h >> new file mode 100644 >> index 0000000000..2690b523d7 >> --- /dev/null >> +++ b/include/codecapi.h >> @@ -0,0 +1,38 @@ >> +#ifndef __CODECAPI_H >> +#define __CODECAPI_H >> + >> +enum eAVEncH264VProfile >> +{ >> + eAVEncH264VProfile_unknown = 0, >> + eAVEncH264VProfile_Simple = 66, >> + eAVEncH264VProfile_Base = 66, >> + eAVEncH264VProfile_Main = 77, >> + eAVEncH264VProfile_High = 100, >> + eAVEncH264VProfile_422 = 122, >> + eAVEncH264VProfile_High10 = 110, >> + eAVEncH264VProfile_444 = 244, >> + eAVEncH264VProfile_Extended = 88, >> +}; >> + >> +enum eAVEncH264VLevel >> +{ >> + eAVEncH264VLevel1 = 10, >> + eAVEncH264VLevel1_b = 11, >> + eAVEncH264VLevel1_1 = 11, >> + eAVEncH264VLevel1_2 = 12, >> + eAVEncH264VLevel1_3 = 13, >> + eAVEncH264VLevel2 = 20, >> + eAVEncH264VLevel2_1 = 21, >> + eAVEncH264VLevel2_2 = 22, >> + eAVEncH264VLevel3 = 30, >> + eAVEncH264VLevel3_1 = 31, >> + eAVEncH264VLevel3_2 = 32, >> + eAVEncH264VLevel4 = 40, >> + eAVEncH264VLevel4_1 = 41, >> + eAVEncH264VLevel4_2 = 42, >> + eAVEncH264VLevel5 = 50, >> + eAVEncH264VLevel5_1 = 51, >> + eAVEncH264VLevel5_2 = 52 >> +}; >> + >> +#endif >> \ No newline at end of file >>
On 3/26/20 3:55 PM, Derek Lesho wrote:
On 3/26/20 3:19 PM, Zebediah Figura wrote:
On 3/26/20 2:54 PM, Derek Lesho wrote:
On 3/26/20 2:46 PM, Zebediah Figura wrote:
On 3/26/20 12:18 PM, Derek Lesho wrote:
On 3/26/20 11:40 AM, Zebediah Figura wrote:
On 3/25/20 11:57 PM, Derek Lesho wrote: > On 3/24/20 3:22 PM, Zebediah Figura wrote: > >> General comments: >> >> It's not great to introduce code that's not used anywhere, it's >> essentially dead until then. >> >> This could, I think, be split up into much smaller pieces in any case: >> you're introducing two different functions here, and each function >> introduces support for several different formats. >> >> On 3/24/20 2:39 PM, Derek Lesho wrote: >>> Signed-off-by: Derek Lesho dlesho@codeweavers.com >>> --- >>> dlls/winegstreamer/gst_private.h | 4 + >>> dlls/winegstreamer/mfplat.c | 533 ++++++++++++++++++++++++++++++- >>> include/codecapi.h | 38 +++ >>> 3 files changed, 574 insertions(+), 1 deletion(-) >>> create mode 100644 include/codecapi.h >>> >>> diff --git a/dlls/winegstreamer/gst_private.h >>> b/dlls/winegstreamer/gst_private.h >>> index e6fb841fc8..a6c3fd3784 100644 >>> --- a/dlls/winegstreamer/gst_private.h >>> +++ b/dlls/winegstreamer/gst_private.h >>> @@ -36,6 +36,7 @@ >>> #include "winuser.h" >>> #include "dshow.h" >>> #include "strmif.h" >>> +#include "mfobjects.h" >>> #include "wine/heap.h" >>> #include "wine/strmbase.h" >>> @@ -54,4 +55,7 @@ void start_dispatch_thread(void) DECLSPEC_HIDDEN; >>> extern HRESULT mfplat_get_class_object(REFCLSID rclsid, REFIID riid, >>> void **obj) DECLSPEC_HIDDEN; >>> +IMFMediaType* media_type_from_caps(GstCaps *caps); >>> +GstCaps *caps_from_media_type(IMFMediaType *type); >>> + >> Using the generic name "media_type", in a module that serves multiple >> media APIs, is not great. > Would you prefer mf_media_type? That's probably better, yes.
>> Also, why is this in the public header? > Would it be better to split this into a mfplat_private.h header? I mean, why do you need to use it from anything other than mfplat.c?
Because I'd prefer to not merge around 4000 thousands lines of code into a single file. (See media_source.c, mf_decode.c)
This is another reason why it doesn't make a lot of sense to submit dead code.
The code which uses these functions are included in my more recent patch-set.
While submitting the code that uses a helper function in the same patch set does help, it's still not the best way to organize patches. Also, in this case, it means submitting at least 16 patches in one set, which is not desirable either.
The best way to submit such a patch set is to add the code which uses (or is going to use) media_type_from_caps() first, then actually implement media_type_from_caps(). That can mean e.g. adding a stub media_type_from_caps() that prints a FIXME and returns NULL, such as in fb6956c7d, or just leaving that part out of the caller (and probably doing a similar fail-with-FIXME). I don't know what the best way to arrange that is in this case, but I'm not the one writing the patches.
Such a top-down approach is much easier to review, because then you know exactly how a helper will be used when or before you have to review that helper's implementation. When you submit the helper by itself, first, it's hard to understand if it's doing the right thing. You also won't have dead code (and won't have to work around compiler warnings for such by e.g. making functions non-static).
Ah, I see what you mean, that makes sense.
>> Also, style nitpick: please try to be consistent about your asterisk >> placement (ideally using "type *var" style.) > Ack. >>> #endif /* __GST_PRIVATE_INCLUDED__ */ >> ... >> >>> @@ -433,3 +438,529 @@ HRESULT mfplat_get_class_object(REFCLSID >>> rclsid, REFIID riid, void **obj) >>> return CLASS_E_CLASSNOTAVAILABLE; >>> } >>> + >>> +struct aac_user_data >>> +{ >>> + WORD payload_type; >>> + WORD profile_level_indication; >>> + WORD struct_type; >>> + WORD reserved; >>> + /*BYTE audio_specific_config;*/ >> What's this field doing here? > We store the audio_config_config after these fields, and I wanted to > express that here, it's not important though. It's not necessarily a problem to specify that arbitrary data comes after the struct, but that comment is not particularly clear.
>>> +}; >>> + >>> +/* IMPORTANT: caps will be modified to represent the exact type >>> needed for the format */ >> Why? > Because in the case of a demuxer, the caps of the stream we receive > might not map 1:1 with the representation in media foundation. Because > of this, in the media source, if any adjustments are needed, we feed the > buffers through a parser to correct it. > > See: > https://github.com/Guy1524/wine/commit/7ab88be3882ab95f3fc17dab374184e06f018... This seems like a very confusing way to do that. At least I'd relegate it to a separate function. I wouldn't expect a conversion function to modify its argument, and it moreover makes it essentially unusable anywhere else.
The alternative is to just fail, because there's no mapping. For example there's no equivalent to a non annex b h.264 stream in MF.
Sure, I think that's a good design. It's a clear way to communicate "we don't support these caps".
But we do want to support those caps, with modifications. When modifications are needed, we try to use a parser to perform those transformations. For example, qtdemux doesn't output h264 streams in annex b form, so we find a parser that converts it into that form.
Right, the idea is to make those modifications *before* converting into an MF media type.
For reference, the way it's done in quartz is:
(1) during test-play, we record the pin's preferred caps and convert them to a DirectShow media type [setcaps_sink()]; (2) when connecting to the downstream DirectShow sink, we first propose that media type [gstdecoder_source_get_media_type(index = 0)]; (3) if that fails, we propose a bunch of other DirectShow types to the downstream sink [gstdecoder_source_get_media_type(index > 0)]; (4) if none of those work, it tries any types enumerated by the downstream sink, ensuring that GStreamer can understand them [gstdecoder_source_query_accept()]; (5) we convert that type to GstCaps, stripping details we don't care about if necessary, and specify that as the format our sink pad demands [query_sink(), case GST_QUERY_CAPS].
Some of this is implied by the design of quartz (e.g. source pins generally try formats suggested by the downstream sink, though it's not a requirement), but in general, the idea that I think also makes sense here is to determine a media type that you support from the media type that the element exposes, require that type on the sink pad, give GStreamer the tools to convert between the two if necessary, and let GStreamer's caps negotiation do the rest.
I don't think that using GStreamer's caps negotiation is mutually exclusive with the modifications I'm making to the caps in this function. The function's purpose is to find the closest matching GstCaps, and it describes how it got there by performing the modifications. Yes, right now, the code just manually finds a parser, but we could easily set the sink caps to the those returned from this function, then use caps negotation. Essentially, the function determines the media type we support by looking at the preferred caps' format, and just changes the details to make it match with the media foundation representation. I don't see a need to split that off into a separate function, as, in my opinion, the function serves as good documentation on the exact meanings of a given IMFMediaType.
I wouldn't expect the purpose of a function called "media_type_from_caps()" to be to find MF-compatible GstCaps from input GstCaps.
Even if you were to rename the function, I don't see the benefit of performing both tasks in a single function. As I see it, you could just as easily have two functions, along the lines of:
static GstCaps *make_compatible_caps(const GstCaps *source_caps) { GstCaps *caps = gst_caps_copy(source_caps); /* ... */
if (!strcmp(type, "video/x-h264")) { /* Media Foundation does not support unparsed h264. */ gst_structure_set(structure, "parsed", GST_TYPE_BOOLEAN, TRUE, NULL); } return caps; }
/* Returns NULL if the type cannot be converted to caps. */ static IMFMediaType *media_type_from_caps(const GstCaps *caps) { /* ... */ }
Besides being clearer to read, as I see it this:
* allows you to use media_type_from_caps() in other places;
* allows you to support advertising multiple types more easily (which I believe mfplat supports in general),
* if, like quartz, we ever want to derive the caps from an IMFMediaType instead of from the source caps, you then don't have to change media_type_from_caps() at all.
That said, these modifications are specific to the format, and along those lines it may make more sense to append specific elements rather than to make specific changes to the caps and try to find an element that can accommodate those. This will also help if you ever need to append multiple such elements. Thus you can e.g. append an audioconvert element unconditionally, and if no conversion is necessary it'll just pass through.
In the case of compressed sample parsers, what would I append unconditionally? It's very specific to the type.
Looking at the modifications you do make—
- you force h264 into annex B format, which is the job of h264parse;
Yes, because that's how it's represented on windows.
- you force all raw audio into 32-bit float. Does native mfplat really
never output integer PCM?
I think I can fix that, I do know that MFAudioFormat float can only be be F32LE though.
64-bit float exists. (So does 16-bit and 24-bit, in fact.) That's not necessarily to say that any given MF object handles it, but I'd recommend at least checking whether the bit depth and endianness matches what you expect, instead of just assuming that it does.
Okay, I heard somewhere that MFAudioFormat_Float was always 32 bit. That must have been wrong information, I'll fix that.
It's possible that MFAudioFormat_Float is always 32-bit, but GStreamer's audio/x-raw isn't always 32-bit.
Oh, that's why why we set the F32LE format, so that if it isn't 32-bit little-endian, it will be converted to that.
>>> +IMFMediaType* media_type_from_caps(GstCaps *caps) >>> +{ >>> + IMFMediaType *media_type; >>> + GstStructure *info; >>> + const char *media_type_name; >>> + gchar *human_readable; >>> + >>> + if (FAILED(MFCreateMediaType(&media_type))) >>> + { >>> + return NULL; >>> + } >>> + >>> + info = gst_caps_get_structure(caps, 0); >>> + media_type_name = gst_structure_get_name(info); >>> + >>> + human_readable = gst_caps_to_string(caps); >>> + TRACE("caps = %s\n", human_readable); >>> + g_free(human_readable); >> Probably would be best to guard this with TRACE_ON, so that we don't >> bother allocating anything otherwise. >> >> Also, you'll want to use debugstr_a(), especially since caps can overrun >> the static buffer in ntdll. > Ack. >>> + >>> + if (!(strncmp(media_type_name, "video", 5))) >> Style nitpick, superfluous parentheses. >> >> I think Nikolay already mentioned this, but it's probably not a bad idea >> to just match against the whole "video/x-h264" etc. sequence. > Ack. >>> + { >>> + const char *video_format = media_type_name + 6; >>> + gint width, height, framerate_num, framerate_den; >>> + >>> + IMFMediaType_SetGUID(media_type, &MF_MT_MAJOR_TYPE, >>> &MFMediaType_Video); >>> + >>> + if (gst_structure_get_int(info, "width", &width) && >>> gst_structure_get_int(info, "height", &height)) >>> + { >>> + IMFMediaType_SetUINT64(media_type, &MF_MT_FRAME_SIZE, >>> ((UINT64)width << 32) | height); >>> + } >>> + if (gst_structure_get_fraction(info, "framerate", &framerate_num, >>> &framerate_den)) >>> + { >>> + IMFMediaType_SetUINT64(media_type, &MF_MT_FRAME_RATE, >>> ((UINT64)framerate_num << 32) | framerate_den); >>> + } >>> + >>> + if (!(strcmp(video_format, "x-h264"))) >>> + { >>> + const char *profile, *level; >>> + >>> + IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_H264); >>> + IMFMediaType_SetUINT32(media_type, &MF_MT_COMPRESSED, TRUE); >>> + >>> + if ((profile = gst_structure_get_string(info, "profile"))) >>> + { >>> + if (!(strcmp(profile, "main"))) >>> + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_PROFILE, >>> eAVEncH264VProfile_Main); >>> + else if (!(strcmp(profile, "high"))) >>> + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_PROFILE, >>> eAVEncH264VProfile_High); >>> + else if (!(strcmp(profile, "high-4:4:4"))) >>> + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_PROFILE, >>> eAVEncH264VProfile_444); >>> + else >>> + ERR("Unrecognized profile %s\n", profile); >> This ERR (and many below) should probably be a FIXME instead, methinks. > Ack. >>> + } >>> + if ((level = gst_structure_get_string(info, "level"))) >>> + { >>> + if (!(strcmp(level, "1"))) >>> + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, >>> eAVEncH264VLevel1); >>> + else if (!(strcmp(level, "1.1"))) >>> + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, >>> eAVEncH264VLevel1_1); >>> + else if (!(strcmp(level, "1.2"))) >>> + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, >>> eAVEncH264VLevel1_2); >>> + else if (!(strcmp(level, "1.3"))) >>> + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, >>> eAVEncH264VLevel1_3); >>> + else if (!(strcmp(level, "2"))) >>> + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, >>> eAVEncH264VLevel2); >>> + else if (!(strcmp(level, "2.1"))) >>> + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, >>> eAVEncH264VLevel2_1); >>> + else if (!(strcmp(level, "2.2"))) >>> + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, >>> eAVEncH264VLevel2_2); >>> + else if (!(strcmp(level, "3"))) >>> + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, >>> eAVEncH264VLevel3); >>> + else if (!(strcmp(level, "3.1"))) >>> + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, >>> eAVEncH264VLevel3_1); >>> + else if (!(strcmp(level, "3.2"))) >>> + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, >>> eAVEncH264VLevel3_2); >>> + else if (!(strcmp(level, "4"))) >>> + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, >>> eAVEncH264VLevel4); >>> + else if (!(strcmp(level, "4.1"))) >>> + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, >>> eAVEncH264VLevel4_1); >>> + else if (!(strcmp(level, "4.2"))) >>> + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, >>> eAVEncH264VLevel4_2); >>> + else if (!(strcmp(level, "5"))) >>> + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, >>> eAVEncH264VLevel5); >>> + else if (!(strcmp(level, "5.1"))) >>> + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, >>> eAVEncH264VLevel5_1); >>> + else if (!(strcmp(level, "5.2"))) >>> + IMFMediaType_SetUINT32(media_type, &MF_MT_MPEG2_LEVEL, >>> eAVEncH264VLevel5_2); >>> + else >>> + ERR("Unrecognized level %s\n", level); >>> + } >> Could we maybe make this a table instead? > Sure. >>> + gst_caps_set_simple(caps, "stream-format", G_TYPE_STRING, >>> "byte-stream", NULL); >>> + gst_caps_set_simple(caps, "alignment", G_TYPE_STRING, "au", NULL); >>> + for (unsigned int i = 0; i < gst_caps_get_size(caps); i++) >>> + { >>> + GstStructure *structure = gst_caps_get_structure (caps, i); >>> + gst_structure_remove_field(structure, "codec_data"); >>> + } >>> + } >>> + else if (!(strcmp(video_format, "x-wmv"))) >>> + { >>> + gint wmv_version; >>> + const char *format; >>> + const GValue *codec_data; >>> + >>> + if (gst_structure_get_int(info, "wmvversion", &wmv_version)) >>> + { >>> + switch (wmv_version) >>> + { >>> + case 1: >>> + IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_WMV1); >>> + break; >>> + case 2: >>> + IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_WMV2); >>> + break; >>> + case 3: >>> + IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_WMV3); >>> + break; >>> + default: >>> + ERR("Unrecognized wmvversion %d\n", wmv_version); >>> + } >>> + } >>> + >>> + if ((format = gst_structure_get_string(info, "format"))) >>> + { >>> + if (!(strcmp(format, "WVC1"))) >>> + IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_WVC1); >> What if it's not? I think that deserves at least a FIXME. >> >> (Style nitpick, extra parentheses.) > Ack. >>> + } >>> + >>> + if ((codec_data = gst_structure_get_value(info, "codec_data"))) >>> + { >>> + GstBuffer *codec_data_buffer = gst_value_get_buffer(codec_data); >>> + if (codec_data_buffer) >>> + { >>> + gsize codec_data_size = gst_buffer_get_size(codec_data_buffer); >>> + gpointer codec_data_raw = heap_alloc(codec_data_size); >>> + gst_buffer_extract(codec_data_buffer, 0, codec_data_raw, >>> codec_data_size); >>> + IMFMediaType_SetBlob(media_type, &MF_MT_USER_DATA, codec_data_raw, >>> codec_data_size); >>> + } >>> + } >>> + } >>> + else if (!(strcmp(video_format, "mpeg"))) >>> + { >>> + IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFVideoFormat_M4S2); >>> + IMFMediaType_SetUINT32(media_type, &MF_MT_COMPRESSED, TRUE); >> There are other video/mpeg formats. > TBH, the only reason I've included this is for the tests to work, I'll > look into how to differentiate the mpeg types tomorrow. >>> + } >>> + else if (!(strcmp(video_format, "x-raw"))) >>> + { >>> + const char *fourcc = gst_structure_get_string(info, "stream-format"); >>> + IMFMediaType_SetUINT32(media_type, &MF_MT_COMPRESSED, FALSE); >>> + if (fourcc && (strlen(fourcc) == 4)) >>> + { >>> + GUID fourcc_subtype = MFVideoFormat_Base; >>> + fourcc_subtype.Data1 = MAKEFOURCC( >>> + toupper(fourcc[0]), toupper(fourcc[1]), toupper(fourcc[2]), >>> toupper(fourcc[3])); >>> + IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &fourcc_subtype); >>> + } >>> + else >>> + ERR("uncompressed video has no stream-format\n"); >> I've never seen a FOURCC stored in the "stream-format" tag; where are >> you getting this from? > You're right, I think I'm supposed to use "format" here, but this is > dead code rn so I that's why I didn't see any problems. >>> + } >>> + else >>> + ERR("Unrecognized video format %s\n", video_format); >>> + } >>> + else if (!(strncmp(media_type_name, "audio", 5))) >>> + { >>> + const char *audio_format = media_type_name + 6; >>> + >>> + IMFMediaType_SetGUID(media_type, &MF_MT_MAJOR_TYPE, >>> &MFMediaType_Audio); >>> + if (!(strcmp(audio_format, "mpeg"))) >>> + { >>> + int mpeg_version = -1; >>> + >>> + IMFMediaType_SetUINT32(media_type, &MF_MT_COMPRESSED, TRUE); >>> + >>> + if (!(gst_structure_get_int(info, "mpegversion", &mpeg_version))) >>> + ERR("Failed to get mpegversion\n"); >>> + switch (mpeg_version) >>> + { >>> + case 1: >>> + { >>> + IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFAudioFormat_MPEG); >>> + break; >>> + } >> What about MFAudioFormat_MP3? > I'm actually not sure what to use here, I should probably remove it for now. >>> + case 2: >>> + case 4: >>> + { >>> + const char *format, *profile, *level; >>> + DWORD profile_level_indication = 0; >>> + const GValue *codec_data; >>> + DWORD asc_size = 0; >>> + struct aac_user_data *user_data = NULL; >>> + >>> + IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFAudioFormat_AAC); >>> + >>> + codec_data = gst_structure_get_value(info, "codec_data"); >>> + if (codec_data) >>> + { >>> + GstBuffer *codec_data_buffer = gst_value_get_buffer(codec_data); >>> + if (codec_data_buffer) >>> + { >>> + if ((asc_size = gst_buffer_get_size(codec_data_buffer)) >= 2) >>> + { >>> + user_data = heap_alloc_zero(sizeof(*user_data)+asc_size); >>> + gst_buffer_extract(codec_data_buffer, 0, (gpointer)(user_data + 1), >>> asc_size); >>> + } >>> + else >>> + ERR("Unexpected buffer size\n"); >>> + } >>> + else >>> + ERR("codec_data not a buffer\n"); >>> + } >>> + else >>> + ERR("codec_data not found\n"); >>> + if (!user_data) >>> + user_data = heap_alloc_zero(sizeof(*user_data)); >>> + >>> + { >>> + int rate; >>> + if (gst_structure_get_int(info, "rate", &rate)) >>> + IMFMediaType_SetUINT32(media_type, &MF_MT_AUDIO_SAMPLES_PER_SECOND, >>> rate); >>> + } >>> + { >>> + int channels; >>> + if (gst_structure_get_int(info, "channels", &channels)) >>> + IMFMediaType_SetUINT32(media_type, &MF_MT_AUDIO_NUM_CHANNELS, >>> channels); >>> + } >> Did you mean to add these blocks? > Yeah, it's so I can declare the variables closer to where they are used. I'll admit I don't get the obsession with C99 variable declarations, but this just seems janky.
It wouldn't seem janky if we had C99 variable declarations :P
>>> + >>> + if ((format = gst_structure_get_string(info, "stream-format"))) >>> + { >>> + DWORD payload_type = -1; >>> + if (!(strcmp(format, "raw"))) >>> + payload_type = 0; >>> + else if (!(strcmp(format, "adts"))) >>> + payload_type = 1; >>> + else >>> + ERR("Unrecognized stream-format\n"); >>> + if (payload_type != -1) >>> + { >>> + IMFMediaType_SetUINT32(media_type, &MF_MT_AAC_PAYLOAD_TYPE, >>> payload_type); >>> + user_data->payload_type = payload_type; >>> + } >>> + } >>> + else >>> + { >>> + ERR("Stream format not present\n"); >>> + } >>> + >>> + profile = gst_structure_get_string(info, "profile"); >>> + level = gst_structure_get_string(info, "level"); >>> + /* Data from >>> https://docs.microsoft.com/en-us/windows/win32/medfound/aac-encoder#output-t... >>> */ >> I'm not sure I'd link to Microsoft documentation; it's not very stable. > Would a link to an archive.is backup of it be better? Probably.
>>> + if (profile && level) >>> + { >>> + if (!(strcmp(profile, "lc")) && !(strcmp(level, "2"))) >>> + profile_level_indication = 0x29; >>> + else if (!(strcmp(profile, "lc")) && !(strcmp(level, "4"))) >>> + profile_level_indication = 0x2A; >>> + else if (!(strcmp(profile, "lc")) && !(strcmp(level, "5"))) >>> + profile_level_indication = 0x2B; >>> + else >>> + ERR("Unhandled profile/level combo\n"); >>> + } >>> + else >>> + ERR("Profile or level not present\n"); >>> + >>> + if (profile_level_indication) >>> + { >>> + IMFMediaType_SetUINT32(media_type, >>> &MF_MT_AAC_AUDIO_PROFILE_LEVEL_INDICATION, profile_level_indication); >>> + user_data->profile_level_indication = profile_level_indication; >>> + } >>> + >>> + IMFMediaType_SetBlob(media_type, &MF_MT_USER_DATA, (BYTE >>> *)user_data, sizeof(user_data) + asc_size); >>> + heap_free(user_data); >>> + break; >>> + } >>> + default: >>> + ERR("Unhandled mpegversion %d\n", mpeg_version); >>> + } >>> + } >>> + else if (!(strcmp(audio_format, "x-raw"))) >>> + { >>> + IMFMediaType_SetGUID(media_type, &MF_MT_SUBTYPE, &MFAudioFormat_Float); >>> + >>> + gst_caps_set_simple(caps, "format", G_TYPE_STRING, "F32LE", NULL); >> There are other audio formats. > Ah, you mean PCM? I'll add a case for that tomorrow. f32le is PCM, but I mean integer PCM and other depths than 32-bit.
Hmm okay, I'll do more research on that.
Presumably there should also be channel and sample rate data here.
Yeah good catch.
>>> + } >>> + else >>> + ERR("Unrecognized audio format %s\n", audio_format); >>> + } >>> + else >>> + { >>> + goto fail; >> I'm generally of the opinion that one line of cleanup doesn't merit a >> "goto". > Okay I'll change that then. >>> + } >>> + >>> + return media_type; >>> + fail: >>> + IMFMediaType_Release(media_type); >>> + return NULL; >>> +} >>> + >>> +static const char *fourcc_str(DWORD fourcc) >>> +{ >>> + if (!fourcc) return NULL; >>> + return wine_dbg_sprintf ("%c%c%c%c", >>> + (char)(fourcc), (char)(fourcc >> 8), >>> + (char)(fourcc >> 16), (char)(fourcc >> 24)); >>> +} >> I don't think you want to use Wine's debugging utilities for non-debug >> code. > Ack. >>> + >>> +GstCaps *caps_from_media_type(IMFMediaType *type) >>> +{ >>> + GUID major_type; >>> + GUID subtype; >>> + GUID base_masked_subtype; >>> + GstCaps *output = NULL; >>> + >>> + if (FAILED(IMFMediaType_GetMajorType(type, &major_type))) >>> + return NULL; >>> + if (FAILED(IMFMediaType_GetGUID(type, &MF_MT_SUBTYPE, &subtype))) >>> + return NULL; >>> + base_masked_subtype = subtype; >>> + base_masked_subtype.Data1 = 0; >>> + >>> + if (IsEqualGUID(&major_type, &MFMediaType_Video)) >>> + { >>> + UINT64 frame_rate = 0, frame_size = 0; >>> + DWORD *framerate_num = ((DWORD*)&frame_rate) + 1; >>> + DWORD *framerate_den = ((DWORD*)&frame_rate); >>> + DWORD *width = ((DWORD*)&frame_size) + 1; >>> + DWORD *height = ((DWORD*)&frame_size); >> It seems simpler to me to do e.g. >> >> DWORD width = frame_size; >> DWORD height = frame_size >> 32; > I'm not getting the width and height here, I'm declaring pointers to > them which are set later on. Right, I mean actually set the variables after retrieving frame_size; in full something like
DWORD width, height; /* ... */ IMFMediaType_GetUINT64(type, &MF_MT_FRAME_SIZE, &frame_size); width = frame_size; height = frame_size >> 32;
Yeah that works.
>>> + >>> + if (IsEqualGUID(&subtype, &MFVideoFormat_H264)) >>> + { >>> + enum eAVEncH264VProfile h264_profile; >>> + enum eAVEncH264VLevel h264_level; >>> + output = gst_caps_new_empty_simple("video/x-h264"); >>> + gst_caps_set_simple(output, "stream-format", G_TYPE_STRING, >>> "byte-stream", NULL); >>> + gst_caps_set_simple(output, "alignment", G_TYPE_STRING, "au", NULL); >>> + >>> + if (SUCCEEDED(IMFMediaType_GetUINT32(type, &MF_MT_MPEG2_PROFILE, >>> &h264_profile))) >>> + { >>> + const char *profile = NULL; >>> + switch (h264_profile) >>> + { >>> + case eAVEncH264VProfile_Main: profile = "main"; break; >>> + case eAVEncH264VProfile_High: profile = "high"; break; >>> + case eAVEncH264VProfile_444: profile = "high-4:4:4"; break; >>> + default: ERR("Unknown profile %u\n", h264_profile); >>> + } >>> + if (profile) >>> + gst_caps_set_simple(output, "profile", G_TYPE_STRING, profile, NULL); >>> + } >>> + if (SUCCEEDED(IMFMediaType_GetUINT32(type, &MF_MT_MPEG2_LEVEL, >>> &h264_level))) >>> + { >>> + const char *level = NULL; >>> + switch (h264_level) >>> + { >>> + case eAVEncH264VLevel1: level = "1"; break; >>> + case eAVEncH264VLevel1_1: level = "1.1"; break; >>> + case eAVEncH264VLevel1_2: level = "1.2"; break; >>> + case eAVEncH264VLevel1_3: level = "1.3"; break; >>> + case eAVEncH264VLevel2: level = "2"; break; >>> + case eAVEncH264VLevel2_1: level = "2.1"; break; >>> + case eAVEncH264VLevel2_2: level = "2.2"; break; >>> + case eAVEncH264VLevel3: level = "3"; break; >>> + case eAVEncH264VLevel3_1: level = "3.1"; break; >>> + case eAVEncH264VLevel3_2: level = "3.2"; break; >>> + case eAVEncH264VLevel4: level = "4"; break; >>> + case eAVEncH264VLevel4_1: level = "4.1"; break; >>> + case eAVEncH264VLevel4_2: level = "4.2"; break; >>> + case eAVEncH264VLevel5: level = "5"; break; >>> + case eAVEncH264VLevel5_1: level = "5.1"; break; >>> + case eAVEncH264VLevel5_2: level = "5.2"; break; >>> + default: ERR("Unknown level %u\n", h264_level); >>> + } >>> + if (level) >>> + gst_caps_set_simple(output, "level", G_TYPE_STRING, level, NULL); >>> + } >>> + } >>> + else if (IsEqualGUID(&subtype, &MFVideoFormat_WVC1)) >>> + { >>> + BYTE *user_data; >>> + DWORD user_data_size; >>> + output = gst_caps_new_empty_simple("video/x-wmv"); >>> + gst_caps_set_simple(output, "format", G_TYPE_STRING, "WVC1", NULL); >>> + >>> + gst_caps_set_simple(output, "wmvversion", G_TYPE_INT, 3, NULL); >>> + >>> + if (SUCCEEDED(IMFMediaType_GetAllocatedBlob(type, &MF_MT_USER_DATA, >>> &user_data, &user_data_size))) >>> + { >>> + GstBuffer *codec_data_buffer = gst_buffer_new_allocate(NULL, >>> user_data_size, NULL); >>> + gst_buffer_fill(codec_data_buffer, 0, user_data, user_data_size); >>> + gst_caps_set_simple(output, "codec_data", GST_TYPE_BUFFER, >>> codec_data_buffer, NULL); >>> + gst_buffer_unref(codec_data_buffer); >>> + CoTaskMemFree(user_data); >>> + } >>> + } >>> + else if (IsEqualGUID(&base_masked_subtype, &MFVideoFormat_Base)) >>> + { >>> + output = gst_caps_new_empty_simple("video/x-raw"); >>> + gst_caps_set_simple(output, "format", G_TYPE_STRING, >>> fourcc_str(subtype.Data1), NULL); >> What about RGB formats? > Ah, I didn't think about those, looks like we'll have to use a table of > known conversions instead. Well, to some degree, though you can also make use of gst_video_format_from_fourcc(). See also amt_to_gst_caps_video() in gstdemux.c.
Ah check for RGB formats first then fall back to FOURCC conversion, okay sure.
>>> + } >>> + else { >>> + ERR("Unrecognized subtype %s\n", debugstr_guid(&subtype)); >>> + return NULL; >>> + } >>> + >>> + IMFMediaType_GetUINT64(type, &MF_MT_FRAME_RATE, &frame_rate); >>> + IMFMediaType_GetUINT64(type, &MF_MT_FRAME_SIZE, &frame_size); >>> + >>> + if (frame_rate) >>> + gst_caps_set_simple(output, "framerate", GST_TYPE_FRACTION, >>> *framerate_num, *framerate_den, NULL); >>> + if (frame_size) >>> + { >>> + gst_caps_set_simple(output, "width", G_TYPE_INT, *width, NULL); >>> + gst_caps_set_simple(output, "height", G_TYPE_INT, *height, NULL); >>> + } >>> + return output; >>> + } >>> + else if (IsEqualGUID(&major_type, &MFMediaType_Audio)) >>> + { >>> + DWORD rate, channels; >>> + >>> + if (IsEqualGUID(&subtype, &MFAudioFormat_AAC)) >>> + { >>> + DWORD payload_type, indication; >>> + struct aac_user_data *user_data; >>> + UINT32 user_data_size; >>> + output = gst_caps_new_empty_simple("audio/mpeg"); >>> + >>> + /* TODO */ >>> + gst_caps_set_simple(output, "framed", G_TYPE_BOOLEAN, TRUE, NULL); >>> + gst_caps_set_simple(output, "mpegversion", G_TYPE_INT, 4, NULL); >> What's TODO here? > MFAudioFormat_AAC could also mean mpegversion=2, and I don't know what > the "framed" attribute is for. A TODO message should probably mention what exactly is to be done.
In general it's good practice to understand what your code is doing before you submit it, but regardless, "framed" means there is exactly one frame per buffer. Is that guaranteed by the MF source? (It's not obvious to me that it is...)
Yeah I should probably remove it in that case, I was trying to match up all the attributes when going through the conversion to IMFMediaType and back, but it's probably not necessary.
>>> + >>> + if (SUCCEEDED(IMFMediaType_GetUINT32(type, &MF_MT_AAC_PAYLOAD_TYPE, >>> &payload_type))) >>> + { >>> + switch (payload_type) >>> + { >>> + case 0: >>> + gst_caps_set_simple(output, "stream-format", G_TYPE_STRING, "raw", >>> NULL); >>> + break; >>> + case 1: >>> + gst_caps_set_simple(output, "stream-format", G_TYPE_STRING, "adts", >>> NULL); >>> + break; >>> + default: >>> + gst_caps_set_simple(output, "stream-format", G_TYPE_STRING, "raw", >>> NULL); >> Seems to me that 2 and 3 should be mapped to "adif" and "loas", >> respectively. > Ack. >>> + } >>> + } >>> + else >>> + gst_caps_set_simple(output, "stream-format", G_TYPE_STRING, "raw", >>> NULL); >>> + >>> + if (SUCCEEDED(IMFMediaType_GetUINT32(type, >>> &MF_MT_AAC_AUDIO_PROFILE_LEVEL_INDICATION, &indication))) >>> + { >>> + switch (indication) >>> + { >>> + case 0x29: >>> + { >>> + gst_caps_set_simple(output, "profile", G_TYPE_STRING, "lc", NULL); >>> + gst_caps_set_simple(output, "level", G_TYPE_STRING, "2", NULL); >>> + break; >>> + } >>> + case 0x2A: >>> + { >>> + gst_caps_set_simple(output, "profile", G_TYPE_STRING, "lc", NULL); >>> + gst_caps_set_simple(output, "level", G_TYPE_STRING, "4", NULL); >>> + break; >>> + } >>> + case 0x2B: >>> + { >>> + gst_caps_set_simple(output, "profile", G_TYPE_STRING, "lc", NULL); >>> + gst_caps_set_simple(output, "level", G_TYPE_STRING, "5", NULL); >>> + break; >>> + } >>> + default: >>> + ERR("Unrecognized profile-level-indication %u\n", indication); >>> + } >> I think you could significantly deduplicate this switch. > Ack. >>> + } >>> + >>> + if (SUCCEEDED(IMFMediaType_GetAllocatedBlob(type, &MF_MT_USER_DATA, >>> (BYTE **) &user_data, &user_data_size))) >>> + { >>> + if (user_data_size > sizeof(sizeof(*user_data))) >>> + { >>> + GstBuffer *audio_specific_config = gst_buffer_new_allocate(NULL, >>> user_data_size - sizeof(*user_data), NULL); >>> + gst_buffer_fill(audio_specific_config, 0, user_data + 1, >>> user_data_size - sizeof(*user_data)); >>> + >>> + gst_caps_set_simple(output, "codec_data", GST_TYPE_BUFFER, >>> audio_specific_config, NULL); >>> + gst_buffer_unref(audio_specific_config); >>> + } >>> + CoTaskMemFree(user_data); >>> + } >>> + } >>> + else if (IsEqualGUID(&subtype, &MFAudioFormat_Float)) >>> + { >>> + output = gst_caps_new_empty_simple("audio/x-raw"); >>> + >>> + gst_caps_set_simple(output, "format", G_TYPE_STRING, "F32LE", NULL); >>> + } >>> + else >>> + { >>> + ERR("Unrecognized subtype %s\n", debugstr_guid(&subtype)); >>> + if (output) >>> + gst_caps_unref(output); >>> + return NULL; >>> + } >>> + if (SUCCEEDED(IMFMediaType_GetUINT32(type, >>> &MF_MT_AUDIO_SAMPLES_PER_SECOND, &rate))) >>> + { >>> + gst_caps_set_simple(output, "rate", G_TYPE_INT, rate, NULL); >>> + } >>> + if (SUCCEEDED(IMFMediaType_GetUINT32(type, >>> &MF_MT_AUDIO_NUM_CHANNELS, &channels))) >>> + { >>> + gst_caps_set_simple(output, "channels", G_TYPE_INT, channels, NULL); >>> + } >>> + >>> + return output; >>> + } >>> + >>> + ERR("Unrecognized major type %s\n", debugstr_guid(&major_type)); >>> + return NULL; >>> +} >>> diff --git a/include/codecapi.h b/include/codecapi.h >>> new file mode 100644 >>> index 0000000000..2690b523d7 >>> --- /dev/null >>> +++ b/include/codecapi.h >>> @@ -0,0 +1,38 @@ >>> +#ifndef __CODECAPI_H >>> +#define __CODECAPI_H >>> + >>> +enum eAVEncH264VProfile >>> +{ >>> + eAVEncH264VProfile_unknown = 0, >>> + eAVEncH264VProfile_Simple = 66, >>> + eAVEncH264VProfile_Base = 66, >>> + eAVEncH264VProfile_Main = 77, >>> + eAVEncH264VProfile_High = 100, >>> + eAVEncH264VProfile_422 = 122, >>> + eAVEncH264VProfile_High10 = 110, >>> + eAVEncH264VProfile_444 = 244, >>> + eAVEncH264VProfile_Extended = 88, >>> +}; >>> + >>> +enum eAVEncH264VLevel >>> +{ >>> + eAVEncH264VLevel1 = 10, >>> + eAVEncH264VLevel1_b = 11, >>> + eAVEncH264VLevel1_1 = 11, >>> + eAVEncH264VLevel1_2 = 12, >>> + eAVEncH264VLevel1_3 = 13, >>> + eAVEncH264VLevel2 = 20, >>> + eAVEncH264VLevel2_1 = 21, >>> + eAVEncH264VLevel2_2 = 22, >>> + eAVEncH264VLevel3 = 30, >>> + eAVEncH264VLevel3_1 = 31, >>> + eAVEncH264VLevel3_2 = 32, >>> + eAVEncH264VLevel4 = 40, >>> + eAVEncH264VLevel4_1 = 41, >>> + eAVEncH264VLevel4_2 = 42, >>> + eAVEncH264VLevel5 = 50, >>> + eAVEncH264VLevel5_1 = 51, >>> + eAVEncH264VLevel5_2 = 52 >>> +}; >>> + >>> +#endif >>> \ No newline at end of file >>>
On 3/26/20 4:22 PM, Zebediah Figura wrote:
I wouldn't expect the purpose of a function called "media_type_from_caps()" to be to find MF-compatible GstCaps from input GstCaps.
Even if you were to rename the function, I don't see the benefit of performing both tasks in a single function. As I see it, you could just as easily have two functions, along the lines of:
static GstCaps *make_compatible_caps(const GstCaps *source_caps) { GstCaps *caps = gst_caps_copy(source_caps); /* ... */
if (!strcmp(type, "video/x-h264")) { /* Media Foundation does not support unparsed h264. */ gst_structure_set(structure, "parsed", GST_TYPE_BOOLEAN, TRUE,
NULL); } return caps; }
/* Returns NULL if the type cannot be converted to caps. */ static IMFMediaType *media_type_from_caps(const GstCaps *caps) { /* ... */ }
Okay, yeah this solution is probably cleaner, I'll transition to it. FWIW though, some of your points against the current solution don't make sense to me:
Besides being clearer to read, as I see it this:
- allows you to use media_type_from_caps() in other places;
With the current solution, if you want to see whether caps match perfectly with a media type, you use gst_caps_is_equal, as we do in the media_source.
- allows you to support advertising multiple types more easily (which I
believe mfplat supports in general),
This is unrelated, we don't use this function for our decoder transform. In that case, we do derive the desired caps from a MF subtype, using gst_caps_from_mf_media_type. In the case of a media source, where we use this caps->media type route, there should only be one supported type, since it's compressed.
- if, like quartz, we ever want to derive the caps from an IMFMediaType
instead of from the source caps, you then don't have to change media_type_from_caps() at all.
Dido, that's what gst_caps_from_mf_media_type is for.
There's another broad question I have with this approach, actually, which is fundamental enough I have to assume it's at had some thought put into it, but it would be nice if that discussion happened in a more public place, and was justified in the patches sent.
Essentially, the question is: what if we were to use decodebin directly?
As I understand (and admittedly Media Foundation is far more complex than I could hope to understand) an application which just calls IMFSourceResolver methods just needs to get back a working IMFMediaSource, and we could wrap decodebin with one of those, similar to the quartz wrapper.
First of all, this is something I think we want to do anyway. Microsoft has no demuxer for, say, Vorbis (at least, there's not one registered on my Windows 10 machine), but I think that we want to be able to play back Vorbis files anyway (in, say, a Win32 media player application). Instead of writing yet another source for vorbis, and for each other obscure format, we just write one generic decodebin wrapper.
Second of all, the most obvious benefit, at least while looking at these patches, is that you now don't need to write caps <-> IMFMediaType conversion for every type on the planet. Another benefit is that you let all of the decoding happen within a single GStreamer pipeline, which is probably better for performance. You also can simplify your postprocessing step to adding a single videoconvert and audioconvert, instead of having to manually (or semi-manually) add e.g. an h264 parser element. These are some of the benefits I had in mind when removing the GStreamer quartz transforms.
Even in the case where the application manually creates e.g. an MPEG-4 source, my understanding is it's still the source's job to automatically append transforms to match the requested type. We'd just be moving that from the mfplat level to the gstreamer level—i.e. let decodebin select the 'transforms' needed to convert to raw video and audio.
It obviously wouldn't match native structure, but it's not clear to me that it would fail to match native in a way that would cause problems. Judging from my experience with quartz, most applications aren't going to care how their media is decoded as long as they get raw samples out of it. Only a select few build the graph manually because they don't realize that they can autoplug, or make assumptions about which filters will be present once autoplugging is done, and some of those even fall back to autoplugging if their preferred method fails. Maybe the situation is different with mfplat, but given that there is a way to let mfplat figure out which sources and transforms to use, I'm gonna be really surprised if most applications aren't using it.
If you do come across an application that requires we mimic native's specific arrangement of sources and transforms, it seems to me it wouldn't require that much effort to swap a different parser in for decodebin, and to implement the necessary bits in the media type conversion functions. Ultimately I suspect it'd be less work to have a decodebin wrapper + specific sources for applications that require them, than to manually implement every source and transform.
It obviously wouldn't match native structure, but it's not clear to me
that it would fail to match native in a way that would cause problems.
Famous last words.
- Josh 🐸
On Thu, 26 Mar 2020 at 22:09, Zebediah Figura zfigura@codeweavers.com wrote:
There's another broad question I have with this approach, actually, which is fundamental enough I have to assume it's at had some thought put into it, but it would be nice if that discussion happened in a more public place, and was justified in the patches sent.
Essentially, the question is: what if we were to use decodebin directly?
As I understand (and admittedly Media Foundation is far more complex than I could hope to understand) an application which just calls IMFSourceResolver methods just needs to get back a working IMFMediaSource, and we could wrap decodebin with one of those, similar to the quartz wrapper.
First of all, this is something I think we want to do anyway. Microsoft has no demuxer for, say, Vorbis (at least, there's not one registered on my Windows 10 machine), but I think that we want to be able to play back Vorbis files anyway (in, say, a Win32 media player application). Instead of writing yet another source for vorbis, and for each other obscure format, we just write one generic decodebin wrapper.
Second of all, the most obvious benefit, at least while looking at these patches, is that you now don't need to write caps <-> IMFMediaType conversion for every type on the planet. Another benefit is that you let all of the decoding happen within a single GStreamer pipeline, which is probably better for performance. You also can simplify your postprocessing step to adding a single videoconvert and audioconvert, instead of having to manually (or semi-manually) add e.g. an h264 parser element. These are some of the benefits I had in mind when removing the GStreamer quartz transforms.
Even in the case where the application manually creates e.g. an MPEG-4 source, my understanding is it's still the source's job to automatically append transforms to match the requested type. We'd just be moving that from the mfplat level to the gstreamer level—i.e. let decodebin select the 'transforms' needed to convert to raw video and audio.
It obviously wouldn't match native structure, but it's not clear to me that it would fail to match native in a way that would cause problems. Judging from my experience with quartz, most applications aren't going to care how their media is decoded as long as they get raw samples out of it. Only a select few build the graph manually because they don't realize that they can autoplug, or make assumptions about which filters will be present once autoplugging is done, and some of those even fall back to autoplugging if their preferred method fails. Maybe the situation is different with mfplat, but given that there is a way to let mfplat figure out which sources and transforms to use, I'm gonna be really surprised if most applications aren't using it.
If you do come across an application that requires we mimic native's specific arrangement of sources and transforms, it seems to me it wouldn't require that much effort to swap a different parser in for decodebin, and to implement the necessary bits in the media type conversion functions. Ultimately I suspect it'd be less work to have a decodebin wrapper + specific sources for applications that require them, than to manually implement every source and transform.
On 3/26/20 4:56 PM, Zebediah Figura wrote:
There's another broad question I have with this approach, actually, which is fundamental enough I have to assume it's at had some thought put into it, but it would be nice if that discussion happened in a more public place, and was justified in the patches sent.
Essentially, the question is: what if we were to use decodebin directly?
As I understand (and admittedly Media Foundation is far more complex than I could hope to understand) an application which just calls IMFSourceResolver methods just needs to get back a working IMFMediaSource, and we could wrap decodebin with one of those, similar to the quartz wrapper.
First of all, this is something I think we want to do anyway. Microsoft has no demuxer for, say, Vorbis (at least, there's not one registered on my Windows 10 machine), but I think that we want to be able to play back Vorbis files anyway (in, say, a Win32 media player application). Instead of writing yet another source for vorbis, and for each other obscure format, we just write one generic decodebin wrapper.
Second of all, the most obvious benefit, at least while looking at these patches, is that you now don't need to write caps <-> IMFMediaType conversion for every type on the planet. Another benefit is that you let all of the decoding happen within a single GStreamer pipeline, which is probably better for performance. You also can simplify your postprocessing step to adding a single videoconvert and audioconvert, instead of having to manually (or semi-manually) add e.g. an h264 parser element. These are some of the benefits I had in mind when removing the GStreamer quartz transforms.
Even in the case where the application manually creates e.g. an MPEG-4 source, my understanding is it's still the source's job to automatically append transforms to match the requested type. We'd just be moving that from the mfplat level to the gstreamer level—i.e. let decodebin select the 'transforms' needed to convert to raw video and audio.
It obviously wouldn't match native structure, but it's not clear to me that it would fail to match native in a way that would cause problems. Judging from my experience with quartz, most applications aren't going to care how their media is decoded as long as they get raw samples out of it. Only a select few build the graph manually because they don't realize that they can autoplug, or make assumptions about which filters will be present once autoplugging is done, and some of those even fall back to autoplugging if their preferred method fails. Maybe the situation is different with mfplat, but given that there is a way to let mfplat figure out which sources and transforms to use, I'm gonna be really surprised if most applications aren't using it.
If you do come across an application that requires we mimic native's specific arrangement of sources and transforms, it seems to me it wouldn't require that much effort to swap a different parser in for decodebin, and to implement the necessary bits in the media type conversion functions. Ultimately I suspect it'd be less work to have a decodebin wrapper + specific sources for applications that require them, than to manually implement every source and transform.
I'll make a more complete response to this tomorrow, but I really think that doing the incorrect thing isn't worth the supposed simplicity your method brings. For instance, a commit I have on my local branch adding a ASF source and WMV decoder is 126 lines long. Take a look: https://github.com/Guy1524/wine/commit/37748e69bb25f3bf97f4dbfebaa830e3eb282...
On 3/26/20 6:07 PM, Derek Lesho wrote:
On 3/26/20 4:56 PM, Zebediah Figura wrote:
There's another broad question I have with this approach, actually, which is fundamental enough I have to assume it's at had some thought put into it, but it would be nice if that discussion happened in a more public place, and was justified in the patches sent.
Essentially, the question is: what if we were to use decodebin directly?
As I understand (and admittedly Media Foundation is far more complex than I could hope to understand) an application which just calls IMFSourceResolver methods just needs to get back a working IMFMediaSource, and we could wrap decodebin with one of those, similar to the quartz wrapper.
First of all, this is something I think we want to do anyway. Microsoft has no demuxer for, say, Vorbis (at least, there's not one registered on my Windows 10 machine), but I think that we want to be able to play back Vorbis files anyway (in, say, a Win32 media player application). Instead of writing yet another source for vorbis, and for each other obscure format, we just write one generic decodebin wrapper.
Second of all, the most obvious benefit, at least while looking at these patches, is that you now don't need to write caps <-> IMFMediaType conversion for every type on the planet. Another benefit is that you let all of the decoding happen within a single GStreamer pipeline, which is probably better for performance. You also can simplify your postprocessing step to adding a single videoconvert and audioconvert, instead of having to manually (or semi-manually) add e.g. an h264 parser element. These are some of the benefits I had in mind when removing the GStreamer quartz transforms.
Even in the case where the application manually creates e.g. an MPEG-4 source, my understanding is it's still the source's job to automatically append transforms to match the requested type. We'd just be moving that from the mfplat level to the gstreamer level—i.e. let decodebin select the 'transforms' needed to convert to raw video and audio.
It obviously wouldn't match native structure, but it's not clear to me that it would fail to match native in a way that would cause problems. Judging from my experience with quartz, most applications aren't going to care how their media is decoded as long as they get raw samples out of it. Only a select few build the graph manually because they don't realize that they can autoplug, or make assumptions about which filters will be present once autoplugging is done, and some of those even fall back to autoplugging if their preferred method fails. Maybe the situation is different with mfplat, but given that there is a way to let mfplat figure out which sources and transforms to use, I'm gonna be really surprised if most applications aren't using it.
If you do come across an application that requires we mimic native's specific arrangement of sources and transforms, it seems to me it wouldn't require that much effort to swap a different parser in for decodebin, and to implement the necessary bits in the media type conversion functions. Ultimately I suspect it'd be less work to have a decodebin wrapper + specific sources for applications that require them, than to manually implement every source and transform.
I'll make a more complete response to this tomorrow, but I really think that doing the incorrect thing isn't worth the supposed simplicity your method brings. For instance, a commit I have on my local branch adding a ASF source and WMV decoder is 126 lines long. Take a look: https://github.com/Guy1524/wine/commit/37748e69bb25f3bf97f4dbfebaa830e3eb282...
While I await your more complete response, I figure I might as well clarify some things.
I don't think that "doing the incorrect thing", i.e. failing to exactly emulate Windows, should necessarily be considered bad in itself, or at least not nearly as bad as all that.
My view, and my understanding of the Wine project's view in general as informed by its maintainers, is that emulating Windows is desirable for public documented behaviour (obviously), for undocumented behaviour that applications rely on (also obviously), for undocumented or semi-documented behaviour where there's no difference otherwise and where the native thing to do is obvious (e.g. the name of an internal registry key).
But there's not really a reason to emulate Windows otherwise. And in a case like this, where there's a significant benefit to not emulating Windows exactly, the only reason I see is "an application we don't know yet *might* depend on it". When faced with such a risk, I weigh the probability of that happening—and on the evidence of DirectShow applications, I see that as low—with the cost of having to change design—which also seems low to me; I can say from experience (c.f. 5de712b5d) that swapping out a specific demuxer for decodebin isn't very difficult.
Not to mention that what we're doing is barely "incorrect". Media Foundation is an API that's specifically meant to be extended in this way. For that matter, some application could easily register its own codec libraries on Windows with a higher priority than the native ones (this happened with DirectShow); that's essentially no different than what I'm suggesting.
I think the linked commit misses the point somewhat. That's partially because I don't think it makes sense to measure simplicity as an absolute metric simply using line count, and partially because it's missing the cost of adding other media types to the conversion functions (which is one of the reasons, though not the only reason, I thought to write this mail). But it's mostly because the cost of using decodebin, where it works, is essentially zero: we write one media source, and it works for everything; no extension for ASF required. If it never becomes necessary to write a source that outputs compressed samples, then we also don't have the cost of abstraction (which is always worth taking seriously!), and if it does, we come out even—we can still use your generic media source, or something like it.
Ultimately, I think that a decodebin wrapper is something we want to have anyway, for the sake of host codecs like Theora, and once we have it, I see zero cost in using it wherever else we can.
On 3/26/20 4:56 PM, Zebediah Figura wrote:
There's another broad question I have with this approach, actually, which is fundamental enough I have to assume it's at had some thought put into it, but it would be nice if that discussion happened in a more public place, and was justified in the patches sent.
Essentially, the question is: what if we were to use decodebin directly?
As I understand (and admittedly Media Foundation is far more complex than I could hope to understand) an application which just calls IMFSourceResolver methods just needs to get back a working IMFMediaSource, and we could wrap decodebin with one of those, similar to the quartz wrapper.
The most basic applications (games) seem to either use a source reader or simple sample grabber media session to get their raw samples. If you want to add a hack for using decodebin, you can easily add a special source type, and for the media source of that type, just make a decodebin element instead of searching for a demuxer. In this case, the source reader wouldn't search for a decoder since the output type set by the application would be natively supported by the source. Then, as part of the hack, just always yield that source type in the source resolver. This is completely incorrect and probably shouldn't make it's way into mainline, IMO. Also, I have reason to believe it may break Unity3D, as they do look at the native media types supported by the source, and getting around this would require adding some hackery in the source reader.
First of all, this is something I think we want to do anyway. Microsoft has no demuxer for, say, Vorbis (at least, there's not one registered on my Windows 10 machine), but I think that we want to be able to play back Vorbis files anyway (in, say, a Win32 media player application).
I'm pretty sure our goal is not to extend windows functionality.
Instead of writing yet another source for vorbis,
You don't "write another source", you just expose a new source object and link it with a new source_desc structure, which specifies the mime type of the container format: https://github.com/Guy1524/wine/blob/mfplat_rebase/dlls/winegstreamer/media_...
and for each other obscure format, we just write one generic decodebin wrapper.
Not to mention, you'd have to perform this step with a decodebin wrapper anyway.
Second of all, the most obvious benefit, at least while looking at these patches, is that you now don't need to write caps <-> IMFMediaType conversion for every type on the planet.
I don't see this as a problem, most games I've seen will use either H.264 of WMV, and adding new formats isn't that difficult. You look at the caps exposed by the gstreamer demuxer, find the equivalent attributes in media foundation, and fill in the gaps. In return you get correct behavior, and a source that can be paired with a correctly written MFT from outside of the wine source.
Another benefit is that you let all of the decoding happen within a single GStreamer pipeline, which is probably better for performance.
I have applications working right now with completely acceptable performance, and we are still copying every uncompressed sample an extra time, which we may be able to optimize away. Copying compressed samples, on the other hand, is not that big of a deal at all.
You also can simplify your postprocessing step to adding a single videoconvert and audioconvert, instead of having to manually (or semi-manually) add e.g. an h264 parser element.
It isn't manual, we find a parser which corrects the caps. And as I mentioned in earlier email, we could also use caps negotiation for this, all the setup is in place.
These are some of the benefits I had in mind when removing the GStreamer quartz transforms.
Even in the case where the application manually creates e.g. an MPEG-4 source, my understanding is it's still the source's job to automatically append transforms to match the requested type.
It's not the source's job at all. On windows, where sources are purpose-built, they apply no transformations to the types they get, their goal is only to get raw sample data from a container / stream. It's the job of the media session, or source reader to apply transforms when needed.
We'd just be moving that from the mfplat level to the gstreamer level—i.e. let decodebin select the 'transforms' needed to convert to raw video and audio.
The media session and source reader shouldn't be affected by winegstreamer details. If a user/an application decides to install a third party decoder, we still need the infrastructure in place for this to function.
It obviously wouldn't match native structure, but it's not clear to me that it would fail to match native in a way that would cause problems. Judging from my experience with quartz, most applications aren't going to care how their media is decoded as long as they get raw samples out of it.
Most games, or most applications? Chromium uses media foundation in a much more granular way.
Only a select few build the graph manually because they don't realize that they can autoplug, or make assumptions about which filters will be present once autoplugging is done, and some of those even fall back to autoplugging if their preferred method fails. Maybe the situation is different with mfplat, but given that there is a way to let mfplat figure out which sources and transforms to use, I'm gonna be really surprised if most applications aren't using it.
If you do come across an application that requires we mimic native's specific arrangement of sources and transforms, it seems to me it wouldn't require that much effort to swap a different parser in for decodebin, and to implement the necessary bits in the media type conversion functions. Ultimately I suspect it'd be less work to have a decodebin wrapper + specific sources for applications that require them, than to manually implement every source and transform.
The current solution isn't very manual, and, as I mentioned earlier in this email, you also can construct a decodebin wrapper source using the infrastructure which is available. And in general terms, I think it's more work to maintain a solution that doesn't match up to windows, as we now have to think of all these edge cases and how to work around them.
On 3/26/20 8:07 PM, Zebediah Figura wrote:
While I await your more complete response, I figure I might as well clarify some things.
I don't think that "doing the incorrect thing", i.e. failing to exactly emulate Windows, should necessarily be considered bad in itself, or at least not nearly as bad as all that.
My view, and my understanding of the Wine project's view in general as informed by its maintainers, is that emulating Windows is desirable for public documented behaviour (obviously), for undocumented behaviour that applications rely on (also obviously), for undocumented or semi-documented behaviour where there's no difference otherwise and where the native thing to do is obvious (e.g. the name of an internal registry key).
In my view, when completely incorrect behavior is only a few function calls away, that's not acceptable. The media source is a well documented public interface, and doing something different instead is just asking for trouble.
But there's not really a reason to emulate Windows otherwise. And in a case like this, where there's a significant benefit to not emulating Windows exactly, the only reason I see is "an application we don't know yet *might* depend on it". When faced with such a risk, I weigh the probability of that happening—and on the evidence of DirectShow applications, I see that as low—with the cost of having to change design—which also seems low to me; I can say from experience (c.f. 5de712b5d) that swapping out a specific demuxer for decodebin isn't very difficult.
The converse of this is also true, if you want to quickly experiment with some gstreamer codec that we don't support yet, you just perform the hack I mentioned earlier, and then after you get it working you make it correct by adding the necessary gstreamer caps. Another hack we could use is to serialize the compressed caps, and throw them in a MF_MT_USER_DATA attribute, and hope that an application never looks. But as I mentioned earlier, I don't think the amount of work required for adding a new media type is excessive. Microsoft only ships a limited amount of sources and decoders, they fit on a single page: https://docs.microsoft.com/en-us/windows/win32/medfound/supported-media-form... , so it's not like we'll be adding new types for years to come.
Not to mention that what we're doing is barely "incorrect". Media Foundation is an API that's specifically meant to be extended in this way.
I don't think Microsoft ever meant for an application to make a media source that decodes compressed content, the source reader and media session exist for a reason.
For that matter, some application could easily register its own codec libraries on Windows with a higher priority than the native ones (this happened with DirectShow); that's essentially no different than what I'm suggesting.
Yes, but even in that case, I assume they will still follow the basic concept of what a source is and is not.
I think the linked commit misses the point somewhat. That's partially because I don't think it makes sense to measure simplicity as an absolute metric simply using line count,
It's not just line count, the code itself is very simple, all we are doing is registering the supported input and output types of the decoder, setting the mime type of the container format for the source, and and registering both objects.
and partially because it's missing the cost of adding other media types to the conversion functions
You can use the MF_MT_USER_DATA serialization hack if you're worried about that.
(which is one of the reasons, though not the only reason, I thought to write this mail). But it's mostly because the cost of using decodebin, where it works, is essentially zero:
Except in the cases where an application does something unexpected.
we write one media source, and it works for everything; no extension for ASF required.
There already is only one real implementation of the media source, the only "extension" is adding the mime type instead of using typefind. We will register the necessary byte stream handlers no matter which path we take.
If it never becomes necessary to write a source that outputs compressed samples, then we also don't have the cost of abstraction (which is always worth taking seriously!), and if it does, we come out even—we can still use your generic media source, or something like it.
Ultimately, I think that a decodebin wrapper is something we want to have anyway, for the sake of host codecs like Theora,
Where would we use support for Theora, if no windows applications are able to use it.
and once we have it, I see zero cost in using it wherever else we can.
On 3/27/20 10:05 AM, Derek Lesho wrote:
On 3/26/20 4:56 PM, Zebediah Figura wrote:
There's another broad question I have with this approach, actually, which is fundamental enough I have to assume it's at had some thought put into it, but it would be nice if that discussion happened in a more public place, and was justified in the patches sent.
Essentially, the question is: what if we were to use decodebin directly?
As I understand (and admittedly Media Foundation is far more complex than I could hope to understand) an application which just calls IMFSourceResolver methods just needs to get back a working IMFMediaSource, and we could wrap decodebin with one of those, similar to the quartz wrapper.
The most basic applications (games) seem to either use a source reader or simple sample grabber media session to get their raw samples. If you want to add a hack for using decodebin, you can easily add a special source type, and for the media source of that type, just make a decodebin element instead of searching for a demuxer. In this case, the source reader wouldn't search for a decoder since the output type set by the application would be natively supported by the source. Then, as part of the hack, just always yield that source type in the source resolver. This is completely incorrect and probably shouldn't make it's way into mainline, IMO. Also, I have reason to believe it may break Unity3D, as they do look at the native media types supported by the source, and getting around this would require adding some hackery in the source reader.
My assertion is this isn't really a "hack". This is something that's reasonable to do, and that fits within the design of Media Foundation. It's changing the implementation details, not the API contract. We have the freedom to do that.
First of all, this is something I think we want to do anyway. Microsoft has no demuxer for, say, Vorbis (at least, there's not one registered on my Windows 10 machine), but I think that we want to be able to play back Vorbis files anyway (in, say, a Win32 media player application).
I'm pretty sure our goal is not to extend windows functionality.
Actually, I'd assert the opposite. Host integration has always been a feature of Wine, not a bug. That goes beyond just mapping program launcher entries to .desktop files; it includes things like:
* mapping host devices to DOS drives, * allowing unix paths to be used in file system functions, * exposing the unix file system as a shell folder, * making winebrowser the default browser (instead of explorer), * exposing public Wine-specific exports from ntdll (those not prefixed with a double underscore), * making use of host credentials in advapi32 (on Mac, anyway), * exposing host GStreamer and QuickTime codecs in DirectShow.
We extend host functionality to integrate with the system, and to make using Wine easier. Using host codecs from mfplat does both.
Instead of writing yet another source for vorbis,
You don't "write another source", you just expose a new source object and link it with a new source_desc structure, which specifies the mime type of the container format: https://github.com/Guy1524/wine/blob/mfplat_rebase/dlls/winegstreamer/media_...
and for each other obscure format, we just write one generic decodebin wrapper.
Not to mention, you'd have to perform this step with a decodebin wrapper anyway.
The amount of abstraction, and the amount of actual code you have to add, is beside the point, but it's also not quite as simple as you make out there:
* First and foremost, we also need to add caps conversion functions, since vorbisparse doesn't output raw video, and we need to be able to feed it through theoradec afterwards.
* Also, I'm guessing you haven't dealt with "always" pads yet; vorbisparse doesn't send "no-more-pads".
* In the case that elements get added, removed, or changed from upstream GStreamer, we have to reflect that here.
By contrast, the amount of code we have to add to deal with a new format when using decodebin is *exactly zero*. We don't actually have to write "audio/x-vorbis" anywhere in our code. After all, we don't write it anywhere in quartz, and yet Vorbis still works. (If an application were to ask what the stream type is—and I doubt any do—we report it as MEDIATYPE_Stream, MEDIASUBTYPE_Gstreamer).
Second of all, the most obvious benefit, at least while looking at these patches, is that you now don't need to write caps <-> IMFMediaType conversion for every type on the planet.
I don't see this as a problem, most games I've seen will use either H.264 of WMV, and adding new formats isn't that difficult. You look at the caps exposed by the gstreamer demuxer, find the equivalent attributes in media foundation, and fill in the gaps. In return you get correct behavior, and a source that can be paired with a correctly written MFT from outside of the wine source.
This is basically true until it isn't. And it already isn't true if we want to support host codecs. An "add it when we need it" approach is going to be hell on media players.
I also think you're kind of underestimating the cost here. I don't like making LoC arguments, but your code to deal with those caps is something like 370 LoC, maybe 350 LoC with some deduplication. There's also the developer cost of looking up what GStreamer caps values mean (which usually requires looking at the source), looking up the Media Foundation attributes, testing them to ensure that the conversion is correct, figuring out how to deal with caps that either GStreamer or Media Foundation can't handle...
Another benefit is that you let all of the decoding happen within a single GStreamer pipeline, which is probably better for performance.
I have applications working right now with completely acceptable performance, and we are still copying every uncompressed sample an extra time, which we may be able to optimize away. Copying compressed samples, on the other hand, is not that big of a deal at all.
I don't doubt it works regardless. DirectShow did too, back before I got rid of the transforms. It's also not the main reason I'm proposing this.
On the other hand, decreasing CPU usage is also nice.
Another thing that occurred to me is, letting everything happen in one GStreamer pipeline is nice for debugging.
You also can simplify your postprocessing step to adding a single videoconvert and audioconvert, instead of having to manually (or semi-manually) add e.g. an h264 parser element.
It isn't manual, we find a parser which corrects the caps. And as I mentioned in earlier email, we could also use caps negotiation for this, all the setup is in place.
Hence "semi-manually". You still have to manually fix the caps so that the element will be added.
These are some of the benefits I had in mind when removing the GStreamer quartz transforms.
Even in the case where the application manually creates e.g. an MPEG-4 source, my understanding is it's still the source's job to automatically append transforms to match the requested type.
It's not the source's job at all. On windows, where sources are purpose-built, they apply no transformations to the types they get, their goal is only to get raw sample data from a container / stream. It's the job of the media session, or source reader to apply transforms when needed.
I see, I confused the media source with the source reader. I guess that argument isn't valid, but I don't think it really affects my conclusion.
We'd just be moving that from the mfplat level to the gstreamer level—i.e. let decodebin select the 'transforms' needed to convert to raw video and audio.
The media session and source reader shouldn't be affected by winegstreamer details. If a user/an application decides to install a third party decoder, we still need the infrastructure in place for this to function.
It obviously wouldn't match native structure, but it's not clear to me that it would fail to match native in a way that would cause problems. Judging from my experience with quartz, most applications aren't going to care how their media is decoded as long as they get raw samples out of it.
Most games, or most applications? Chromium uses media foundation in a much more granular way.
Yes, most applications.
What does Chromium do?
Only a select few build the graph manually because they don't realize that they can autoplug, or make assumptions about which filters will be present once autoplugging is done, and some of those even fall back to autoplugging if their preferred method fails. Maybe the situation is different with mfplat, but given that there is a way to let mfplat figure out which sources and transforms to use, I'm gonna be really surprised if most applications aren't using it.
If you do come across an application that requires we mimic native's specific arrangement of sources and transforms, it seems to me it wouldn't require that much effort to swap a different parser in for decodebin, and to implement the necessary bits in the media type conversion functions. Ultimately I suspect it'd be less work to have a decodebin wrapper + specific sources for applications that require them, than to manually implement every source and transform.
The current solution isn't very manual, and, as I mentioned earlier in this email, you also can construct a decodebin wrapper source using the infrastructure which is available. And in general terms, I think it's more work to maintain a solution that doesn't match up to windows, as we now have to think of all these edge cases and how to work around them.
What edge cases do you mean?
On 3/26/20 8:07 PM, Zebediah Figura wrote:
While I await your more complete response, I figure I might as well clarify some things.
I don't think that "doing the incorrect thing", i.e. failing to exactly emulate Windows, should necessarily be considered bad in itself, or at least not nearly as bad as all that.
My view, and my understanding of the Wine project's view in general as informed by its maintainers, is that emulating Windows is desirable for public documented behaviour (obviously), for undocumented behaviour that applications rely on (also obviously), for undocumented or semi-documented behaviour where there's no difference otherwise and where the native thing to do is obvious (e.g. the name of an internal registry key).
In my view, when completely incorrect behavior is only a few function calls away, that's not acceptable. The media source is a well documented public interface, and doing something different instead is just asking for trouble.
The media source is a documented public interface, but *which* media source is returned from IMFSourceResolver is not documented or guaranteed, and which transforms are returned from the source reader is also not guaranteed.
Using decodebin is not "completely incorrect", and emulating Windows' specific arrangement of sources and transforms is not "a few function calls away". It's several hundred lines of code to do caps conversion, the entire transform object (which, to be sure, we might need *anyway*, but also might not), and it means more work every time we have to deal with a new codec.
But there's not really a reason to emulate Windows otherwise. And in a case like this, where there's a significant benefit to not emulating Windows exactly, the only reason I see is "an application we don't know yet *might* depend on it". When faced with such a risk, I weigh the probability of that happening—and on the evidence of DirectShow applications, I see that as low—with the cost of having to change design—which also seems low to me; I can say from experience (c.f. 5de712b5d) that swapping out a specific demuxer for decodebin isn't very difficult.
The converse of this is also true, if you want to quickly experiment with some gstreamer codec that we don't support yet, you just perform the hack I mentioned earlier, and then after you get it working you make it correct by adding the necessary gstreamer caps. Another hack we could use is to serialize the compressed caps, and throw them in a MF_MT_USER_DATA attribute, and hope that an application never looks.
Sure. But I'm willing to assert that one of these things is more likely than the other. I'm prepared to eat my words if proven wrong.
But as I mentioned earlier, I don't think the amount of work required for adding a new media type is excessive. Microsoft only ships a limited amount of sources and decoders, they fit on a single page: https://docs.microsoft.com/en-us/windows/win32/medfound/supported-media-form... , so it's not like we'll be adding new types for years to come.
That's seven demuxers and sixteen transforms, which is still kind of a lot. It also, unsurprisingly, isn't every format that Windows supports; just looking at my Windows 7 VM I see also NSC and LPCM, and a much longer list of transforms.
And it doesn't take into account host codecs.
Not to mention that what we're doing is barely "incorrect". Media Foundation is an API that's specifically meant to be extended in this way.
I don't think Microsoft ever meant for an application to make a media source that decodes compressed content, the source reader and media session exist for a reason.
I don't think they specifically meant for an application *not* to do that. It fits within the design of Media Foundation. The reason that transforms exist—in any media API—is because different containers can hold the same video or audio codec. GStreamer can already deal with that.
For that matter, some application could easily register its own codec libraries on Windows with a higher priority than the native ones (this happened with DirectShow); that's essentially no different than what I'm suggesting.
Yes, but even in that case, I assume they will still follow the basic concept of what a source is and is not.
I wouldn't necessarily assert that. A codec library—like GStreamer—might have its own set of transforms and autoplugging code. Easier to reuse that internally than to try to integrate it with every new decoding API that Microsoft releases.
I think the linked commit misses the point somewhat. That's partially because I don't think it makes sense to measure simplicity as an absolute metric simply using line count,
It's not just line count, the code itself is very simple, all we are doing is registering the supported input and output types of the decoder, setting the mime type of the container format for the source, and and registering both objects.
and partially because it's missing the cost of adding other media types to the conversion functions
You can use the MF_MT_USER_DATA serialization hack if you're worried about that.
Unless you're proposing we use that in Wine, that doesn't affect anything.
(which is one of the reasons, though not the only reason, I thought to write this mail). But it's mostly because the cost of using decodebin, where it works, is essentially zero:
Except in the cases where an application does something unexpected.
In which case the cost is still no more than the cost of not using decodebin.
we write one media source, and it works for everything; no extension for ASF required.
There already is only one real implementation of the media source, the only "extension" is adding the mime type instead of using typefind. We will register the necessary byte stream handlers no matter which path we take.
Well, ideally we'd do what quartz does, and register a handler that catches every file, and returns a subtype that essentially identifies GStreamer.
If it never becomes necessary to write a source that outputs compressed samples, then we also don't have the cost of abstraction (which is always worth taking seriously!), and if it does, we come out even—we can still use your generic media source, or something like it.
Ultimately, I think that a decodebin wrapper is something we want to have anyway, for the sake of host codecs like Theora,
Where would we use support for Theora, if no windows applications are able to use it.
Anything which wants to be able to play back an arbitrary media file, i.e. generic media players, mostly. I see all sorts of bug reports for these with Quartz, so people are definitely using them.
and once we have it, I see zero cost in using it wherever else we can.
On 3/27/20 11:32 AM, Zebediah Figura wrote:
On 3/27/20 10:05 AM, Derek Lesho wrote:
On 3/26/20 4:56 PM, Zebediah Figura wrote:
There's another broad question I have with this approach, actually, which is fundamental enough I have to assume it's at had some thought put into it, but it would be nice if that discussion happened in a more public place, and was justified in the patches sent.
Essentially, the question is: what if we were to use decodebin directly?
As I understand (and admittedly Media Foundation is far more complex than I could hope to understand) an application which just calls IMFSourceResolver methods just needs to get back a working IMFMediaSource, and we could wrap decodebin with one of those, similar to the quartz wrapper.
The most basic applications (games) seem to either use a source reader or simple sample grabber media session to get their raw samples. If you want to add a hack for using decodebin, you can easily add a special source type, and for the media source of that type, just make a decodebin element instead of searching for a demuxer. In this case, the source reader wouldn't search for a decoder since the output type set by the application would be natively supported by the source. Then, as part of the hack, just always yield that source type in the source resolver. This is completely incorrect and probably shouldn't make it's way into mainline, IMO. Also, I have reason to believe it may break Unity3D, as they do look at the native media types supported by the source, and getting around this would require adding some hackery in the source reader.
My assertion is this isn't really a "hack".
I think that if you have to modify media foundation code to workaround shortcuts in winegstreamer, it can be classified as a hack. It is probable that most games will work with it, but I think it makes more sense as a staging enhancement.
This is something that's reasonable to do, and that fits within the design of Media Foundation.
I have a hard time subscribing to the idea that this is within the design of media foundation. I took a look on github, and a good amount of applications find desired streams using the subtype from the source reader's GetNativeMediaType. If we were to output uncompressed types, this would break. To work around this, we'd either have to expose incorrect media types on our streams, and add an exception to the decoder finding behavior in the source reader and topology loader, or expose some private interface for getting the true native types. And in either case, we'd still have to conversion of caps for a compressed media type.
It's changing the implementation details, not the API contract. We have the freedom to do that.
First of all, this is something I think we want to do anyway. Microsoft has no demuxer for, say, Vorbis (at least, there's not one registered on my Windows 10 machine), but I think that we want to be able to play back Vorbis files anyway (in, say, a Win32 media player application).
I'm pretty sure our goal is not to extend windows functionality.
Actually, I'd assert the opposite. Host integration has always been a feature of Wine, not a bug. That goes beyond just mapping program launcher entries to .desktop files; it includes things like:
- mapping host devices to DOS drives,
- allowing unix paths to be used in file system functions,
- exposing the unix file system as a shell folder,
- making winebrowser the default browser (instead of explorer),
- exposing public Wine-specific exports from ntdll (those not prefixed
with a double underscore),
- making use of host credentials in advapi32 (on Mac, anyway),
- exposing host GStreamer and QuickTime codecs in DirectShow.
We extend host functionality to integrate with the system, and to make using Wine easier. Using host codecs from mfplat does both.
I'm unsure why anyone would want to use a windows media player over something like VLC. But as I mentioned earlier, it is possible to add a hack using decodebin with minimal effort, and we could possibly only use this hack as a fallback if the container have a registered byte stream handler. I think we would get the best of both worlds with this solution.
Instead of writing yet another source for vorbis,
You don't "write another source", you just expose a new source object and link it with a new source_desc structure, which specifies the mime type of the container format: https://github.com/Guy1524/wine/blob/mfplat_rebase/dlls/winegstreamer/media_...
and for each other obscure format, we just write one generic decodebin wrapper.
Not to mention, you'd have to perform this step with a decodebin wrapper anyway.
The amount of abstraction, and the amount of actual code you have to add, is beside the point, but it's also not quite as simple as you make out there:
- First and foremost, we also need to add caps conversion functions,
since vorbisparse doesn't output raw video, and we need to be able to feed it through theoradec afterwards.
You need that anyway, chromium manually creates H.264 encoder and decoder instances and uses them without anything from the control layer. Because of this, we will at-least need to keep the mediatype->caps conversion function for compressed types.
- Also, I'm guessing you haven't dealt with "always" pads yet;
vorbisparse doesn't send "no-more-pads".
That would be ever easier to support.
- In the case that elements get added, removed, or changed from upstream
GStreamer, we have to reflect that here.
Elaborate?
By contrast, the amount of code we have to add to deal with a new format when using decodebin is *exactly zero*. We don't actually have to write "audio/x-vorbis" anywhere in our code.
Okay, adding that path as a fallback makes a lot of sense then, since we still have full ability to fix compatibility issues with types that are natively supported in windows.
After all, we don't write it anywhere in quartz, and yet Vorbis still works. (If an application were to ask what the stream type is—and I doubt any do—we report it as MEDIATYPE_Stream, MEDIASUBTYPE_Gstreamer).
Second of all, the most obvious benefit, at least while looking at these patches, is that you now don't need to write caps <-> IMFMediaType conversion for every type on the planet.
I don't see this as a problem, most games I've seen will use either H.264 of WMV, and adding new formats isn't that difficult. You look at the caps exposed by the gstreamer demuxer, find the equivalent attributes in media foundation, and fill in the gaps. In return you get correct behavior, and a source that can be paired with a correctly written MFT from outside of the wine source.
This is basically true until it isn't. And it already isn't true if we want to support host codecs. An "add it when we need it" approach is going to be hell on media players.
I also think you're kind of underestimating the cost here. I don't like making LoC arguments, but your code to deal with those caps is something like 370 LoC, maybe 350 LoC with some deduplication.
As mentioned earlier in the email, the IMFMediaType->caps path will always be necessary, to support the decoder transforms, which real applications do use by themselves.
There's also the developer cost of looking up what GStreamer caps values mean (which usually requires looking at the source), looking up the Media Foundation attributes, testing them to ensure that the conversion is correct, figuring out how to deal with caps that either GStreamer or Media Foundation can't handle...
Another benefit is that you let all of the decoding happen within a single GStreamer pipeline, which is probably better for performance.
I have applications working right now with completely acceptable performance, and we are still copying every uncompressed sample an extra time, which we may be able to optimize away. Copying compressed samples, on the other hand, is not that big of a deal at all.
I don't doubt it works regardless. DirectShow did too, back before I got rid of the transforms. It's also not the main reason I'm proposing this.
On the other hand, decreasing CPU usage is also nice.
How would this reduce CPU usage?
Another thing that occurred to me is, letting everything happen in one GStreamer pipeline is nice for debugging.
I disagree, decodebin adds complexity to the pipeline that isn't otherwise necessary, like typefind.
You also can simplify your postprocessing step to adding a single videoconvert and audioconvert, instead of having to manually (or semi-manually) add e.g. an h264 parser element.
It isn't manual, we find a parser which corrects the caps. And as I mentioned in earlier email, we could also use caps negotiation for this, all the setup is in place.
Hence "semi-manually". You still have to manually fix the caps so that the element will be added.
As mentioned, we will need this regardless.
These are some of the benefits I had in mind when removing the GStreamer quartz transforms.
Even in the case where the application manually creates e.g. an MPEG-4 source, my understanding is it's still the source's job to automatically append transforms to match the requested type.
It's not the source's job at all. On windows, where sources are purpose-built, they apply no transformations to the types they get, their goal is only to get raw sample data from a container / stream. It's the job of the media session, or source reader to apply transforms when needed.
I see, I confused the media source with the source reader. I guess that argument isn't valid, but I don't think it really affects my conclusion.
We'd just be moving that from the mfplat level to the gstreamer level—i.e. let decodebin select the 'transforms' needed to convert to raw video and audio.
The media session and source reader shouldn't be affected by winegstreamer details. If a user/an application decides to install a third party decoder, we still need the infrastructure in place for this to function.
It obviously wouldn't match native structure, but it's not clear to me that it would fail to match native in a way that would cause problems. Judging from my experience with quartz, most applications aren't going to care how their media is decoded as long as they get raw samples out of it.
Most games, or most applications? Chromium uses media foundation in a much more granular way.
Yes, most applications.
What does Chromium do?
As mentioned earlier, uses decoders and encoders manually, so we'll have to fix up/parse the data we get anyway.
Only a select few build the graph manually because they don't realize that they can autoplug, or make assumptions about which filters will be present once autoplugging is done, and some of those even fall back to autoplugging if their preferred method fails. Maybe the situation is different with mfplat, but given that there is a way to let mfplat figure out which sources and transforms to use, I'm gonna be really surprised if most applications aren't using it.
If you do come across an application that requires we mimic native's specific arrangement of sources and transforms, it seems to me it wouldn't require that much effort to swap a different parser in for decodebin, and to implement the necessary bits in the media type conversion functions. Ultimately I suspect it'd be less work to have a decodebin wrapper + specific sources for applications that require them, than to manually implement every source and transform.
The current solution isn't very manual, and, as I mentioned earlier in this email, you also can construct a decodebin wrapper source using the infrastructure which is available. And in general terms, I think it's more work to maintain a solution that doesn't match up to windows, as we now have to think of all these edge cases and how to work around them.
What edge cases do you mean?
Cases where applications expect compressed streams from the source.
On 3/26/20 8:07 PM, Zebediah Figura wrote:
While I await your more complete response, I figure I might as well clarify some things.
I don't think that "doing the incorrect thing", i.e. failing to exactly emulate Windows, should necessarily be considered bad in itself, or at least not nearly as bad as all that.
My view, and my understanding of the Wine project's view in general as informed by its maintainers, is that emulating Windows is desirable for public documented behaviour (obviously), for undocumented behaviour that applications rely on (also obviously), for undocumented or semi-documented behaviour where there's no difference otherwise and where the native thing to do is obvious (e.g. the name of an internal registry key).
In my view, when completely incorrect behavior is only a few function calls away, that's not acceptable. The media source is a well documented public interface, and doing something different instead is just asking for trouble.
The media source is a documented public interface, but *which* media source is returned from IMFSourceResolver is not documented or guaranteed, and which transforms are returned from the source reader is also not guaranteed.
Using decodebin is not "completely incorrect", and emulating Windows' specific arrangement of sources and transforms is not "a few function calls away".
Finding out the media type of a source is one function call away.
It's several hundred lines of code to do caps conversion, the entire transform object (which, to be sure, we might need *anyway*,
We will.
but also might not), and it means more work every time we have to deal with a new codec.
Unless we implement the decodebin solution as a fallback for unknown types. Taking the fallback approach means we will only have to go through this process for every type natively supported by windows.
But there's not really a reason to emulate Windows otherwise. And in a case like this, where there's a significant benefit to not emulating Windows exactly, the only reason I see is "an application we don't know yet *might* depend on it". When faced with such a risk, I weigh the probability of that happening—and on the evidence of DirectShow applications, I see that as low—with the cost of having to change design—which also seems low to me; I can say from experience (c.f. 5de712b5d) that swapping out a specific demuxer for decodebin isn't very difficult.
The converse of this is also true, if you want to quickly experiment with some gstreamer codec that we don't support yet, you just perform the hack I mentioned earlier, and then after you get it working you make it correct by adding the necessary gstreamer caps. Another hack we could use is to serialize the compressed caps, and throw them in a MF_MT_USER_DATA attribute, and hope that an application never looks.
Sure. But I'm willing to assert that one of these things is more likely than the other. I'm prepared to eat my words if proven wrong.
What do you mean, that in most cases applications won't care how they get their samples? That may be true, but I still think the edge cases are big enough to warrant the accurate approach. Unity3D, a pretty important user of this work, gets native media types of the source for instance. What they use it for, I'm not sure, but I wouldn't take any chances.
But as I mentioned earlier, I don't think the amount of work required for adding a new media type is excessive. Microsoft only ships a limited amount of sources and decoders, they fit on a single page: https://docs.microsoft.com/en-us/windows/win32/medfound/supported-media-form... , so it's not like we'll be adding new types for years to come.
That's seven demuxers and sixteen transforms, which is still kind of a lot. It also, unsurprisingly, isn't every format that Windows supports; just looking at my Windows 7 VM I see also NSC and LPCM, and a much longer list of transforms.
And it doesn't take into account host codecs.
Insert fallback argument here :P
Not to mention that what we're doing is barely "incorrect". Media Foundation is an API that's specifically meant to be extended in this way.
I don't think Microsoft ever meant for an application to make a media source that decodes compressed content, the source reader and media session exist for a reason.
I don't think they specifically meant for an application *not* to do that. It fits within the design of Media Foundation. The reason that transforms exist—in any media API—is because different containers can hold the same video or audio codec. GStreamer can already deal with that.
For that matter, some application could easily register its own codec libraries on Windows with a higher priority than the native ones (this happened with DirectShow); that's essentially no different than what I'm suggesting.
Yes, but even in that case, I assume they will still follow the basic concept of what a source is and is not.
I wouldn't necessarily assert that. A codec library—like GStreamer—might have its own set of transforms and autoplugging code. Easier to reuse that internally than to try to integrate it with every new decoding API that Microsoft releases.
That could potentially break other applications though, and I don't think codec libraries are comparable to gstreamer, they usually just handle a specific task and plug into the relevant part of the media API, whether it be dshow, media foundation, or gstreamer.
I think the linked commit misses the point somewhat. That's partially because I don't think it makes sense to measure simplicity as an absolute metric simply using line count,
It's not just line count, the code itself is very simple, all we are doing is registering the supported input and output types of the decoder, setting the mime type of the container format for the source, and and registering both objects.
and partially because it's missing the cost of adding other media types to the conversion functions
You can use the MF_MT_USER_DATA serialization hack if you're worried about that.
Unless you're proposing we use that in Wine, that doesn't affect anything.
You're right, the decodebin fallback is a much cleaner solution than that.
(which is one of the reasons, though not the only reason, I thought to write this mail). But it's mostly because the cost of using decodebin, where it works, is essentially zero:
Except in the cases where an application does something unexpected.
In which case the cost is still no more than the cost of not using decodebin.
we write one media source, and it works for everything; no extension for ASF required.
There already is only one real implementation of the media source, the only "extension" is adding the mime type instead of using typefind. We will register the necessary byte stream handlers no matter which path we take.
Well, ideally we'd do what quartz does, and register a handler that catches every file, and returns a subtype that essentially identifies GStreamer.
If it never becomes necessary to write a source that outputs compressed samples, then we also don't have the cost of abstraction (which is always worth taking seriously!), and if it does, we come out even—we can still use your generic media source, or something like it.
Ultimately, I think that a decodebin wrapper is something we want to have anyway, for the sake of host codecs like Theora,
Where would we use support for Theora, if no windows applications are able to use it.
Anything which wants to be able to play back an arbitrary media file, i.e. generic media players, mostly. I see all sorts of bug reports for these with Quartz, so people are definitely using them.
Heh.
and once we have it, I see zero cost in using it wherever else we can.
On 3/27/20 1:08 PM, Derek Lesho wrote:
On 3/27/20 11:32 AM, Zebediah Figura wrote:
On 3/27/20 10:05 AM, Derek Lesho wrote:
On 3/26/20 4:56 PM, Zebediah Figura wrote:
There's another broad question I have with this approach, actually, which is fundamental enough I have to assume it's at had some thought put into it, but it would be nice if that discussion happened in a more public place, and was justified in the patches sent.
Essentially, the question is: what if we were to use decodebin directly?
As I understand (and admittedly Media Foundation is far more complex than I could hope to understand) an application which just calls IMFSourceResolver methods just needs to get back a working IMFMediaSource, and we could wrap decodebin with one of those, similar to the quartz wrapper.
The most basic applications (games) seem to either use a source reader or simple sample grabber media session to get their raw samples. If you want to add a hack for using decodebin, you can easily add a special source type, and for the media source of that type, just make a decodebin element instead of searching for a demuxer. In this case, the source reader wouldn't search for a decoder since the output type set by the application would be natively supported by the source. Then, as part of the hack, just always yield that source type in the source resolver. This is completely incorrect and probably shouldn't make it's way into mainline, IMO. Also, I have reason to believe it may break Unity3D, as they do look at the native media types supported by the source, and getting around this would require adding some hackery in the source reader.
My assertion is this isn't really a "hack".
I think that if you have to modify media foundation code to workaround shortcuts in winegstreamer, it can be classified as a hack. It is probable that most games will work with it, but I think it makes more sense as a staging enhancement.
There's nothing we have to modify in core Media Foundation code (though modifying bytesteream_get_url_hint() would help). I disagree with your assertion that it's a hack, though. Or, more saliently, that differing from Windows' implementation details inherently means it's bad and wrong.
I'd also point out that "enhancements that aren't suitable for upstream" isn't a purpose of Staging. "Patches that aren't good enough for upstream yet" is.
This is something that's reasonable to do, and that fits within the design of Media Foundation.
I have a hard time subscribing to the idea that this is within the design of media foundation. I took a look on github, and a good amount of applications find desired streams using the subtype from the source reader's GetNativeMediaType. If we were to output uncompressed types, this would break. To work around this, we'd either have to expose incorrect media types on our streams, and add an exception to the decoder finding behavior in the source reader and topology loader, or expose some private interface for getting the true native types. And in either case, we'd still have to conversion of caps for a compressed media type.
So, I went to see what exactly these programs were doing with GetNativeMediaType(). I figured I'd check the first ten unique ones, skipping anything that looks like a binding or wrapper, and here's what I came up with:
https://github.com/KennethEvans/VS-Audio uses it in one place to test whether a stream is present, in another place just to dump the type to stdout, and in a third place to get the type from a capture device.
https://github.com/Csineneo/Vivaldi uses it in one place just to retrieve the major type, and in another place to get the type from a capture device.
https://github.com/Hanumanthu2020/HanuWork uses it for capture devices.
https://github.com/clarkezone/audiovisualizer uses it in one place to test the major type and subtype; it checks if the subtype is mp3 but doesn't do anything with that information. It uses it in another place passed through to its own API.
https://github.com/nickluo/camaro-sdk uses it for video capture.
https://github.com/mrojkov/Citrus uses it to get the major type, width, and height.
https://github.com/ms-iot/ros_win_camera uses it for image/video capture.
https://github.com/daramkun/SamplePlay uses it to get the frame rate, width, and height of a video stream, and the number of channels, sample rate, and bit depth of an audio stream. It outputs to uncompressed samples, and in the case of the latter it uses those parameters to determine a PCM type. (Even though not all audio types have a "bit depth"...)
https://github.com/vipoo/SuperMFLib uses it in one place to check the major type. It uses it in another place to get the frame rate, PAR, width, and height of a video stream, and the number of channels and sample rate of an audio stream. It outputs to uncompressed samples.
https://github.com/Brhsoftco/PlexDL-MetroSet_UI uses it for video capture.
The conclusion I draw from this is:
* most applications which call GetNativeMediaType() are doing so on capture sources [which, it goes without saying, are outside the scope of gstreamer],
* the rest only care about details that wouldn't change from decoding: major type, frame rate, width, height, PAR, channel count, sample rate,
* none of the applications concerned with decoding audio actually set the media type to be the native media type.
It's changing the implementation details, not the API contract. We have the freedom to do that.
First of all, this is something I think we want to do anyway. Microsoft has no demuxer for, say, Vorbis (at least, there's not one registered on my Windows 10 machine), but I think that we want to be able to play back Vorbis files anyway (in, say, a Win32 media player application).
I'm pretty sure our goal is not to extend windows functionality.
Actually, I'd assert the opposite. Host integration has always been a feature of Wine, not a bug. That goes beyond just mapping program launcher entries to .desktop files; it includes things like:
- mapping host devices to DOS drives,
- allowing unix paths to be used in file system functions,
- exposing the unix file system as a shell folder,
- making winebrowser the default browser (instead of explorer),
- exposing public Wine-specific exports from ntdll (those not prefixed
with a double underscore),
- making use of host credentials in advapi32 (on Mac, anyway),
- exposing host GStreamer and QuickTime codecs in DirectShow.
We extend host functionality to integrate with the system, and to make using Wine easier. Using host codecs from mfplat does both.
I'm unsure why anyone would want to use a windows media player over something like VLC.
I'm unsure as well, but that's not really our place to judge. We just make the software work where we can. If I had to guess, though, I'd say that native media players offer a UI that the user prefers, includes some feature that native players don't, is more familiar to the user (who may have recently migrated from Windows)...
But as I mentioned earlier, it is possible to add a hack using decodebin with minimal effort, and we could possibly only use this hack as a fallback if the container have a registered byte stream handler. I think we would get the best of both worlds with this solution.
In a sense I'm kind of proposing exactly that, except that we rely on decodebin first, and only add other sources if it turns out that decodebin doesn't work for something.
Instead of writing yet another source for vorbis,
You don't "write another source", you just expose a new source object and link it with a new source_desc structure, which specifies the mime type of the container format: https://github.com/Guy1524/wine/blob/mfplat_rebase/dlls/winegstreamer/media_...
and for each other obscure format, we just write one generic decodebin wrapper.
Not to mention, you'd have to perform this step with a decodebin wrapper anyway.
The amount of abstraction, and the amount of actual code you have to add, is beside the point, but it's also not quite as simple as you make out there:
- First and foremost, we also need to add caps conversion functions,
since vorbisparse doesn't output raw video, and we need to be able to feed it through theoradec afterwards.
You need that anyway, chromium manually creates H.264 encoder and decoder instances and uses them without anything from the control layer. Because of this, we will at-least need to keep the mediatype->caps conversion function for compressed types.
It creates the h264 decoder transform manually, and doesn't use the rest of mfplat, or do I misunderstand you?
- Also, I'm guessing you haven't dealt with "always" pads yet;
vorbisparse doesn't send "no-more-pads".
That would be ever easier to support.
- In the case that elements get added, removed, or changed from upstream
GStreamer, we have to reflect that here.
Elaborate?
If GStreamer supports a new media type or removes support for one, we have to reflect that. If caps details change upstream, that's something we should pay attention to as well; it could affect our conversion.
By contrast, the amount of code we have to add to deal with a new format when using decodebin is *exactly zero*. We don't actually have to write "audio/x-vorbis" anywhere in our code.
Okay, adding that path as a fallback makes a lot of sense then, since we still have full ability to fix compatibility issues with types that are natively supported in windows.
After all, we don't write it anywhere in quartz, and yet Vorbis still works. (If an application were to ask what the stream type is—and I doubt any do—we report it as MEDIATYPE_Stream, MEDIASUBTYPE_Gstreamer).
Second of all, the most obvious benefit, at least while looking at these patches, is that you now don't need to write caps <-> IMFMediaType conversion for every type on the planet.
I don't see this as a problem, most games I've seen will use either H.264 of WMV, and adding new formats isn't that difficult. You look at the caps exposed by the gstreamer demuxer, find the equivalent attributes in media foundation, and fill in the gaps. In return you get correct behavior, and a source that can be paired with a correctly written MFT from outside of the wine source.
This is basically true until it isn't. And it already isn't true if we want to support host codecs. An "add it when we need it" approach is going to be hell on media players.
I also think you're kind of underestimating the cost here. I don't like making LoC arguments, but your code to deal with those caps is something like 370 LoC, maybe 350 LoC with some deduplication.
As mentioned earlier in the email, the IMFMediaType->caps path will always be necessary, to support the decoder transforms, which real applications do use by themselves.
Sure. But as I understand, we'd only need to do the conversion one way (i.e. Media Foundation -> GStreamer), and we'd only need to bother with it for transforms that are explicitly created.
There's also the developer cost of looking up what GStreamer caps values mean (which usually requires looking at the source), looking up the Media Foundation attributes, testing them to ensure that the conversion is correct, figuring out how to deal with caps that either GStreamer or Media Foundation can't handle...
Another benefit is that you let all of the decoding happen within a single GStreamer pipeline, which is probably better for performance.
I have applications working right now with completely acceptable performance, and we are still copying every uncompressed sample an extra time, which we may be able to optimize away. Copying compressed samples, on the other hand, is not that big of a deal at all.
I don't doubt it works regardless. DirectShow did too, back before I got rid of the transforms. It's also not the main reason I'm proposing this.
On the other hand, decreasing CPU usage is also nice.
How would this reduce CPU usage?
It's only an armchair hypothesis, so feel free to just ignore, but it probably means less buffer copies.
Another thing that occurred to me is, letting everything happen in one GStreamer pipeline is nice for debugging.
I disagree, decodebin adds complexity to the pipeline that isn't otherwise necessary, like typefind.
I mostly meant along the lines of keeping all of the decoders in the same pipeline as the demuxer, which in my experience debugging GStreamer is easier to read than when they were bouncing through quartz.
typefind is pretty much necessary, unless we reimplement it ourselves (which, I understand, you've taken as granted that you'll have to do, but I'm not so sure). I don't see how it's not nice for debugging either—sure, it takes up a lot of lines in the log figuring out the type, but in my experience I can always skip over that by searching for winegstreamer callbacks, or no-more-pads, or whatever it is I'm trying to debug.
You also can simplify your postprocessing step to adding a single videoconvert and audioconvert, instead of having to manually (or semi-manually) add e.g. an h264 parser element.
It isn't manual, we find a parser which corrects the caps. And as I mentioned in earlier email, we could also use caps negotiation for this, all the setup is in place.
Hence "semi-manually". You still have to manually fix the caps so that the element will be added.
As mentioned, we will need this regardless.
These are some of the benefits I had in mind when removing the GStreamer quartz transforms.
Even in the case where the application manually creates e.g. an MPEG-4 source, my understanding is it's still the source's job to automatically append transforms to match the requested type.
It's not the source's job at all. On windows, where sources are purpose-built, they apply no transformations to the types they get, their goal is only to get raw sample data from a container / stream. It's the job of the media session, or source reader to apply transforms when needed.
I see, I confused the media source with the source reader. I guess that argument isn't valid, but I don't think it really affects my conclusion.
We'd just be moving that from the mfplat level to the gstreamer level—i.e. let decodebin select the 'transforms' needed to convert to raw video and audio.
The media session and source reader shouldn't be affected by winegstreamer details. If a user/an application decides to install a third party decoder, we still need the infrastructure in place for this to function.
It obviously wouldn't match native structure, but it's not clear to me that it would fail to match native in a way that would cause problems. Judging from my experience with quartz, most applications aren't going to care how their media is decoded as long as they get raw samples out of it.
Most games, or most applications? Chromium uses media foundation in a much more granular way.
Yes, most applications.
What does Chromium do?
As mentioned earlier, uses decoders and encoders manually, so we'll have to fix up/parse the data we get anyway.
Only a select few build the graph manually because they don't realize that they can autoplug, or make assumptions about which filters will be present once autoplugging is done, and some of those even fall back to autoplugging if their preferred method fails. Maybe the situation is different with mfplat, but given that there is a way to let mfplat figure out which sources and transforms to use, I'm gonna be really surprised if most applications aren't using it.
If you do come across an application that requires we mimic native's specific arrangement of sources and transforms, it seems to me it wouldn't require that much effort to swap a different parser in for decodebin, and to implement the necessary bits in the media type conversion functions. Ultimately I suspect it'd be less work to have a decodebin wrapper + specific sources for applications that require them, than to manually implement every source and transform.
The current solution isn't very manual, and, as I mentioned earlier in this email, you also can construct a decodebin wrapper source using the infrastructure which is available. And in general terms, I think it's more work to maintain a solution that doesn't match up to windows, as we now have to think of all these edge cases and how to work around them.
What edge cases do you mean?
Cases where applications expect compressed streams from the source.
The way I see it, you're essentially thinking of those "edge cases" now, except that you're not considering them edge cases. If we use decodebin, they become more obviously edge cases. But it's not a lot of thought it takes, from my view. We just need to ask, "what happens if an application depends on getting compressed samples?" and answer, "well, then we create a new media source, probably reusing most of the same infrastructure, that utilises the parts of gstreamer that output compressed samples." We don't actually have to do that work until we find such an application.
On 3/26/20 8:07 PM, Zebediah Figura wrote:
While I await your more complete response, I figure I might as well clarify some things.
I don't think that "doing the incorrect thing", i.e. failing to exactly emulate Windows, should necessarily be considered bad in itself, or at least not nearly as bad as all that.
My view, and my understanding of the Wine project's view in general as informed by its maintainers, is that emulating Windows is desirable for public documented behaviour (obviously), for undocumented behaviour that applications rely on (also obviously), for undocumented or semi-documented behaviour where there's no difference otherwise and where the native thing to do is obvious (e.g. the name of an internal registry key).
In my view, when completely incorrect behavior is only a few function calls away, that's not acceptable. The media source is a well documented public interface, and doing something different instead is just asking for trouble.
The media source is a documented public interface, but *which* media source is returned from IMFSourceResolver is not documented or guaranteed, and which transforms are returned from the source reader is also not guaranteed.
Using decodebin is not "completely incorrect", and emulating Windows' specific arrangement of sources and transforms is not "a few function calls away".
Finding out the media type of a source is one function call away.
I don't understand what you mean. Which function call?
It's several hundred lines of code to do caps conversion, the entire transform object (which, to be sure, we might need *anyway*,
We will.
but also might not), and it means more work every time we have to deal with a new codec.
Unless we implement the decodebin solution as a fallback for unknown types. Taking the fallback approach means we will only have to go through this process for every type natively supported by windows.
But there's not really a reason to emulate Windows otherwise. And in a case like this, where there's a significant benefit to not emulating Windows exactly, the only reason I see is "an application we don't know yet *might* depend on it". When faced with such a risk, I weigh the probability of that happening—and on the evidence of DirectShow applications, I see that as low—with the cost of having to change design—which also seems low to me; I can say from experience (c.f. 5de712b5d) that swapping out a specific demuxer for decodebin isn't very difficult.
The converse of this is also true, if you want to quickly experiment with some gstreamer codec that we don't support yet, you just perform the hack I mentioned earlier, and then after you get it working you make it correct by adding the necessary gstreamer caps. Another hack we could use is to serialize the compressed caps, and throw them in a MF_MT_USER_DATA attribute, and hope that an application never looks.
Sure. But I'm willing to assert that one of these things is more likely than the other. I'm prepared to eat my words if proven wrong.
What do you mean, that in most cases applications won't care how they get their samples? That may be true, but I still think the edge cases are big enough to warrant the accurate approach. Unity3D, a pretty important user of this work, gets native media types of the source for instance. What they use it for, I'm not sure, but I wouldn't take any chances.
I mean it's more likely that an application wants uncompressed samples than that it wants compressed samples. As I see it, the latter case is (1) still hypothetical, (2) wouldn't be very difficult to implement either.
Based on my survey of GitHub above, I have to wonder what aspects of the native media type Unity3D actually cares about. What attributes does it ask for? Does it actually set the decoder to use a compressed media type? Even if the answer is yes, does it break if we return an uncompressed media type?
But as I mentioned earlier, I don't think the amount of work required for adding a new media type is excessive. Microsoft only ships a limited amount of sources and decoders, they fit on a single page: https://docs.microsoft.com/en-us/windows/win32/medfound/supported-media-form... , so it's not like we'll be adding new types for years to come.
That's seven demuxers and sixteen transforms, which is still kind of a lot. It also, unsurprisingly, isn't every format that Windows supports; just looking at my Windows 7 VM I see also NSC and LPCM, and a much longer list of transforms.
And it doesn't take into account host codecs.
Insert fallback argument here :P
Not to mention that what we're doing is barely "incorrect". Media Foundation is an API that's specifically meant to be extended in this way.
I don't think Microsoft ever meant for an application to make a media source that decodes compressed content, the source reader and media session exist for a reason.
I don't think they specifically meant for an application *not* to do that. It fits within the design of Media Foundation. The reason that transforms exist—in any media API—is because different containers can hold the same video or audio codec. GStreamer can already deal with that.
For that matter, some application could easily register its own codec libraries on Windows with a higher priority than the native ones (this happened with DirectShow); that's essentially no different than what I'm suggesting.
Yes, but even in that case, I assume they will still follow the basic concept of what a source is and is not.
I wouldn't necessarily assert that. A codec library—like GStreamer—might have its own set of transforms and autoplugging code. Easier to reuse that internally than to try to integrate it with every new decoding API that Microsoft releases.
That could potentially break other applications though, and I don't think codec libraries are comparable to gstreamer, they usually just handle a specific task and plug into the relevant part of the media API, whether it be dshow, media foundation, or gstreamer.
GStreamer *is* a codec library. That's exactly what it is.
We don't yet know that any other applications would be broken; that's still hypothetical. It's not unheard of for applications to mess with Windows internals in ways that break other applications, to be sure. But it's also not a good idea.
I think the linked commit misses the point somewhat. That's partially because I don't think it makes sense to measure simplicity as an absolute metric simply using line count,
It's not just line count, the code itself is very simple, all we are doing is registering the supported input and output types of the decoder, setting the mime type of the container format for the source, and and registering both objects.
and partially because it's missing the cost of adding other media types to the conversion functions
You can use the MF_MT_USER_DATA serialization hack if you're worried about that.
Unless you're proposing we use that in Wine, that doesn't affect anything.
You're right, the decodebin fallback is a much cleaner solution than that.
(which is one of the reasons, though not the only reason, I thought to write this mail). But it's mostly because the cost of using decodebin, where it works, is essentially zero:
Except in the cases where an application does something unexpected.
In which case the cost is still no more than the cost of not using decodebin.
we write one media source, and it works for everything; no extension for ASF required.
There already is only one real implementation of the media source, the only "extension" is adding the mime type instead of using typefind. We will register the necessary byte stream handlers no matter which path we take.
Well, ideally we'd do what quartz does, and register a handler that catches every file, and returns a subtype that essentially identifies GStreamer.
If it never becomes necessary to write a source that outputs compressed samples, then we also don't have the cost of abstraction (which is always worth taking seriously!), and if it does, we come out even—we can still use your generic media source, or something like it.
Ultimately, I think that a decodebin wrapper is something we want to have anyway, for the sake of host codecs like Theora,
Where would we use support for Theora, if no windows applications are able to use it.
Anything which wants to be able to play back an arbitrary media file, i.e. generic media players, mostly. I see all sorts of bug reports for these with Quartz, so people are definitely using them.
Heh.
and once we have it, I see zero cost in using it wherever else we can.
On 3/27/20 3:11 PM, Zebediah Figura wrote:
On 3/27/20 1:08 PM, Derek Lesho wrote:
On 3/27/20 11:32 AM, Zebediah Figura wrote:
On 3/27/20 10:05 AM, Derek Lesho wrote:
On 3/26/20 4:56 PM, Zebediah Figura wrote:
There's another broad question I have with this approach, actually, which is fundamental enough I have to assume it's at had some thought put into it, but it would be nice if that discussion happened in a more public place, and was justified in the patches sent.
Essentially, the question is: what if we were to use decodebin directly?
As I understand (and admittedly Media Foundation is far more complex than I could hope to understand) an application which just calls IMFSourceResolver methods just needs to get back a working IMFMediaSource, and we could wrap decodebin with one of those, similar to the quartz wrapper.
The most basic applications (games) seem to either use a source reader or simple sample grabber media session to get their raw samples. If you want to add a hack for using decodebin, you can easily add a special source type, and for the media source of that type, just make a decodebin element instead of searching for a demuxer. In this case, the source reader wouldn't search for a decoder since the output type set by the application would be natively supported by the source. Then, as part of the hack, just always yield that source type in the source resolver. This is completely incorrect and probably shouldn't make it's way into mainline, IMO. Also, I have reason to believe it may break Unity3D, as they do look at the native media types supported by the source, and getting around this would require adding some hackery in the source reader.
My assertion is this isn't really a "hack".
I think that if you have to modify media foundation code to workaround shortcuts in winegstreamer, it can be classified as a hack. It is probable that most games will work with it, but I think it makes more sense as a staging enhancement.
There's nothing we have to modify in core Media Foundation code (though modifying bytesteream_get_url_hint() would help). I disagree with your assertion that it's a hack, though. Or, more saliently, that differing from Windows' implementation details inherently means it's bad and wrong.
I'd also point out that "enhancements that aren't suitable for upstream" isn't a purpose of Staging. "Patches that aren't good enough for upstream yet" is.
This is something that's reasonable to do, and that fits within the design of Media Foundation.
I have a hard time subscribing to the idea that this is within the design of media foundation. I took a look on github, and a good amount of applications find desired streams using the subtype from the source reader's GetNativeMediaType. If we were to output uncompressed types, this would break. To work around this, we'd either have to expose incorrect media types on our streams, and add an exception to the decoder finding behavior in the source reader and topology loader, or expose some private interface for getting the true native types. And in either case, we'd still have to conversion of caps for a compressed media type.
So, I went to see what exactly these programs were doing with GetNativeMediaType(). I figured I'd check the first ten unique ones, skipping anything that looks like a binding or wrapper, and here's what I came up with:
https://github.com/KennethEvans/VS-Audio uses it in one place to test whether a stream is present, in another place just to dump the type to stdout, and in a third place to get the type from a capture device.
https://github.com/Csineneo/Vivaldi uses it in one place just to retrieve the major type, and in another place to get the type from a capture device.
https://github.com/Hanumanthu2020/HanuWork uses it for capture devices.
https://github.com/clarkezone/audiovisualizer uses it in one place to test the major type and subtype; it checks if the subtype is mp3 but doesn't do anything with that information. It uses it in another place passed through to its own API.
https://github.com/nickluo/camaro-sdk uses it for video capture.
https://github.com/mrojkov/Citrus uses it to get the major type, width, and height.
https://github.com/ms-iot/ros_win_camera uses it for image/video capture.
https://github.com/daramkun/SamplePlay uses it to get the frame rate, width, and height of a video stream, and the number of channels, sample rate, and bit depth of an audio stream. It outputs to uncompressed samples, and in the case of the latter it uses those parameters to determine a PCM type. (Even though not all audio types have a "bit depth"...)
https://github.com/vipoo/SuperMFLib uses it in one place to check the major type. It uses it in another place to get the frame rate, PAR, width, and height of a video stream, and the number of channels and sample rate of an audio stream. It outputs to uncompressed samples.
https://github.com/Brhsoftco/PlexDL-MetroSet_UI uses it for video capture.
The conclusion I draw from this is:
- most applications which call GetNativeMediaType() are doing so on
capture sources [which, it goes without saying, are outside the scope of gstreamer],
- the rest only care about details that wouldn't change from decoding:
major type, frame rate, width, height, PAR, channel count, sample rate,
- none of the applications concerned with decoding audio actually set
the media type to be the native media type.
The conclusion I draw from this is that incorrect behavior is always one attribute retrieval away, with no easy/straightforward fix.
It's changing the implementation details, not the API contract. We have the freedom to do that.
First of all, this is something I think we want to do anyway. Microsoft has no demuxer for, say, Vorbis (at least, there's not one registered on my Windows 10 machine), but I think that we want to be able to play back Vorbis files anyway (in, say, a Win32 media player application).
I'm pretty sure our goal is not to extend windows functionality.
Actually, I'd assert the opposite. Host integration has always been a feature of Wine, not a bug. That goes beyond just mapping program launcher entries to .desktop files; it includes things like:
- mapping host devices to DOS drives,
- allowing unix paths to be used in file system functions,
- exposing the unix file system as a shell folder,
- making winebrowser the default browser (instead of explorer),
- exposing public Wine-specific exports from ntdll (those not prefixed
with a double underscore),
- making use of host credentials in advapi32 (on Mac, anyway),
- exposing host GStreamer and QuickTime codecs in DirectShow.
We extend host functionality to integrate with the system, and to make using Wine easier. Using host codecs from mfplat does both.
I'm unsure why anyone would want to use a windows media player over something like VLC.
I'm unsure as well, but that's not really our place to judge. We just make the software work where we can. If I had to guess, though, I'd say that native media players offer a UI that the user prefers, includes some feature that native players don't, is more familiar to the user (who may have recently migrated from Windows)...
But as I mentioned earlier, it is possible to add a hack using decodebin with minimal effort, and we could possibly only use this hack as a fallback if the container have a registered byte stream handler. I think we would get the best of both worlds with this solution.
In a sense I'm kind of proposing exactly that, except that we rely on decodebin first, and only add other sources if it turns out that decodebin doesn't work for something.
Call me crazy, but I think the accurate solution should be the default, not the fallback :P
Instead
of writing yet another source for vorbis,
You don't "write another source", you just expose a new source object and link it with a new source_desc structure, which specifies the mime type of the container format: https://github.com/Guy1524/wine/blob/mfplat_rebase/dlls/winegstreamer/media_...
and for each other obscure
format, we just write one generic decodebin wrapper.
Not to mention, you'd have to perform this step with a decodebin wrapper anyway.
The amount of abstraction, and the amount of actual code you have to add, is beside the point, but it's also not quite as simple as you make out there:
- First and foremost, we also need to add caps conversion functions,
since vorbisparse doesn't output raw video, and we need to be able to feed it through theoradec afterwards.
You need that anyway, chromium manually creates H.264 encoder and decoder instances and uses them without anything from the control layer. Because of this, we will at-least need to keep the mediatype->caps conversion function for compressed types.
It creates the h264 decoder transform manually, and doesn't use the rest of mfplat, or do I misunderstand you?
Yep, exactly, see https://github.com/chromium/chromium/blob/master/media/gpu/windows/dxva_vide... for the decoding code.
- Also, I'm guessing you haven't dealt with "always" pads yet;
vorbisparse doesn't send "no-more-pads".
That would be ever easier to support.
- In the case that elements get added, removed, or changed from upstream
GStreamer, we have to reflect that here.
Elaborate?
If GStreamer supports a new media type or removes support for one, we have to reflect that. If caps details change upstream, that's something we should pay attention to as well; it could affect our conversion.
Are cap details even allowed to change like that? I find this very unlikely.
By contrast, the amount of code we have to add to deal with a new format when using decodebin is *exactly zero*. We don't actually have to write "audio/x-vorbis" anywhere in our code.
Okay, adding that path as a fallback makes a lot of sense then, since we still have full ability to fix compatibility issues with types that are natively supported in windows.
After all, we don't write it anywhere in quartz, and yet Vorbis still works. (If an application were to ask what the stream type is—and I doubt any do—we report it as MEDIATYPE_Stream, MEDIASUBTYPE_Gstreamer).
Second of all, the most obvious benefit, at least while looking at these patches, is that you now don't need to write caps <-> IMFMediaType conversion for every type on the planet.
I don't see this as a problem, most games I've seen will use either H.264 of WMV, and adding new formats isn't that difficult. You look at the caps exposed by the gstreamer demuxer, find the equivalent attributes in media foundation, and fill in the gaps. In return you get correct behavior, and a source that can be paired with a correctly written MFT from outside of the wine source.
This is basically true until it isn't. And it already isn't true if we want to support host codecs. An "add it when we need it" approach is going to be hell on media players.
I also think you're kind of underestimating the cost here. I don't like making LoC arguments, but your code to deal with those caps is something like 370 LoC, maybe 350 LoC with some deduplication.
As mentioned earlier in the email, the IMFMediaType->caps path will always be necessary, to support the decoder transforms, which real applications do use by themselves.
Sure. But as I understand, we'd only need to do the conversion one way (i.e. Media Foundation -> GStreamer), and we'd only need to bother with it for transforms that are explicitly created.
If you know how to convert a media foundation type into caps, you've already figured out everything you need to know about the other way around.
There's also the developer cost of looking up what GStreamer caps values mean (which usually requires looking at the source), looking up the Media Foundation attributes, testing them to ensure that the conversion is correct, figuring out how to deal with caps that either GStreamer or Media Foundation can't handle...
Another benefit is that you let
all of the decoding happen within a single GStreamer pipeline, which is probably better for performance.
I have applications working right now with completely acceptable performance, and we are still copying every uncompressed sample an extra time, which we may be able to optimize away. Copying compressed samples, on the other hand, is not that big of a deal at all.
I don't doubt it works regardless. DirectShow did too, back before I got rid of the transforms. It's also not the main reason I'm proposing this.
On the other hand, decreasing CPU usage is also nice.
How would this reduce CPU usage?
It's only an armchair hypothesis, so feel free to just ignore, but it probably means less buffer copies.
True, but only compressed buffer copies, which shouldn't have any noticeable impact.
Another thing that occurred to me is, letting everything happen in one GStreamer pipeline is nice for debugging.
I disagree, decodebin adds complexity to the pipeline that isn't otherwise necessary, like typefind.
I mostly meant along the lines of keeping all of the decoders in the same pipeline as the demuxer, which in my experience debugging GStreamer is easier to read than when they were bouncing through quartz.
typefind is pretty much necessary, unless we reimplement it ourselves (which, I understand, you've taken as granted that you'll have to do, but I'm not so sure).
Yes, even with your solution, the source resolver, if we want to be at all correct, will fine media sources based on searching the registry for the entry matching the mime type or file extension.
I don't see how it's not nice for debugging either—sure, it takes up a lot of lines in the log figuring out the type, but in my experience I can always skip over that by searching for winegstreamer callbacks, or no-more-pads, or whatever it is I'm trying to debug.
True, it probably doesn't make much of a difference. Either way debugging gstreamer isn't very hard IMO, since their logging system is spectacular.
You also can simplify your
postprocessing step to adding a single videoconvert and audioconvert, instead of having to manually (or semi-manually) add e.g. an h264 parser element.
It isn't manual, we find a parser which corrects the caps. And as I mentioned in earlier email, we could also use caps negotiation for this, all the setup is in place.
Hence "semi-manually". You still have to manually fix the caps so that the element will be added.
As mentioned, we will need this regardless.
These are some of the benefits I had in mind when removing the
GStreamer quartz transforms.
Even in the case where the application manually creates e.g. an MPEG-4 source, my understanding is it's still the source's job to automatically append transforms to match the requested type.
It's not the source's job at all. On windows, where sources are purpose-built, they apply no transformations to the types they get, their goal is only to get raw sample data from a container / stream. It's the job of the media session, or source reader to apply transforms when needed.
I see, I confused the media source with the source reader. I guess that argument isn't valid, but I don't think it really affects my conclusion.
We'd just be moving that
from the mfplat level to the gstreamer level—i.e. let decodebin select the 'transforms' needed to convert to raw video and audio.
The media session and source reader shouldn't be affected by winegstreamer details. If a user/an application decides to install a third party decoder, we still need the infrastructure in place for this to function.
It obviously wouldn't match native structure, but it's not clear to me that it would fail to match native in a way that would cause problems. Judging from my experience with quartz, most applications aren't going to care how their media is decoded as long as they get raw samples out of it.
Most games, or most applications? Chromium uses media foundation in a much more granular way.
Yes, most applications.
What does Chromium do?
As mentioned earlier, uses decoders and encoders manually, so we'll have to fix up/parse the data we get anyway.
Only a select few build the graph manually because they don't
realize that they can autoplug, or make assumptions about which filters will be present once autoplugging is done, and some of those even fall back to autoplugging if their preferred method fails. Maybe the situation is different with mfplat, but given that there is a way to let mfplat figure out which sources and transforms to use, I'm gonna be really surprised if most applications aren't using it.
If you do come across an application that requires we mimic native's specific arrangement of sources and transforms, it seems to me it wouldn't require that much effort to swap a different parser in for decodebin, and to implement the necessary bits in the media type conversion functions. Ultimately I suspect it'd be less work to have a decodebin wrapper + specific sources for applications that require them, than to manually implement every source and transform.
The current solution isn't very manual, and, as I mentioned earlier in this email, you also can construct a decodebin wrapper source using the infrastructure which is available. And in general terms, I think it's more work to maintain a solution that doesn't match up to windows, as we now have to think of all these edge cases and how to work around them.
What edge cases do you mean?
Cases where applications expect compressed streams from the source.
The way I see it, you're essentially thinking of those "edge cases" now, except that you're not considering them edge cases.
I am considering them edge cases, but I do think it's important so I'm implementing the source accurately, it's not like my implementation is somehow less desirable for the common case.
If we use decodebin, they become more obviously edge cases. But it's not a lot of thought it takes, from my view. We just need to ask, "what happens if an application depends on getting compressed samples?" and answer, "well, then we create a new media source, probably reusing most of the same infrastructure
There is no need to create a new media source implementation, just a new configuration of the current one which uses a different tool for demuxing.
, that utilises the parts of gstreamer that output compressed samples." We don't actually have to do that work until we find such an application.
The work is already there 🐸.
On 3/26/20 8:07 PM, Zebediah Figura wrote:
While I await your more complete response, I figure I might as well clarify some things.
I don't think that "doing the incorrect thing", i.e. failing to exactly emulate Windows, should necessarily be considered bad in itself, or at least not nearly as bad as all that.
My view, and my understanding of the Wine project's view in general as informed by its maintainers, is that emulating Windows is desirable for public documented behaviour (obviously), for undocumented behaviour that applications rely on (also obviously), for undocumented or semi-documented behaviour where there's no difference otherwise and where the native thing to do is obvious (e.g. the name of an internal registry key).
In my view, when completely incorrect behavior is only a few function calls away, that's not acceptable. The media source is a well documented public interface, and doing something different instead is just asking for trouble.
The media source is a documented public interface, but *which* media source is returned from IMFSourceResolver is not documented or guaranteed, and which transforms are returned from the source reader is also not guaranteed.
Using decodebin is not "completely incorrect", and emulating Windows' specific arrangement of sources and transforms is not "a few function calls away".
Finding out the media type of a source is one function call away.
I don't understand what you mean. Which function call?
GetNativeMediaType
It's several hundred lines of code to do caps conversion, the entire transform object (which, to be sure, we might need *anyway*,
We will.
but also might not), and it means more work every time we have to deal with a new codec.
Unless we implement the decodebin solution as a fallback for unknown types. Taking the fallback approach means we will only have to go through this process for every type natively supported by windows.
But there's not really a reason to emulate Windows otherwise. And in a case like this, where there's a significant benefit to not emulating Windows exactly, the only reason I see is "an application we don't know yet *might* depend on it". When faced with such a risk, I weigh the probability of that happening—and on the evidence of DirectShow applications, I see that as low—with the cost of having to change design—which also seems low to me; I can say from experience (c.f. 5de712b5d) that swapping out a specific demuxer for decodebin isn't very difficult.
The converse of this is also true, if you want to quickly experiment with some gstreamer codec that we don't support yet, you just perform the hack I mentioned earlier, and then after you get it working you make it correct by adding the necessary gstreamer caps. Another hack we could use is to serialize the compressed caps, and throw them in a MF_MT_USER_DATA attribute, and hope that an application never looks.
Sure. But I'm willing to assert that one of these things is more likely than the other. I'm prepared to eat my words if proven wrong.
What do you mean, that in most cases applications won't care how they get their samples? That may be true, but I still think the edge cases are big enough to warrant the accurate approach. Unity3D, a pretty important user of this work, gets native media types of the source for instance. What they use it for, I'm not sure, but I wouldn't take any chances.
I mean it's more likely that an application wants uncompressed samples than that it wants compressed samples. As I see it, the latter case is (1) still hypothetical, (2) wouldn't be very difficult to implement either.
Yes, of course that's the case most applications want uncompressed samples, which is why they use the source reader or a session. And no, the latter is not very difficult, I've already done it. The point is, it matches windows, and it's easy to make a fallback path for any such case where it doesn't suffice. You just add a new source_desc that somehow specifies it is the hack source, and instead of searching for a demuxer, we just use use decodebin as the demuxer. Then, you either register this source with whichever container types you want to support, or add a hack in the source resolver which creates an instance of this source if it can't find a byte stream handler.
If we are going to have two paths anyway, the one which diverges from windows should be the one which takes the back seat, at-least in terms of code presence.
Based on my survey of GitHub above, I have to wonder what aspects of the native media type Unity3D actually cares about. What attributes does it ask for? Does it actually set the decoder to use a compressed media type? Even if the answer is yes, does it break if we return an uncompressed media type?
Yeah, I haven't tested that, but it just makes me feel very nervous about this.
But as I mentioned earlier, I don't think the amount of work required for adding a new media type is excessive. Microsoft only ships a limited amount of sources and decoders, they fit on a single page: https://docs.microsoft.com/en-us/windows/win32/medfound/supported-media-form... , so it's not like we'll be adding new types for years to come.
That's seven demuxers and sixteen transforms, which is still kind of a lot. It also, unsurprisingly, isn't every format that Windows supports; just looking at my Windows 7 VM I see also NSC and LPCM, and a much longer list of transforms.
And it doesn't take into account host codecs.
Insert fallback argument here :P
Not to mention that what we're doing is barely "incorrect". Media Foundation is an API that's specifically meant to be extended in this way.
I don't think Microsoft ever meant for an application to make a media source that decodes compressed content, the source reader and media session exist for a reason.
I don't think they specifically meant for an application *not* to do that. It fits within the design of Media Foundation. The reason that transforms exist—in any media API—is because different containers can hold the same video or audio codec. GStreamer can already deal with that.
For that matter, some application could easily register its own
codec libraries on Windows with a higher priority than the native ones (this happened with DirectShow); that's essentially no different than what I'm suggesting.
Yes, but even in that case, I assume they will still follow the basic concept of what a source is and is not.
I wouldn't necessarily assert that. A codec library—like GStreamer—might have its own set of transforms and autoplugging code. Easier to reuse that internally than to try to integrate it with every new decoding API that Microsoft releases.
That could potentially break other applications though, and I don't think codec libraries are comparable to gstreamer, they usually just handle a specific task and plug into the relevant part of the media API, whether it be dshow, media foundation, or gstreamer.
GStreamer *is* a codec library. That's exactly what it is.
"GStreamer is a pipeline-based multimedia framework that links together a wide variety of media processing systems to complete complex workflows."
I think I would consider something like ffmpeg a codec library, but either way, I think anyone adding transforms / sources are doing it because the functionality doesn't exist natively. And to maximize cohesion, they would probably use just use an external library to perform the desired action, and the rest of the code would be for hooking up to media framework it is operating within. A good example of this would be the libav gstreamer plugins. And I think there's a reason the opposite of this, libfluffgst, isn't very well known.
We don't yet know that any other applications would be broken; that's still hypothetical. It's not unheard of for applications to mess with Windows internals in ways that break other applications, to be sure. But it's also not a good idea.
I think the linked commit misses the point somewhat. That's partially because I don't think it makes sense to measure simplicity as an absolute metric simply using line count,
It's not just line count, the code itself is very simple, all we are doing is registering the supported input and output types of the decoder, setting the mime type of the container format for the source, and and registering both objects.
and partially because it's
missing the cost of adding other media types to the conversion functions
You can use the MF_MT_USER_DATA serialization hack if you're worried about that.
Unless you're proposing we use that in Wine, that doesn't affect anything.
You're right, the decodebin fallback is a much cleaner solution than that.
(which is one of the reasons, though not the only reason, I thought to write this mail). But it's mostly because the cost of using decodebin, where it works, is essentially zero:
Except in the cases where an application does something unexpected.
In which case the cost is still no more than the cost of not using decodebin.
we write one media source, and it
works for everything; no extension for ASF required.
There already is only one real implementation of the media source, the only "extension" is adding the mime type instead of using typefind. We will register the necessary byte stream handlers no matter which path we take.
Well, ideally we'd do what quartz does, and register a handler that catches every file, and returns a subtype that essentially identifies GStreamer.
If it never becomes
necessary to write a source that outputs compressed samples, then we also don't have the cost of abstraction (which is always worth taking seriously!), and if it does, we come out even—we can still use your generic media source, or something like it.
Ultimately, I think that a decodebin wrapper is something we want to have anyway, for the sake of host codecs like Theora,
Where would we use support for Theora, if no windows applications are able to use it.
Anything which wants to be able to play back an arbitrary media file, i.e. generic media players, mostly. I see all sorts of bug reports for these with Quartz, so people are definitely using them.
Heh.
and once we have
it, I see zero cost in using it wherever else we can.
On 3/27/20 4:26 PM, Derek Lesho wrote:
On 3/27/20 3:11 PM, Zebediah Figura wrote:
On 3/27/20 1:08 PM, Derek Lesho wrote:
On 3/27/20 11:32 AM, Zebediah Figura wrote:
On 3/27/20 10:05 AM, Derek Lesho wrote:
On 3/26/20 4:56 PM, Zebediah Figura wrote:
There's another broad question I have with this approach, actually, which is fundamental enough I have to assume it's at had some thought put into it, but it would be nice if that discussion happened in a more public place, and was justified in the patches sent.
Essentially, the question is: what if we were to use decodebin directly?
As I understand (and admittedly Media Foundation is far more complex than I could hope to understand) an application which just calls IMFSourceResolver methods just needs to get back a working IMFMediaSource, and we could wrap decodebin with one of those, similar to the quartz wrapper.
The most basic applications (games) seem to either use a source reader or simple sample grabber media session to get their raw samples. If you want to add a hack for using decodebin, you can easily add a special source type, and for the media source of that type, just make a decodebin element instead of searching for a demuxer. In this case, the source reader wouldn't search for a decoder since the output type set by the application would be natively supported by the source. Then, as part of the hack, just always yield that source type in the source resolver. This is completely incorrect and probably shouldn't make it's way into mainline, IMO. Also, I have reason to believe it may break Unity3D, as they do look at the native media types supported by the source, and getting around this would require adding some hackery in the source reader.
My assertion is this isn't really a "hack".
I think that if you have to modify media foundation code to workaround shortcuts in winegstreamer, it can be classified as a hack. It is probable that most games will work with it, but I think it makes more sense as a staging enhancement.
There's nothing we have to modify in core Media Foundation code (though modifying bytesteream_get_url_hint() would help). I disagree with your assertion that it's a hack, though. Or, more saliently, that differing from Windows' implementation details inherently means it's bad and wrong.
I'd also point out that "enhancements that aren't suitable for upstream" isn't a purpose of Staging. "Patches that aren't good enough for upstream yet" is.
This is something that's reasonable to do, and that fits within the design of Media Foundation.
I have a hard time subscribing to the idea that this is within the design of media foundation. I took a look on github, and a good amount of applications find desired streams using the subtype from the source reader's GetNativeMediaType. If we were to output uncompressed types, this would break. To work around this, we'd either have to expose incorrect media types on our streams, and add an exception to the decoder finding behavior in the source reader and topology loader, or expose some private interface for getting the true native types. And in either case, we'd still have to conversion of caps for a compressed media type.
So, I went to see what exactly these programs were doing with GetNativeMediaType(). I figured I'd check the first ten unique ones, skipping anything that looks like a binding or wrapper, and here's what I came up with:
https://github.com/KennethEvans/VS-Audio uses it in one place to test whether a stream is present, in another place just to dump the type to stdout, and in a third place to get the type from a capture device.
https://github.com/Csineneo/Vivaldi uses it in one place just to retrieve the major type, and in another place to get the type from a capture device.
https://github.com/Hanumanthu2020/HanuWork uses it for capture devices.
https://github.com/clarkezone/audiovisualizer uses it in one place to test the major type and subtype; it checks if the subtype is mp3 but doesn't do anything with that information. It uses it in another place passed through to its own API.
https://github.com/nickluo/camaro-sdk uses it for video capture.
https://github.com/mrojkov/Citrus uses it to get the major type, width, and height.
https://github.com/ms-iot/ros_win_camera uses it for image/video capture.
https://github.com/daramkun/SamplePlay uses it to get the frame rate, width, and height of a video stream, and the number of channels, sample rate, and bit depth of an audio stream. It outputs to uncompressed samples, and in the case of the latter it uses those parameters to determine a PCM type. (Even though not all audio types have a "bit depth"...)
https://github.com/vipoo/SuperMFLib uses it in one place to check the major type. It uses it in another place to get the frame rate, PAR, width, and height of a video stream, and the number of channels and sample rate of an audio stream. It outputs to uncompressed samples.
https://github.com/Brhsoftco/PlexDL-MetroSet_UI uses it for video capture.
The conclusion I draw from this is:
- most applications which call GetNativeMediaType() are doing so on
capture sources [which, it goes without saying, are outside the scope of gstreamer],
- the rest only care about details that wouldn't change from decoding:
major type, frame rate, width, height, PAR, channel count, sample rate,
- none of the applications concerned with decoding audio actually set
the media type to be the native media type.
The conclusion I draw from this is that incorrect behavior is always one attribute retrieval away, with no easy/straightforward fix.
The point is that if we're looking at what applications actually do in practice, the evidence supports that they rarely if ever depend on the native media type.
In the case that they do, I don't think the fix is as difficult as all that. Moreover, the work has already been done, and would need only be adapted.
It's changing the implementation details, not the API contract. We have the freedom to do that.
First of all, this is something I think we want to do anyway. Microsoft has no demuxer for, say, Vorbis (at least, there's not one registered on my Windows 10 machine), but I think that we want to be able to play back Vorbis files anyway (in, say, a Win32 media player application).
I'm pretty sure our goal is not to extend windows functionality.
Actually, I'd assert the opposite. Host integration has always been a feature of Wine, not a bug. That goes beyond just mapping program launcher entries to .desktop files; it includes things like:
- mapping host devices to DOS drives,
- allowing unix paths to be used in file system functions,
- exposing the unix file system as a shell folder,
- making winebrowser the default browser (instead of explorer),
- exposing public Wine-specific exports from ntdll (those not prefixed
with a double underscore),
- making use of host credentials in advapi32 (on Mac, anyway),
- exposing host GStreamer and QuickTime codecs in DirectShow.
We extend host functionality to integrate with the system, and to make using Wine easier. Using host codecs from mfplat does both.
I'm unsure why anyone would want to use a windows media player over something like VLC.
I'm unsure as well, but that's not really our place to judge. We just make the software work where we can. If I had to guess, though, I'd say that native media players offer a UI that the user prefers, includes some feature that native players don't, is more familiar to the user (who may have recently migrated from Windows)...
But as I mentioned earlier, it is possible to add a hack using decodebin with minimal effort, and we could possibly only use this hack as a fallback if the container have a registered byte stream handler. I think we would get the best of both worlds with this solution.
In a sense I'm kind of proposing exactly that, except that we rely on decodebin first, and only add other sources if it turns out that decodebin doesn't work for something.
Call me crazy, but I think the accurate solution should be the default, not the fallback :P
In a vacuum, that kind of maxim would be true. But there's a lot more to consider here: how inaccurate our implementation details actually are, how likely an application is to care, how much simpler or clearer it makes our code. The answers seem to me to support by far that it's reasonable to use decodebin as the default. That there's precedent in quartz also helps, I think.
Instead
of writing yet another source for vorbis,
You don't "write another source", you just expose a new source object and link it with a new source_desc structure, which specifies the mime type of the container format: https://github.com/Guy1524/wine/blob/mfplat_rebase/dlls/winegstreamer/media_...
and for each other obscure
format, we just write one generic decodebin wrapper.
Not to mention, you'd have to perform this step with a decodebin wrapper anyway.
The amount of abstraction, and the amount of actual code you have to add, is beside the point, but it's also not quite as simple as you make out there:
- First and foremost, we also need to add caps conversion functions,
since vorbisparse doesn't output raw video, and we need to be able to feed it through theoradec afterwards.
You need that anyway, chromium manually creates H.264 encoder and decoder instances and uses them without anything from the control layer. Because of this, we will at-least need to keep the mediatype->caps conversion function for compressed types.
It creates the h264 decoder transform manually, and doesn't use the rest of mfplat, or do I misunderstand you?
Yep, exactly, see https://github.com/chromium/chromium/blob/master/media/gpu/windows/dxva_vide... for the decoding code.
Okay, thanks. So yes, clearly we will need the transform anyway. Of course, we don't necessarily need anything other than the h264 transform.
- Also, I'm guessing you haven't dealt with "always" pads yet;
vorbisparse doesn't send "no-more-pads".
That would be ever easier to support.
- In the case that elements get added, removed, or changed from upstream
GStreamer, we have to reflect that here.
Elaborate?
If GStreamer supports a new media type or removes support for one, we have to reflect that. If caps details change upstream, that's something we should pay attention to as well; it could affect our conversion.
Are cap details even allowed to change like that? I find this very unlikely.
Sure. As I understand it, caps are only meant to connect together elements that know what those caps mean, which is partly why they're not always documented. I would presume that they'll try to preserve backwards-compatibility, but we also want to keep ahead of other changes that they make.
By contrast, the amount of code we have to add to deal with a new format when using decodebin is *exactly zero*. We don't actually have to write "audio/x-vorbis" anywhere in our code.
Okay, adding that path as a fallback makes a lot of sense then, since we still have full ability to fix compatibility issues with types that are natively supported in windows.
After all, we don't write it anywhere in quartz, and yet Vorbis still works. (If an application were to ask what the stream type is—and I doubt any do—we report it as MEDIATYPE_Stream, MEDIASUBTYPE_Gstreamer).
Second of all, the most obvious benefit, at least while looking at these patches, is that you now don't need to write caps <-> IMFMediaType conversion for every type on the planet.
I don't see this as a problem, most games I've seen will use either H.264 of WMV, and adding new formats isn't that difficult. You look at the caps exposed by the gstreamer demuxer, find the equivalent attributes in media foundation, and fill in the gaps. In return you get correct behavior, and a source that can be paired with a correctly written MFT from outside of the wine source.
This is basically true until it isn't. And it already isn't true if we want to support host codecs. An "add it when we need it" approach is going to be hell on media players.
I also think you're kind of underestimating the cost here. I don't like making LoC arguments, but your code to deal with those caps is something like 370 LoC, maybe 350 LoC with some deduplication.
As mentioned earlier in the email, the IMFMediaType->caps path will always be necessary, to support the decoder transforms, which real applications do use by themselves.
Sure. But as I understand, we'd only need to do the conversion one way (i.e. Media Foundation -> GStreamer), and we'd only need to bother with it for transforms that are explicitly created.
If you know how to convert a media foundation type into caps, you've already figured out everything you need to know about the other way around.
Well, mostly everything, because the conversions are never actually bijective, but regardless we don't actually have to write that code.
There's also the developer cost of looking up what GStreamer caps values mean (which usually requires looking at the source), looking up the Media Foundation attributes, testing them to ensure that the conversion is correct, figuring out how to deal with caps that either GStreamer or Media Foundation can't handle...
Another benefit is that you let
all of the decoding happen within a single GStreamer pipeline, which is probably better for performance.
I have applications working right now with completely acceptable performance, and we are still copying every uncompressed sample an extra time, which we may be able to optimize away. Copying compressed samples, on the other hand, is not that big of a deal at all.
I don't doubt it works regardless. DirectShow did too, back before I got rid of the transforms. It's also not the main reason I'm proposing this.
On the other hand, decreasing CPU usage is also nice.
How would this reduce CPU usage?
It's only an armchair hypothesis, so feel free to just ignore, but it probably means less buffer copies.
True, but only compressed buffer copies, which shouldn't have any noticeable impact.
Another thing that occurred to me is, letting everything happen in one GStreamer pipeline is nice for debugging.
I disagree, decodebin adds complexity to the pipeline that isn't otherwise necessary, like typefind.
I mostly meant along the lines of keeping all of the decoders in the same pipeline as the demuxer, which in my experience debugging GStreamer is easier to read than when they were bouncing through quartz.
typefind is pretty much necessary, unless we reimplement it ourselves (which, I understand, you've taken as granted that you'll have to do, but I'm not so sure).
Yes, even with your solution, the source resolver, if we want to be at all correct, will fine media sources based on searching the registry for the entry matching the mime type or file extension.
Sure, but if we use decodebin for "anything" or "anything else", we don't actually need to add such entries. Unfortunately it's not clear to me that mfplat allows that to be done through registry entries (unlike quartz), but adding code in resolver_get_bytestream_handler() seems unobtrusive enough to me.
I don't see how it's not nice for debugging either—sure, it takes up a lot of lines in the log figuring out the type, but in my experience I can always skip over that by searching for winegstreamer callbacks, or no-more-pads, or whatever it is I'm trying to debug.
True, it probably doesn't make much of a difference. Either way debugging gstreamer isn't very hard IMO, since their logging system is spectacular.
You also can simplify your
postprocessing step to adding a single videoconvert and audioconvert, instead of having to manually (or semi-manually) add e.g. an h264 parser element.
It isn't manual, we find a parser which corrects the caps. And as I mentioned in earlier email, we could also use caps negotiation for this, all the setup is in place.
Hence "semi-manually". You still have to manually fix the caps so that the element will be added.
As mentioned, we will need this regardless.
These are some of the benefits I had in mind when removing the
GStreamer quartz transforms.
Even in the case where the application manually creates e.g. an MPEG-4 source, my understanding is it's still the source's job to automatically append transforms to match the requested type.
It's not the source's job at all. On windows, where sources are purpose-built, they apply no transformations to the types they get, their goal is only to get raw sample data from a container / stream. It's the job of the media session, or source reader to apply transforms when needed.
I see, I confused the media source with the source reader. I guess that argument isn't valid, but I don't think it really affects my conclusion.
We'd just be moving that
from the mfplat level to the gstreamer level—i.e. let decodebin select the 'transforms' needed to convert to raw video and audio.
The media session and source reader shouldn't be affected by winegstreamer details. If a user/an application decides to install a third party decoder, we still need the infrastructure in place for this to function.
It obviously wouldn't match native structure, but it's not clear to me that it would fail to match native in a way that would cause problems. Judging from my experience with quartz, most applications aren't going to care how their media is decoded as long as they get raw samples out of it.
Most games, or most applications? Chromium uses media foundation in a much more granular way.
Yes, most applications.
What does Chromium do?
As mentioned earlier, uses decoders and encoders manually, so we'll have to fix up/parse the data we get anyway.
Only a select few build the graph manually because they don't
realize that they can autoplug, or make assumptions about which filters will be present once autoplugging is done, and some of those even fall back to autoplugging if their preferred method fails. Maybe the situation is different with mfplat, but given that there is a way to let mfplat figure out which sources and transforms to use, I'm gonna be really surprised if most applications aren't using it.
If you do come across an application that requires we mimic native's specific arrangement of sources and transforms, it seems to me it wouldn't require that much effort to swap a different parser in for decodebin, and to implement the necessary bits in the media type conversion functions. Ultimately I suspect it'd be less work to have a decodebin wrapper + specific sources for applications that require them, than to manually implement every source and transform.
The current solution isn't very manual, and, as I mentioned earlier in this email, you also can construct a decodebin wrapper source using the infrastructure which is available. And in general terms, I think it's more work to maintain a solution that doesn't match up to windows, as we now have to think of all these edge cases and how to work around them.
What edge cases do you mean?
Cases where applications expect compressed streams from the source.
The way I see it, you're essentially thinking of those "edge cases" now, except that you're not considering them edge cases.
I am considering them edge cases, but I do think it's important so I'm implementing the source accurately, it's not like my implementation is somehow less desirable for the common case.
Well, my point is that it kind of is, from the perspective of code quality and simplicity.
If we use decodebin, they become more obviously edge cases. But it's not a lot of thought it takes, from my view. We just need to ask, "what happens if an application depends on getting compressed samples?" and answer, "well, then we create a new media source, probably reusing most of the same infrastructure
There is no need to create a new media source implementation, just a new configuration of the current one which uses a different tool for demuxing.
Sure, most things can be shared. I just mean it's another (COM) object.
, that utilises the parts of gstreamer that output compressed samples." We don't actually have to do that work until we find such an application.
The work is already there 🐸.
Sure, but it doesn't have to be reviewed or committed to the tree.
On 3/26/20 8:07 PM, Zebediah Figura wrote:
While I await your more complete response, I figure I might as well clarify some things.
I don't think that "doing the incorrect thing", i.e. failing to exactly emulate Windows, should necessarily be considered bad in itself, or at least not nearly as bad as all that.
My view, and my understanding of the Wine project's view in general as informed by its maintainers, is that emulating Windows is desirable for public documented behaviour (obviously), for undocumented behaviour that applications rely on (also obviously), for undocumented or semi-documented behaviour where there's no difference otherwise and where the native thing to do is obvious (e.g. the name of an internal registry key).
In my view, when completely incorrect behavior is only a few function calls away, that's not acceptable. The media source is a well documented public interface, and doing something different instead is just asking for trouble.
The media source is a documented public interface, but *which* media source is returned from IMFSourceResolver is not documented or guaranteed, and which transforms are returned from the source reader is also not guaranteed.
Using decodebin is not "completely incorrect", and emulating Windows' specific arrangement of sources and transforms is not "a few function calls away".
Finding out the media type of a source is one function call away.
I don't understand what you mean. Which function call?
GetNativeMediaType
It's several hundred lines of code to do caps conversion, the entire transform object (which, to be sure, we might need *anyway*,
We will.
but also might not), and it means more work every time we have to deal with a new codec.
Unless we implement the decodebin solution as a fallback for unknown types. Taking the fallback approach means we will only have to go through this process for every type natively supported by windows.
But there's not really a reason to emulate Windows otherwise. And in a case like this, where there's a significant benefit to not emulating Windows exactly, the only reason I see is "an application we don't know yet *might* depend on it". When faced with such a risk, I weigh the probability of that happening—and on the evidence of DirectShow applications, I see that as low—with the cost of having to change design—which also seems low to me; I can say from experience (c.f. 5de712b5d) that swapping out a specific demuxer for decodebin isn't very difficult.
The converse of this is also true, if you want to quickly experiment with some gstreamer codec that we don't support yet, you just perform the hack I mentioned earlier, and then after you get it working you make it correct by adding the necessary gstreamer caps. Another hack we could use is to serialize the compressed caps, and throw them in a MF_MT_USER_DATA attribute, and hope that an application never looks.
Sure. But I'm willing to assert that one of these things is more likely than the other. I'm prepared to eat my words if proven wrong.
What do you mean, that in most cases applications won't care how they get their samples? That may be true, but I still think the edge cases are big enough to warrant the accurate approach. Unity3D, a pretty important user of this work, gets native media types of the source for instance. What they use it for, I'm not sure, but I wouldn't take any chances.
I mean it's more likely that an application wants uncompressed samples than that it wants compressed samples. As I see it, the latter case is (1) still hypothetical, (2) wouldn't be very difficult to implement either.
Yes, of course that's the case most applications want uncompressed samples, which is why they use the source reader or a session. And no, the latter is not very difficult, I've already done it. The point is, it matches windows, and it's easy to make a fallback path for any such case where it doesn't suffice. You just add a new source_desc that somehow specifies it is the hack source, and instead of searching for a demuxer, we just use use decodebin as the demuxer. Then, you either register this source with whichever container types you want to support, or add a hack in the source resolver which creates an instance of this source if it can't find a byte stream handler.
If we are going to have two paths anyway, the one which diverges from windows should be the one which takes the back seat, at-least in terms of code presence.
Based on my survey of GitHub above, I have to wonder what aspects of the native media type Unity3D actually cares about. What attributes does it ask for? Does it actually set the decoder to use a compressed media type? Even if the answer is yes, does it break if we return an uncompressed media type?
Yeah, I haven't tested that, but it just makes me feel very nervous about this.
But as I mentioned earlier, I don't think the amount of work required for adding a new media type is excessive. Microsoft only ships a limited amount of sources and decoders, they fit on a single page: https://docs.microsoft.com/en-us/windows/win32/medfound/supported-media-form... , so it's not like we'll be adding new types for years to come.
That's seven demuxers and sixteen transforms, which is still kind of a lot. It also, unsurprisingly, isn't every format that Windows supports; just looking at my Windows 7 VM I see also NSC and LPCM, and a much longer list of transforms.
And it doesn't take into account host codecs.
Insert fallback argument here :P
Not to mention that what we're doing is barely "incorrect". Media Foundation is an API that's specifically meant to be extended in this way.
I don't think Microsoft ever meant for an application to make a media source that decodes compressed content, the source reader and media session exist for a reason.
I don't think they specifically meant for an application *not* to do that. It fits within the design of Media Foundation. The reason that transforms exist—in any media API—is because different containers can hold the same video or audio codec. GStreamer can already deal with that.
For that matter, some application could easily register its own
codec libraries on Windows with a higher priority than the native ones (this happened with DirectShow); that's essentially no different than what I'm suggesting.
Yes, but even in that case, I assume they will still follow the basic concept of what a source is and is not.
I wouldn't necessarily assert that. A codec library—like GStreamer—might have its own set of transforms and autoplugging code. Easier to reuse that internally than to try to integrate it with every new decoding API that Microsoft releases.
That could potentially break other applications though, and I don't think codec libraries are comparable to gstreamer, they usually just handle a specific task and plug into the relevant part of the media API, whether it be dshow, media foundation, or gstreamer.
GStreamer *is* a codec library. That's exactly what it is.
"GStreamer is a pipeline-based multimedia framework that links together a wide variety of media processing systems to complete complex workflows."
I think I would consider something like ffmpeg a codec library, but either way, I think anyone adding transforms / sources are doing it because the functionality doesn't exist natively. And to maximize cohesion, they would probably use just use an external library to perform the desired action, and the rest of the code would be for hooking up to media framework it is operating within. A good example of this would be the libav gstreamer plugins. And I think there's a reason the opposite of this, libfluffgst, isn't very well known.
I think you're splitting hairs, but the point remains, libav has its own autoplugging mechanism. Any codec library that wants to be used directly is going to.
We don't yet know that any other applications would be broken; that's still hypothetical. It's not unheard of for applications to mess with Windows internals in ways that break other applications, to be sure. But it's also not a good idea.
I think the linked commit misses the point somewhat. That's partially because I don't think it makes sense to measure simplicity as an absolute metric simply using line count,
It's not just line count, the code itself is very simple, all we are doing is registering the supported input and output types of the decoder, setting the mime type of the container format for the source, and and registering both objects.
and partially because it's
missing the cost of adding other media types to the conversion functions
You can use the MF_MT_USER_DATA serialization hack if you're worried about that.
Unless you're proposing we use that in Wine, that doesn't affect anything.
You're right, the decodebin fallback is a much cleaner solution than that.
(which is one of the reasons, though not the only reason, I thought to write this mail). But it's mostly because the cost of using decodebin, where it works, is essentially zero:
Except in the cases where an application does something unexpected.
In which case the cost is still no more than the cost of not using decodebin.
we write one media source, and it
works for everything; no extension for ASF required.
There already is only one real implementation of the media source, the only "extension" is adding the mime type instead of using typefind. We will register the necessary byte stream handlers no matter which path we take.
Well, ideally we'd do what quartz does, and register a handler that catches every file, and returns a subtype that essentially identifies GStreamer.
If it never becomes
necessary to write a source that outputs compressed samples, then we also don't have the cost of abstraction (which is always worth taking seriously!), and if it does, we come out even—we can still use your generic media source, or something like it.
Ultimately, I think that a decodebin wrapper is something we want to have anyway, for the sake of host codecs like Theora,
Where would we use support for Theora, if no windows applications are able to use it.
Anything which wants to be able to play back an arbitrary media file, i.e. generic media players, mostly. I see all sorts of bug reports for these with Quartz, so people are definitely using them.
Heh.
and once we have
it, I see zero cost in using it wherever else we can.