GStreamer uses _SC_NPROCESSORS_CONF to determine 'max-threads'. On the Steam Deck, this is configured to be 16 (which is double its number of logical cores).
_SC_NPROCESSORS_CONF also disregards a process's CPU affinity, thus it can create more threads than is useful, which ultimately wastes memory resources.
Using affinity to set 'max-threads' addresses both these problems.
From: Brendan McGrath bmcgrath@codeweavers.com
GStreamer uses _SC_NPROCESSORS_CONF to determine 'max-threads'. On the Steam Deck, this is configured to be 16 (which is double its number of logical cores).
_SC_NPROCESSORS_CONF also disregards a process's CPU affinity, thus it can create more threads than is useful, which ultimately wastes memory resources.
Using affinity to set 'max-threads' addresses both these problems. --- dlls/winegstreamer/wg_parser.c | 35 ++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+)
diff --git a/dlls/winegstreamer/wg_parser.c b/dlls/winegstreamer/wg_parser.c index 7253013b6a3..dc820df2d24 100644 --- a/dlls/winegstreamer/wg_parser.c +++ b/dlls/winegstreamer/wg_parser.c @@ -567,6 +567,40 @@ static void no_more_pads_cb(GstElement *element, gpointer user) pthread_cond_signal(&parser->init_cond); }
+static void deep_element_added_cb(GstBin *self, GstBin *sub_bin, GstElement *element, gpointer user) +{ + GstElementFactory *factory = NULL; + const char *name = NULL; + + if (element) + factory = gst_element_get_factory(element); + + if (factory) + name = gst_element_factory_get_longname(factory); + + if (name && strstr(name, "libav H.264")) + { + const gint32 MAX_THREADS = 16; + gint32 max_threads; + +#ifdef HAVE_SCHED_SETAFFINITY + gint32 thread_count; + cpu_set_t p_aff; + memset( &p_aff, 0, sizeof(p_aff) ); + if( sched_getaffinity( 0, sizeof(p_aff), &p_aff ) ) + thread_count = 0; + else + thread_count = CPU_COUNT(&p_aff); +#else + /* A thread_count of 0 is the default value for gstreamer and means 'auto' */ + gint32 thread_count = 0; +#endif + max_threads = MIN(thread_count, MAX_THREADS); + GST_DEBUG("%s found, setting max-threads to %d.", name, max_threads); + g_object_set(element, "max-threads", max_threads, NULL); + } +} + static gboolean sink_event_cb(GstPad *pad, GstObject *parent, GstEvent *event) { struct wg_parser_stream *stream = gst_pad_get_element_private(pad); @@ -1797,6 +1831,7 @@ static BOOL decodebin_parser_init_gst(struct wg_parser *parser) g_signal_connect(element, "autoplug-continue", G_CALLBACK(autoplug_continue_cb), parser); g_signal_connect(element, "autoplug-select", G_CALLBACK(autoplug_select_cb), parser); g_signal_connect(element, "no-more-pads", G_CALLBACK(no_more_pads_cb), parser); + g_signal_connect(element, "deep-element-added", G_CALLBACK(deep_element_added_cb), parser);
pthread_mutex_lock(&parser->mutex); parser->no_more_pads = false;
From: Brendan McGrath bmcgrath@codeweavers.com
The avdec_h264 element can use 32MB per thread when working with 4K video.
With 16 threads, this is 512MB, which is a quarter of the RAM available to a 32-bit application. Setting MAX_THREADS to 4 can save 384MB. --- dlls/winegstreamer/wg_parser.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/dlls/winegstreamer/wg_parser.c b/dlls/winegstreamer/wg_parser.c index dc820df2d24..a058ab08715 100644 --- a/dlls/winegstreamer/wg_parser.c +++ b/dlls/winegstreamer/wg_parser.c @@ -580,7 +580,11 @@ static void deep_element_added_cb(GstBin *self, GstBin *sub_bin, GstElement *ele
if (name && strstr(name, "libav H.264")) { +#if defined(__i386__) + const gint32 MAX_THREADS = 4; +#else const gint32 MAX_THREADS = 16; +#endif gint32 max_threads;
#ifdef HAVE_SCHED_SETAFFINITY
Rémi Bernon (@rbernon) commented about dlls/winegstreamer/wg_parser.c:
- if (factory)
name = gst_element_factory_get_longname(factory);
- if (name && strstr(name, "libav H.264"))
- {
const gint32 MAX_THREADS = 16;
gint32 max_threads;
+#ifdef HAVE_SCHED_SETAFFINITY
gint32 thread_count;
cpu_set_t p_aff;
memset( &p_aff, 0, sizeof(p_aff) );
if( sched_getaffinity( 0, sizeof(p_aff), &p_aff ) )
thread_count = 0;
else
thread_count = CPU_COUNT(&p_aff);
What about using Nt functions instead? NtQueryInformationThread( ThreadBasicInformation ), and counting the bits in `info.AffinityMask`?
It could probably better be done in `wg_init_gstreamer`, as this callback may be called in a non-wine thread.
Rémi Bernon (@rbernon) commented about dlls/winegstreamer/wg_parser.c:
pthread_cond_signal(&parser->init_cond);
}
+static void deep_element_added_cb(GstBin *self, GstBin *sub_bin, GstElement *element, gpointer user) +{
- GstElementFactory *factory = NULL;
- const char *name = NULL;
- if (element)
factory = gst_element_get_factory(element);
- if (factory)
name = gst_element_factory_get_longname(factory);
- if (name && strstr(name, "libav H.264"))
Do we need to check the name? Could we just set the property unconditionally? If we need to check the name I would rather check that the element short name starts with `avdec_`, as is the case for every libav decoder.
Note that this will not change the behavior of the standalone H264 decoder, you will need to do a similar thing in `wg_transform`.