GStreamer uses _SC_NPROCESSORS_CONF to determine 'max-threads'. On the Steam Deck, this is configured to be 16 (which is double its number of logical cores).
_SC_NPROCESSORS_CONF also disregards a process's CPU affinity, thus it can create more threads than is useful, which ultimately wastes memory resources.
Using affinity to set 'max-threads' addresses both these problems.
-- v6: winegstreamer: Set MAX_THREADS to 4 for i386. winegstreamer: Use thread_count to determine 'max-threads' value. winegstreamer: Use process affinity to calculate thread_count.
From: Brendan McGrath bmcgrath@codeweavers.com
--- dlls/winegstreamer/unixlib.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+)
diff --git a/dlls/winegstreamer/unixlib.c b/dlls/winegstreamer/unixlib.c index 175ab92ecdc..9124103cd46 100644 --- a/dlls/winegstreamer/unixlib.c +++ b/dlls/winegstreamer/unixlib.c @@ -47,6 +47,8 @@
GST_DEBUG_CATEGORY(wine);
+static UINT16 thread_count; + GstStreamType stream_type_from_caps(GstCaps *caps) { const gchar *media_type; @@ -244,6 +246,17 @@ bool push_event(GstPad *pad, GstEvent *event) return true; }
+static ULONG popcount(ULONG val) +{ +#if HAVE___BUILTIN_POPCOUNT + return __builtin_popcount(val); +#else + val -= val >> 1 & 0x55555555; + val = (val & 0x33333333) + (val >> 2 & 0x33333333); + return ((val + (val >> 4)) & 0x0f0f0f0f) * 0x01010101 >> 24; +#endif +} + NTSTATUS wg_init_gstreamer(void *arg) { struct wg_init_gstreamer_params *params = arg; @@ -253,6 +266,7 @@ NTSTATUS wg_init_gstreamer(void *arg) int argc = ARRAY_SIZE(args) - 1; char **argv = args; GError *err; + DWORD_PTR process_mask;
if (params->trace_on) setenv("GST_DEBUG", "WINE:9,4", FALSE); @@ -276,6 +290,12 @@ NTSTATUS wg_init_gstreamer(void *arg) return STATUS_UNSUCCESSFUL; }
+ if (!NtQueryInformationProcess(GetCurrentProcess(), + ProcessAffinityMask, &process_mask, sizeof(process_mask), NULL)) + thread_count = popcount(process_mask); + else + thread_count = 0; + GST_DEBUG_CATEGORY_INIT(wine, "WINE", GST_DEBUG_FG_RED, "Wine GStreamer support");
GST_INFO("GStreamer library version %s; wine built with %d.%d.%d.",
From: Brendan McGrath bmcgrath@codeweavers.com
GStreamer uses _SC_NPROCESSORS_CONF to determine 'max-threads'. On the Steam Deck, this is configured to be 16 (which is double its number of logical cores).
_SC_NPROCESSORS_CONF also disregards a process's CPU affinity, thus it can create more threads than is useful, which ultimately wastes memory resources.
Using thread_count to set 'max-threads' addresses both these problems. --- dlls/winegstreamer/unix_private.h | 4 ++++ dlls/winegstreamer/unixlib.c | 17 +++++++++++++++++ dlls/winegstreamer/wg_parser.c | 7 +++++++ dlls/winegstreamer/wg_transform.c | 2 ++ 4 files changed, 30 insertions(+)
diff --git a/dlls/winegstreamer/unix_private.h b/dlls/winegstreamer/unix_private.h index 6f01b3a5a69..f81eca8ff90 100644 --- a/dlls/winegstreamer/unix_private.h +++ b/dlls/winegstreamer/unix_private.h @@ -31,6 +31,9 @@ GST_DEBUG_CATEGORY_EXTERN(wine); #define GST_CAT_DEFAULT wine
+#define ELEMENT_HAS_PROPERTY(element, property) \ + (!!g_object_class_find_property(G_OBJECT_CLASS(GST_ELEMENT_GET_CLASS((element))), (property))) + extern NTSTATUS wg_init_gstreamer(void *args);
extern GstStreamType stream_type_from_caps(GstCaps *caps); @@ -45,6 +48,7 @@ extern bool link_src_to_sink(GstPad *src_pad, GstPad *sink_pad); extern bool link_src_to_element(GstPad *src_pad, GstElement *element); extern bool link_element_to_sink(GstElement *element, GstPad *sink_pad); extern bool push_event(GstPad *pad, GstEvent *event); +extern void set_max_threads(GstElement *element);
/* wg_format.c */
diff --git a/dlls/winegstreamer/unixlib.c b/dlls/winegstreamer/unixlib.c index 9124103cd46..84a946ec395 100644 --- a/dlls/winegstreamer/unixlib.c +++ b/dlls/winegstreamer/unixlib.c @@ -302,3 +302,20 @@ NTSTATUS wg_init_gstreamer(void *arg) gst_version_string(), GST_VERSION_MAJOR, GST_VERSION_MINOR, GST_VERSION_MICRO); return STATUS_SUCCESS; } + +void set_max_threads(GstElement *element) +{ + const char *shortname = NULL; + GstElementFactory *factory = gst_element_get_factory(element); + + if (factory) + shortname = gst_plugin_feature_get_name(GST_PLUGIN_FEATURE(factory)); + + if (shortname && strstr(shortname, "avdec_") && ELEMENT_HAS_PROPERTY(element, "max-threads")) + { + const gint32 MAX_THREADS = 16; + gint32 max_threads = MIN(thread_count, MAX_THREADS); + GST_DEBUG("%s found, setting max-threads to %d.", shortname, max_threads); + g_object_set(element, "max-threads", max_threads, NULL); + } +} diff --git a/dlls/winegstreamer/wg_parser.c b/dlls/winegstreamer/wg_parser.c index 7253013b6a3..b4237731b29 100644 --- a/dlls/winegstreamer/wg_parser.c +++ b/dlls/winegstreamer/wg_parser.c @@ -567,6 +567,12 @@ static void no_more_pads_cb(GstElement *element, gpointer user) pthread_cond_signal(&parser->init_cond); }
+static void deep_element_added_cb(GstBin *self, GstBin *sub_bin, GstElement *element, gpointer user) +{ + if (element) + set_max_threads(element); +} + static gboolean sink_event_cb(GstPad *pad, GstObject *parent, GstEvent *event) { struct wg_parser_stream *stream = gst_pad_get_element_private(pad); @@ -1797,6 +1803,7 @@ static BOOL decodebin_parser_init_gst(struct wg_parser *parser) g_signal_connect(element, "autoplug-continue", G_CALLBACK(autoplug_continue_cb), parser); g_signal_connect(element, "autoplug-select", G_CALLBACK(autoplug_select_cb), parser); g_signal_connect(element, "no-more-pads", G_CALLBACK(no_more_pads_cb), parser); + g_signal_connect(element, "deep-element-added", G_CALLBACK(deep_element_added_cb), parser);
pthread_mutex_lock(&parser->mutex); parser->no_more_pads = false; diff --git a/dlls/winegstreamer/wg_transform.c b/dlls/winegstreamer/wg_transform.c index 08c2c678024..1ca1906e6f6 100644 --- a/dlls/winegstreamer/wg_transform.c +++ b/dlls/winegstreamer/wg_transform.c @@ -486,6 +486,8 @@ NTSTATUS wg_transform_create(void *args) if (!(element = find_element(GST_ELEMENT_FACTORY_TYPE_DECODER, parsed_caps, sink_caps)) || !append_element(transform->container, element, &first, &last)) goto out; + + set_max_threads(element); break;
case WG_MAJOR_TYPE_AUDIO:
From: Brendan McGrath bmcgrath@codeweavers.com
The avdec_h264 element can use 32MB per thread when working with 4K video.
With 16 threads, this is 512MB, which is a quarter of the RAM available to a 32-bit application. Setting MAX_THREADS to 4 can save 384MB. --- dlls/winegstreamer/unixlib.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/dlls/winegstreamer/unixlib.c b/dlls/winegstreamer/unixlib.c index 84a946ec395..822deb21949 100644 --- a/dlls/winegstreamer/unixlib.c +++ b/dlls/winegstreamer/unixlib.c @@ -313,7 +313,11 @@ void set_max_threads(GstElement *element)
if (shortname && strstr(shortname, "avdec_") && ELEMENT_HAS_PROPERTY(element, "max-threads")) { +#if defined(__i386__) + const gint32 MAX_THREADS = 4; +#else const gint32 MAX_THREADS = 16; +#endif gint32 max_threads = MIN(thread_count, MAX_THREADS); GST_DEBUG("%s found, setting max-threads to %d.", shortname, max_threads); g_object_set(element, "max-threads", max_threads, NULL);
OK - I think I finally got it. Thanks again @rbernon
This merge request was approved by Rémi Bernon.