GStreamer uses _SC_NPROCESSORS_CONF to determine 'max-threads'. On the Steam Deck, this is configured to be 16 (which is double its number of logical cores).
_SC_NPROCESSORS_CONF also disregards a process's CPU affinity, thus it can create more threads than is useful, which ultimately wastes memory resources.
Using affinity to set 'max-threads' addresses both these problems.
-- v3: winegstreamer: Set MAX_THREADS to 4 for i386. winegstreamer: Use thread_count to determine 'max-threads' value. winegstreamer: Use process affinity to calculate thread_count.
From: Brendan McGrath bmcgrath@codeweavers.com
--- dlls/winegstreamer/unixlib.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+)
diff --git a/dlls/winegstreamer/unixlib.c b/dlls/winegstreamer/unixlib.c index 175ab92ecdc..354d723d5a3 100644 --- a/dlls/winegstreamer/unixlib.c +++ b/dlls/winegstreamer/unixlib.c @@ -47,6 +47,8 @@
GST_DEBUG_CATEGORY(wine);
+static UINT16 thread_count; + GstStreamType stream_type_from_caps(GstCaps *caps) { const gchar *media_type; @@ -244,6 +246,17 @@ bool push_event(GstPad *pad, GstEvent *event) return true; }
+static ULONG popcount( ULONG val ) +{ +#if defined(__MINGW32__) + return __builtin_popcount( val ); +#else + val -= val >> 1 & 0x55555555; + val = (val & 0x33333333) + (val >> 2 & 0x33333333); + return ((val + (val >> 4)) & 0x0f0f0f0f) * 0x01010101 >> 24; +#endif +} + NTSTATUS wg_init_gstreamer(void *arg) { struct wg_init_gstreamer_params *params = arg; @@ -253,6 +266,7 @@ NTSTATUS wg_init_gstreamer(void *arg) int argc = ARRAY_SIZE(args) - 1; char **argv = args; GError *err; + DWORD_PTR process_mask;
if (params->trace_on) setenv("GST_DEBUG", "WINE:9,4", FALSE); @@ -276,6 +290,12 @@ NTSTATUS wg_init_gstreamer(void *arg) return STATUS_UNSUCCESSFUL; }
+ if (SUCCEEDED(NtQueryInformationProcess( GetCurrentProcess(), + ProcessAffinityMask, &process_mask, sizeof(process_mask), NULL ))) + thread_count = popcount(process_mask); + else + thread_count = 0; + GST_DEBUG_CATEGORY_INIT(wine, "WINE", GST_DEBUG_FG_RED, "Wine GStreamer support");
GST_INFO("GStreamer library version %s; wine built with %d.%d.%d.",
From: Brendan McGrath bmcgrath@codeweavers.com
GStreamer uses _SC_NPROCESSORS_CONF to determine 'max-threads'. On the Steam Deck, this is configured to be 16 (which is double its number of logical cores).
_SC_NPROCESSORS_CONF also disregards a process's CPU affinity, thus it can create more threads than is useful, which ultimately wastes memory resources.
Using thread_count to set 'max-threads' addresses both these problems. --- dlls/winegstreamer/unix_private.h | 3 +++ dlls/winegstreamer/unixlib.c | 17 +++++++++++++++++ dlls/winegstreamer/wg_parser.c | 7 +++++++ dlls/winegstreamer/wg_transform.c | 2 ++ 4 files changed, 29 insertions(+)
diff --git a/dlls/winegstreamer/unix_private.h b/dlls/winegstreamer/unix_private.h index 6f01b3a5a69..51b418eb513 100644 --- a/dlls/winegstreamer/unix_private.h +++ b/dlls/winegstreamer/unix_private.h @@ -31,6 +31,9 @@ GST_DEBUG_CATEGORY_EXTERN(wine); #define GST_CAT_DEFAULT wine
+#define ELEMENT_HAS_PROPERTY(element, property) \ + (!!g_object_class_find_property(G_OBJECT_CLASS(GST_ELEMENT_GET_CLASS((element))), (property))) + extern NTSTATUS wg_init_gstreamer(void *args);
extern GstStreamType stream_type_from_caps(GstCaps *caps); diff --git a/dlls/winegstreamer/unixlib.c b/dlls/winegstreamer/unixlib.c index 354d723d5a3..9b069c1f310 100644 --- a/dlls/winegstreamer/unixlib.c +++ b/dlls/winegstreamer/unixlib.c @@ -302,3 +302,20 @@ NTSTATUS wg_init_gstreamer(void *arg) gst_version_string(), GST_VERSION_MAJOR, GST_VERSION_MINOR, GST_VERSION_MICRO); return STATUS_SUCCESS; } + +void set_max_threads(GstElement *element) +{ + const char *shortname = NULL; + GstElementFactory *factory = gst_element_get_factory(element); + + if (factory) + shortname = gst_plugin_feature_get_name(GST_PLUGIN_FEATURE(factory)); + + if (shortname && strstr(shortname, "avdec_") && ELEMENT_HAS_PROPERTY(element, "max-threads")) + { + const gint32 MAX_THREADS = 16; + gint32 max_threads = MIN(thread_count, MAX_THREADS); + GST_DEBUG("%s found, setting max-threads to %d.", shortname, max_threads); + g_object_set(element, "max-threads", max_threads, NULL); + } +} diff --git a/dlls/winegstreamer/wg_parser.c b/dlls/winegstreamer/wg_parser.c index 7253013b6a3..b4237731b29 100644 --- a/dlls/winegstreamer/wg_parser.c +++ b/dlls/winegstreamer/wg_parser.c @@ -567,6 +567,12 @@ static void no_more_pads_cb(GstElement *element, gpointer user) pthread_cond_signal(&parser->init_cond); }
+static void deep_element_added_cb(GstBin *self, GstBin *sub_bin, GstElement *element, gpointer user) +{ + if (element) + set_max_threads(element); +} + static gboolean sink_event_cb(GstPad *pad, GstObject *parent, GstEvent *event) { struct wg_parser_stream *stream = gst_pad_get_element_private(pad); @@ -1797,6 +1803,7 @@ static BOOL decodebin_parser_init_gst(struct wg_parser *parser) g_signal_connect(element, "autoplug-continue", G_CALLBACK(autoplug_continue_cb), parser); g_signal_connect(element, "autoplug-select", G_CALLBACK(autoplug_select_cb), parser); g_signal_connect(element, "no-more-pads", G_CALLBACK(no_more_pads_cb), parser); + g_signal_connect(element, "deep-element-added", G_CALLBACK(deep_element_added_cb), parser);
pthread_mutex_lock(&parser->mutex); parser->no_more_pads = false; diff --git a/dlls/winegstreamer/wg_transform.c b/dlls/winegstreamer/wg_transform.c index 08c2c678024..1ca1906e6f6 100644 --- a/dlls/winegstreamer/wg_transform.c +++ b/dlls/winegstreamer/wg_transform.c @@ -486,6 +486,8 @@ NTSTATUS wg_transform_create(void *args) if (!(element = find_element(GST_ELEMENT_FACTORY_TYPE_DECODER, parsed_caps, sink_caps)) || !append_element(transform->container, element, &first, &last)) goto out; + + set_max_threads(element); break;
case WG_MAJOR_TYPE_AUDIO:
From: Brendan McGrath bmcgrath@codeweavers.com
The avdec_h264 element can use 32MB per thread when working with 4K video.
With 16 threads, this is 512MB, which is a quarter of the RAM available to a 32-bit application. Setting MAX_THREADS to 4 can save 384MB. --- dlls/winegstreamer/unixlib.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/dlls/winegstreamer/unixlib.c b/dlls/winegstreamer/unixlib.c index 9b069c1f310..6102580c419 100644 --- a/dlls/winegstreamer/unixlib.c +++ b/dlls/winegstreamer/unixlib.c @@ -313,7 +313,11 @@ void set_max_threads(GstElement *element)
if (shortname && strstr(shortname, "avdec_") && ELEMENT_HAS_PROPERTY(element, "max-threads")) { +#if defined(__i386__) + const gint32 MAX_THREADS = 4; +#else const gint32 MAX_THREADS = 16; +#endif gint32 max_threads = MIN(thread_count, MAX_THREADS); GST_DEBUG("%s found, setting max-threads to %d.", shortname, max_threads); g_object_set(element, "max-threads", max_threads, NULL);
On Wed Jun 26 22:50:15 2024 +0000, Brendan McGrath wrote:
changed this line in [version 3 of the diff](/wine/wine/-/merge_requests/5923/diffs?diff_id=119633&start_sha=8cff15183dfb67f3e136ab67f5124fcbadb7fc40#066e40048e94a2f91e87cefd01bb0dc7bc6ab892_924_912)
Sorry, I've changed this to now call `NtQueryInformationProcess` from within `wg_init_gstreamer`.
I made the incorrect assumption that I could only call `NtQueryInformationProcess` from the PE side; and as a result I had read your original suggestion as putting this in `wg_init_gstreamer` and its PE counterpart. But I prefer it in `wg_init_gstreamer`.
On Wed Jun 26 22:50:16 2024 +0000, Brendan McGrath wrote:
changed this line in [version 3 of the diff](/wine/wine/-/merge_requests/5923/diffs?diff_id=119633&start_sha=8cff15183dfb67f3e136ab67f5124fcbadb7fc40#60c57c4ae493c61ac248f463f5b5971603bd4e53_51_51)
I'm only using `thread_count` from within `unixlib.c`, so I've now removed this function (and left `thread_count` static).
On Wed Jun 26 08:05:49 2024 +0000, Rémi Bernon wrote:
shortname = gst_plugin_feature_get_name(GST_PLUGIN_FEATURE(factory));
:thumbsup:
On Wed Jun 26 08:05:49 2024 +0000, Rémi Bernon wrote:
Any reason for this to be a macro? Why not inline it in the if?
I factored this out when I was using the same code in two places. But after returning to using it in the one place, I decided `ELEMENT_HAS_PROPERTY` read better than "not not class find property"; so I left it. But can change it back to inline if that's the preference.
On Wed Jun 26 23:02:17 2024 +0000, Brendan McGrath wrote:
I factored this out when I was using the same code in two places. But after returning to using it in the one place, I decided `ELEMENT_HAS_PROPERTY` read better than "not not class find property"; so I left it. But can change it back to inline if that's the preference.
I guess I don't need it in `unix_private.h` anymore though. So I could move it to `unixlib.c`.