GStreamer uses _SC_NPROCESSORS_CONF to determine 'max-threads'. On the Steam Deck, this is configured to be 16 (which is double its number of logical cores).
_SC_NPROCESSORS_CONF also disregards a process's CPU affinity, thus it can create more threads than is useful, which ultimately wastes memory resources.
Using affinity to set 'max-threads' addresses both these problems.
-- v2: winegstreamer: Set MAX_THREADS to 4 for i386. winegstreamer: Use thread_count to determine 'max-threads' value. winegstreamer: Provide thread_count to init_gstreamer.
From: Brendan McGrath bmcgrath@codeweavers.com
This value is then made available through get_thread_count(). --- dlls/winegstreamer/main.c | 13 +++++++++++++ dlls/winegstreamer/unix_private.h | 1 + dlls/winegstreamer/unixlib.c | 9 +++++++++ dlls/winegstreamer/unixlib.h | 1 + 4 files changed, 24 insertions(+)
diff --git a/dlls/winegstreamer/main.c b/dlls/winegstreamer/main.c index 747479daf40..596d8442630 100644 --- a/dlls/winegstreamer/main.c +++ b/dlls/winegstreamer/main.c @@ -907,8 +907,21 @@ static BOOL CALLBACK init_gstreamer_proc(INIT_ONCE *once, void *param, void **ct .trace_on = TRACE_ON(mfplat) || TRACE_ON(quartz) || TRACE_ON(wmvcore), .warn_on = WARN_ON(mfplat) || WARN_ON(quartz) || WARN_ON(wmvcore), .err_on = ERR_ON(mfplat) || ERR_ON(quartz) || ERR_ON(wmvcore), + .thread_count = 0, }; HINSTANCE handle; + DWORD_PTR process_mask; + int i; + + if (SUCCEEDED(NtQueryInformationProcess( GetCurrentProcess(), + ProcessAffinityMask, &process_mask, sizeof(process_mask), NULL ))) + { + for (i = 0; i < 8 * sizeof(process_mask); i++) + { + if (process_mask & 1 << i) + params.thread_count++; + } + }
if (WINE_UNIX_CALL(unix_wg_init_gstreamer, ¶ms)) return FALSE; diff --git a/dlls/winegstreamer/unix_private.h b/dlls/winegstreamer/unix_private.h index 6f01b3a5a69..b9b974972f4 100644 --- a/dlls/winegstreamer/unix_private.h +++ b/dlls/winegstreamer/unix_private.h @@ -45,6 +45,7 @@ extern bool link_src_to_sink(GstPad *src_pad, GstPad *sink_pad); extern bool link_src_to_element(GstPad *src_pad, GstElement *element); extern bool link_element_to_sink(GstElement *element, GstPad *sink_pad); extern bool push_event(GstPad *pad, GstEvent *event); +extern UINT16 get_thread_count(void);
/* wg_format.c */
diff --git a/dlls/winegstreamer/unixlib.c b/dlls/winegstreamer/unixlib.c index 175ab92ecdc..b34f8593b94 100644 --- a/dlls/winegstreamer/unixlib.c +++ b/dlls/winegstreamer/unixlib.c @@ -47,6 +47,8 @@
GST_DEBUG_CATEGORY(wine);
+static UINT16 thread_count; + GstStreamType stream_type_from_caps(GstCaps *caps) { const gchar *media_type; @@ -262,6 +264,8 @@ NTSTATUS wg_init_gstreamer(void *arg) setenv("GST_DEBUG", "1", FALSE); setenv("GST_DEBUG_NO_COLOR", "1", FALSE);
+ thread_count = params->thread_count; + /* GStreamer installs a temporary SEGV handler when it loads plugins * to initialize its registry calling exit(-1) when any fault is caught. * We need to make sure any signal reaches our signal handlers to catch @@ -282,3 +286,8 @@ NTSTATUS wg_init_gstreamer(void *arg) gst_version_string(), GST_VERSION_MAJOR, GST_VERSION_MINOR, GST_VERSION_MICRO); return STATUS_SUCCESS; } + +UINT16 get_thread_count(void) +{ + return thread_count; +} diff --git a/dlls/winegstreamer/unixlib.h b/dlls/winegstreamer/unixlib.h index 153b6c91306..69aed595efd 100644 --- a/dlls/winegstreamer/unixlib.h +++ b/dlls/winegstreamer/unixlib.h @@ -182,6 +182,7 @@ struct wg_init_gstreamer_params UINT8 trace_on; UINT8 warn_on; UINT8 err_on; + UINT16 thread_count; };
struct wg_parser_create_params
From: Brendan McGrath bmcgrath@codeweavers.com
GStreamer uses _SC_NPROCESSORS_CONF to determine 'max-threads'. On the Steam Deck, this is configured to be 16 (which is double its number of logical cores).
_SC_NPROCESSORS_CONF also disregards a process's CPU affinity, thus it can create more threads than is useful, which ultimately wastes memory resources.
Using thread_count to set 'max-threads' addresses both these problems. --- dlls/winegstreamer/unix_private.h | 4 ++++ dlls/winegstreamer/unixlib.c | 17 +++++++++++++++++ dlls/winegstreamer/wg_parser.c | 7 +++++++ dlls/winegstreamer/wg_transform.c | 2 ++ 4 files changed, 30 insertions(+)
diff --git a/dlls/winegstreamer/unix_private.h b/dlls/winegstreamer/unix_private.h index b9b974972f4..88bca3f44c6 100644 --- a/dlls/winegstreamer/unix_private.h +++ b/dlls/winegstreamer/unix_private.h @@ -31,6 +31,9 @@ GST_DEBUG_CATEGORY_EXTERN(wine); #define GST_CAT_DEFAULT wine
+#define ELEMENT_HAS_PROPERTY(element, property) \ + (!!g_object_class_find_property(G_OBJECT_CLASS(GST_ELEMENT_GET_CLASS((element))), (property))) + extern NTSTATUS wg_init_gstreamer(void *args);
extern GstStreamType stream_type_from_caps(GstCaps *caps); @@ -46,6 +49,7 @@ extern bool link_src_to_element(GstPad *src_pad, GstElement *element); extern bool link_element_to_sink(GstElement *element, GstPad *sink_pad); extern bool push_event(GstPad *pad, GstEvent *event); extern UINT16 get_thread_count(void); +extern void set_max_threads(GstElement *element);
/* wg_format.c */
diff --git a/dlls/winegstreamer/unixlib.c b/dlls/winegstreamer/unixlib.c index b34f8593b94..a091fdc187a 100644 --- a/dlls/winegstreamer/unixlib.c +++ b/dlls/winegstreamer/unixlib.c @@ -291,3 +291,20 @@ UINT16 get_thread_count(void) { return thread_count; } + +void set_max_threads(GstElement *element) +{ + const char *shortname = NULL; + GstElementFactory *factory = gst_element_get_factory(element); + + if (factory) + shortname = gst_plugin_feature_get_name (GST_PLUGIN_FEATURE (factory)); + + if (shortname && strstr(shortname, "avdec_") && ELEMENT_HAS_PROPERTY(element, "max-threads")) + { + const gint32 MAX_THREADS = 16; + gint32 max_threads = MIN(get_thread_count(), MAX_THREADS); + GST_DEBUG("%s found, setting max-threads to %d.", shortname, max_threads); + g_object_set(element, "max-threads", max_threads, NULL); + } +} diff --git a/dlls/winegstreamer/wg_parser.c b/dlls/winegstreamer/wg_parser.c index 7253013b6a3..b4237731b29 100644 --- a/dlls/winegstreamer/wg_parser.c +++ b/dlls/winegstreamer/wg_parser.c @@ -567,6 +567,12 @@ static void no_more_pads_cb(GstElement *element, gpointer user) pthread_cond_signal(&parser->init_cond); }
+static void deep_element_added_cb(GstBin *self, GstBin *sub_bin, GstElement *element, gpointer user) +{ + if (element) + set_max_threads(element); +} + static gboolean sink_event_cb(GstPad *pad, GstObject *parent, GstEvent *event) { struct wg_parser_stream *stream = gst_pad_get_element_private(pad); @@ -1797,6 +1803,7 @@ static BOOL decodebin_parser_init_gst(struct wg_parser *parser) g_signal_connect(element, "autoplug-continue", G_CALLBACK(autoplug_continue_cb), parser); g_signal_connect(element, "autoplug-select", G_CALLBACK(autoplug_select_cb), parser); g_signal_connect(element, "no-more-pads", G_CALLBACK(no_more_pads_cb), parser); + g_signal_connect(element, "deep-element-added", G_CALLBACK(deep_element_added_cb), parser);
pthread_mutex_lock(&parser->mutex); parser->no_more_pads = false; diff --git a/dlls/winegstreamer/wg_transform.c b/dlls/winegstreamer/wg_transform.c index 08c2c678024..1ca1906e6f6 100644 --- a/dlls/winegstreamer/wg_transform.c +++ b/dlls/winegstreamer/wg_transform.c @@ -486,6 +486,8 @@ NTSTATUS wg_transform_create(void *args) if (!(element = find_element(GST_ELEMENT_FACTORY_TYPE_DECODER, parsed_caps, sink_caps)) || !append_element(transform->container, element, &first, &last)) goto out; + + set_max_threads(element); break;
case WG_MAJOR_TYPE_AUDIO:
From: Brendan McGrath bmcgrath@codeweavers.com
The avdec_h264 element can use 32MB per thread when working with 4K video.
With 16 threads, this is 512MB, which is a quarter of the RAM available to a 32-bit application. Setting MAX_THREADS to 4 can save 384MB. --- dlls/winegstreamer/unixlib.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/dlls/winegstreamer/unixlib.c b/dlls/winegstreamer/unixlib.c index a091fdc187a..546785918e8 100644 --- a/dlls/winegstreamer/unixlib.c +++ b/dlls/winegstreamer/unixlib.c @@ -302,7 +302,11 @@ void set_max_threads(GstElement *element)
if (shortname && strstr(shortname, "avdec_") && ELEMENT_HAS_PROPERTY(element, "max-threads")) { +#if defined(__i386__) + const gint32 MAX_THREADS = 4; +#else const gint32 MAX_THREADS = 16; +#endif gint32 max_threads = MIN(get_thread_count(), MAX_THREADS); GST_DEBUG("%s found, setting max-threads to %d.", shortname, max_threads); g_object_set(element, "max-threads", max_threads, NULL);
Hi,
It looks like your patch introduced the new failures shown below. Please investigate and fix them before resubmitting your patch. If they are not new, fixing them anyway would help a lot. Otherwise please ask for the known failures list to be updated.
The tests also ran into some preexisting test failures. If you know how to fix them that would be helpful. See the TestBot job for the details:
The full results can be found at: https://testbot.winehq.org/JobDetails.pl?Key=146570
Your paranoid android.
=== debian11b (64 bit WoW report) ===
kernel32: comm.c:1574: Test failed: AbortWaitCts hComPortEvent failed comm.c:1586: Test failed: Unexpected time 1002, expected around 500
On Wed Jun 26 02:25:49 2024 +0000, Brendan McGrath wrote:
changed this line in [version 2 of the diff](/wine/wine/-/merge_requests/5923/diffs?diff_id=119523&start_sha=0b13d8f7c93d01315f1a32524118ae2cbf149494#d19138de09a66cf3bcf4e698b2f5c81e9daf14a2_597_573)
Good idea, then if we need different APIs for different platforms, it can all be done in `server/thread.c`.
I've changed it so `init_gstreamer_proc` now calls `NtQueryInformationProcess` with `ProcessAffinityMask` to calculate the thread_count (and pass it to `wg_init_gstreamer`).
On Wed Jun 26 02:25:50 2024 +0000, Brendan McGrath wrote:
changed this line in [version 2 of the diff](/wine/wine/-/merge_requests/5923/diffs?diff_id=119523&start_sha=0b13d8f7c93d01315f1a32524118ae2cbf149494#d19138de09a66cf3bcf4e698b2f5c81e9daf14a2_581_573)
Probably don't need to name check, but I kept it just in case another plugin uses `max-thread` with a different meaning.
But I found some of the `avdec_` elements don't have `max-thread` (for example, `avdec_aac`) and trying to set it on those would result in a "CRITICAL" error being logged. So I now check for the presence of that property in addition to the prefix of `avdec_`.
On Wed Jun 26 02:46:08 2024 +0000, Rémi Bernon wrote:
Note that this will not change the behavior of the standalone H264 decoder, you will need to do a similar thing in `wg_transform`.
Good pick-up. I've now added this to `wg_transform` as well.
Thanks @rbernon.
Rémi Bernon (@rbernon) commented about dlls/winegstreamer/main.c:
.err_on = ERR_ON(mfplat) || ERR_ON(quartz) || ERR_ON(wmvcore),
}; HINSTANCE handle;.thread_count = 0,
- DWORD_PTR process_mask;
- int i;
- if (SUCCEEDED(NtQueryInformationProcess( GetCurrentProcess(),
ProcessAffinityMask, &process_mask, sizeof(process_mask), NULL )))
- {
for (i = 0; i < 8 * sizeof(process_mask); i++)
{
if (process_mask & 1 << i)
params.thread_count++;
}
- }
```suggestion:-8+0 if (GetProcessAffinityMask(GetCurrentProcess(), &process_mask, NULL)) { for (i = 0; i < 8 * sizeof(process_mask); i++) { if (process_mask & 1 << i) params.thread_count++; } } ```
As you're doing this on the PE side, you don't have to use Nt APIs.
You could maybe copy `popcount` from `ntdll/rtlbitmap.c`. I think it may be slightly better to make it explicit that it's what is computed, rather than open-coding it.
Rémi Bernon (@rbernon) commented about dlls/winegstreamer/unix_private.h:
extern bool link_src_to_element(GstPad *src_pad, GstElement *element); extern bool link_element_to_sink(GstElement *element, GstPad *sink_pad); extern bool push_event(GstPad *pad, GstEvent *event); +extern UINT16 get_thread_count(void);
You could probably just make the global variable extern.
Rémi Bernon (@rbernon) commented about dlls/winegstreamer/unixlib.c:
{ return thread_count; }
+void set_max_threads(GstElement *element) +{
- const char *shortname = NULL;
- GstElementFactory *factory = gst_element_get_factory(element);
- if (factory)
shortname = gst_plugin_feature_get_name (GST_PLUGIN_FEATURE (factory));
```suggestion:-0+0 shortname = gst_plugin_feature_get_name(GST_PLUGIN_FEATURE(factory)); ```
Rémi Bernon (@rbernon) commented about dlls/winegstreamer/unix_private.h:
GST_DEBUG_CATEGORY_EXTERN(wine); #define GST_CAT_DEFAULT wine
+#define ELEMENT_HAS_PROPERTY(element, property) \
(!!g_object_class_find_property(G_OBJECT_CLASS(GST_ELEMENT_GET_CLASS((element))), (property)))
Any reason for this to be a macro? Why not inline it in the if?