On my Steam Deck, this reduces the time it takes to initialize the wg_parser radically (by around 1 second). This helps in the game WILD HEARTS, which doesn't present while loading help videos during gameplay, causing large stutters.
Because GStreamer can randomly access the file with no known pattern on our side, I opted to implement this by buffering 4 chunks so that interleaved reads to different areas of the file don't cause us to discard and reload cached data more than we need to.
-- v3: winegstreamer: Cache wg_parser input data.
From: Derek Lesho dlesho@codeweavers.com
In order to reduce wg_parser initialization time by skipping the round-trip to the PE thread.
Signed-off-by: Derek Lesho dlesho@codeweavers.com --- dlls/winegstreamer/wg_parser.c | 144 ++++++++++++++++++++++++++++----- 1 file changed, 123 insertions(+), 21 deletions(-)
diff --git a/dlls/winegstreamer/wg_parser.c b/dlls/winegstreamer/wg_parser.c index 5bb824f4399..5221dcba544 100644 --- a/dlls/winegstreamer/wg_parser.c +++ b/dlls/winegstreamer/wg_parser.c @@ -59,6 +59,12 @@ GST_DEBUG_CATEGORY(wine);
typedef BOOL (*init_gst_cb)(struct wg_parser *parser);
+struct input_cache_chunk +{ + guint64 offset; + uint8_t *data; +}; + struct wg_parser { init_gst_cb init_gst; @@ -96,7 +102,10 @@ struct wg_parser bool unlimited_buffering;
gchar *sink_caps; + + struct input_cache_chunk input_cache_chunks[4]; }; +static unsigned int wg_parser_chunk_size = 512 << 10;
struct wg_parser_stream { @@ -967,30 +976,10 @@ static void pad_removed_cb(GstElement *element, GstPad *pad, gpointer user) g_free(name); }
-static GstFlowReturn src_getrange_cb(GstPad *pad, GstObject *parent, - guint64 offset, guint size, GstBuffer **buffer) +static GstFlowReturn issue_read_request(struct wg_parser *parser, guint64 offset, guint size, GstBuffer **buffer) { - struct wg_parser *parser = gst_pad_get_element_private(pad); GstFlowReturn ret;
- GST_LOG("pad %p, offset %" G_GINT64_MODIFIER "u, size %u, buffer %p.", pad, offset, size, *buffer); - - if (offset == GST_BUFFER_OFFSET_NONE) - offset = parser->next_pull_offset; - parser->next_pull_offset = offset + size; - - if (!size) - { - /* asfreader occasionally asks for zero bytes. gst_buffer_map() will - * return NULL in this case. Avoid confusing the read thread by asking - * it for zero bytes. */ - if (!*buffer) - *buffer = gst_buffer_new_and_alloc(0); - gst_buffer_set_size(*buffer, 0); - GST_LOG("Returning empty buffer."); - return GST_FLOW_OK; - } - pthread_mutex_lock(&parser->mutex);
assert(!parser->read_request.size); @@ -1017,6 +1006,110 @@ static GstFlowReturn src_getrange_cb(GstPad *pad, GstObject *parent, return ret; }
+static GstFlowReturn read_input_cache(struct wg_parser *parser, guint64 offset, guint size, GstBuffer **buffer) +{ + GstBuffer *chunk_buffer, *working_buffer; + unsigned int read_size, bytes_read = 0; + struct input_cache_chunk chunk; + GstFlowReturn ret; + int i; + + working_buffer = *buffer; + if (!working_buffer) + working_buffer = gst_buffer_new_and_alloc(size); + + while (size) + { + for (i = 0; i < ARRAY_SIZE(parser->input_cache_chunks); i++) + { + chunk = parser->input_cache_chunks[i]; + + if (chunk.data && offset - chunk.offset < wg_parser_chunk_size) + { + read_size = min(size, wg_parser_chunk_size - (offset - chunk.offset)); + gst_buffer_fill(working_buffer, bytes_read, chunk.data + (offset - chunk.offset), read_size); + + if (i != 0) + { + memmove(&parser->input_cache_chunks[1], &parser->input_cache_chunks[0], i * sizeof(chunk)); + parser->input_cache_chunks[0] = chunk; + } + + size -= read_size; + offset += read_size; + bytes_read += read_size; + + break; + } + } + + if (i != ARRAY_SIZE(parser->input_cache_chunks)) + continue; + + chunk = parser->input_cache_chunks[ ARRAY_SIZE(parser->input_cache_chunks) - 1 ]; + + chunk.offset = offset - (offset % wg_parser_chunk_size); + if (!chunk.data) + chunk.data = malloc(wg_parser_chunk_size); + + chunk_buffer = gst_buffer_new_wrapped_full(0, chunk.data, wg_parser_chunk_size, 0, wg_parser_chunk_size, NULL, NULL); + + ret = issue_read_request(parser, chunk.offset, wg_parser_chunk_size, &chunk_buffer); + + gst_buffer_unref(chunk_buffer); + + if (ret != GST_FLOW_OK) + { + if (!parser->input_cache_chunks[ ARRAY_SIZE(parser->input_cache_chunks) - 1 ].data) + free(chunk.data); + if (!*buffer) + gst_buffer_unref(working_buffer); + return ret; + } + + memmove(&parser->input_cache_chunks[1], &parser->input_cache_chunks[0], (ARRAY_SIZE(parser->input_cache_chunks) - 1) * sizeof(chunk)); + parser->input_cache_chunks[0] = chunk; + } + + *buffer = working_buffer; + return GST_FLOW_OK; +} + +static GstFlowReturn src_getrange_cb(GstPad *pad, GstObject *parent, + guint64 offset, guint size, GstBuffer **buffer) +{ + struct wg_parser *parser = gst_pad_get_element_private(pad); + + GST_LOG("pad %p, offset %" G_GINT64_MODIFIER "u, size %u, buffer %p.", pad, offset, size, *buffer); + + if (offset == GST_BUFFER_OFFSET_NONE) + offset = parser->next_pull_offset; + parser->next_pull_offset = offset + size; + + if (!size) + { + /* asfreader occasionally asks for zero bytes. gst_buffer_map() will + * return NULL in this case. Avoid confusing the read thread by asking + * it for zero bytes. */ + if (!*buffer) + *buffer = gst_buffer_new_and_alloc(0); + gst_buffer_set_size(*buffer, 0); + GST_LOG("Returning empty buffer."); + return GST_FLOW_OK; + } + + if (size >= wg_parser_chunk_size || sizeof(void*) == 4) + return issue_read_request(parser, offset, size, buffer); + + if (offset >= parser->file_size) + return GST_FLOW_EOS; + + if ((offset + size) >= parser->file_size) + size = parser->file_size - offset; + + return read_input_cache(parser, offset, size, buffer); +} + static gboolean src_query_cb(GstPad *pad, GstObject *parent, GstQuery *query) { struct wg_parser *parser = gst_pad_get_element_private(pad); @@ -1562,6 +1655,15 @@ static NTSTATUS wg_parser_disconnect(void *args) g_free(parser->sink_caps); parser->sink_caps = NULL;
+ for (i = 0; i < ARRAY_SIZE(parser->input_cache_chunks); i++) + { + if (parser->input_cache_chunks[i].data) + { + free(parser->input_cache_chunks[i].data); + parser->input_cache_chunks[i].data = NULL; + } + } + return S_OK; }
Also in this push is a fix to an error introduced in the last push w/ the array index-based recency ordering caused by not multiplying by the chunk struct size when memmoving the structs.
Taking all that into consideration, I'll keep the fallback path out of cautiousness, keep the chunk size at 512kb, and disable the path entirely for 32-bit, unless there are any objections.
I think that makes sense, yeah.
I still don't like the way that read_input_cache() is arranged, it doesn't strike me as a very idiomatic way to do a cache. In general I expect something like
bool get_cache_entry(...) { for (each entry in cache) { if (entry matches) return cache entry (or fill and return true) }
return false; }
read_cached_chunk(...) { if (get_cache_entry(...)) return (or fill and return)
issue_read_request(...)
insert new cache entry }
read_input_cache(...) { for (each aligned chunk) read_cached_chunk(...) }
Where the cache is filled can vary while still being idiomatic; it's often best to e.g. return a const struct input_cache_chunk pointer from "read_cached_chunk()", allowing the GstBuffer to be filled in exactly one place. (The function names in that pseudocode aren't very consistent either but should hopefully get the point across well enough.)
We could even further simplify read_input_cache() in this case, since the loop can be at most two iterations and hence doesn't really need to be a loop.
Ugh, I hate markdown. Here's the pseudocode I was trying to give:
``` bool get_cache_entry(...) { for (each entry in cache) { if (entry matches) return cache entry (or fill and return true) }
return false; }
read_cached_chunk(...) { if (get_cache_entry(...)) return (or fill and return)
issue_read_request(...)
insert new cache entry }
read_input_cache(...) { for (each aligned chunk) read_cached_chunk(...) } ```