Module: wine
Branch: master
Commit: cb723c6dd3baf13583b0bde2fc71191b86100229
URL: https://gitlab.winehq.org/wine/wine/-/commit/cb723c6dd3baf13583b0bde2fc7119…
Author: Zebediah Figura <zfigura(a)codeweavers.com>
Date: Wed Apr 24 18:30:08 2024 -0500
wined3d: Submit command buffers after 512 draw or dispatch commands.
This improves performance for the game "Grounded", on a AMD Radeon RX 6700 XT,
with radv from Mesa 22.3.6. Testing was done with the "cb_access_map_w" option
enabled, which also improves performance with the game by itself.
Grounded generally makes about 4000 draw calls per frame, which seems not
atypical. This change makes it submit at most an extra 8 times per frame, but in
practice due to WINED3D_PERIODIC_SUBMIT_MAX_BUFFERS it submits less (usually
only 2-3).
The most demanding game I've seen made about 20,000 draw calls per frame, at
which point the overhead of adapter_vk_draw_primitive() itself becomes a serious
bottleneck. For such a game we would submit 40 more times per frame with these
settings, although WINED3D_PERIODIC_SUBMIT_MAX_BUFFERS means we would likely
submit less than that. In any case if submission itself becomes a bottleneck, we
should offload it to a separate thread.
Credit goes to Philip Rebohle and his work on DXVK for helping me to notice that
periodic submission might make a difference.
---
dlls/wined3d/adapter_vk.c | 4 ++++
dlls/wined3d/context_vk.c | 35 ++++++++++++++++++++++++++++++++++-
dlls/wined3d/wined3d_vk.h | 3 +++
3 files changed, 41 insertions(+), 1 deletion(-)
diff --git a/dlls/wined3d/adapter_vk.c b/dlls/wined3d/adapter_vk.c
index 418b67de8b6..39b263872ed 100644
--- a/dlls/wined3d/adapter_vk.c
+++ b/dlls/wined3d/adapter_vk.c
@@ -1807,6 +1807,8 @@ static void adapter_vk_draw_primitive(struct wined3d_device *device,
context_vk->c.transform_feedback_active = 0;
}
+ ++context_vk->command_buffer_work_count;
+
context_release(&context_vk->c);
}
@@ -1851,6 +1853,8 @@ static void adapter_vk_dispatch_compute(struct wined3d_device *device,
VK_CALL(vkCmdPipelineBarrier(vk_command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 0, 0, NULL, 0, NULL, 0, NULL));
+ ++context_vk->command_buffer_work_count;
+
context_release(&context_vk->c);
}
diff --git a/dlls/wined3d/context_vk.c b/dlls/wined3d/context_vk.c
index 377c437ee09..200dcb57b7f 100644
--- a/dlls/wined3d/context_vk.c
+++ b/dlls/wined3d/context_vk.c
@@ -1771,6 +1771,37 @@ void wined3d_context_vk_cleanup(struct wined3d_context_vk *context_vk)
wined3d_context_cleanup(&context_vk->c);
}
+/* In general we only submit when necessary or when a frame ends. However,
+ * applications which do a lot of work per frame can end up with the GPU idle
+ * for long periods of time while the CPU is building commands, and drivers may
+ * choose to reclock the GPU to a lower power level if they detect it being idle
+ * for that long.
+ *
+ * This may also help performance simply by virtue of allowing more parallelism
+ * between the GPU and CPU, although no clear evidence of that has been seen
+ * yet. */
+
+#define WINED3D_PERIODIC_SUBMIT_WORK_COUNT 512
+#define WINED3D_PERIODIC_SUBMIT_MAX_BUFFERS 3
+
+static bool should_periodic_submit(struct wined3d_context_vk *context_vk)
+{
+ uint64_t busy_count;
+
+ if (context_vk->command_buffer_work_count < WINED3D_PERIODIC_SUBMIT_WORK_COUNT)
+ return false;
+
+ /* The point of periodic submit is to keep the GPU busy, so if it's already
+ * busy with 4 or more command buffers, don't submit another one now. */
+ busy_count = context_vk->current_command_buffer.id - context_vk->completed_command_buffer_id - 1;
+ if (busy_count > WINED3D_PERIODIC_SUBMIT_MAX_BUFFERS)
+ return false;
+
+ TRACE("Periodically submitting command buffer, %u draw/dispatch commands since last buffer, %I64u currently busy.\n",
+ context_vk->command_buffer_work_count, busy_count);
+ return true;
+}
+
VkCommandBuffer wined3d_context_vk_get_command_buffer(struct wined3d_context_vk *context_vk)
{
struct wined3d_device_vk *device_vk = wined3d_device_vk(context_vk->c.device);
@@ -1785,7 +1816,7 @@ VkCommandBuffer wined3d_context_vk_get_command_buffer(struct wined3d_context_vk
buffer = &context_vk->current_command_buffer;
if (buffer->vk_command_buffer)
{
- if (context_vk->retired_bo_size > WINED3D_RETIRED_BO_SIZE_THRESHOLD)
+ if (context_vk->retired_bo_size > WINED3D_RETIRED_BO_SIZE_THRESHOLD || should_periodic_submit(context_vk))
wined3d_context_vk_submit_command_buffer(context_vk, 0, NULL, NULL, 0, NULL);
else
{
@@ -1854,6 +1885,8 @@ VkCommandBuffer wined3d_context_vk_get_command_buffer(struct wined3d_context_vk
wined3d_query_vk_resume(query_vk, context_vk);
}
+ context_vk->command_buffer_work_count = 0;
+
TRACE("Created new command buffer %p with id 0x%s.\n",
buffer->vk_command_buffer, wine_dbgstr_longlong(buffer->id));
diff --git a/dlls/wined3d/wined3d_vk.h b/dlls/wined3d/wined3d_vk.h
index 94a6b6c0c5e..ad8eb2453f5 100644
--- a/dlls/wined3d/wined3d_vk.h
+++ b/dlls/wined3d/wined3d_vk.h
@@ -614,6 +614,9 @@ struct wined3d_context_vk
struct wined3d_command_buffer_vk current_command_buffer;
uint64_t completed_command_buffer_id;
VkDeviceSize retired_bo_size;
+ /* Number of draw or dispatch calls that have been recorded into the
+ * current command buffer. */
+ unsigned int command_buffer_work_count;
struct
{