Which is the SM5 limit.
Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=43845 Signed-off-by: Matteo Bruni mbruni@codeweavers.com --- It also should fix a crash some ways into the game in Hellblade: Senua's Sacrifice.
As it turns out, GL implementations in practice support only 16 attributes, while Vulkan requires at least 32.
In the case of both ABZU and Hellblade: Senua's Sacrifice at least though, the problematic shaders don't actually make use of vertex attributes with index > 15, they just include a SV_InstanceID system value for attribute 16 in the shader input signature but don't declare it in the shader code at all. This patch is enough to make ABZU work on GL (I haven't retested Hellblade but I expect the same there). FWIW, both games are built around Unreal Engine 4.
v2: Just raise MAX_ATTRIBS and fixup (hopefully) all the fallout. --- dlls/wined3d/context_gl.c | 13 +++++++++++-- dlls/wined3d/wined3d_private.h | 31 ++++++++++++++++--------------- 2 files changed, 27 insertions(+), 17 deletions(-)
diff --git a/dlls/wined3d/context_gl.c b/dlls/wined3d/context_gl.c index 45c1062b3ec..2e953fc9c78 100644 --- a/dlls/wined3d/context_gl.c +++ b/dlls/wined3d/context_gl.c @@ -4818,7 +4818,8 @@ static void draw_primitive_immediate_mode(struct wined3d_context_gl *context_gl, unsigned int element_idx;
stride_idx = get_stride_idx(idx_data, idx_size, base_vertex_idx, start_idx, vertex_idx); - for (element_idx = MAX_ATTRIBS - 1; use_map; use_map &= ~(1u << element_idx), --element_idx) + for (element_idx = gl_info->limits.vertex_attribs - 1; use_map; + use_map &= ~(1u << element_idx), --element_idx) { if (!(use_map & 1u << element_idx)) continue; @@ -5657,7 +5658,15 @@ static void wined3d_context_gl_load_numbered_arrays(struct wined3d_context_gl *c context->instance_count = 0; current_bo = gl_info->supported[ARB_VERTEX_BUFFER_OBJECT] ? ~0u : 0;
- for (i = 0; i < MAX_ATTRIBS; ++i) + if (stream_info->use_map & ~wined3d_mask_from_size(gl_info->limits.vertex_attribs)) + { + static unsigned int once; + + if (!once++) + FIXME("More than the supported %u vertex attributes are in use.\n", gl_info->limits.vertex_attribs); + } + + for (i = 0; i < gl_info->limits.vertex_attribs; ++i) { const struct wined3d_stream_info_element *element = &stream_info->elements[i]; const void *offset = get_vertex_attrib_pointer(element, state); diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index 35925b04cd1..3210c628de3 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -737,8 +737,7 @@ enum wined3d_shader_conditional_op
/* Shader backends */
-/* TODO: Make this dynamic, based on shader limits ? */ -#define MAX_ATTRIBS 16 +#define MAX_ATTRIBS 32 #define MAX_REG_ADDR 1 #define MAX_REG_TEXCRD 8 #define MAX_REG_INPUT 32 @@ -1453,6 +1452,9 @@ enum fog_src_type
struct vs_compile_args { + DWORD swizzle_map; /* MAX_ATTRIBS, 32 */ + unsigned int next_shader_input_count; + DWORD interpolation_mode[WINED3D_PACKED_INTERPOLATION_SIZE]; BYTE fog_src; BYTE clip_enabled : 1; BYTE point_size : 1; @@ -1460,9 +1462,6 @@ struct vs_compile_args BYTE flatshading : 1; BYTE next_shader_type : 3; BYTE padding : 1; - WORD swizzle_map; /* MAX_ATTRIBS, 16 */ - unsigned int next_shader_input_count; - DWORD interpolation_mode[WINED3D_PACKED_INTERPOLATION_SIZE]; };
struct ds_compile_args @@ -1699,8 +1698,8 @@ struct wined3d_stream_info struct wined3d_stream_info_element elements[MAX_ATTRIBS]; DWORD position_transformed : 1; DWORD all_vbo : 1; - WORD swizzle_map; /* MAX_ATTRIBS, 16 */ - WORD use_map; /* MAX_ATTRIBS, 16 */ + DWORD swizzle_map; /* MAX_ATTRIBS, 32 */ + DWORD use_map; /* MAX_ATTRIBS, 32 */ };
void wined3d_stream_info_from_declaration(struct wined3d_stream_info *stream_info, @@ -2149,28 +2148,28 @@ struct wined3d_context } current_rt;
/* Stores some information about the context state for optimization */ + DWORD last_swizzle_map; /* MAX_ATTRIBS, 32 */ + DWORD shader_update_mask : 6; /* WINED3D_SHADER_TYPE_COUNT, 6 */ DWORD update_shader_resource_bindings : 1; DWORD update_compute_shader_resource_bindings : 1; DWORD update_unordered_access_view_bindings : 1; DWORD update_compute_unordered_access_view_bindings : 1; - DWORD last_swizzle_map : 16; /* MAX_ATTRIBS, 16 */ DWORD last_was_rhw : 1; /* True iff last draw_primitive was in xyzrhw mode. */ DWORD last_was_pshader : 1; DWORD last_was_vshader : 1; DWORD last_was_diffuse : 1; DWORD last_was_specular : 1; DWORD last_was_normal : 1; - DWORD last_was_ffp_blit : 1; DWORD last_was_blit : 1; DWORD last_was_ckey : 1; DWORD last_was_dual_source_blend : 1; DWORD texShaderBumpMap : 8; /* WINED3D_MAX_TEXTURES, 8 */ - DWORD lastWasPow2Texture : 8; /* WINED3D_MAX_TEXTURES, 8 */ - DWORD fixed_function_usage_map : 8; /* WINED3D_MAX_TEXTURES, 8 */ DWORD lowest_disabled_stage : 4; /* Max WINED3D_MAX_TEXTURES, 8 */
+ DWORD lastWasPow2Texture : 8; /* WINED3D_MAX_TEXTURES, 8 */ + DWORD fixed_function_usage_map : 8; /* WINED3D_MAX_TEXTURES, 8 */ DWORD use_immediate_mode_draw : 1; DWORD uses_uavs : 1; DWORD uses_fbo_attached_resources : 1; @@ -2181,9 +2180,10 @@ struct wined3d_context DWORD current : 1; DWORD destroyed : 1; DWORD destroy_delayed : 1; - DWORD clip_distance_mask : 8; /* WINED3D_MAX_CLIP_DISTANCES, 8 */ DWORD namedArraysLoaded : 1; - DWORD padding : 13; + DWORD padding : 5; + + DWORD clip_distance_mask : 8; /* WINED3D_MAX_CLIP_DISTANCES, 8 */
DWORD constant_update_mask; DWORD numbered_array_mask; @@ -3653,8 +3653,9 @@ struct wined3d_ffp_vs_settings DWORD texcoords : 8; /* WINED3D_MAX_TEXTURES */ DWORD ortho_fog : 1; DWORD flatshading : 1; - DWORD swizzle_map : 16; /* MAX_ATTRIBS, 16 */ - DWORD padding : 2; + DWORD padding : 18; + + DWORD swizzle_map; /* MAX_ATTRIBS, 32 */
DWORD texgen[WINED3D_MAX_TEXTURES]; };
Signed-off-by: Matteo Bruni mbruni@codeweavers.com --- v2: Patch moved forward in the series.
dlls/wined3d/adapter_gl.c | 1 + dlls/wined3d/adapter_vk.c | 1 + dlls/wined3d/context_gl.c | 9 ++++++--- dlls/wined3d/query.c | 5 ----- dlls/wined3d/swapchain.c | 3 ++- dlls/wined3d/wined3d_private.h | 6 ++++++ 6 files changed, 16 insertions(+), 9 deletions(-)
diff --git a/dlls/wined3d/adapter_gl.c b/dlls/wined3d/adapter_gl.c index 5f3e3538950..d72c43e48b8 100644 --- a/dlls/wined3d/adapter_gl.c +++ b/dlls/wined3d/adapter_gl.c @@ -5228,6 +5228,7 @@ static void wined3d_adapter_gl_init_d3d_info(struct wined3d_adapter_gl *adapter_ d3d_info->scaled_resolve = !!gl_info->supported[EXT_FRAMEBUFFER_MULTISAMPLE_BLIT_SCALED]; d3d_info->pbo = !!gl_info->supported[ARB_PIXEL_BUFFER_OBJECT]; d3d_info->subpixel_viewport = gl_info->limits.viewport_subpixel_bits >= 8; + d3d_info->fences = wined3d_fence_supported(gl_info); d3d_info->feature_level = feature_level_from_caps(gl_info, &shader_caps, &fragment_caps); d3d_info->filling_convention_offset = gl_info->filling_convention_offset;
diff --git a/dlls/wined3d/adapter_vk.c b/dlls/wined3d/adapter_vk.c index 2752a0ac0b3..cd1748afe90 100644 --- a/dlls/wined3d/adapter_vk.c +++ b/dlls/wined3d/adapter_vk.c @@ -2350,6 +2350,7 @@ static void wined3d_adapter_vk_init_d3d_info(struct wined3d_adapter_vk *adapter_ d3d_info->pbo = true; d3d_info->feature_level = feature_level_from_caps(&shader_caps); d3d_info->subpixel_viewport = true; + d3d_info->fences = true;
/* Like GL, Vulkan doesn't explicitly specify a filling convention and only mandates that a * shared edge of two adjacent triangles generate a fragment for exactly one of the triangles. diff --git a/dlls/wined3d/context_gl.c b/dlls/wined3d/context_gl.c index 2e953fc9c78..bb4157c0501 100644 --- a/dlls/wined3d/context_gl.c +++ b/dlls/wined3d/context_gl.c @@ -2865,9 +2865,12 @@ static void *wined3d_bo_gl_map(struct wined3d_bo_gl *bo, struct wined3d_context_ ERR("Failed to create new buffer object.\n"); }
- if (bo->command_fence_id == device_gl->current_fence_id) - wined3d_context_gl_submit_command_fence(context_gl); - wined3d_context_gl_wait_command_fence(context_gl, bo->command_fence_id); + if (context_gl->c.d3d_info->fences) + { + if (bo->command_fence_id == device_gl->current_fence_id) + wined3d_context_gl_submit_command_fence(context_gl); + wined3d_context_gl_wait_command_fence(context_gl, bo->command_fence_id); + }
map: if (bo->b.map_ptr) diff --git a/dlls/wined3d/query.c b/dlls/wined3d/query.c index 4b94cb8daea..34ee56d5d11 100644 --- a/dlls/wined3d/query.c +++ b/dlls/wined3d/query.c @@ -173,11 +173,6 @@ static struct wined3d_pipeline_statistics_query *wined3d_pipeline_statistics_que return CONTAINING_RECORD(query, struct wined3d_pipeline_statistics_query, query); }
-static BOOL wined3d_fence_supported(const struct wined3d_gl_info *gl_info) -{ - return gl_info->supported[ARB_SYNC] || gl_info->supported[NV_FENCE] || gl_info->supported[APPLE_FENCE]; -} - enum wined3d_fence_result wined3d_fence_test(const struct wined3d_fence *fence, struct wined3d_device *device, DWORD flags) { diff --git a/dlls/wined3d/swapchain.c b/dlls/wined3d/swapchain.c index eef7d46ba39..9f2d41b757a 100644 --- a/dlls/wined3d/swapchain.c +++ b/dlls/wined3d/swapchain.c @@ -633,7 +633,8 @@ static void swapchain_gl_present(struct wined3d_swapchain *swapchain, gl_info->gl_ops.wgl.p_wglSwapBuffers(context_gl->dc); }
- wined3d_context_gl_submit_command_fence(context_gl); + if (context->d3d_info->fences) + wined3d_context_gl_submit_command_fence(context_gl);
wined3d_swapchain_gl_rotate(swapchain, context);
diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index 3210c628de3..11da88aff8a 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -242,6 +242,7 @@ struct wined3d_d3d_info uint32_t scaled_resolve : 1; uint32_t pbo : 1; uint32_t subpixel_viewport : 1; + uint32_t fences : 1; enum wined3d_feature_level feature_level;
DWORD multisample_draw_location; @@ -3282,6 +3283,11 @@ struct wined3d_gl_info void (WINE_GLAPI *p_glEnableWINE)(GLenum cap); };
+static inline BOOL wined3d_fence_supported(const struct wined3d_gl_info *gl_info) +{ + return gl_info->supported[ARB_SYNC] || gl_info->supported[NV_FENCE] || gl_info->supported[APPLE_FENCE]; +} + /* The driver names reflect the lowest GPU supported * by a certain driver, so DRIVER_AMD_R300 supports * R3xx, R4xx and R5xx GPUs. */
Signed-off-by: Henri Verbeet hverbeet@codeweavers.com
Signed-off-by: Matteo Bruni mbruni@codeweavers.com --- v2: Also handle the fence usage in wined3d_device_gl_delete_opengl_contexts_cs().
dlls/wined3d/context_gl.c | 9 ++++++--- dlls/wined3d/device.c | 9 ++++++--- 2 files changed, 12 insertions(+), 6 deletions(-)
diff --git a/dlls/wined3d/context_gl.c b/dlls/wined3d/context_gl.c index bb4157c0501..0a731dc85f0 100644 --- a/dlls/wined3d/context_gl.c +++ b/dlls/wined3d/context_gl.c @@ -1393,13 +1393,16 @@ static void wined3d_context_gl_cleanup(struct wined3d_context_gl *context_gl) { /* If we're here because we're switching away from a previously * destroyed context, acquiring a context in order to submit a fence - * is problematic. (In particular, we'd end up back here again in the - * process of switching to the newly acquired context.) */ + * is problematic. In particular, we'd end up back here again in the + * process of switching to the newly acquired context. + * + * If fences aren't supported there should be nothing to wait for + * anyway, so just do nothing in that case. */ if (context_gl->c.destroyed) { gl_info->gl_ops.gl.p_glFinish(); } - else + else if (context_gl->c.d3d_info->fences) { wined3d_context_gl_submit_command_fence(context_gl); wined3d_context_gl_wait_command_fence(context_gl, diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c index d14b40d8e3e..8937c7fc0fc 100644 --- a/dlls/wined3d/device.c +++ b/dlls/wined3d/device.c @@ -1201,9 +1201,12 @@ void wined3d_device_gl_delete_opengl_contexts_cs(void *object) device->shader_backend->shader_free_private(device, context); wined3d_device_gl_destroy_dummy_textures(device_gl, context_gl);
- wined3d_context_gl_submit_command_fence(context_gl); - wined3d_context_gl_wait_command_fence(context_gl, - wined3d_device_gl(context_gl->c.device)->current_fence_id - 1); + if (context_gl->c.d3d_info->fences) + { + wined3d_context_gl_submit_command_fence(context_gl); + wined3d_context_gl_wait_command_fence(context_gl, + wined3d_device_gl(context_gl->c.device)->current_fence_id - 1); + } wined3d_allocator_cleanup(&device_gl->allocator);
context_release(context);
Signed-off-by: Henri Verbeet hverbeet@codeweavers.com
Signed-off-by: Matteo Bruni mbruni@codeweavers.com --- dlls/wined3d/cs.c | 9 +++++++++ 1 file changed, 9 insertions(+)
diff --git a/dlls/wined3d/cs.c b/dlls/wined3d/cs.c index cf7debe4256..6cbc9e2e2a6 100644 --- a/dlls/wined3d/cs.c +++ b/dlls/wined3d/cs.c @@ -3421,6 +3421,15 @@ struct wined3d_cs *wined3d_cs_create(struct wined3d_device *device, if (!(cs->data = heap_alloc(cs->data_size))) goto fail;
+ if (wined3d_settings.cs_multithreaded & WINED3D_CSMT_ENABLE) + { + if (!d3d_info->fences) + { + WARN("Disabling CSMT, adapter doesn't support fences.\n"); + wined3d_settings.cs_multithreaded &= ~WINED3D_CSMT_ENABLE; + } + } + if (wined3d_settings.cs_multithreaded & WINED3D_CSMT_ENABLE && !RtlIsCriticalSectionLockedByThread(NtCurrentTeb()->Peb->LoaderLock)) {
Signed-off-by: Henri Verbeet hverbeet@codeweavers.com
Signed-off-by: Matteo Bruni mbruni@codeweavers.com --- dlls/wined3d/wined3d_private.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index 11da88aff8a..3a12f5ef261 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -6744,7 +6744,8 @@ static inline void wined3d_context_gl_reference_bo(struct wined3d_context_gl *co static inline void wined3d_context_gl_reference_buffer(struct wined3d_context_gl *context_gl, struct wined3d_buffer *buffer) { - wined3d_context_gl_reference_bo(context_gl, wined3d_bo_gl(buffer->buffer_object)); + if (buffer->buffer_object) + wined3d_context_gl_reference_bo(context_gl, wined3d_bo_gl(buffer->buffer_object)); }
static inline bool wined3d_map_persistent(void)
Signed-off-by: Henri Verbeet hverbeet@codeweavers.com