My hardware is Ryzen 7 4700u with the integrated graphics
Flatout (Direct3D 9): 20 fps (renders correctly) Unigine Heaven (OpenGL): ~40-60 fps (renders correctly) Unigine Heaven (Direct3D 9): 20-30 fps (renders correctly) Unigine Heaven (Direct3D 11): 20-30 fps as well (renders correctly) Elder Scrolls IV (Direct3D 9): 20 fps (renders correctly) BeamNG Tech Demo 0.3 (Direct3D 9): 2 fps (renders correctly, but still runs poorly)
Massive step up from getting 2 fps across many wined3d games, but it's still pretty bad, ~~and sometimes runs worse than the original code~~. Now with a combination of using the new and old code dynamically you can get the best of both worlds!
Unfortunately, we lose the ability to get lucky with the mapping just happening to be in the 32 bit range.
-- v7: opengl32: speed up wow64 mapping.
From: Etaash Mathamsetty 45927311+Etaash-mathamsetty@users.noreply.github.com
--- dlls/opengl32/unix_wgl.c | 121 ++++++++++++++++++++++++++++----------- 1 file changed, 88 insertions(+), 33 deletions(-)
diff --git a/dlls/opengl32/unix_wgl.c b/dlls/opengl32/unix_wgl.c index 453a9824046..b1e0e961e2e 100644 --- a/dlls/opengl32/unix_wgl.c +++ b/dlls/opengl32/unix_wgl.c @@ -1852,28 +1852,70 @@ static void unmap_named_buffer( TEB *teb, GLint buffer ) if (func) func( buffer ); }
-static NTSTATUS wow64_map_buffer( TEB *teb, GLint buffer, GLenum target, void *ptr, SIZE_T size, - GLbitfield access, PTR32 *ret ) +static void gl_buffer_sub_data( TEB *teb, GLenum target, GLintptr offset, GLsizeiptr size, const void *data ) +{ + const struct opengl_funcs *funcs = teb->glTable; + typeof(*funcs->ext.p_glBufferSubData) *func; + if (!(func = funcs->ext.p_glBufferSubData)) func = (void *)funcs->wgl.p_wglGetProcAddress( "glBufferSubData" ); + if (func) func( target, offset, size, data ); +} + +static void gl_named_buffer_sub_data( TEB *teb, GLint buffer, GLintptr offset, GLsizeiptr size, const void *data ) +{ + const struct opengl_funcs *funcs = teb->glTable; + typeof(*funcs->ext.p_glNamedBufferSubData) *func; + if (!(func = funcs->ext.p_glNamedBufferSubData)) func = (void *)funcs->wgl.p_wglGetProcAddress( "glNamedBufferSubData" ); + if (func) func( buffer, offset, size, data ); +} + +static void gl_get_buffer_sub_data( TEB *teb, GLenum target, GLintptr offset, GLsizeiptr size, void *data ) +{ + const struct opengl_funcs *funcs = teb->glTable; + typeof(*funcs->ext.p_glGetBufferSubData) *func; + if (!(func = funcs->ext.p_glGetBufferSubData)) func = (void *)funcs->wgl.p_wglGetProcAddress( "glGetBufferSubData" ); + if (func) func( target, offset, size, data ); +} + +static void gl_get_named_buffer_sub_data( TEB *teb, GLint buffer, GLintptr offset, GLsizeiptr size, void *data ) +{ + const struct opengl_funcs *funcs = teb->glTable; + typeof(*funcs->ext.p_glGetNamedBufferSubData) *func; + if (!(func = funcs->ext.p_glGetNamedBufferSubData)) func = (void *)funcs->wgl.p_wglGetProcAddress( "glGetNamedBufferSubData" ); + if (func) func( buffer, offset, size, data ); +} + +static NTSTATUS wow64_map_buffer( TEB *teb, GLint buffer, GLenum target, void **ptr, GLintptr offset, SIZE_T size, + GLbitfield access, PTR32 *ret, NTSTATUS (*gl_map_buffer64)(void *), void *params ) { static unsigned int once;
if (*ret) /* wow64 pointer provided, map buffer to it */ { - if (!(access & (GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT))) - { - if (!once++) - FIXME( "Doing a copy of a mapped buffer (expect performance issues)\n" ); + /* getting sub buffer data for smaller buffers produces more overhead when size is small */ + const BOOL do_memcpy = size < 0x8000; + + if (!once++) + FIXME( "Doing a copy of a mapped buffer (expect performance issues)\n" );
- TRACE( "Copying %#zx from buffer at %p to wow64 buffer %p\n", size, ptr, UlongToPtr(*ret) ); - memcpy( UlongToPtr(*ret), ptr, size ); + TRACE( "Copying %#zx from buffer at %p to wow64 buffer %p\n", size, ptr, UlongToPtr(*ret) ); + + if (!do_memcpy && !(access & (GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT))) + { + if (buffer) + gl_get_named_buffer_sub_data( teb, buffer, offset, size, UlongToPtr(*ret) ); + else + gl_get_buffer_sub_data( teb, target, offset, size, UlongToPtr(*ret) ); }
+ gl_map_buffer64(params); + + if (do_memcpy) memcpy( UlongToPtr(*ret), *ptr, size ); + /* save the wow64 pointer in the buffer data, we'll overwrite it on unmap */ - *(PTR32 *)ptr = (UINT_PTR)*ret; + *(PTR32 *)(*ptr) = (UINT_PTR)*ret; return STATUS_SUCCESS; }
- if (ULongToPtr(*ret = PtrToUlong(ptr)) == ptr) return STATUS_SUCCESS; /* we're lucky */ if (access & GL_MAP_PERSISTENT_BIT) { FIXME( "GL_MAP_PERSISTENT_BIT not supported!\n" ); @@ -1905,17 +1947,23 @@ static GLbitfield map_range_flags_from_map_flags( GLenum flags ) } }
-static NTSTATUS wow64_unmap_buffer( void *ptr, SIZE_T size, GLbitfield access ) +static NTSTATUS wow64_unmap_buffer( TEB *teb, void *ptr, GLenum target, GLint buffer, GLintptr offset, + SIZE_T size, GLbitfield access, NTSTATUS (*gl_unmap_buffer64)(void *), void *params) { - void *wow_ptr; + void *wow_ptr = UlongToPtr(*(PTR32 *)ptr); + + /* overwrite the stored pointer with the correct data */ + memcpy( ptr, wow_ptr, sizeof(PTR32) );
- if (ULongToPtr(PtrToUlong(ptr)) == ptr) return STATUS_SUCCESS; /* we're lucky */ + gl_unmap_buffer64( params );
- wow_ptr = UlongToPtr(*(PTR32 *)ptr); if (access & GL_MAP_WRITE_BIT) { TRACE( "Copying %#zx from wow64 buffer %p to buffer %p\n", size, wow_ptr, ptr ); - memcpy( ptr, wow_ptr, size ); + if (buffer) + gl_named_buffer_sub_data( teb, buffer, offset, size, wow_ptr ); + else + gl_buffer_sub_data( teb, target, offset, size, wow_ptr ); }
return STATUS_INVALID_ADDRESS; @@ -1943,7 +1991,7 @@ static NTSTATUS wow64_gl_get_buffer_pointer_v( void *args, NTSTATUS (*get_buffer
if ((status = get_buffer_pointer_v64( ¶ms ))) return status; if (params.pname != GL_BUFFER_MAP_POINTER) return STATUS_NOT_IMPLEMENTED; - if (ULongToPtr(*wow_ptr = PtrToUlong(ptr)) == ptr) return STATUS_SUCCESS; /* we're lucky */ + *wow_ptr = ptr[0]; return STATUS_SUCCESS; } @@ -1980,7 +2028,7 @@ static NTSTATUS wow64_gl_get_named_buffer_pointer_v( void *args, NTSTATUS (*gl_g
if ((status = gl_get_named_buffer_pointer_v64( ¶ms ))) return status; if (params.pname != GL_BUFFER_MAP_POINTER) return STATUS_NOT_IMPLEMENTED; - if (ULongToPtr(*wow_ptr = PtrToUlong(ptr)) == ptr) return STATUS_SUCCESS; /* we're lucky */ + *wow_ptr = ptr[0]; return STATUS_SUCCESS; } @@ -2014,10 +2062,10 @@ static NTSTATUS wow64_gl_map_buffer( void *args, NTSTATUS (*gl_map_buffer64)(voi
/* already mapped, we're being called again with a wow64 pointer */ if (params32->ret) params.ret = get_buffer_pointer( params.teb, params.target ); - else if ((status = gl_map_buffer64( ¶ms ))) return status;
- status = wow64_map_buffer( params.teb, 0, params.target, params.ret, 0, - map_range_flags_from_map_flags( params.access ), ¶ms32->ret ); + status = wow64_map_buffer( params.teb, 0, params.target, ¶ms.ret, 0, 0, + map_range_flags_from_map_flags( params.access ), ¶ms32->ret, + gl_map_buffer64, ¶ms ); if (!status || status == STATUS_INVALID_ADDRESS) return status;
unmap_buffer( params.teb, params.target ); @@ -2057,9 +2105,10 @@ NTSTATUS wow64_ext_glMapBufferRange( void *args )
/* already mapped, we're being called again with a wow64 pointer */ if (params32->ret) params.ret = (char *)get_buffer_pointer( params.teb, params.target ); - else if ((status = ext_glMapBufferRange( ¶ms ))) return status;
- status = wow64_map_buffer( params.teb, 0, params.target, params.ret, params.length, params.access, ¶ms32->ret ); + status = wow64_map_buffer( params.teb, 0, params.target, ¶ms.ret, params.offset, + params.length, params.access, ¶ms32->ret, + ext_glMapBufferRange, ¶ms ); if (!status || status == STATUS_INVALID_ADDRESS) return status;
unmap_buffer( params.teb, params.target ); @@ -2085,10 +2134,10 @@ static NTSTATUS wow64_gl_map_named_buffer( void *args, NTSTATUS (*gl_map_named_b
/* already mapped, we're being called again with a wow64 pointer */ if (params32->ret) params.ret = get_named_buffer_pointer( params.teb, params.buffer ); - else if ((status = gl_map_named_buffer64( ¶ms ))) return status;
- status = wow64_map_buffer( params.teb, params.buffer, 0, params.ret, 0, - map_range_flags_from_map_flags( params.access ), ¶ms32->ret ); + status = wow64_map_buffer( params.teb, params.buffer, 0, ¶ms.ret, 0, 0, + map_range_flags_from_map_flags( params.access ), ¶ms32->ret, + gl_map_named_buffer64, ¶ms ); if (!status || status == STATUS_INVALID_ADDRESS) return status;
unmap_named_buffer( params.teb, params.buffer ); @@ -2128,9 +2177,10 @@ static NTSTATUS wow64_gl_map_named_buffer_range( void *args, NTSTATUS (*gl_map_n
/* already mapped, we're being called again with a wow64 pointer */ if (params32->ret) params.ret = get_named_buffer_pointer( params.teb, params.buffer ); - else if ((status = gl_map_named_buffer_range64( ¶ms ))) return status;
- status = wow64_map_buffer( params.teb, params.buffer, 0, params.ret, params.length, params.access, ¶ms32->ret ); + status = wow64_map_buffer( params.teb, params.buffer, 0, ¶ms.ret, params.offset, + params.length, params.access, ¶ms32->ret, + gl_map_named_buffer_range64, ¶ms ); if (!status || status == STATUS_INVALID_ADDRESS) return status;
unmap_named_buffer( params.teb, params.buffer ); @@ -2166,9 +2216,12 @@ static NTSTATUS wow64_gl_unmap_buffer( void *args, NTSTATUS (*gl_unmap_buffer64)
if (!(ptr = get_buffer_pointer( params.teb, params.target ))) return STATUS_SUCCESS;
- status = wow64_unmap_buffer( ptr, get_buffer_param( params.teb, params.target, GL_BUFFER_MAP_LENGTH ), - get_buffer_param( params.teb, params.target, GL_BUFFER_ACCESS_FLAGS ) ); - gl_unmap_buffer64( ¶ms ); + status = wow64_unmap_buffer( params.teb, ptr, params.target, 0, + get_buffer_param( params.teb, params.target, GL_BUFFER_MAP_OFFSET ), + get_buffer_param( params.teb, params.target, GL_BUFFER_MAP_LENGTH ), + get_buffer_param( params.teb, params.target, GL_BUFFER_ACCESS_FLAGS ), + gl_unmap_buffer64, ¶ms ); + params32->ret = params.ret;
return status; @@ -2203,9 +2256,11 @@ static NTSTATUS wow64_gl_unmap_named_buffer( void *args, NTSTATUS (*gl_unmap_nam
if (!(ptr = get_named_buffer_pointer( params.teb, params.buffer ))) return STATUS_SUCCESS;
- status = wow64_unmap_buffer( ptr, get_named_buffer_param( params.teb, params.buffer, GL_BUFFER_MAP_LENGTH ), - get_named_buffer_param( params.teb, params.buffer, GL_BUFFER_ACCESS_FLAGS ) ); - gl_unmap_named_buffer64( ¶ms ); + status = wow64_unmap_buffer( params.teb, ptr, 0, params.buffer, + get_named_buffer_param( params.teb, params.buffer, GL_BUFFER_MAP_OFFSET ), + get_named_buffer_param( params.teb, params.buffer, GL_BUFFER_MAP_LENGTH ), + get_named_buffer_param( params.teb, params.buffer, GL_BUFFER_ACCESS_FLAGS ), + gl_unmap_named_buffer64, ¶ms ); params32->ret = params.ret;
return status;
Hi there, Did you make any more progress on this patch? Pi 5 using Wine WOW64 + 32BIT applications really struggles with OpenGL apps, and the hardware isn't all that fast to begin with. Call of Duty 2 runs at 1fps, but on 32bit averages 20-30fps.
On Sat Aug 10 19:31:37 2024 +0000, Chris “eating mcdonalds” Fraser wrote:
Hi there, Did you make any more progress on this patch? Pi 5 using Wine WOW64 + 32BIT applications really struggles with OpenGL apps, and the hardware isn't all that fast to begin with. Call of Duty 2 runs at 1fps, but on 32bit averages 20-30fps.
Just FYI (and sorry for jumping-in) but regarding D3D9 based stuff on Broadcom hardware there might exist at some point an alternative option, see Mesa [MR 7494](https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7494). Yeah, it hasn't landed yet, however it works quite well on [panfrost](https://gitlab.freedesktop.org/mesa/mesa/-/issues/8293) and ARM based hardware. :wink:
On Sat Aug 10 19:31:37 2024 +0000, C. Leu wrote:
Just FYI (and sorry for jumping-in) but regarding D3D9 based stuff on Broadcom hardware there might exist at some point an alternative option, see Mesa [MR 7494](https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7494). Yeah, it hasn't landed yet, however it works quite well on [panfrost](https://gitlab.freedesktop.org/mesa/mesa/-/issues/8293) and ARM based hardware. :wink:
yes actually, before this I made a cursed version (somehow I still have the code and have fixed it up slightly today). Give it a try!
https://gitlab.winehq.org/etaash.mathamsetty/wine/-/commits/cursed-opengl
note: it doesn't work with everything because it's not conformant like this one is
On Sun Oct 27 19:38:53 2024 +0000, Etaash Mathamsetty wrote:
yes actually, before this I made a cursed version (somehow I still have the code and have fixed it up slightly today). Give it a try! https://gitlab.winehq.org/etaash.mathamsetty/wine/-/commits/cursed-opengl note: it doesn't work with everything because it's not conformant like this one is
This version also "suppports" OpenGL 4.6, but it doesn't support coherent maps properly (yet) so not really.